1//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This implements the TargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/CodeGen/TargetLowering.h"
14#include "llvm/ADT/STLExtras.h"
15#include "llvm/Analysis/ValueTracking.h"
16#include "llvm/Analysis/VectorUtils.h"
17#include "llvm/CodeGen/Analysis.h"
18#include "llvm/CodeGen/CallingConvLower.h"
19#include "llvm/CodeGen/CodeGenCommonISel.h"
20#include "llvm/CodeGen/MachineFrameInfo.h"
21#include "llvm/CodeGen/MachineFunction.h"
22#include "llvm/CodeGen/MachineJumpTableInfo.h"
23#include "llvm/CodeGen/MachineRegisterInfo.h"
24#include "llvm/CodeGen/SDPatternMatch.h"
25#include "llvm/CodeGen/SelectionDAG.h"
26#include "llvm/CodeGen/TargetRegisterInfo.h"
27#include "llvm/IR/DataLayout.h"
28#include "llvm/IR/DerivedTypes.h"
29#include "llvm/IR/GlobalVariable.h"
30#include "llvm/IR/LLVMContext.h"
31#include "llvm/MC/MCAsmInfo.h"
32#include "llvm/MC/MCExpr.h"
33#include "llvm/Support/DivisionByConstantInfo.h"
34#include "llvm/Support/ErrorHandling.h"
35#include "llvm/Support/KnownBits.h"
36#include "llvm/Support/MathExtras.h"
37#include "llvm/Target/TargetMachine.h"
38#include <cctype>
39#include <deque>
40using namespace llvm;
41using namespace llvm::SDPatternMatch;
42
43/// NOTE: The TargetMachine owns TLOF.
44TargetLowering::TargetLowering(const TargetMachine &tm)
45 : TargetLoweringBase(tm) {}
46
47// Define the virtual destructor out-of-line for build efficiency.
48TargetLowering::~TargetLowering() = default;
49
50const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
51 return nullptr;
52}
53
54bool TargetLowering::isPositionIndependent() const {
55 return getTargetMachine().isPositionIndependent();
56}
57
58/// Check whether a given call node is in tail position within its function. If
59/// so, it sets Chain to the input chain of the tail call.
60bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
61 SDValue &Chain) const {
62 const Function &F = DAG.getMachineFunction().getFunction();
63
64 // First, check if tail calls have been disabled in this function.
65 if (F.getFnAttribute(Kind: "disable-tail-calls").getValueAsBool())
66 return false;
67
68 // Conservatively require the attributes of the call to match those of
69 // the return. Ignore following attributes because they don't affect the
70 // call sequence.
71 AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs());
72 for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
73 Attribute::DereferenceableOrNull, Attribute::NoAlias,
74 Attribute::NonNull, Attribute::NoUndef,
75 Attribute::Range, Attribute::NoFPClass})
76 CallerAttrs.removeAttribute(Val: Attr);
77
78 if (CallerAttrs.hasAttributes())
79 return false;
80
81 // It's not safe to eliminate the sign / zero extension of the return value.
82 if (CallerAttrs.contains(A: Attribute::ZExt) ||
83 CallerAttrs.contains(A: Attribute::SExt))
84 return false;
85
86 // Check if the only use is a function return node.
87 return isUsedByReturnOnly(Node, Chain);
88}
89
90bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
91 const uint32_t *CallerPreservedMask,
92 const SmallVectorImpl<CCValAssign> &ArgLocs,
93 const SmallVectorImpl<SDValue> &OutVals) const {
94 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
95 const CCValAssign &ArgLoc = ArgLocs[I];
96 if (!ArgLoc.isRegLoc())
97 continue;
98 MCRegister Reg = ArgLoc.getLocReg();
99 // Only look at callee saved registers.
100 if (MachineOperand::clobbersPhysReg(RegMask: CallerPreservedMask, PhysReg: Reg))
101 continue;
102 // Check that we pass the value used for the caller.
103 // (We look for a CopyFromReg reading a virtual register that is used
104 // for the function live-in value of register Reg)
105 SDValue Value = OutVals[I];
106 if (Value->getOpcode() == ISD::AssertZext)
107 Value = Value.getOperand(i: 0);
108 if (Value->getOpcode() != ISD::CopyFromReg)
109 return false;
110 Register ArgReg = cast<RegisterSDNode>(Val: Value->getOperand(Num: 1))->getReg();
111 if (MRI.getLiveInPhysReg(VReg: ArgReg) != Reg)
112 return false;
113 }
114 return true;
115}
116
117/// Set CallLoweringInfo attribute flags based on a call instruction
118/// and called function attributes.
119void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
120 unsigned ArgIdx) {
121 IsSExt = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::SExt);
122 IsZExt = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::ZExt);
123 IsNoExt = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::NoExt);
124 IsInReg = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::InReg);
125 IsSRet = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::StructRet);
126 IsNest = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::Nest);
127 IsByVal = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::ByVal);
128 IsPreallocated = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::Preallocated);
129 IsInAlloca = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::InAlloca);
130 IsReturned = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::Returned);
131 IsSwiftSelf = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::SwiftSelf);
132 IsSwiftAsync = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::SwiftAsync);
133 IsSwiftError = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::SwiftError);
134 Alignment = Call->getParamStackAlign(ArgNo: ArgIdx);
135 IndirectType = nullptr;
136 assert(IsByVal + IsPreallocated + IsInAlloca + IsSRet <= 1 &&
137 "multiple ABI attributes?");
138 if (IsByVal) {
139 IndirectType = Call->getParamByValType(ArgNo: ArgIdx);
140 if (!Alignment)
141 Alignment = Call->getParamAlign(ArgNo: ArgIdx);
142 }
143 if (IsPreallocated)
144 IndirectType = Call->getParamPreallocatedType(ArgNo: ArgIdx);
145 if (IsInAlloca)
146 IndirectType = Call->getParamInAllocaType(ArgNo: ArgIdx);
147 if (IsSRet)
148 IndirectType = Call->getParamStructRetType(ArgNo: ArgIdx);
149}
150
151/// Generate a libcall taking the given operands as arguments and returning a
152/// result of type RetVT.
153std::pair<SDValue, SDValue>
154TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
155 ArrayRef<SDValue> Ops,
156 MakeLibCallOptions CallOptions,
157 const SDLoc &dl,
158 SDValue InChain) const {
159 if (!InChain)
160 InChain = DAG.getEntryNode();
161
162 TargetLowering::ArgListTy Args;
163 Args.reserve(n: Ops.size());
164
165 TargetLowering::ArgListEntry Entry;
166 ArrayRef<Type *> OpsTypeOverrides = CallOptions.OpsTypeOverrides;
167 for (unsigned i = 0; i < Ops.size(); ++i) {
168 SDValue NewOp = Ops[i];
169 Entry.Node = NewOp;
170 Entry.Ty = i < OpsTypeOverrides.size() && OpsTypeOverrides[i]
171 ? OpsTypeOverrides[i]
172 : Entry.Node.getValueType().getTypeForEVT(Context&: *DAG.getContext());
173 Entry.IsSExt =
174 shouldSignExtendTypeInLibCall(Ty: Entry.Ty, IsSigned: CallOptions.IsSigned);
175 Entry.IsZExt = !Entry.IsSExt;
176
177 if (CallOptions.IsSoften &&
178 !shouldExtendTypeInLibCall(Type: CallOptions.OpsVTBeforeSoften[i])) {
179 Entry.IsSExt = Entry.IsZExt = false;
180 }
181 Args.push_back(x: Entry);
182 }
183
184 const char *LibcallName = getLibcallName(Call: LC);
185 if (LC == RTLIB::UNKNOWN_LIBCALL || !LibcallName)
186 reportFatalInternalError(reason: "unsupported library call operation");
187
188 SDValue Callee =
189 DAG.getExternalSymbol(Sym: LibcallName, VT: getPointerTy(DL: DAG.getDataLayout()));
190
191 Type *RetTy = RetVT.getTypeForEVT(Context&: *DAG.getContext());
192 TargetLowering::CallLoweringInfo CLI(DAG);
193 bool signExtend = shouldSignExtendTypeInLibCall(Ty: RetTy, IsSigned: CallOptions.IsSigned);
194 bool zeroExtend = !signExtend;
195
196 if (CallOptions.IsSoften &&
197 !shouldExtendTypeInLibCall(Type: CallOptions.RetVTBeforeSoften)) {
198 signExtend = zeroExtend = false;
199 }
200
201 CLI.setDebugLoc(dl)
202 .setChain(InChain)
203 .setLibCallee(CC: getLibcallCallingConv(Call: LC), ResultType: RetTy, Target: Callee, ArgsList: std::move(Args))
204 .setNoReturn(CallOptions.DoesNotReturn)
205 .setDiscardResult(!CallOptions.IsReturnValueUsed)
206 .setIsPostTypeLegalization(CallOptions.IsPostTypeLegalization)
207 .setSExtResult(signExtend)
208 .setZExtResult(zeroExtend);
209 return LowerCallTo(CLI);
210}
211
212bool TargetLowering::findOptimalMemOpLowering(
213 std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
214 unsigned SrcAS, const AttributeList &FuncAttributes) const {
215 if (Limit != ~unsigned(0) && Op.isMemcpyWithFixedDstAlign() &&
216 Op.getSrcAlign() < Op.getDstAlign())
217 return false;
218
219 EVT VT = getOptimalMemOpType(Op, FuncAttributes);
220
221 if (VT == MVT::Other) {
222 // Use the largest integer type whose alignment constraints are satisfied.
223 // We only need to check DstAlign here as SrcAlign is always greater or
224 // equal to DstAlign (or zero).
225 VT = MVT::LAST_INTEGER_VALUETYPE;
226 if (Op.isFixedDstAlign())
227 while (Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
228 !allowsMisalignedMemoryAccesses(VT, AddrSpace: DstAS, Alignment: Op.getDstAlign()))
229 VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
230 assert(VT.isInteger());
231
232 // Find the largest legal integer type.
233 MVT LVT = MVT::LAST_INTEGER_VALUETYPE;
234 while (!isTypeLegal(VT: LVT))
235 LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
236 assert(LVT.isInteger());
237
238 // If the type we've chosen is larger than the largest legal integer type
239 // then use that instead.
240 if (VT.bitsGT(VT: LVT))
241 VT = LVT;
242 }
243
244 unsigned NumMemOps = 0;
245 uint64_t Size = Op.size();
246 while (Size) {
247 unsigned VTSize = VT.getSizeInBits() / 8;
248 while (VTSize > Size) {
249 // For now, only use non-vector load / store's for the left-over pieces.
250 EVT NewVT = VT;
251 unsigned NewVTSize;
252
253 bool Found = false;
254 if (VT.isVector() || VT.isFloatingPoint()) {
255 NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
256 if (isOperationLegalOrCustom(Op: ISD::STORE, VT: NewVT) &&
257 isSafeMemOpType(NewVT.getSimpleVT()))
258 Found = true;
259 else if (NewVT == MVT::i64 &&
260 isOperationLegalOrCustom(Op: ISD::STORE, VT: MVT::f64) &&
261 isSafeMemOpType(MVT::f64)) {
262 // i64 is usually not legal on 32-bit targets, but f64 may be.
263 NewVT = MVT::f64;
264 Found = true;
265 }
266 }
267
268 if (!Found) {
269 do {
270 NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
271 if (NewVT == MVT::i8)
272 break;
273 } while (!isSafeMemOpType(NewVT.getSimpleVT()));
274 }
275 NewVTSize = NewVT.getSizeInBits() / 8;
276
277 // If the new VT cannot cover all of the remaining bits, then consider
278 // issuing a (or a pair of) unaligned and overlapping load / store.
279 unsigned Fast;
280 if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
281 allowsMisalignedMemoryAccesses(
282 VT, AddrSpace: DstAS, Alignment: Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
283 Flags: MachineMemOperand::MONone, &Fast) &&
284 Fast)
285 VTSize = Size;
286 else {
287 VT = NewVT;
288 VTSize = NewVTSize;
289 }
290 }
291
292 if (++NumMemOps > Limit)
293 return false;
294
295 MemOps.push_back(x: VT);
296 Size -= VTSize;
297 }
298
299 return true;
300}
301
302/// Soften the operands of a comparison. This code is shared among BR_CC,
303/// SELECT_CC, and SETCC handlers.
304void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
305 SDValue &NewLHS, SDValue &NewRHS,
306 ISD::CondCode &CCCode,
307 const SDLoc &dl, const SDValue OldLHS,
308 const SDValue OldRHS) const {
309 SDValue Chain;
310 return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, DL: dl, OldLHS,
311 OldRHS, Chain);
312}
313
314void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
315 SDValue &NewLHS, SDValue &NewRHS,
316 ISD::CondCode &CCCode,
317 const SDLoc &dl, const SDValue OldLHS,
318 const SDValue OldRHS,
319 SDValue &Chain,
320 bool IsSignaling) const {
321 // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
322 // not supporting it. We can update this code when libgcc provides such
323 // functions.
324
325 assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
326 && "Unsupported setcc type!");
327
328 // Expand into one or more soft-fp libcall(s).
329 RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
330 bool ShouldInvertCC = false;
331 switch (CCCode) {
332 case ISD::SETEQ:
333 case ISD::SETOEQ:
334 LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
335 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
336 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
337 break;
338 case ISD::SETNE:
339 case ISD::SETUNE:
340 LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
341 (VT == MVT::f64) ? RTLIB::UNE_F64 :
342 (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
343 break;
344 case ISD::SETGE:
345 case ISD::SETOGE:
346 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
347 (VT == MVT::f64) ? RTLIB::OGE_F64 :
348 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
349 break;
350 case ISD::SETLT:
351 case ISD::SETOLT:
352 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
353 (VT == MVT::f64) ? RTLIB::OLT_F64 :
354 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
355 break;
356 case ISD::SETLE:
357 case ISD::SETOLE:
358 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
359 (VT == MVT::f64) ? RTLIB::OLE_F64 :
360 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
361 break;
362 case ISD::SETGT:
363 case ISD::SETOGT:
364 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
365 (VT == MVT::f64) ? RTLIB::OGT_F64 :
366 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
367 break;
368 case ISD::SETO:
369 ShouldInvertCC = true;
370 [[fallthrough]];
371 case ISD::SETUO:
372 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
373 (VT == MVT::f64) ? RTLIB::UO_F64 :
374 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
375 break;
376 case ISD::SETONE:
377 // SETONE = O && UNE
378 ShouldInvertCC = true;
379 [[fallthrough]];
380 case ISD::SETUEQ:
381 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
382 (VT == MVT::f64) ? RTLIB::UO_F64 :
383 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
384 LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
385 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
386 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
387 break;
388 default:
389 // Invert CC for unordered comparisons
390 ShouldInvertCC = true;
391 switch (CCCode) {
392 case ISD::SETULT:
393 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
394 (VT == MVT::f64) ? RTLIB::OGE_F64 :
395 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
396 break;
397 case ISD::SETULE:
398 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
399 (VT == MVT::f64) ? RTLIB::OGT_F64 :
400 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
401 break;
402 case ISD::SETUGT:
403 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
404 (VT == MVT::f64) ? RTLIB::OLE_F64 :
405 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
406 break;
407 case ISD::SETUGE:
408 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
409 (VT == MVT::f64) ? RTLIB::OLT_F64 :
410 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
411 break;
412 default: llvm_unreachable("Do not know how to soften this setcc!");
413 }
414 }
415
416 // Use the target specific return value for comparison lib calls.
417 EVT RetVT = getCmpLibcallReturnType();
418 SDValue Ops[2] = {NewLHS, NewRHS};
419 TargetLowering::MakeLibCallOptions CallOptions;
420 EVT OpsVT[2] = { OldLHS.getValueType(),
421 OldRHS.getValueType() };
422 CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, Value: true);
423 auto Call = makeLibCall(DAG, LC: LC1, RetVT, Ops, CallOptions, dl, InChain: Chain);
424 NewLHS = Call.first;
425 NewRHS = DAG.getConstant(Val: 0, DL: dl, VT: RetVT);
426
427 CCCode = getICmpCondCode(Pred: getSoftFloatCmpLibcallPredicate(Call: LC1));
428 if (ShouldInvertCC) {
429 assert(RetVT.isInteger());
430 CCCode = getSetCCInverse(Operation: CCCode, Type: RetVT);
431 }
432
433 if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
434 // Update Chain.
435 Chain = Call.second;
436 } else {
437 assert(CCCode == (ShouldInvertCC ? ISD::SETEQ : ISD::SETNE) &&
438 "unordered call should be simple boolean");
439
440 EVT SetCCVT =
441 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: RetVT);
442 if (getBooleanContents(Type: RetVT) == ZeroOrOneBooleanContent) {
443 NewLHS = DAG.getNode(Opcode: ISD::AssertZext, DL: dl, VT: RetVT, N1: Call.first,
444 N2: DAG.getValueType(MVT::i1));
445 }
446
447 SDValue Tmp = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: NewLHS, RHS: NewRHS, Cond: CCCode);
448 auto Call2 = makeLibCall(DAG, LC: LC2, RetVT, Ops, CallOptions, dl, InChain: Chain);
449 CCCode = getICmpCondCode(Pred: getSoftFloatCmpLibcallPredicate(Call: LC2));
450 if (ShouldInvertCC)
451 CCCode = getSetCCInverse(Operation: CCCode, Type: RetVT);
452 NewLHS = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Call2.first, RHS: NewRHS, Cond: CCCode);
453 if (Chain)
454 Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, N1: Call.second,
455 N2: Call2.second);
456 NewLHS = DAG.getNode(Opcode: ShouldInvertCC ? ISD::AND : ISD::OR, DL: dl,
457 VT: Tmp.getValueType(), N1: Tmp, N2: NewLHS);
458 NewRHS = SDValue();
459 }
460}
461
462/// Return the entry encoding for a jump table in the current function. The
463/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
464unsigned TargetLowering::getJumpTableEncoding() const {
465 // In non-pic modes, just use the address of a block.
466 if (!isPositionIndependent())
467 return MachineJumpTableInfo::EK_BlockAddress;
468
469 // Otherwise, use a label difference.
470 return MachineJumpTableInfo::EK_LabelDifference32;
471}
472
473SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
474 SelectionDAG &DAG) const {
475 return Table;
476}
477
478/// This returns the relocation base for the given PIC jumptable, the same as
479/// getPICJumpTableRelocBase, but as an MCExpr.
480const MCExpr *
481TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
482 unsigned JTI,MCContext &Ctx) const{
483 // The normal PIC reloc base is the label at the start of the jump table.
484 return MCSymbolRefExpr::create(Symbol: MF->getJTISymbol(JTI, Ctx), Ctx);
485}
486
487SDValue TargetLowering::expandIndirectJTBranch(const SDLoc &dl, SDValue Value,
488 SDValue Addr, int JTI,
489 SelectionDAG &DAG) const {
490 SDValue Chain = Value;
491 // Jump table debug info is only needed if CodeView is enabled.
492 if (DAG.getTarget().getTargetTriple().isOSBinFormatCOFF()) {
493 Chain = DAG.getJumpTableDebugInfo(JTI, Chain, DL: dl);
494 }
495 return DAG.getNode(Opcode: ISD::BRIND, DL: dl, VT: MVT::Other, N1: Chain, N2: Addr);
496}
497
498bool
499TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
500 const TargetMachine &TM = getTargetMachine();
501 const GlobalValue *GV = GA->getGlobal();
502
503 // If the address is not even local to this DSO we will have to load it from
504 // a got and then add the offset.
505 if (!TM.shouldAssumeDSOLocal(GV))
506 return false;
507
508 // If the code is position independent we will have to add a base register.
509 if (isPositionIndependent())
510 return false;
511
512 // Otherwise we can do it.
513 return true;
514}
515
516//===----------------------------------------------------------------------===//
517// Optimization Methods
518//===----------------------------------------------------------------------===//
519
520/// If the specified instruction has a constant integer operand and there are
521/// bits set in that constant that are not demanded, then clear those bits and
522/// return true.
523bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
524 const APInt &DemandedBits,
525 const APInt &DemandedElts,
526 TargetLoweringOpt &TLO) const {
527 SDLoc DL(Op);
528 unsigned Opcode = Op.getOpcode();
529
530 // Early-out if we've ended up calling an undemanded node, leave this to
531 // constant folding.
532 if (DemandedBits.isZero() || DemandedElts.isZero())
533 return false;
534
535 // Do target-specific constant optimization.
536 if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
537 return TLO.New.getNode();
538
539 // FIXME: ISD::SELECT, ISD::SELECT_CC
540 switch (Opcode) {
541 default:
542 break;
543 case ISD::XOR:
544 case ISD::AND:
545 case ISD::OR: {
546 auto *Op1C = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 1));
547 if (!Op1C || Op1C->isOpaque())
548 return false;
549
550 // If this is a 'not' op, don't touch it because that's a canonical form.
551 const APInt &C = Op1C->getAPIntValue();
552 if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(RHS: C))
553 return false;
554
555 if (!C.isSubsetOf(RHS: DemandedBits)) {
556 EVT VT = Op.getValueType();
557 SDValue NewC = TLO.DAG.getConstant(Val: DemandedBits & C, DL, VT);
558 SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, N1: Op.getOperand(i: 0), N2: NewC,
559 Flags: Op->getFlags());
560 return TLO.CombineTo(O: Op, N: NewOp);
561 }
562
563 break;
564 }
565 }
566
567 return false;
568}
569
570bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
571 const APInt &DemandedBits,
572 TargetLoweringOpt &TLO) const {
573 EVT VT = Op.getValueType();
574 APInt DemandedElts = VT.isVector()
575 ? APInt::getAllOnes(numBits: VT.getVectorNumElements())
576 : APInt(1, 1);
577 return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
578}
579
580/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
581/// This uses isTruncateFree/isZExtFree and ANY_EXTEND for the widening cast,
582/// but it could be generalized for targets with other types of implicit
583/// widening casts.
584bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
585 const APInt &DemandedBits,
586 TargetLoweringOpt &TLO) const {
587 assert(Op.getNumOperands() == 2 &&
588 "ShrinkDemandedOp only supports binary operators!");
589 assert(Op.getNode()->getNumValues() == 1 &&
590 "ShrinkDemandedOp only supports nodes with one result!");
591
592 EVT VT = Op.getValueType();
593 SelectionDAG &DAG = TLO.DAG;
594 SDLoc dl(Op);
595
596 // Early return, as this function cannot handle vector types.
597 if (VT.isVector())
598 return false;
599
600 assert(Op.getOperand(0).getValueType().getScalarSizeInBits() == BitWidth &&
601 Op.getOperand(1).getValueType().getScalarSizeInBits() == BitWidth &&
602 "ShrinkDemandedOp only supports operands that have the same size!");
603
604 // Don't do this if the node has another user, which may require the
605 // full value.
606 if (!Op.getNode()->hasOneUse())
607 return false;
608
609 // Search for the smallest integer type with free casts to and from
610 // Op's type. For expedience, just check power-of-2 integer types.
611 unsigned DemandedSize = DemandedBits.getActiveBits();
612 for (unsigned SmallVTBits = llvm::bit_ceil(Value: DemandedSize);
613 SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(A: SmallVTBits)) {
614 EVT SmallVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: SmallVTBits);
615 if (isTruncateFree(FromVT: VT, ToVT: SmallVT) && isZExtFree(FromTy: SmallVT, ToTy: VT)) {
616 // We found a type with free casts.
617
618 // If the operation has the 'disjoint' flag, then the
619 // operands on the new node are also disjoint.
620 SDNodeFlags Flags(Op->getFlags().hasDisjoint() ? SDNodeFlags::Disjoint
621 : SDNodeFlags::None);
622 SDValue X = DAG.getNode(
623 Opcode: Op.getOpcode(), DL: dl, VT: SmallVT,
624 N1: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: SmallVT, Operand: Op.getOperand(i: 0)),
625 N2: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: SmallVT, Operand: Op.getOperand(i: 1)), Flags);
626 assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
627 SDValue Z = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT, Operand: X);
628 return TLO.CombineTo(O: Op, N: Z);
629 }
630 }
631 return false;
632}
633
634bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
635 DAGCombinerInfo &DCI) const {
636 SelectionDAG &DAG = DCI.DAG;
637 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
638 !DCI.isBeforeLegalizeOps());
639 KnownBits Known;
640
641 bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
642 if (Simplified) {
643 DCI.AddToWorklist(N: Op.getNode());
644 DCI.CommitTargetLoweringOpt(TLO);
645 }
646 return Simplified;
647}
648
649bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
650 const APInt &DemandedElts,
651 DAGCombinerInfo &DCI) const {
652 SelectionDAG &DAG = DCI.DAG;
653 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
654 !DCI.isBeforeLegalizeOps());
655 KnownBits Known;
656
657 bool Simplified =
658 SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO);
659 if (Simplified) {
660 DCI.AddToWorklist(N: Op.getNode());
661 DCI.CommitTargetLoweringOpt(TLO);
662 }
663 return Simplified;
664}
665
666bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
667 KnownBits &Known,
668 TargetLoweringOpt &TLO,
669 unsigned Depth,
670 bool AssumeSingleUse) const {
671 EVT VT = Op.getValueType();
672
673 // Since the number of lanes in a scalable vector is unknown at compile time,
674 // we track one bit which is implicitly broadcast to all lanes. This means
675 // that all lanes in a scalable vector are considered demanded.
676 APInt DemandedElts = VT.isFixedLengthVector()
677 ? APInt::getAllOnes(numBits: VT.getVectorNumElements())
678 : APInt(1, 1);
679 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
680 AssumeSingleUse);
681}
682
683// TODO: Under what circumstances can we create nodes? Constant folding?
684SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
685 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
686 SelectionDAG &DAG, unsigned Depth) const {
687 EVT VT = Op.getValueType();
688
689 // Limit search depth.
690 if (Depth >= SelectionDAG::MaxRecursionDepth)
691 return SDValue();
692
693 // Ignore UNDEFs.
694 if (Op.isUndef())
695 return SDValue();
696
697 // Not demanding any bits/elts from Op.
698 if (DemandedBits == 0 || DemandedElts == 0)
699 return DAG.getUNDEF(VT);
700
701 bool IsLE = DAG.getDataLayout().isLittleEndian();
702 unsigned NumElts = DemandedElts.getBitWidth();
703 unsigned BitWidth = DemandedBits.getBitWidth();
704 KnownBits LHSKnown, RHSKnown;
705 switch (Op.getOpcode()) {
706 case ISD::BITCAST: {
707 if (VT.isScalableVector())
708 return SDValue();
709
710 SDValue Src = peekThroughBitcasts(V: Op.getOperand(i: 0));
711 EVT SrcVT = Src.getValueType();
712 EVT DstVT = Op.getValueType();
713 if (SrcVT == DstVT)
714 return Src;
715
716 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
717 unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
718 if (NumSrcEltBits == NumDstEltBits)
719 if (SDValue V = SimplifyMultipleUseDemandedBits(
720 Op: Src, DemandedBits, DemandedElts, DAG, Depth: Depth + 1))
721 return DAG.getBitcast(VT: DstVT, V);
722
723 if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) {
724 unsigned Scale = NumDstEltBits / NumSrcEltBits;
725 unsigned NumSrcElts = SrcVT.getVectorNumElements();
726 APInt DemandedSrcBits = APInt::getZero(numBits: NumSrcEltBits);
727 APInt DemandedSrcElts = APInt::getZero(numBits: NumSrcElts);
728 for (unsigned i = 0; i != Scale; ++i) {
729 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
730 unsigned BitOffset = EltOffset * NumSrcEltBits;
731 APInt Sub = DemandedBits.extractBits(numBits: NumSrcEltBits, bitPosition: BitOffset);
732 if (!Sub.isZero()) {
733 DemandedSrcBits |= Sub;
734 for (unsigned j = 0; j != NumElts; ++j)
735 if (DemandedElts[j])
736 DemandedSrcElts.setBit((j * Scale) + i);
737 }
738 }
739
740 if (SDValue V = SimplifyMultipleUseDemandedBits(
741 Op: Src, DemandedBits: DemandedSrcBits, DemandedElts: DemandedSrcElts, DAG, Depth: Depth + 1))
742 return DAG.getBitcast(VT: DstVT, V);
743 }
744
745 // TODO - bigendian once we have test coverage.
746 if (IsLE && (NumSrcEltBits % NumDstEltBits) == 0) {
747 unsigned Scale = NumSrcEltBits / NumDstEltBits;
748 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
749 APInt DemandedSrcBits = APInt::getZero(numBits: NumSrcEltBits);
750 APInt DemandedSrcElts = APInt::getZero(numBits: NumSrcElts);
751 for (unsigned i = 0; i != NumElts; ++i)
752 if (DemandedElts[i]) {
753 unsigned Offset = (i % Scale) * NumDstEltBits;
754 DemandedSrcBits.insertBits(SubBits: DemandedBits, bitPosition: Offset);
755 DemandedSrcElts.setBit(i / Scale);
756 }
757
758 if (SDValue V = SimplifyMultipleUseDemandedBits(
759 Op: Src, DemandedBits: DemandedSrcBits, DemandedElts: DemandedSrcElts, DAG, Depth: Depth + 1))
760 return DAG.getBitcast(VT: DstVT, V);
761 }
762
763 break;
764 }
765 case ISD::FREEZE: {
766 SDValue N0 = Op.getOperand(i: 0);
767 if (DAG.isGuaranteedNotToBeUndefOrPoison(Op: N0, DemandedElts,
768 /*PoisonOnly=*/false))
769 return N0;
770 break;
771 }
772 case ISD::AND: {
773 LHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1);
774 RHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1);
775
776 // If all of the demanded bits are known 1 on one side, return the other.
777 // These bits cannot contribute to the result of the 'and' in this
778 // context.
779 if (DemandedBits.isSubsetOf(RHS: LHSKnown.Zero | RHSKnown.One))
780 return Op.getOperand(i: 0);
781 if (DemandedBits.isSubsetOf(RHS: RHSKnown.Zero | LHSKnown.One))
782 return Op.getOperand(i: 1);
783 break;
784 }
785 case ISD::OR: {
786 LHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1);
787 RHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1);
788
789 // If all of the demanded bits are known zero on one side, return the
790 // other. These bits cannot contribute to the result of the 'or' in this
791 // context.
792 if (DemandedBits.isSubsetOf(RHS: LHSKnown.One | RHSKnown.Zero))
793 return Op.getOperand(i: 0);
794 if (DemandedBits.isSubsetOf(RHS: RHSKnown.One | LHSKnown.Zero))
795 return Op.getOperand(i: 1);
796 break;
797 }
798 case ISD::XOR: {
799 LHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1);
800 RHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1);
801
802 // If all of the demanded bits are known zero on one side, return the
803 // other.
804 if (DemandedBits.isSubsetOf(RHS: RHSKnown.Zero))
805 return Op.getOperand(i: 0);
806 if (DemandedBits.isSubsetOf(RHS: LHSKnown.Zero))
807 return Op.getOperand(i: 1);
808 break;
809 }
810 case ISD::ADD: {
811 RHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1);
812 if (RHSKnown.isZero())
813 return Op.getOperand(i: 0);
814
815 LHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1);
816 if (LHSKnown.isZero())
817 return Op.getOperand(i: 1);
818 break;
819 }
820 case ISD::SHL: {
821 // If we are only demanding sign bits then we can use the shift source
822 // directly.
823 if (std::optional<uint64_t> MaxSA =
824 DAG.getValidMaximumShiftAmount(V: Op, DemandedElts, Depth: Depth + 1)) {
825 SDValue Op0 = Op.getOperand(i: 0);
826 unsigned ShAmt = *MaxSA;
827 unsigned NumSignBits =
828 DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + 1);
829 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
830 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
831 return Op0;
832 }
833 break;
834 }
835 case ISD::SRL: {
836 // If we are only demanding sign bits then we can use the shift source
837 // directly.
838 if (std::optional<uint64_t> MaxSA =
839 DAG.getValidMaximumShiftAmount(V: Op, DemandedElts, Depth: Depth + 1)) {
840 SDValue Op0 = Op.getOperand(i: 0);
841 unsigned ShAmt = *MaxSA;
842 // Must already be signbits in DemandedBits bounds, and can't demand any
843 // shifted in zeroes.
844 if (DemandedBits.countl_zero() >= ShAmt) {
845 unsigned NumSignBits =
846 DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + 1);
847 if (DemandedBits.countr_zero() >= (BitWidth - NumSignBits))
848 return Op0;
849 }
850 }
851 break;
852 }
853 case ISD::SETCC: {
854 SDValue Op0 = Op.getOperand(i: 0);
855 SDValue Op1 = Op.getOperand(i: 1);
856 ISD::CondCode CC = cast<CondCodeSDNode>(Val: Op.getOperand(i: 2))->get();
857 // If (1) we only need the sign-bit, (2) the setcc operands are the same
858 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
859 // -1, we may be able to bypass the setcc.
860 if (DemandedBits.isSignMask() &&
861 Op0.getScalarValueSizeInBits() == BitWidth &&
862 getBooleanContents(Type: Op0.getValueType()) ==
863 BooleanContent::ZeroOrNegativeOneBooleanContent) {
864 // If we're testing X < 0, then this compare isn't needed - just use X!
865 // FIXME: We're limiting to integer types here, but this should also work
866 // if we don't care about FP signed-zero. The use of SETLT with FP means
867 // that we don't care about NaNs.
868 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
869 (isNullConstant(V: Op1) || ISD::isBuildVectorAllZeros(N: Op1.getNode())))
870 return Op0;
871 }
872 break;
873 }
874 case ISD::SIGN_EXTEND_INREG: {
875 // If none of the extended bits are demanded, eliminate the sextinreg.
876 SDValue Op0 = Op.getOperand(i: 0);
877 EVT ExVT = cast<VTSDNode>(Val: Op.getOperand(i: 1))->getVT();
878 unsigned ExBits = ExVT.getScalarSizeInBits();
879 if (DemandedBits.getActiveBits() <= ExBits &&
880 shouldRemoveRedundantExtend(Op))
881 return Op0;
882 // If the input is already sign extended, just drop the extension.
883 unsigned NumSignBits = DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + 1);
884 if (NumSignBits >= (BitWidth - ExBits + 1))
885 return Op0;
886 break;
887 }
888 case ISD::ANY_EXTEND_VECTOR_INREG:
889 case ISD::SIGN_EXTEND_VECTOR_INREG:
890 case ISD::ZERO_EXTEND_VECTOR_INREG: {
891 if (VT.isScalableVector())
892 return SDValue();
893
894 // If we only want the lowest element and none of extended bits, then we can
895 // return the bitcasted source vector.
896 SDValue Src = Op.getOperand(i: 0);
897 EVT SrcVT = Src.getValueType();
898 EVT DstVT = Op.getValueType();
899 if (IsLE && DemandedElts == 1 &&
900 DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
901 DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
902 return DAG.getBitcast(VT: DstVT, V: Src);
903 }
904 break;
905 }
906 case ISD::INSERT_VECTOR_ELT: {
907 if (VT.isScalableVector())
908 return SDValue();
909
910 // If we don't demand the inserted element, return the base vector.
911 SDValue Vec = Op.getOperand(i: 0);
912 auto *CIdx = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 2));
913 EVT VecVT = Vec.getValueType();
914 if (CIdx && CIdx->getAPIntValue().ult(RHS: VecVT.getVectorNumElements()) &&
915 !DemandedElts[CIdx->getZExtValue()])
916 return Vec;
917 break;
918 }
919 case ISD::INSERT_SUBVECTOR: {
920 if (VT.isScalableVector())
921 return SDValue();
922
923 SDValue Vec = Op.getOperand(i: 0);
924 SDValue Sub = Op.getOperand(i: 1);
925 uint64_t Idx = Op.getConstantOperandVal(i: 2);
926 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
927 APInt DemandedSubElts = DemandedElts.extractBits(numBits: NumSubElts, bitPosition: Idx);
928 // If we don't demand the inserted subvector, return the base vector.
929 if (DemandedSubElts == 0)
930 return Vec;
931 break;
932 }
933 case ISD::VECTOR_SHUFFLE: {
934 assert(!VT.isScalableVector());
935 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Val&: Op)->getMask();
936
937 // If all the demanded elts are from one operand and are inline,
938 // then we can use the operand directly.
939 bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
940 for (unsigned i = 0; i != NumElts; ++i) {
941 int M = ShuffleMask[i];
942 if (M < 0 || !DemandedElts[i])
943 continue;
944 AllUndef = false;
945 IdentityLHS &= (M == (int)i);
946 IdentityRHS &= ((M - NumElts) == i);
947 }
948
949 if (AllUndef)
950 return DAG.getUNDEF(VT: Op.getValueType());
951 if (IdentityLHS)
952 return Op.getOperand(i: 0);
953 if (IdentityRHS)
954 return Op.getOperand(i: 1);
955 break;
956 }
957 default:
958 // TODO: Probably okay to remove after audit; here to reduce change size
959 // in initial enablement patch for scalable vectors
960 if (VT.isScalableVector())
961 return SDValue();
962
963 if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
964 if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
965 Op, DemandedBits, DemandedElts, DAG, Depth))
966 return V;
967 break;
968 }
969 return SDValue();
970}
971
972SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
973 SDValue Op, const APInt &DemandedBits, SelectionDAG &DAG,
974 unsigned Depth) const {
975 EVT VT = Op.getValueType();
976 // Since the number of lanes in a scalable vector is unknown at compile time,
977 // we track one bit which is implicitly broadcast to all lanes. This means
978 // that all lanes in a scalable vector are considered demanded.
979 APInt DemandedElts = VT.isFixedLengthVector()
980 ? APInt::getAllOnes(numBits: VT.getVectorNumElements())
981 : APInt(1, 1);
982 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
983 Depth);
984}
985
986SDValue TargetLowering::SimplifyMultipleUseDemandedVectorElts(
987 SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
988 unsigned Depth) const {
989 APInt DemandedBits = APInt::getAllOnes(numBits: Op.getScalarValueSizeInBits());
990 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
991 Depth);
992}
993
994// Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1).
995// or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1).
996static SDValue combineShiftToAVG(SDValue Op,
997 TargetLowering::TargetLoweringOpt &TLO,
998 const TargetLowering &TLI,
999 const APInt &DemandedBits,
1000 const APInt &DemandedElts, unsigned Depth) {
1001 assert((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) &&
1002 "SRL or SRA node is required here!");
1003 // Is the right shift using an immediate value of 1?
1004 ConstantSDNode *N1C = isConstOrConstSplat(N: Op.getOperand(i: 1), DemandedElts);
1005 if (!N1C || !N1C->isOne())
1006 return SDValue();
1007
1008 // We are looking for an avgfloor
1009 // add(ext, ext)
1010 // or one of these as a avgceil
1011 // add(add(ext, ext), 1)
1012 // add(add(ext, 1), ext)
1013 // add(ext, add(ext, 1))
1014 SDValue Add = Op.getOperand(i: 0);
1015 if (Add.getOpcode() != ISD::ADD)
1016 return SDValue();
1017
1018 SDValue ExtOpA = Add.getOperand(i: 0);
1019 SDValue ExtOpB = Add.getOperand(i: 1);
1020 SDValue Add2;
1021 auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3, SDValue A) {
1022 ConstantSDNode *ConstOp;
1023 if ((ConstOp = isConstOrConstSplat(N: Op2, DemandedElts)) &&
1024 ConstOp->isOne()) {
1025 ExtOpA = Op1;
1026 ExtOpB = Op3;
1027 Add2 = A;
1028 return true;
1029 }
1030 if ((ConstOp = isConstOrConstSplat(N: Op3, DemandedElts)) &&
1031 ConstOp->isOne()) {
1032 ExtOpA = Op1;
1033 ExtOpB = Op2;
1034 Add2 = A;
1035 return true;
1036 }
1037 return false;
1038 };
1039 bool IsCeil =
1040 (ExtOpA.getOpcode() == ISD::ADD &&
1041 MatchOperands(ExtOpA.getOperand(i: 0), ExtOpA.getOperand(i: 1), ExtOpB, ExtOpA)) ||
1042 (ExtOpB.getOpcode() == ISD::ADD &&
1043 MatchOperands(ExtOpB.getOperand(i: 0), ExtOpB.getOperand(i: 1), ExtOpA, ExtOpB));
1044
1045 // If the shift is signed (sra):
1046 // - Needs >= 2 sign bit for both operands.
1047 // - Needs >= 2 zero bits.
1048 // If the shift is unsigned (srl):
1049 // - Needs >= 1 zero bit for both operands.
1050 // - Needs 1 demanded bit zero and >= 2 sign bits.
1051 SelectionDAG &DAG = TLO.DAG;
1052 unsigned ShiftOpc = Op.getOpcode();
1053 bool IsSigned = false;
1054 unsigned KnownBits;
1055 unsigned NumSignedA = DAG.ComputeNumSignBits(Op: ExtOpA, DemandedElts, Depth);
1056 unsigned NumSignedB = DAG.ComputeNumSignBits(Op: ExtOpB, DemandedElts, Depth);
1057 unsigned NumSigned = std::min(a: NumSignedA, b: NumSignedB) - 1;
1058 unsigned NumZeroA =
1059 DAG.computeKnownBits(Op: ExtOpA, DemandedElts, Depth).countMinLeadingZeros();
1060 unsigned NumZeroB =
1061 DAG.computeKnownBits(Op: ExtOpB, DemandedElts, Depth).countMinLeadingZeros();
1062 unsigned NumZero = std::min(a: NumZeroA, b: NumZeroB);
1063
1064 switch (ShiftOpc) {
1065 default:
1066 llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG");
1067 case ISD::SRA: {
1068 if (NumZero >= 2 && NumSigned < NumZero) {
1069 IsSigned = false;
1070 KnownBits = NumZero;
1071 break;
1072 }
1073 if (NumSigned >= 1) {
1074 IsSigned = true;
1075 KnownBits = NumSigned;
1076 break;
1077 }
1078 return SDValue();
1079 }
1080 case ISD::SRL: {
1081 if (NumZero >= 1 && NumSigned < NumZero) {
1082 IsSigned = false;
1083 KnownBits = NumZero;
1084 break;
1085 }
1086 if (NumSigned >= 1 && DemandedBits.isSignBitClear()) {
1087 IsSigned = true;
1088 KnownBits = NumSigned;
1089 break;
1090 }
1091 return SDValue();
1092 }
1093 }
1094
1095 unsigned AVGOpc = IsCeil ? (IsSigned ? ISD::AVGCEILS : ISD::AVGCEILU)
1096 : (IsSigned ? ISD::AVGFLOORS : ISD::AVGFLOORU);
1097
1098 // Find the smallest power-2 type that is legal for this vector size and
1099 // operation, given the original type size and the number of known sign/zero
1100 // bits.
1101 EVT VT = Op.getValueType();
1102 unsigned MinWidth =
1103 std::max<unsigned>(a: VT.getScalarSizeInBits() - KnownBits, b: 8);
1104 EVT NVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: llvm::bit_ceil(Value: MinWidth));
1105 if (NVT.getScalarSizeInBits() > VT.getScalarSizeInBits())
1106 return SDValue();
1107 if (VT.isVector())
1108 NVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: NVT, EC: VT.getVectorElementCount());
1109 if (TLO.LegalTypes() && !TLI.isOperationLegal(Op: AVGOpc, VT: NVT)) {
1110 // If we could not transform, and (both) adds are nuw/nsw, we can use the
1111 // larger type size to do the transform.
1112 if (TLO.LegalOperations() && !TLI.isOperationLegal(Op: AVGOpc, VT))
1113 return SDValue();
1114 if (DAG.willNotOverflowAdd(IsSigned, N0: Add.getOperand(i: 0),
1115 N1: Add.getOperand(i: 1)) &&
1116 (!Add2 || DAG.willNotOverflowAdd(IsSigned, N0: Add2.getOperand(i: 0),
1117 N1: Add2.getOperand(i: 1))))
1118 NVT = VT;
1119 else
1120 return SDValue();
1121 }
1122
1123 // Don't create a AVGFLOOR node with a scalar constant unless its legal as
1124 // this is likely to stop other folds (reassociation, value tracking etc.)
1125 if (!IsCeil && !TLI.isOperationLegal(Op: AVGOpc, VT: NVT) &&
1126 (isa<ConstantSDNode>(Val: ExtOpA) || isa<ConstantSDNode>(Val: ExtOpB)))
1127 return SDValue();
1128
1129 SDLoc DL(Op);
1130 SDValue ResultAVG =
1131 DAG.getNode(Opcode: AVGOpc, DL, VT: NVT, N1: DAG.getExtOrTrunc(IsSigned, Op: ExtOpA, DL, VT: NVT),
1132 N2: DAG.getExtOrTrunc(IsSigned, Op: ExtOpB, DL, VT: NVT));
1133 return DAG.getExtOrTrunc(IsSigned, Op: ResultAVG, DL, VT);
1134}
1135
1136/// Look at Op. At this point, we know that only the OriginalDemandedBits of the
1137/// result of Op are ever used downstream. If we can use this information to
1138/// simplify Op, create a new simplified DAG node and return true, returning the
1139/// original and new nodes in Old and New. Otherwise, analyze the expression and
1140/// return a mask of Known bits for the expression (used to simplify the
1141/// caller). The Known bits may only be accurate for those bits in the
1142/// OriginalDemandedBits and OriginalDemandedElts.
1143bool TargetLowering::SimplifyDemandedBits(
1144 SDValue Op, const APInt &OriginalDemandedBits,
1145 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
1146 unsigned Depth, bool AssumeSingleUse) const {
1147 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
1148 assert(Op.getScalarValueSizeInBits() == BitWidth &&
1149 "Mask size mismatches value type size!");
1150
1151 // Don't know anything.
1152 Known = KnownBits(BitWidth);
1153
1154 EVT VT = Op.getValueType();
1155 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
1156 unsigned NumElts = OriginalDemandedElts.getBitWidth();
1157 assert((!VT.isFixedLengthVector() || NumElts == VT.getVectorNumElements()) &&
1158 "Unexpected vector size");
1159
1160 APInt DemandedBits = OriginalDemandedBits;
1161 APInt DemandedElts = OriginalDemandedElts;
1162 SDLoc dl(Op);
1163
1164 // Undef operand.
1165 if (Op.isUndef())
1166 return false;
1167
1168 // We can't simplify target constants.
1169 if (Op.getOpcode() == ISD::TargetConstant)
1170 return false;
1171
1172 if (Op.getOpcode() == ISD::Constant) {
1173 // We know all of the bits for a constant!
1174 Known = KnownBits::makeConstant(C: Op->getAsAPIntVal());
1175 return false;
1176 }
1177
1178 if (Op.getOpcode() == ISD::ConstantFP) {
1179 // We know all of the bits for a floating point constant!
1180 Known = KnownBits::makeConstant(
1181 C: cast<ConstantFPSDNode>(Val&: Op)->getValueAPF().bitcastToAPInt());
1182 return false;
1183 }
1184
1185 // Other users may use these bits.
1186 bool HasMultiUse = false;
1187 if (!AssumeSingleUse && !Op.getNode()->hasOneUse()) {
1188 if (Depth >= SelectionDAG::MaxRecursionDepth) {
1189 // Limit search depth.
1190 return false;
1191 }
1192 // Allow multiple uses, just set the DemandedBits/Elts to all bits.
1193 DemandedBits = APInt::getAllOnes(numBits: BitWidth);
1194 DemandedElts = APInt::getAllOnes(numBits: NumElts);
1195 HasMultiUse = true;
1196 } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
1197 // Not demanding any bits/elts from Op.
1198 return TLO.CombineTo(O: Op, N: TLO.DAG.getUNDEF(VT));
1199 } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
1200 // Limit search depth.
1201 return false;
1202 }
1203
1204 KnownBits Known2;
1205 switch (Op.getOpcode()) {
1206 case ISD::SCALAR_TO_VECTOR: {
1207 if (VT.isScalableVector())
1208 return false;
1209 if (!DemandedElts[0])
1210 return TLO.CombineTo(O: Op, N: TLO.DAG.getUNDEF(VT));
1211
1212 KnownBits SrcKnown;
1213 SDValue Src = Op.getOperand(i: 0);
1214 unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
1215 APInt SrcDemandedBits = DemandedBits.zext(width: SrcBitWidth);
1216 if (SimplifyDemandedBits(Op: Src, DemandedBits: SrcDemandedBits, Known&: SrcKnown, TLO, Depth: Depth + 1))
1217 return true;
1218
1219 // Upper elements are undef, so only get the knownbits if we just demand
1220 // the bottom element.
1221 if (DemandedElts == 1)
1222 Known = SrcKnown.anyextOrTrunc(BitWidth);
1223 break;
1224 }
1225 case ISD::BUILD_VECTOR:
1226 // Collect the known bits that are shared by every demanded element.
1227 // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
1228 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1229 return false; // Don't fall through, will infinitely loop.
1230 case ISD::SPLAT_VECTOR: {
1231 SDValue Scl = Op.getOperand(i: 0);
1232 APInt DemandedSclBits = DemandedBits.zextOrTrunc(width: Scl.getValueSizeInBits());
1233 KnownBits KnownScl;
1234 if (SimplifyDemandedBits(Op: Scl, DemandedBits: DemandedSclBits, Known&: KnownScl, TLO, Depth: Depth + 1))
1235 return true;
1236
1237 // Implicitly truncate the bits to match the official semantics of
1238 // SPLAT_VECTOR.
1239 Known = KnownScl.trunc(BitWidth);
1240 break;
1241 }
1242 case ISD::LOAD: {
1243 auto *LD = cast<LoadSDNode>(Val&: Op);
1244 if (getTargetConstantFromLoad(LD)) {
1245 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1246 return false; // Don't fall through, will infinitely loop.
1247 }
1248 if (ISD::isZEXTLoad(N: Op.getNode()) && Op.getResNo() == 0) {
1249 // If this is a ZEXTLoad and we are looking at the loaded value.
1250 EVT MemVT = LD->getMemoryVT();
1251 unsigned MemBits = MemVT.getScalarSizeInBits();
1252 Known.Zero.setBitsFrom(MemBits);
1253 return false; // Don't fall through, will infinitely loop.
1254 }
1255 break;
1256 }
1257 case ISD::INSERT_VECTOR_ELT: {
1258 if (VT.isScalableVector())
1259 return false;
1260 SDValue Vec = Op.getOperand(i: 0);
1261 SDValue Scl = Op.getOperand(i: 1);
1262 auto *CIdx = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 2));
1263 EVT VecVT = Vec.getValueType();
1264
1265 // If index isn't constant, assume we need all vector elements AND the
1266 // inserted element.
1267 APInt DemandedVecElts(DemandedElts);
1268 if (CIdx && CIdx->getAPIntValue().ult(RHS: VecVT.getVectorNumElements())) {
1269 unsigned Idx = CIdx->getZExtValue();
1270 DemandedVecElts.clearBit(BitPosition: Idx);
1271
1272 // Inserted element is not required.
1273 if (!DemandedElts[Idx])
1274 return TLO.CombineTo(O: Op, N: Vec);
1275 }
1276
1277 KnownBits KnownScl;
1278 unsigned NumSclBits = Scl.getScalarValueSizeInBits();
1279 APInt DemandedSclBits = DemandedBits.zextOrTrunc(width: NumSclBits);
1280 if (SimplifyDemandedBits(Op: Scl, DemandedBits: DemandedSclBits, Known&: KnownScl, TLO, Depth: Depth + 1))
1281 return true;
1282
1283 Known = KnownScl.anyextOrTrunc(BitWidth);
1284
1285 KnownBits KnownVec;
1286 if (SimplifyDemandedBits(Op: Vec, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedVecElts, Known&: KnownVec, TLO,
1287 Depth: Depth + 1))
1288 return true;
1289
1290 if (!!DemandedVecElts)
1291 Known = Known.intersectWith(RHS: KnownVec);
1292
1293 return false;
1294 }
1295 case ISD::INSERT_SUBVECTOR: {
1296 if (VT.isScalableVector())
1297 return false;
1298 // Demand any elements from the subvector and the remainder from the src its
1299 // inserted into.
1300 SDValue Src = Op.getOperand(i: 0);
1301 SDValue Sub = Op.getOperand(i: 1);
1302 uint64_t Idx = Op.getConstantOperandVal(i: 2);
1303 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
1304 APInt DemandedSubElts = DemandedElts.extractBits(numBits: NumSubElts, bitPosition: Idx);
1305 APInt DemandedSrcElts = DemandedElts;
1306 DemandedSrcElts.clearBits(LoBit: Idx, HiBit: Idx + NumSubElts);
1307
1308 KnownBits KnownSub, KnownSrc;
1309 if (SimplifyDemandedBits(Op: Sub, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedSubElts, Known&: KnownSub, TLO,
1310 Depth: Depth + 1))
1311 return true;
1312 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedSrcElts, Known&: KnownSrc, TLO,
1313 Depth: Depth + 1))
1314 return true;
1315
1316 Known.Zero.setAllBits();
1317 Known.One.setAllBits();
1318 if (!!DemandedSubElts)
1319 Known = Known.intersectWith(RHS: KnownSub);
1320 if (!!DemandedSrcElts)
1321 Known = Known.intersectWith(RHS: KnownSrc);
1322
1323 // Attempt to avoid multi-use src if we don't need anything from it.
1324 if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||
1325 !DemandedSrcElts.isAllOnes()) {
1326 SDValue NewSub = SimplifyMultipleUseDemandedBits(
1327 Op: Sub, DemandedBits, DemandedElts: DemandedSubElts, DAG&: TLO.DAG, Depth: Depth + 1);
1328 SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1329 Op: Src, DemandedBits, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + 1);
1330 if (NewSub || NewSrc) {
1331 NewSub = NewSub ? NewSub : Sub;
1332 NewSrc = NewSrc ? NewSrc : Src;
1333 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: NewSrc, N2: NewSub,
1334 N3: Op.getOperand(i: 2));
1335 return TLO.CombineTo(O: Op, N: NewOp);
1336 }
1337 }
1338 break;
1339 }
1340 case ISD::EXTRACT_SUBVECTOR: {
1341 if (VT.isScalableVector())
1342 return false;
1343 // Offset the demanded elts by the subvector index.
1344 SDValue Src = Op.getOperand(i: 0);
1345 if (Src.getValueType().isScalableVector())
1346 break;
1347 uint64_t Idx = Op.getConstantOperandVal(i: 1);
1348 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1349 APInt DemandedSrcElts = DemandedElts.zext(width: NumSrcElts).shl(shiftAmt: Idx);
1350
1351 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedSrcElts, Known, TLO,
1352 Depth: Depth + 1))
1353 return true;
1354
1355 // Attempt to avoid multi-use src if we don't need anything from it.
1356 if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
1357 SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
1358 Op: Src, DemandedBits, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + 1);
1359 if (DemandedSrc) {
1360 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: DemandedSrc,
1361 N2: Op.getOperand(i: 1));
1362 return TLO.CombineTo(O: Op, N: NewOp);
1363 }
1364 }
1365 break;
1366 }
1367 case ISD::CONCAT_VECTORS: {
1368 if (VT.isScalableVector())
1369 return false;
1370 Known.Zero.setAllBits();
1371 Known.One.setAllBits();
1372 EVT SubVT = Op.getOperand(i: 0).getValueType();
1373 unsigned NumSubVecs = Op.getNumOperands();
1374 unsigned NumSubElts = SubVT.getVectorNumElements();
1375 for (unsigned i = 0; i != NumSubVecs; ++i) {
1376 APInt DemandedSubElts =
1377 DemandedElts.extractBits(numBits: NumSubElts, bitPosition: i * NumSubElts);
1378 if (SimplifyDemandedBits(Op: Op.getOperand(i), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedSubElts,
1379 Known&: Known2, TLO, Depth: Depth + 1))
1380 return true;
1381 // Known bits are shared by every demanded subvector element.
1382 if (!!DemandedSubElts)
1383 Known = Known.intersectWith(RHS: Known2);
1384 }
1385 break;
1386 }
1387 case ISD::VECTOR_SHUFFLE: {
1388 assert(!VT.isScalableVector());
1389 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Val&: Op)->getMask();
1390
1391 // Collect demanded elements from shuffle operands..
1392 APInt DemandedLHS, DemandedRHS;
1393 if (!getShuffleDemandedElts(SrcWidth: NumElts, Mask: ShuffleMask, DemandedElts, DemandedLHS,
1394 DemandedRHS))
1395 break;
1396
1397 if (!!DemandedLHS || !!DemandedRHS) {
1398 SDValue Op0 = Op.getOperand(i: 0);
1399 SDValue Op1 = Op.getOperand(i: 1);
1400
1401 Known.Zero.setAllBits();
1402 Known.One.setAllBits();
1403 if (!!DemandedLHS) {
1404 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedLHS, Known&: Known2, TLO,
1405 Depth: Depth + 1))
1406 return true;
1407 Known = Known.intersectWith(RHS: Known2);
1408 }
1409 if (!!DemandedRHS) {
1410 if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedRHS, Known&: Known2, TLO,
1411 Depth: Depth + 1))
1412 return true;
1413 Known = Known.intersectWith(RHS: Known2);
1414 }
1415
1416 // Attempt to avoid multi-use ops if we don't need anything from them.
1417 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1418 Op: Op0, DemandedBits, DemandedElts: DemandedLHS, DAG&: TLO.DAG, Depth: Depth + 1);
1419 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1420 Op: Op1, DemandedBits, DemandedElts: DemandedRHS, DAG&: TLO.DAG, Depth: Depth + 1);
1421 if (DemandedOp0 || DemandedOp1) {
1422 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1423 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1424 SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, N1: Op0, N2: Op1, Mask: ShuffleMask);
1425 return TLO.CombineTo(O: Op, N: NewOp);
1426 }
1427 }
1428 break;
1429 }
1430 case ISD::AND: {
1431 SDValue Op0 = Op.getOperand(i: 0);
1432 SDValue Op1 = Op.getOperand(i: 1);
1433
1434 // If the RHS is a constant, check to see if the LHS would be zero without
1435 // using the bits from the RHS. Below, we use knowledge about the RHS to
1436 // simplify the LHS, here we're using information from the LHS to simplify
1437 // the RHS.
1438 if (ConstantSDNode *RHSC = isConstOrConstSplat(N: Op1, DemandedElts)) {
1439 // Do not increment Depth here; that can cause an infinite loop.
1440 KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op: Op0, DemandedElts, Depth);
1441 // If the LHS already has zeros where RHSC does, this 'and' is dead.
1442 if ((LHSKnown.Zero & DemandedBits) ==
1443 (~RHSC->getAPIntValue() & DemandedBits))
1444 return TLO.CombineTo(O: Op, N: Op0);
1445
1446 // If any of the set bits in the RHS are known zero on the LHS, shrink
1447 // the constant.
1448 if (ShrinkDemandedConstant(Op, DemandedBits: ~LHSKnown.Zero & DemandedBits,
1449 DemandedElts, TLO))
1450 return true;
1451
1452 // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1453 // constant, but if this 'and' is only clearing bits that were just set by
1454 // the xor, then this 'and' can be eliminated by shrinking the mask of
1455 // the xor. For example, for a 32-bit X:
1456 // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1457 if (isBitwiseNot(V: Op0) && Op0.hasOneUse() &&
1458 LHSKnown.One == ~RHSC->getAPIntValue()) {
1459 SDValue Xor = TLO.DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: Op0.getOperand(i: 0), N2: Op1);
1460 return TLO.CombineTo(O: Op, N: Xor);
1461 }
1462 }
1463
1464 // AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)
1465 // iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits).
1466 if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR && !VT.isScalableVector() &&
1467 (Op0.getOperand(i: 0).isUndef() ||
1468 ISD::isBuildVectorOfConstantSDNodes(N: Op0.getOperand(i: 0).getNode())) &&
1469 Op0->hasOneUse()) {
1470 unsigned NumSubElts =
1471 Op0.getOperand(i: 1).getValueType().getVectorNumElements();
1472 unsigned SubIdx = Op0.getConstantOperandVal(i: 2);
1473 APInt DemandedSub =
1474 APInt::getBitsSet(numBits: NumElts, loBit: SubIdx, hiBit: SubIdx + NumSubElts);
1475 KnownBits KnownSubMask =
1476 TLO.DAG.computeKnownBits(Op: Op1, DemandedElts: DemandedSub & DemandedElts, Depth: Depth + 1);
1477 if (DemandedBits.isSubsetOf(RHS: KnownSubMask.One)) {
1478 SDValue NewAnd =
1479 TLO.DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op0.getOperand(i: 0), N2: Op1);
1480 SDValue NewInsert =
1481 TLO.DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL: dl, VT, N1: NewAnd,
1482 N2: Op0.getOperand(i: 1), N3: Op0.getOperand(i: 2));
1483 return TLO.CombineTo(O: Op, N: NewInsert);
1484 }
1485 }
1486
1487 if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts, Known, TLO,
1488 Depth: Depth + 1))
1489 return true;
1490 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: ~Known.Zero & DemandedBits, OriginalDemandedElts: DemandedElts,
1491 Known&: Known2, TLO, Depth: Depth + 1))
1492 return true;
1493
1494 // If all of the demanded bits are known one on one side, return the other.
1495 // These bits cannot contribute to the result of the 'and'.
1496 if (DemandedBits.isSubsetOf(RHS: Known2.Zero | Known.One))
1497 return TLO.CombineTo(O: Op, N: Op0);
1498 if (DemandedBits.isSubsetOf(RHS: Known.Zero | Known2.One))
1499 return TLO.CombineTo(O: Op, N: Op1);
1500 // If all of the demanded bits in the inputs are known zeros, return zero.
1501 if (DemandedBits.isSubsetOf(RHS: Known.Zero | Known2.Zero))
1502 return TLO.CombineTo(O: Op, N: TLO.DAG.getConstant(Val: 0, DL: dl, VT));
1503 // If the RHS is a constant, see if we can simplify it.
1504 if (ShrinkDemandedConstant(Op, DemandedBits: ~Known2.Zero & DemandedBits, DemandedElts,
1505 TLO))
1506 return true;
1507 // If the operation can be done in a smaller type, do so.
1508 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1509 return true;
1510
1511 // Attempt to avoid multi-use ops if we don't need anything from them.
1512 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1513 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1514 Op: Op0, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
1515 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1516 Op: Op1, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
1517 if (DemandedOp0 || DemandedOp1) {
1518 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1519 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1520 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: Op0, N2: Op1);
1521 return TLO.CombineTo(O: Op, N: NewOp);
1522 }
1523 }
1524
1525 Known &= Known2;
1526 break;
1527 }
1528 case ISD::OR: {
1529 SDValue Op0 = Op.getOperand(i: 0);
1530 SDValue Op1 = Op.getOperand(i: 1);
1531 if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts, Known, TLO,
1532 Depth: Depth + 1)) {
1533 Op->dropFlags(Mask: SDNodeFlags::Disjoint);
1534 return true;
1535 }
1536
1537 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: ~Known.One & DemandedBits, OriginalDemandedElts: DemandedElts,
1538 Known&: Known2, TLO, Depth: Depth + 1)) {
1539 Op->dropFlags(Mask: SDNodeFlags::Disjoint);
1540 return true;
1541 }
1542
1543 // If all of the demanded bits are known zero on one side, return the other.
1544 // These bits cannot contribute to the result of the 'or'.
1545 if (DemandedBits.isSubsetOf(RHS: Known2.One | Known.Zero))
1546 return TLO.CombineTo(O: Op, N: Op0);
1547 if (DemandedBits.isSubsetOf(RHS: Known.One | Known2.Zero))
1548 return TLO.CombineTo(O: Op, N: Op1);
1549 // If the RHS is a constant, see if we can simplify it.
1550 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1551 return true;
1552 // If the operation can be done in a smaller type, do so.
1553 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1554 return true;
1555
1556 // Attempt to avoid multi-use ops if we don't need anything from them.
1557 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1558 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1559 Op: Op0, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
1560 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1561 Op: Op1, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
1562 if (DemandedOp0 || DemandedOp1) {
1563 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1564 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1565 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: Op0, N2: Op1);
1566 return TLO.CombineTo(O: Op, N: NewOp);
1567 }
1568 }
1569
1570 // (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2))
1571 // TODO: Use SimplifyMultipleUseDemandedBits to peek through masks.
1572 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::AND &&
1573 Op0->hasOneUse() && Op1->hasOneUse()) {
1574 // Attempt to match all commutations - m_c_Or would've been useful!
1575 for (int I = 0; I != 2; ++I) {
1576 SDValue X = Op.getOperand(i: I).getOperand(i: 0);
1577 SDValue C1 = Op.getOperand(i: I).getOperand(i: 1);
1578 SDValue Alt = Op.getOperand(i: 1 - I).getOperand(i: 0);
1579 SDValue C2 = Op.getOperand(i: 1 - I).getOperand(i: 1);
1580 if (Alt.getOpcode() == ISD::OR) {
1581 for (int J = 0; J != 2; ++J) {
1582 if (X == Alt.getOperand(i: J)) {
1583 SDValue Y = Alt.getOperand(i: 1 - J);
1584 if (SDValue C12 = TLO.DAG.FoldConstantArithmetic(Opcode: ISD::OR, DL: dl, VT,
1585 Ops: {C1, C2})) {
1586 SDValue MaskX = TLO.DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: X, N2: C12);
1587 SDValue MaskY = TLO.DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Y, N2: C2);
1588 return TLO.CombineTo(
1589 O: Op, N: TLO.DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: MaskX, N2: MaskY));
1590 }
1591 }
1592 }
1593 }
1594 }
1595 }
1596
1597 Known |= Known2;
1598 break;
1599 }
1600 case ISD::XOR: {
1601 SDValue Op0 = Op.getOperand(i: 0);
1602 SDValue Op1 = Op.getOperand(i: 1);
1603
1604 if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts, Known, TLO,
1605 Depth: Depth + 1))
1606 return true;
1607 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
1608 Depth: Depth + 1))
1609 return true;
1610
1611 // If all of the demanded bits are known zero on one side, return the other.
1612 // These bits cannot contribute to the result of the 'xor'.
1613 if (DemandedBits.isSubsetOf(RHS: Known.Zero))
1614 return TLO.CombineTo(O: Op, N: Op0);
1615 if (DemandedBits.isSubsetOf(RHS: Known2.Zero))
1616 return TLO.CombineTo(O: Op, N: Op1);
1617 // If the operation can be done in a smaller type, do so.
1618 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1619 return true;
1620
1621 // If all of the unknown bits are known to be zero on one side or the other
1622 // turn this into an *inclusive* or.
1623 // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1624 if (DemandedBits.isSubsetOf(RHS: Known.Zero | Known2.Zero))
1625 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Op0, N2: Op1));
1626
1627 ConstantSDNode *C = isConstOrConstSplat(N: Op1, DemandedElts);
1628 if (C) {
1629 // If one side is a constant, and all of the set bits in the constant are
1630 // also known set on the other side, turn this into an AND, as we know
1631 // the bits will be cleared.
1632 // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1633 // NB: it is okay if more bits are known than are requested
1634 if (C->getAPIntValue() == Known2.One) {
1635 SDValue ANDC =
1636 TLO.DAG.getConstant(Val: ~C->getAPIntValue() & DemandedBits, DL: dl, VT);
1637 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op0, N2: ANDC));
1638 }
1639
1640 // If the RHS is a constant, see if we can change it. Don't alter a -1
1641 // constant because that's a 'not' op, and that is better for combining
1642 // and codegen.
1643 if (!C->isAllOnes() && DemandedBits.isSubsetOf(RHS: C->getAPIntValue())) {
1644 // We're flipping all demanded bits. Flip the undemanded bits too.
1645 SDValue New = TLO.DAG.getNOT(DL: dl, Val: Op0, VT);
1646 return TLO.CombineTo(O: Op, N: New);
1647 }
1648
1649 unsigned Op0Opcode = Op0.getOpcode();
1650 if ((Op0Opcode == ISD::SRL || Op0Opcode == ISD::SHL) && Op0.hasOneUse()) {
1651 if (ConstantSDNode *ShiftC =
1652 isConstOrConstSplat(N: Op0.getOperand(i: 1), DemandedElts)) {
1653 // Don't crash on an oversized shift. We can not guarantee that a
1654 // bogus shift has been simplified to undef.
1655 if (ShiftC->getAPIntValue().ult(RHS: BitWidth)) {
1656 uint64_t ShiftAmt = ShiftC->getZExtValue();
1657 APInt Ones = APInt::getAllOnes(numBits: BitWidth);
1658 Ones = Op0Opcode == ISD::SHL ? Ones.shl(shiftAmt: ShiftAmt)
1659 : Ones.lshr(shiftAmt: ShiftAmt);
1660 if ((DemandedBits & C->getAPIntValue()) == (DemandedBits & Ones) &&
1661 isDesirableToCommuteXorWithShift(N: Op.getNode())) {
1662 // If the xor constant is a demanded mask, do a 'not' before the
1663 // shift:
1664 // xor (X << ShiftC), XorC --> (not X) << ShiftC
1665 // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
1666 SDValue Not = TLO.DAG.getNOT(DL: dl, Val: Op0.getOperand(i: 0), VT);
1667 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Op0Opcode, DL: dl, VT, N1: Not,
1668 N2: Op0.getOperand(i: 1)));
1669 }
1670 }
1671 }
1672 }
1673 }
1674
1675 // If we can't turn this into a 'not', try to shrink the constant.
1676 if (!C || !C->isAllOnes())
1677 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1678 return true;
1679
1680 // Attempt to avoid multi-use ops if we don't need anything from them.
1681 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1682 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1683 Op: Op0, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
1684 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1685 Op: Op1, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
1686 if (DemandedOp0 || DemandedOp1) {
1687 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1688 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1689 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: Op0, N2: Op1);
1690 return TLO.CombineTo(O: Op, N: NewOp);
1691 }
1692 }
1693
1694 Known ^= Known2;
1695 break;
1696 }
1697 case ISD::SELECT:
1698 if (SimplifyDemandedBits(Op: Op.getOperand(i: 2), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1699 Known, TLO, Depth: Depth + 1))
1700 return true;
1701 if (SimplifyDemandedBits(Op: Op.getOperand(i: 1), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1702 Known&: Known2, TLO, Depth: Depth + 1))
1703 return true;
1704
1705 // If the operands are constants, see if we can simplify them.
1706 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1707 return true;
1708
1709 // Only known if known in both the LHS and RHS.
1710 Known = Known.intersectWith(RHS: Known2);
1711 break;
1712 case ISD::VSELECT:
1713 if (SimplifyDemandedBits(Op: Op.getOperand(i: 2), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1714 Known, TLO, Depth: Depth + 1))
1715 return true;
1716 if (SimplifyDemandedBits(Op: Op.getOperand(i: 1), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1717 Known&: Known2, TLO, Depth: Depth + 1))
1718 return true;
1719
1720 // Only known if known in both the LHS and RHS.
1721 Known = Known.intersectWith(RHS: Known2);
1722 break;
1723 case ISD::SELECT_CC:
1724 if (SimplifyDemandedBits(Op: Op.getOperand(i: 3), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1725 Known, TLO, Depth: Depth + 1))
1726 return true;
1727 if (SimplifyDemandedBits(Op: Op.getOperand(i: 2), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1728 Known&: Known2, TLO, Depth: Depth + 1))
1729 return true;
1730
1731 // If the operands are constants, see if we can simplify them.
1732 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1733 return true;
1734
1735 // Only known if known in both the LHS and RHS.
1736 Known = Known.intersectWith(RHS: Known2);
1737 break;
1738 case ISD::SETCC: {
1739 SDValue Op0 = Op.getOperand(i: 0);
1740 SDValue Op1 = Op.getOperand(i: 1);
1741 ISD::CondCode CC = cast<CondCodeSDNode>(Val: Op.getOperand(i: 2))->get();
1742 // If (1) we only need the sign-bit, (2) the setcc operands are the same
1743 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
1744 // -1, we may be able to bypass the setcc.
1745 if (DemandedBits.isSignMask() &&
1746 Op0.getScalarValueSizeInBits() == BitWidth &&
1747 getBooleanContents(Type: Op0.getValueType()) ==
1748 BooleanContent::ZeroOrNegativeOneBooleanContent) {
1749 // If we're testing X < 0, then this compare isn't needed - just use X!
1750 // FIXME: We're limiting to integer types here, but this should also work
1751 // if we don't care about FP signed-zero. The use of SETLT with FP means
1752 // that we don't care about NaNs.
1753 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
1754 (isNullConstant(V: Op1) || ISD::isBuildVectorAllZeros(N: Op1.getNode())))
1755 return TLO.CombineTo(O: Op, N: Op0);
1756
1757 // TODO: Should we check for other forms of sign-bit comparisons?
1758 // Examples: X <= -1, X >= 0
1759 }
1760 if (getBooleanContents(Type: Op0.getValueType()) ==
1761 TargetLowering::ZeroOrOneBooleanContent &&
1762 BitWidth > 1)
1763 Known.Zero.setBitsFrom(1);
1764 break;
1765 }
1766 case ISD::SHL: {
1767 SDValue Op0 = Op.getOperand(i: 0);
1768 SDValue Op1 = Op.getOperand(i: 1);
1769 EVT ShiftVT = Op1.getValueType();
1770
1771 if (std::optional<uint64_t> KnownSA =
1772 TLO.DAG.getValidShiftAmount(V: Op, DemandedElts, Depth: Depth + 1)) {
1773 unsigned ShAmt = *KnownSA;
1774 if (ShAmt == 0)
1775 return TLO.CombineTo(O: Op, N: Op0);
1776
1777 // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1778 // single shift. We can do this if the bottom bits (which are shifted
1779 // out) are never demanded.
1780 // TODO - support non-uniform vector amounts.
1781 if (Op0.getOpcode() == ISD::SRL) {
1782 if (!DemandedBits.intersects(RHS: APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: ShAmt))) {
1783 if (std::optional<uint64_t> InnerSA =
1784 TLO.DAG.getValidShiftAmount(V: Op0, DemandedElts, Depth: Depth + 2)) {
1785 unsigned C1 = *InnerSA;
1786 unsigned Opc = ISD::SHL;
1787 int Diff = ShAmt - C1;
1788 if (Diff < 0) {
1789 Diff = -Diff;
1790 Opc = ISD::SRL;
1791 }
1792 SDValue NewSA = TLO.DAG.getConstant(Val: Diff, DL: dl, VT: ShiftVT);
1793 return TLO.CombineTo(
1794 O: Op, N: TLO.DAG.getNode(Opcode: Opc, DL: dl, VT, N1: Op0.getOperand(i: 0), N2: NewSA));
1795 }
1796 }
1797 }
1798
1799 // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1800 // are not demanded. This will likely allow the anyext to be folded away.
1801 // TODO - support non-uniform vector amounts.
1802 if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1803 SDValue InnerOp = Op0.getOperand(i: 0);
1804 EVT InnerVT = InnerOp.getValueType();
1805 unsigned InnerBits = InnerVT.getScalarSizeInBits();
1806 if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1807 isTypeDesirableForOp(ISD::SHL, VT: InnerVT)) {
1808 SDValue NarrowShl = TLO.DAG.getNode(
1809 Opcode: ISD::SHL, DL: dl, VT: InnerVT, N1: InnerOp,
1810 N2: TLO.DAG.getShiftAmountConstant(Val: ShAmt, VT: InnerVT, DL: dl));
1811 return TLO.CombineTo(
1812 O: Op, N: TLO.DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT, Operand: NarrowShl));
1813 }
1814
1815 // Repeat the SHL optimization above in cases where an extension
1816 // intervenes: (shl (anyext (shr x, c1)), c2) to
1817 // (shl (anyext x), c2-c1). This requires that the bottom c1 bits
1818 // aren't demanded (as above) and that the shifted upper c1 bits of
1819 // x aren't demanded.
1820 // TODO - support non-uniform vector amounts.
1821 if (InnerOp.getOpcode() == ISD::SRL && Op0.hasOneUse() &&
1822 InnerOp.hasOneUse()) {
1823 if (std::optional<uint64_t> SA2 = TLO.DAG.getValidShiftAmount(
1824 V: InnerOp, DemandedElts, Depth: Depth + 2)) {
1825 unsigned InnerShAmt = *SA2;
1826 if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1827 DemandedBits.getActiveBits() <=
1828 (InnerBits - InnerShAmt + ShAmt) &&
1829 DemandedBits.countr_zero() >= ShAmt) {
1830 SDValue NewSA =
1831 TLO.DAG.getConstant(Val: ShAmt - InnerShAmt, DL: dl, VT: ShiftVT);
1832 SDValue NewExt = TLO.DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT,
1833 Operand: InnerOp.getOperand(i: 0));
1834 return TLO.CombineTo(
1835 O: Op, N: TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: NewExt, N2: NewSA));
1836 }
1837 }
1838 }
1839 }
1840
1841 APInt InDemandedMask = DemandedBits.lshr(shiftAmt: ShAmt);
1842 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: InDemandedMask, OriginalDemandedElts: DemandedElts, Known, TLO,
1843 Depth: Depth + 1)) {
1844 // Disable the nsw and nuw flags. We can no longer guarantee that we
1845 // won't wrap after simplification.
1846 Op->dropFlags(Mask: SDNodeFlags::NoWrap);
1847 return true;
1848 }
1849 Known.Zero <<= ShAmt;
1850 Known.One <<= ShAmt;
1851 // low bits known zero.
1852 Known.Zero.setLowBits(ShAmt);
1853
1854 // Attempt to avoid multi-use ops if we don't need anything from them.
1855 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1856 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1857 Op: Op0, DemandedBits: InDemandedMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
1858 if (DemandedOp0) {
1859 SDValue NewOp = TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: DemandedOp0, N2: Op1);
1860 return TLO.CombineTo(O: Op, N: NewOp);
1861 }
1862 }
1863
1864 // TODO: Can we merge this fold with the one below?
1865 // Try shrinking the operation as long as the shift amount will still be
1866 // in range.
1867 if (ShAmt < DemandedBits.getActiveBits() && !VT.isVector() &&
1868 Op.getNode()->hasOneUse()) {
1869 // Search for the smallest integer type with free casts to and from
1870 // Op's type. For expedience, just check power-of-2 integer types.
1871 unsigned DemandedSize = DemandedBits.getActiveBits();
1872 for (unsigned SmallVTBits = llvm::bit_ceil(Value: DemandedSize);
1873 SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(A: SmallVTBits)) {
1874 EVT SmallVT = EVT::getIntegerVT(Context&: *TLO.DAG.getContext(), BitWidth: SmallVTBits);
1875 if (isNarrowingProfitable(N: Op.getNode(), SrcVT: VT, DestVT: SmallVT) &&
1876 isTypeDesirableForOp(ISD::SHL, VT: SmallVT) &&
1877 isTruncateFree(FromVT: VT, ToVT: SmallVT) && isZExtFree(FromTy: SmallVT, ToTy: VT) &&
1878 (!TLO.LegalOperations() || isOperationLegal(Op: ISD::SHL, VT: SmallVT))) {
1879 assert(DemandedSize <= SmallVTBits &&
1880 "Narrowed below demanded bits?");
1881 // We found a type with free casts.
1882 SDValue NarrowShl = TLO.DAG.getNode(
1883 Opcode: ISD::SHL, DL: dl, VT: SmallVT,
1884 N1: TLO.DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: SmallVT, Operand: Op.getOperand(i: 0)),
1885 N2: TLO.DAG.getShiftAmountConstant(Val: ShAmt, VT: SmallVT, DL: dl));
1886 return TLO.CombineTo(
1887 O: Op, N: TLO.DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT, Operand: NarrowShl));
1888 }
1889 }
1890 }
1891
1892 // Narrow shift to lower half - similar to ShrinkDemandedOp.
1893 // (shl i64:x, K) -> (i64 zero_extend (shl (i32 (trunc i64:x)), K))
1894 // Only do this if we demand the upper half so the knownbits are correct.
1895 unsigned HalfWidth = BitWidth / 2;
1896 if ((BitWidth % 2) == 0 && !VT.isVector() && ShAmt < HalfWidth &&
1897 DemandedBits.countLeadingOnes() >= HalfWidth) {
1898 EVT HalfVT = EVT::getIntegerVT(Context&: *TLO.DAG.getContext(), BitWidth: HalfWidth);
1899 if (isNarrowingProfitable(N: Op.getNode(), SrcVT: VT, DestVT: HalfVT) &&
1900 isTypeDesirableForOp(ISD::SHL, VT: HalfVT) &&
1901 isTruncateFree(FromVT: VT, ToVT: HalfVT) && isZExtFree(FromTy: HalfVT, ToTy: VT) &&
1902 (!TLO.LegalOperations() || isOperationLegal(Op: ISD::SHL, VT: HalfVT))) {
1903 // If we're demanding the upper bits at all, we must ensure
1904 // that the upper bits of the shift result are known to be zero,
1905 // which is equivalent to the narrow shift being NUW.
1906 if (bool IsNUW = (Known.countMinLeadingZeros() >= HalfWidth)) {
1907 bool IsNSW = Known.countMinSignBits() > HalfWidth;
1908 SDNodeFlags Flags;
1909 Flags.setNoSignedWrap(IsNSW);
1910 Flags.setNoUnsignedWrap(IsNUW);
1911 SDValue NewOp = TLO.DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HalfVT, Operand: Op0);
1912 SDValue NewShiftAmt =
1913 TLO.DAG.getShiftAmountConstant(Val: ShAmt, VT: HalfVT, DL: dl);
1914 SDValue NewShift = TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: HalfVT, N1: NewOp,
1915 N2: NewShiftAmt, Flags);
1916 SDValue NewExt =
1917 TLO.DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: NewShift);
1918 return TLO.CombineTo(O: Op, N: NewExt);
1919 }
1920 }
1921 }
1922 } else {
1923 // This is a variable shift, so we can't shift the demand mask by a known
1924 // amount. But if we are not demanding high bits, then we are not
1925 // demanding those bits from the pre-shifted operand either.
1926 if (unsigned CTLZ = DemandedBits.countl_zero()) {
1927 APInt DemandedFromOp(APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth - CTLZ));
1928 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: DemandedFromOp, OriginalDemandedElts: DemandedElts, Known, TLO,
1929 Depth: Depth + 1)) {
1930 // Disable the nsw and nuw flags. We can no longer guarantee that we
1931 // won't wrap after simplification.
1932 Op->dropFlags(Mask: SDNodeFlags::NoWrap);
1933 return true;
1934 }
1935 Known.resetAll();
1936 }
1937 }
1938
1939 // If we are only demanding sign bits then we can use the shift source
1940 // directly.
1941 if (std::optional<uint64_t> MaxSA =
1942 TLO.DAG.getValidMaximumShiftAmount(V: Op, DemandedElts, Depth: Depth + 1)) {
1943 unsigned ShAmt = *MaxSA;
1944 unsigned NumSignBits =
1945 TLO.DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + 1);
1946 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
1947 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
1948 return TLO.CombineTo(O: Op, N: Op0);
1949 }
1950 break;
1951 }
1952 case ISD::SRL: {
1953 SDValue Op0 = Op.getOperand(i: 0);
1954 SDValue Op1 = Op.getOperand(i: 1);
1955 EVT ShiftVT = Op1.getValueType();
1956
1957 if (std::optional<uint64_t> KnownSA =
1958 TLO.DAG.getValidShiftAmount(V: Op, DemandedElts, Depth: Depth + 1)) {
1959 unsigned ShAmt = *KnownSA;
1960 if (ShAmt == 0)
1961 return TLO.CombineTo(O: Op, N: Op0);
1962
1963 // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1964 // single shift. We can do this if the top bits (which are shifted out)
1965 // are never demanded.
1966 // TODO - support non-uniform vector amounts.
1967 if (Op0.getOpcode() == ISD::SHL) {
1968 if (!DemandedBits.intersects(RHS: APInt::getHighBitsSet(numBits: BitWidth, hiBitsSet: ShAmt))) {
1969 if (std::optional<uint64_t> InnerSA =
1970 TLO.DAG.getValidShiftAmount(V: Op0, DemandedElts, Depth: Depth + 2)) {
1971 unsigned C1 = *InnerSA;
1972 unsigned Opc = ISD::SRL;
1973 int Diff = ShAmt - C1;
1974 if (Diff < 0) {
1975 Diff = -Diff;
1976 Opc = ISD::SHL;
1977 }
1978 SDValue NewSA = TLO.DAG.getConstant(Val: Diff, DL: dl, VT: ShiftVT);
1979 return TLO.CombineTo(
1980 O: Op, N: TLO.DAG.getNode(Opcode: Opc, DL: dl, VT, N1: Op0.getOperand(i: 0), N2: NewSA));
1981 }
1982 }
1983 }
1984
1985 // If this is (srl (sra X, C1), ShAmt), see if we can combine this into a
1986 // single sra. We can do this if the top bits are never demanded.
1987 if (Op0.getOpcode() == ISD::SRA && Op0.hasOneUse()) {
1988 if (!DemandedBits.intersects(RHS: APInt::getHighBitsSet(numBits: BitWidth, hiBitsSet: ShAmt))) {
1989 if (std::optional<uint64_t> InnerSA =
1990 TLO.DAG.getValidShiftAmount(V: Op0, DemandedElts, Depth: Depth + 2)) {
1991 unsigned C1 = *InnerSA;
1992 // Clamp the combined shift amount if it exceeds the bit width.
1993 unsigned Combined = std::min(a: C1 + ShAmt, b: BitWidth - 1);
1994 SDValue NewSA = TLO.DAG.getConstant(Val: Combined, DL: dl, VT: ShiftVT);
1995 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::SRA, DL: dl, VT,
1996 N1: Op0.getOperand(i: 0), N2: NewSA));
1997 }
1998 }
1999 }
2000
2001 APInt InDemandedMask = (DemandedBits << ShAmt);
2002
2003 // If the shift is exact, then it does demand the low bits (and knows that
2004 // they are zero).
2005 if (Op->getFlags().hasExact())
2006 InDemandedMask.setLowBits(ShAmt);
2007
2008 // Narrow shift to lower half - similar to ShrinkDemandedOp.
2009 // (srl i64:x, K) -> (i64 zero_extend (srl (i32 (trunc i64:x)), K))
2010 if ((BitWidth % 2) == 0 && !VT.isVector()) {
2011 APInt HiBits = APInt::getHighBitsSet(numBits: BitWidth, hiBitsSet: BitWidth / 2);
2012 EVT HalfVT = EVT::getIntegerVT(Context&: *TLO.DAG.getContext(), BitWidth: BitWidth / 2);
2013 if (isNarrowingProfitable(N: Op.getNode(), SrcVT: VT, DestVT: HalfVT) &&
2014 isTypeDesirableForOp(ISD::SRL, VT: HalfVT) &&
2015 isTruncateFree(FromVT: VT, ToVT: HalfVT) && isZExtFree(FromTy: HalfVT, ToTy: VT) &&
2016 (!TLO.LegalOperations() || isOperationLegal(Op: ISD::SRL, VT: HalfVT)) &&
2017 ((InDemandedMask.countLeadingZeros() >= (BitWidth / 2)) ||
2018 TLO.DAG.MaskedValueIsZero(Op: Op0, Mask: HiBits))) {
2019 SDValue NewOp = TLO.DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HalfVT, Operand: Op0);
2020 SDValue NewShiftAmt =
2021 TLO.DAG.getShiftAmountConstant(Val: ShAmt, VT: HalfVT, DL: dl);
2022 SDValue NewShift =
2023 TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: HalfVT, N1: NewOp, N2: NewShiftAmt);
2024 return TLO.CombineTo(
2025 O: Op, N: TLO.DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: NewShift));
2026 }
2027 }
2028
2029 // Compute the new bits that are at the top now.
2030 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: InDemandedMask, OriginalDemandedElts: DemandedElts, Known, TLO,
2031 Depth: Depth + 1))
2032 return true;
2033 Known.Zero.lshrInPlace(ShiftAmt: ShAmt);
2034 Known.One.lshrInPlace(ShiftAmt: ShAmt);
2035 // High bits known zero.
2036 Known.Zero.setHighBits(ShAmt);
2037
2038 // Attempt to avoid multi-use ops if we don't need anything from them.
2039 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2040 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2041 Op: Op0, DemandedBits: InDemandedMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
2042 if (DemandedOp0) {
2043 SDValue NewOp = TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: DemandedOp0, N2: Op1);
2044 return TLO.CombineTo(O: Op, N: NewOp);
2045 }
2046 }
2047 } else {
2048 // Use generic knownbits computation as it has support for non-uniform
2049 // shift amounts.
2050 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2051 }
2052
2053 // If we are only demanding sign bits then we can use the shift source
2054 // directly.
2055 if (std::optional<uint64_t> MaxSA =
2056 TLO.DAG.getValidMaximumShiftAmount(V: Op, DemandedElts, Depth: Depth + 1)) {
2057 unsigned ShAmt = *MaxSA;
2058 // Must already be signbits in DemandedBits bounds, and can't demand any
2059 // shifted in zeroes.
2060 if (DemandedBits.countl_zero() >= ShAmt) {
2061 unsigned NumSignBits =
2062 TLO.DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + 1);
2063 if (DemandedBits.countr_zero() >= (BitWidth - NumSignBits))
2064 return TLO.CombineTo(O: Op, N: Op0);
2065 }
2066 }
2067
2068 // Try to match AVG patterns (after shift simplification).
2069 if (SDValue AVG = combineShiftToAVG(Op, TLO, TLI: *this, DemandedBits,
2070 DemandedElts, Depth: Depth + 1))
2071 return TLO.CombineTo(O: Op, N: AVG);
2072
2073 break;
2074 }
2075 case ISD::SRA: {
2076 SDValue Op0 = Op.getOperand(i: 0);
2077 SDValue Op1 = Op.getOperand(i: 1);
2078 EVT ShiftVT = Op1.getValueType();
2079
2080 // If we only want bits that already match the signbit then we don't need
2081 // to shift.
2082 unsigned NumHiDemandedBits = BitWidth - DemandedBits.countr_zero();
2083 if (TLO.DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + 1) >=
2084 NumHiDemandedBits)
2085 return TLO.CombineTo(O: Op, N: Op0);
2086
2087 // If this is an arithmetic shift right and only the low-bit is set, we can
2088 // always convert this into a logical shr, even if the shift amount is
2089 // variable. The low bit of the shift cannot be an input sign bit unless
2090 // the shift amount is >= the size of the datatype, which is undefined.
2091 if (DemandedBits.isOne())
2092 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op0, N2: Op1));
2093
2094 if (std::optional<uint64_t> KnownSA =
2095 TLO.DAG.getValidShiftAmount(V: Op, DemandedElts, Depth: Depth + 1)) {
2096 unsigned ShAmt = *KnownSA;
2097 if (ShAmt == 0)
2098 return TLO.CombineTo(O: Op, N: Op0);
2099
2100 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target
2101 // supports sext_inreg.
2102 if (Op0.getOpcode() == ISD::SHL) {
2103 if (std::optional<uint64_t> InnerSA =
2104 TLO.DAG.getValidShiftAmount(V: Op0, DemandedElts, Depth: Depth + 2)) {
2105 unsigned LowBits = BitWidth - ShAmt;
2106 EVT ExtVT = EVT::getIntegerVT(Context&: *TLO.DAG.getContext(), BitWidth: LowBits);
2107 if (VT.isVector())
2108 ExtVT = EVT::getVectorVT(Context&: *TLO.DAG.getContext(), VT: ExtVT,
2109 EC: VT.getVectorElementCount());
2110
2111 if (*InnerSA == ShAmt) {
2112 if (!TLO.LegalOperations() ||
2113 getOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: ExtVT) == Legal)
2114 return TLO.CombineTo(
2115 O: Op, N: TLO.DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL: dl, VT,
2116 N1: Op0.getOperand(i: 0),
2117 N2: TLO.DAG.getValueType(ExtVT)));
2118
2119 // Even if we can't convert to sext_inreg, we might be able to
2120 // remove this shift pair if the input is already sign extended.
2121 unsigned NumSignBits =
2122 TLO.DAG.ComputeNumSignBits(Op: Op0.getOperand(i: 0), DemandedElts);
2123 if (NumSignBits > ShAmt)
2124 return TLO.CombineTo(O: Op, N: Op0.getOperand(i: 0));
2125 }
2126 }
2127 }
2128
2129 APInt InDemandedMask = (DemandedBits << ShAmt);
2130
2131 // If the shift is exact, then it does demand the low bits (and knows that
2132 // they are zero).
2133 if (Op->getFlags().hasExact())
2134 InDemandedMask.setLowBits(ShAmt);
2135
2136 // If any of the demanded bits are produced by the sign extension, we also
2137 // demand the input sign bit.
2138 if (DemandedBits.countl_zero() < ShAmt)
2139 InDemandedMask.setSignBit();
2140
2141 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: InDemandedMask, OriginalDemandedElts: DemandedElts, Known, TLO,
2142 Depth: Depth + 1))
2143 return true;
2144 Known.Zero.lshrInPlace(ShiftAmt: ShAmt);
2145 Known.One.lshrInPlace(ShiftAmt: ShAmt);
2146
2147 // If the input sign bit is known to be zero, or if none of the top bits
2148 // are demanded, turn this into an unsigned shift right.
2149 if (Known.Zero[BitWidth - ShAmt - 1] ||
2150 DemandedBits.countl_zero() >= ShAmt) {
2151 SDNodeFlags Flags;
2152 Flags.setExact(Op->getFlags().hasExact());
2153 return TLO.CombineTo(
2154 O: Op, N: TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op0, N2: Op1, Flags));
2155 }
2156
2157 int Log2 = DemandedBits.exactLogBase2();
2158 if (Log2 >= 0) {
2159 // The bit must come from the sign.
2160 SDValue NewSA = TLO.DAG.getConstant(Val: BitWidth - 1 - Log2, DL: dl, VT: ShiftVT);
2161 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op0, N2: NewSA));
2162 }
2163
2164 if (Known.One[BitWidth - ShAmt - 1])
2165 // New bits are known one.
2166 Known.One.setHighBits(ShAmt);
2167
2168 // Attempt to avoid multi-use ops if we don't need anything from them.
2169 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2170 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2171 Op: Op0, DemandedBits: InDemandedMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
2172 if (DemandedOp0) {
2173 SDValue NewOp = TLO.DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: DemandedOp0, N2: Op1);
2174 return TLO.CombineTo(O: Op, N: NewOp);
2175 }
2176 }
2177 }
2178
2179 // Try to match AVG patterns (after shift simplification).
2180 if (SDValue AVG = combineShiftToAVG(Op, TLO, TLI: *this, DemandedBits,
2181 DemandedElts, Depth: Depth + 1))
2182 return TLO.CombineTo(O: Op, N: AVG);
2183
2184 break;
2185 }
2186 case ISD::FSHL:
2187 case ISD::FSHR: {
2188 SDValue Op0 = Op.getOperand(i: 0);
2189 SDValue Op1 = Op.getOperand(i: 1);
2190 SDValue Op2 = Op.getOperand(i: 2);
2191 bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
2192
2193 if (ConstantSDNode *SA = isConstOrConstSplat(N: Op2, DemandedElts)) {
2194 unsigned Amt = SA->getAPIntValue().urem(RHS: BitWidth);
2195
2196 // For fshl, 0-shift returns the 1st arg.
2197 // For fshr, 0-shift returns the 2nd arg.
2198 if (Amt == 0) {
2199 if (SimplifyDemandedBits(Op: IsFSHL ? Op0 : Op1, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
2200 Known, TLO, Depth: Depth + 1))
2201 return true;
2202 break;
2203 }
2204
2205 // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
2206 // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
2207 APInt Demanded0 = DemandedBits.lshr(shiftAmt: IsFSHL ? Amt : (BitWidth - Amt));
2208 APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
2209 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: Demanded0, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
2210 Depth: Depth + 1))
2211 return true;
2212 if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: Demanded1, OriginalDemandedElts: DemandedElts, Known, TLO,
2213 Depth: Depth + 1))
2214 return true;
2215
2216 Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt));
2217 Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt));
2218 Known.One.lshrInPlace(ShiftAmt: IsFSHL ? (BitWidth - Amt) : Amt);
2219 Known.Zero.lshrInPlace(ShiftAmt: IsFSHL ? (BitWidth - Amt) : Amt);
2220 Known = Known.unionWith(RHS: Known2);
2221
2222 // Attempt to avoid multi-use ops if we don't need anything from them.
2223 if (!Demanded0.isAllOnes() || !Demanded1.isAllOnes() ||
2224 !DemandedElts.isAllOnes()) {
2225 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2226 Op: Op0, DemandedBits: Demanded0, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
2227 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2228 Op: Op1, DemandedBits: Demanded1, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
2229 if (DemandedOp0 || DemandedOp1) {
2230 DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0;
2231 DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1;
2232 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: DemandedOp0,
2233 N2: DemandedOp1, N3: Op2);
2234 return TLO.CombineTo(O: Op, N: NewOp);
2235 }
2236 }
2237 }
2238
2239 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2240 if (isPowerOf2_32(Value: BitWidth)) {
2241 APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
2242 if (SimplifyDemandedBits(Op: Op2, OriginalDemandedBits: DemandedAmtBits, OriginalDemandedElts: DemandedElts,
2243 Known&: Known2, TLO, Depth: Depth + 1))
2244 return true;
2245 }
2246 break;
2247 }
2248 case ISD::ROTL:
2249 case ISD::ROTR: {
2250 SDValue Op0 = Op.getOperand(i: 0);
2251 SDValue Op1 = Op.getOperand(i: 1);
2252 bool IsROTL = (Op.getOpcode() == ISD::ROTL);
2253
2254 // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
2255 if (BitWidth == TLO.DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + 1))
2256 return TLO.CombineTo(O: Op, N: Op0);
2257
2258 if (ConstantSDNode *SA = isConstOrConstSplat(N: Op1, DemandedElts)) {
2259 unsigned Amt = SA->getAPIntValue().urem(RHS: BitWidth);
2260 unsigned RevAmt = BitWidth - Amt;
2261
2262 // rotl: (Op0 << Amt) | (Op0 >> (BW - Amt))
2263 // rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt)
2264 APInt Demanded0 = DemandedBits.rotr(rotateAmt: IsROTL ? Amt : RevAmt);
2265 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: Demanded0, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
2266 Depth: Depth + 1))
2267 return true;
2268
2269 // rot*(x, 0) --> x
2270 if (Amt == 0)
2271 return TLO.CombineTo(O: Op, N: Op0);
2272
2273 // See if we don't demand either half of the rotated bits.
2274 if ((!TLO.LegalOperations() || isOperationLegal(Op: ISD::SHL, VT)) &&
2275 DemandedBits.countr_zero() >= (IsROTL ? Amt : RevAmt)) {
2276 Op1 = TLO.DAG.getConstant(Val: IsROTL ? Amt : RevAmt, DL: dl, VT: Op1.getValueType());
2277 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op0, N2: Op1));
2278 }
2279 if ((!TLO.LegalOperations() || isOperationLegal(Op: ISD::SRL, VT)) &&
2280 DemandedBits.countl_zero() >= (IsROTL ? RevAmt : Amt)) {
2281 Op1 = TLO.DAG.getConstant(Val: IsROTL ? RevAmt : Amt, DL: dl, VT: Op1.getValueType());
2282 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op0, N2: Op1));
2283 }
2284 }
2285
2286 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2287 if (isPowerOf2_32(Value: BitWidth)) {
2288 APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
2289 if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: DemandedAmtBits, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
2290 Depth: Depth + 1))
2291 return true;
2292 }
2293 break;
2294 }
2295 case ISD::SMIN:
2296 case ISD::SMAX:
2297 case ISD::UMIN:
2298 case ISD::UMAX: {
2299 unsigned Opc = Op.getOpcode();
2300 SDValue Op0 = Op.getOperand(i: 0);
2301 SDValue Op1 = Op.getOperand(i: 1);
2302
2303 // If we're only demanding signbits, then we can simplify to OR/AND node.
2304 unsigned BitOp =
2305 (Opc == ISD::SMIN || Opc == ISD::UMAX) ? ISD::OR : ISD::AND;
2306 unsigned NumSignBits =
2307 std::min(a: TLO.DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + 1),
2308 b: TLO.DAG.ComputeNumSignBits(Op: Op1, DemandedElts, Depth: Depth + 1));
2309 unsigned NumDemandedUpperBits = BitWidth - DemandedBits.countr_zero();
2310 if (NumSignBits >= NumDemandedUpperBits)
2311 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: BitOp, DL: SDLoc(Op), VT, N1: Op0, N2: Op1));
2312
2313 // Check if one arg is always less/greater than (or equal) to the other arg.
2314 KnownBits Known0 = TLO.DAG.computeKnownBits(Op: Op0, DemandedElts, Depth: Depth + 1);
2315 KnownBits Known1 = TLO.DAG.computeKnownBits(Op: Op1, DemandedElts, Depth: Depth + 1);
2316 switch (Opc) {
2317 case ISD::SMIN:
2318 if (std::optional<bool> IsSLE = KnownBits::sle(LHS: Known0, RHS: Known1))
2319 return TLO.CombineTo(O: Op, N: *IsSLE ? Op0 : Op1);
2320 if (std::optional<bool> IsSLT = KnownBits::slt(LHS: Known0, RHS: Known1))
2321 return TLO.CombineTo(O: Op, N: *IsSLT ? Op0 : Op1);
2322 Known = KnownBits::smin(LHS: Known0, RHS: Known1);
2323 break;
2324 case ISD::SMAX:
2325 if (std::optional<bool> IsSGE = KnownBits::sge(LHS: Known0, RHS: Known1))
2326 return TLO.CombineTo(O: Op, N: *IsSGE ? Op0 : Op1);
2327 if (std::optional<bool> IsSGT = KnownBits::sgt(LHS: Known0, RHS: Known1))
2328 return TLO.CombineTo(O: Op, N: *IsSGT ? Op0 : Op1);
2329 Known = KnownBits::smax(LHS: Known0, RHS: Known1);
2330 break;
2331 case ISD::UMIN:
2332 if (std::optional<bool> IsULE = KnownBits::ule(LHS: Known0, RHS: Known1))
2333 return TLO.CombineTo(O: Op, N: *IsULE ? Op0 : Op1);
2334 if (std::optional<bool> IsULT = KnownBits::ult(LHS: Known0, RHS: Known1))
2335 return TLO.CombineTo(O: Op, N: *IsULT ? Op0 : Op1);
2336 Known = KnownBits::umin(LHS: Known0, RHS: Known1);
2337 break;
2338 case ISD::UMAX:
2339 if (std::optional<bool> IsUGE = KnownBits::uge(LHS: Known0, RHS: Known1))
2340 return TLO.CombineTo(O: Op, N: *IsUGE ? Op0 : Op1);
2341 if (std::optional<bool> IsUGT = KnownBits::ugt(LHS: Known0, RHS: Known1))
2342 return TLO.CombineTo(O: Op, N: *IsUGT ? Op0 : Op1);
2343 Known = KnownBits::umax(LHS: Known0, RHS: Known1);
2344 break;
2345 }
2346 break;
2347 }
2348 case ISD::BITREVERSE: {
2349 SDValue Src = Op.getOperand(i: 0);
2350 APInt DemandedSrcBits = DemandedBits.reverseBits();
2351 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedSrcBits, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
2352 Depth: Depth + 1))
2353 return true;
2354 Known.One = Known2.One.reverseBits();
2355 Known.Zero = Known2.Zero.reverseBits();
2356 break;
2357 }
2358 case ISD::BSWAP: {
2359 SDValue Src = Op.getOperand(i: 0);
2360
2361 // If the only bits demanded come from one byte of the bswap result,
2362 // just shift the input byte into position to eliminate the bswap.
2363 unsigned NLZ = DemandedBits.countl_zero();
2364 unsigned NTZ = DemandedBits.countr_zero();
2365
2366 // Round NTZ down to the next byte. If we have 11 trailing zeros, then
2367 // we need all the bits down to bit 8. Likewise, round NLZ. If we
2368 // have 14 leading zeros, round to 8.
2369 NLZ = alignDown(Value: NLZ, Align: 8);
2370 NTZ = alignDown(Value: NTZ, Align: 8);
2371 // If we need exactly one byte, we can do this transformation.
2372 if (BitWidth - NLZ - NTZ == 8) {
2373 // Replace this with either a left or right shift to get the byte into
2374 // the right place.
2375 unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL;
2376 if (!TLO.LegalOperations() || isOperationLegal(Op: ShiftOpcode, VT)) {
2377 unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
2378 SDValue ShAmt = TLO.DAG.getShiftAmountConstant(Val: ShiftAmount, VT, DL: dl);
2379 SDValue NewOp = TLO.DAG.getNode(Opcode: ShiftOpcode, DL: dl, VT, N1: Src, N2: ShAmt);
2380 return TLO.CombineTo(O: Op, N: NewOp);
2381 }
2382 }
2383
2384 APInt DemandedSrcBits = DemandedBits.byteSwap();
2385 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedSrcBits, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
2386 Depth: Depth + 1))
2387 return true;
2388 Known.One = Known2.One.byteSwap();
2389 Known.Zero = Known2.Zero.byteSwap();
2390 break;
2391 }
2392 case ISD::CTPOP: {
2393 // If only 1 bit is demanded, replace with PARITY as long as we're before
2394 // op legalization.
2395 // FIXME: Limit to scalars for now.
2396 if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
2397 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::PARITY, DL: dl, VT,
2398 Operand: Op.getOperand(i: 0)));
2399
2400 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2401 break;
2402 }
2403 case ISD::SIGN_EXTEND_INREG: {
2404 SDValue Op0 = Op.getOperand(i: 0);
2405 EVT ExVT = cast<VTSDNode>(Val: Op.getOperand(i: 1))->getVT();
2406 unsigned ExVTBits = ExVT.getScalarSizeInBits();
2407
2408 // If we only care about the highest bit, don't bother shifting right.
2409 if (DemandedBits.isSignMask()) {
2410 unsigned MinSignedBits =
2411 TLO.DAG.ComputeMaxSignificantBits(Op: Op0, DemandedElts, Depth: Depth + 1);
2412 bool AlreadySignExtended = ExVTBits >= MinSignedBits;
2413 // However if the input is already sign extended we expect the sign
2414 // extension to be dropped altogether later and do not simplify.
2415 if (!AlreadySignExtended) {
2416 // Compute the correct shift amount type, which must be getShiftAmountTy
2417 // for scalar types after legalization.
2418 SDValue ShiftAmt =
2419 TLO.DAG.getShiftAmountConstant(Val: BitWidth - ExVTBits, VT, DL: dl);
2420 return TLO.CombineTo(O: Op,
2421 N: TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op0, N2: ShiftAmt));
2422 }
2423 }
2424
2425 // If none of the extended bits are demanded, eliminate the sextinreg.
2426 if (DemandedBits.getActiveBits() <= ExVTBits)
2427 return TLO.CombineTo(O: Op, N: Op0);
2428
2429 APInt InputDemandedBits = DemandedBits.getLoBits(numBits: ExVTBits);
2430
2431 // Since the sign extended bits are demanded, we know that the sign
2432 // bit is demanded.
2433 InputDemandedBits.setBit(ExVTBits - 1);
2434
2435 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: InputDemandedBits, OriginalDemandedElts: DemandedElts, Known, TLO,
2436 Depth: Depth + 1))
2437 return true;
2438
2439 // If the sign bit of the input is known set or clear, then we know the
2440 // top bits of the result.
2441
2442 // If the input sign bit is known zero, convert this into a zero extension.
2443 if (Known.Zero[ExVTBits - 1])
2444 return TLO.CombineTo(O: Op, N: TLO.DAG.getZeroExtendInReg(Op: Op0, DL: dl, VT: ExVT));
2445
2446 APInt Mask = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: ExVTBits);
2447 if (Known.One[ExVTBits - 1]) { // Input sign bit known set
2448 Known.One.setBitsFrom(ExVTBits);
2449 Known.Zero &= Mask;
2450 } else { // Input sign bit unknown
2451 Known.Zero &= Mask;
2452 Known.One &= Mask;
2453 }
2454 break;
2455 }
2456 case ISD::BUILD_PAIR: {
2457 EVT HalfVT = Op.getOperand(i: 0).getValueType();
2458 unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
2459
2460 APInt MaskLo = DemandedBits.getLoBits(numBits: HalfBitWidth).trunc(width: HalfBitWidth);
2461 APInt MaskHi = DemandedBits.getHiBits(numBits: HalfBitWidth).trunc(width: HalfBitWidth);
2462
2463 KnownBits KnownLo, KnownHi;
2464
2465 if (SimplifyDemandedBits(Op: Op.getOperand(i: 0), DemandedBits: MaskLo, Known&: KnownLo, TLO, Depth: Depth + 1))
2466 return true;
2467
2468 if (SimplifyDemandedBits(Op: Op.getOperand(i: 1), DemandedBits: MaskHi, Known&: KnownHi, TLO, Depth: Depth + 1))
2469 return true;
2470
2471 Known = KnownHi.concat(Lo: KnownLo);
2472 break;
2473 }
2474 case ISD::ZERO_EXTEND_VECTOR_INREG:
2475 if (VT.isScalableVector())
2476 return false;
2477 [[fallthrough]];
2478 case ISD::ZERO_EXTEND: {
2479 SDValue Src = Op.getOperand(i: 0);
2480 EVT SrcVT = Src.getValueType();
2481 unsigned InBits = SrcVT.getScalarSizeInBits();
2482 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2483 bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
2484
2485 // If none of the top bits are demanded, convert this into an any_extend.
2486 if (DemandedBits.getActiveBits() <= InBits) {
2487 // If we only need the non-extended bits of the bottom element
2488 // then we can just bitcast to the result.
2489 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2490 VT.getSizeInBits() == SrcVT.getSizeInBits())
2491 return TLO.CombineTo(O: Op, N: TLO.DAG.getBitcast(VT, V: Src));
2492
2493 unsigned Opc =
2494 IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
2495 if (!TLO.LegalOperations() || isOperationLegal(Op: Opc, VT))
2496 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Opc, DL: dl, VT, Operand: Src));
2497 }
2498
2499 APInt InDemandedBits = DemandedBits.trunc(width: InBits);
2500 APInt InDemandedElts = DemandedElts.zext(width: InElts);
2501 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: InDemandedBits, OriginalDemandedElts: InDemandedElts, Known, TLO,
2502 Depth: Depth + 1)) {
2503 Op->dropFlags(Mask: SDNodeFlags::NonNeg);
2504 return true;
2505 }
2506 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2507 Known = Known.zext(BitWidth);
2508
2509 // Attempt to avoid multi-use ops if we don't need anything from them.
2510 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2511 Op: Src, DemandedBits: InDemandedBits, DemandedElts: InDemandedElts, DAG&: TLO.DAG, Depth: Depth + 1))
2512 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, Operand: NewSrc));
2513 break;
2514 }
2515 case ISD::SIGN_EXTEND_VECTOR_INREG:
2516 if (VT.isScalableVector())
2517 return false;
2518 [[fallthrough]];
2519 case ISD::SIGN_EXTEND: {
2520 SDValue Src = Op.getOperand(i: 0);
2521 EVT SrcVT = Src.getValueType();
2522 unsigned InBits = SrcVT.getScalarSizeInBits();
2523 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2524 bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
2525
2526 APInt InDemandedElts = DemandedElts.zext(width: InElts);
2527 APInt InDemandedBits = DemandedBits.trunc(width: InBits);
2528
2529 // Since some of the sign extended bits are demanded, we know that the sign
2530 // bit is demanded.
2531 InDemandedBits.setBit(InBits - 1);
2532
2533 // If none of the top bits are demanded, convert this into an any_extend.
2534 if (DemandedBits.getActiveBits() <= InBits) {
2535 // If we only need the non-extended bits of the bottom element
2536 // then we can just bitcast to the result.
2537 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2538 VT.getSizeInBits() == SrcVT.getSizeInBits())
2539 return TLO.CombineTo(O: Op, N: TLO.DAG.getBitcast(VT, V: Src));
2540
2541 // Don't lose an all signbits 0/-1 splat on targets with 0/-1 booleans.
2542 if (getBooleanContents(Type: VT) != ZeroOrNegativeOneBooleanContent ||
2543 TLO.DAG.ComputeNumSignBits(Op: Src, DemandedElts: InDemandedElts, Depth: Depth + 1) !=
2544 InBits) {
2545 unsigned Opc =
2546 IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
2547 if (!TLO.LegalOperations() || isOperationLegal(Op: Opc, VT))
2548 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Opc, DL: dl, VT, Operand: Src));
2549 }
2550 }
2551
2552 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: InDemandedBits, OriginalDemandedElts: InDemandedElts, Known, TLO,
2553 Depth: Depth + 1))
2554 return true;
2555 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2556
2557 // If the sign bit is known one, the top bits match.
2558 Known = Known.sext(BitWidth);
2559
2560 // If the sign bit is known zero, convert this to a zero extend.
2561 if (Known.isNonNegative()) {
2562 unsigned Opc =
2563 IsVecInReg ? ISD::ZERO_EXTEND_VECTOR_INREG : ISD::ZERO_EXTEND;
2564 if (!TLO.LegalOperations() || isOperationLegal(Op: Opc, VT)) {
2565 SDNodeFlags Flags;
2566 if (!IsVecInReg)
2567 Flags |= SDNodeFlags::NonNeg;
2568 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Opc, DL: dl, VT, Operand: Src, Flags));
2569 }
2570 }
2571
2572 // Attempt to avoid multi-use ops if we don't need anything from them.
2573 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2574 Op: Src, DemandedBits: InDemandedBits, DemandedElts: InDemandedElts, DAG&: TLO.DAG, Depth: Depth + 1))
2575 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, Operand: NewSrc));
2576 break;
2577 }
2578 case ISD::ANY_EXTEND_VECTOR_INREG:
2579 if (VT.isScalableVector())
2580 return false;
2581 [[fallthrough]];
2582 case ISD::ANY_EXTEND: {
2583 SDValue Src = Op.getOperand(i: 0);
2584 EVT SrcVT = Src.getValueType();
2585 unsigned InBits = SrcVT.getScalarSizeInBits();
2586 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2587 bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
2588
2589 // If we only need the bottom element then we can just bitcast.
2590 // TODO: Handle ANY_EXTEND?
2591 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2592 VT.getSizeInBits() == SrcVT.getSizeInBits())
2593 return TLO.CombineTo(O: Op, N: TLO.DAG.getBitcast(VT, V: Src));
2594
2595 APInt InDemandedBits = DemandedBits.trunc(width: InBits);
2596 APInt InDemandedElts = DemandedElts.zext(width: InElts);
2597 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: InDemandedBits, OriginalDemandedElts: InDemandedElts, Known, TLO,
2598 Depth: Depth + 1))
2599 return true;
2600 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2601 Known = Known.anyext(BitWidth);
2602
2603 // Attempt to avoid multi-use ops if we don't need anything from them.
2604 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2605 Op: Src, DemandedBits: InDemandedBits, DemandedElts: InDemandedElts, DAG&: TLO.DAG, Depth: Depth + 1))
2606 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, Operand: NewSrc));
2607 break;
2608 }
2609 case ISD::TRUNCATE: {
2610 SDValue Src = Op.getOperand(i: 0);
2611
2612 // Simplify the input, using demanded bit information, and compute the known
2613 // zero/one bits live out.
2614 unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
2615 APInt TruncMask = DemandedBits.zext(width: OperandBitWidth);
2616 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: TruncMask, OriginalDemandedElts: DemandedElts, Known, TLO,
2617 Depth: Depth + 1)) {
2618 // Disable the nsw and nuw flags. We can no longer guarantee that we
2619 // won't wrap after simplification.
2620 Op->dropFlags(Mask: SDNodeFlags::NoWrap);
2621 return true;
2622 }
2623 Known = Known.trunc(BitWidth);
2624
2625 // Attempt to avoid multi-use ops if we don't need anything from them.
2626 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2627 Op: Src, DemandedBits: TruncMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1))
2628 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: NewSrc));
2629
2630 // If the input is only used by this truncate, see if we can shrink it based
2631 // on the known demanded bits.
2632 switch (Src.getOpcode()) {
2633 default:
2634 break;
2635 case ISD::SRL:
2636 // Shrink SRL by a constant if none of the high bits shifted in are
2637 // demanded.
2638 if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
2639 // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
2640 // undesirable.
2641 break;
2642
2643 if (Src.getNode()->hasOneUse()) {
2644 if (isTruncateFree(Val: Src, VT2: VT) &&
2645 !isTruncateFree(FromVT: Src.getValueType(), ToVT: VT)) {
2646 // If truncate is only free at trunc(srl), do not turn it into
2647 // srl(trunc). The check is done by first check the truncate is free
2648 // at Src's opcode(srl), then check the truncate is not done by
2649 // referencing sub-register. In test, if both trunc(srl) and
2650 // srl(trunc)'s trunc are free, srl(trunc) performs better. If only
2651 // trunc(srl)'s trunc is free, trunc(srl) is better.
2652 break;
2653 }
2654
2655 std::optional<uint64_t> ShAmtC =
2656 TLO.DAG.getValidShiftAmount(V: Src, DemandedElts, Depth: Depth + 2);
2657 if (!ShAmtC || *ShAmtC >= BitWidth)
2658 break;
2659 uint64_t ShVal = *ShAmtC;
2660
2661 APInt HighBits =
2662 APInt::getHighBitsSet(numBits: OperandBitWidth, hiBitsSet: OperandBitWidth - BitWidth);
2663 HighBits.lshrInPlace(ShiftAmt: ShVal);
2664 HighBits = HighBits.trunc(width: BitWidth);
2665 if (!(HighBits & DemandedBits)) {
2666 // None of the shifted in bits are needed. Add a truncate of the
2667 // shift input, then shift it.
2668 SDValue NewShAmt = TLO.DAG.getShiftAmountConstant(Val: ShVal, VT, DL: dl);
2669 SDValue NewTrunc =
2670 TLO.DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Src.getOperand(i: 0));
2671 return TLO.CombineTo(
2672 O: Op, N: TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: NewTrunc, N2: NewShAmt));
2673 }
2674 }
2675 break;
2676 }
2677
2678 break;
2679 }
2680 case ISD::AssertZext: {
2681 // AssertZext demands all of the high bits, plus any of the low bits
2682 // demanded by its users.
2683 EVT ZVT = cast<VTSDNode>(Val: Op.getOperand(i: 1))->getVT();
2684 APInt InMask = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: ZVT.getSizeInBits());
2685 if (SimplifyDemandedBits(Op: Op.getOperand(i: 0), DemandedBits: ~InMask | DemandedBits, Known,
2686 TLO, Depth: Depth + 1))
2687 return true;
2688
2689 Known.Zero |= ~InMask;
2690 Known.One &= (~Known.Zero);
2691 break;
2692 }
2693 case ISD::EXTRACT_VECTOR_ELT: {
2694 SDValue Src = Op.getOperand(i: 0);
2695 SDValue Idx = Op.getOperand(i: 1);
2696 ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
2697 unsigned EltBitWidth = Src.getScalarValueSizeInBits();
2698
2699 if (SrcEltCnt.isScalable())
2700 return false;
2701
2702 // Demand the bits from every vector element without a constant index.
2703 unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2704 APInt DemandedSrcElts = APInt::getAllOnes(numBits: NumSrcElts);
2705 if (auto *CIdx = dyn_cast<ConstantSDNode>(Val&: Idx))
2706 if (CIdx->getAPIntValue().ult(RHS: NumSrcElts))
2707 DemandedSrcElts = APInt::getOneBitSet(numBits: NumSrcElts, BitNo: CIdx->getZExtValue());
2708
2709 // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
2710 // anything about the extended bits.
2711 APInt DemandedSrcBits = DemandedBits;
2712 if (BitWidth > EltBitWidth)
2713 DemandedSrcBits = DemandedSrcBits.trunc(width: EltBitWidth);
2714
2715 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedSrcBits, OriginalDemandedElts: DemandedSrcElts, Known&: Known2, TLO,
2716 Depth: Depth + 1))
2717 return true;
2718
2719 // Attempt to avoid multi-use ops if we don't need anything from them.
2720 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2721 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2722 Op: Src, DemandedBits: DemandedSrcBits, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + 1)) {
2723 SDValue NewOp =
2724 TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: DemandedSrc, N2: Idx);
2725 return TLO.CombineTo(O: Op, N: NewOp);
2726 }
2727 }
2728
2729 Known = Known2;
2730 if (BitWidth > EltBitWidth)
2731 Known = Known.anyext(BitWidth);
2732 break;
2733 }
2734 case ISD::BITCAST: {
2735 if (VT.isScalableVector())
2736 return false;
2737 SDValue Src = Op.getOperand(i: 0);
2738 EVT SrcVT = Src.getValueType();
2739 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
2740
2741 // If this is an FP->Int bitcast and if the sign bit is the only
2742 // thing demanded, turn this into a FGETSIGN.
2743 if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
2744 DemandedBits == APInt::getSignMask(BitWidth: Op.getValueSizeInBits()) &&
2745 SrcVT.isFloatingPoint()) {
2746 bool OpVTLegal = isOperationLegalOrCustom(Op: ISD::FGETSIGN, VT);
2747 bool i32Legal = isOperationLegalOrCustom(Op: ISD::FGETSIGN, VT: MVT::i32);
2748 if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
2749 SrcVT != MVT::f128) {
2750 // Cannot eliminate/lower SHL for f128 yet.
2751 EVT Ty = OpVTLegal ? VT : MVT::i32;
2752 // Make a FGETSIGN + SHL to move the sign bit into the appropriate
2753 // place. We expect the SHL to be eliminated by other optimizations.
2754 SDValue Sign = TLO.DAG.getNode(Opcode: ISD::FGETSIGN, DL: dl, VT: Ty, Operand: Src);
2755 unsigned OpVTSizeInBits = Op.getValueSizeInBits();
2756 if (!OpVTLegal && OpVTSizeInBits > 32)
2757 Sign = TLO.DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: Sign);
2758 unsigned ShVal = Op.getValueSizeInBits() - 1;
2759 SDValue ShAmt = TLO.DAG.getConstant(Val: ShVal, DL: dl, VT);
2760 return TLO.CombineTo(O: Op,
2761 N: TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Sign, N2: ShAmt));
2762 }
2763 }
2764
2765 // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
2766 // Demand the elt/bit if any of the original elts/bits are demanded.
2767 if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) {
2768 unsigned Scale = BitWidth / NumSrcEltBits;
2769 unsigned NumSrcElts = SrcVT.getVectorNumElements();
2770 APInt DemandedSrcBits = APInt::getZero(numBits: NumSrcEltBits);
2771 APInt DemandedSrcElts = APInt::getZero(numBits: NumSrcElts);
2772 for (unsigned i = 0; i != Scale; ++i) {
2773 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
2774 unsigned BitOffset = EltOffset * NumSrcEltBits;
2775 APInt Sub = DemandedBits.extractBits(numBits: NumSrcEltBits, bitPosition: BitOffset);
2776 if (!Sub.isZero()) {
2777 DemandedSrcBits |= Sub;
2778 for (unsigned j = 0; j != NumElts; ++j)
2779 if (DemandedElts[j])
2780 DemandedSrcElts.setBit((j * Scale) + i);
2781 }
2782 }
2783
2784 APInt KnownSrcUndef, KnownSrcZero;
2785 if (SimplifyDemandedVectorElts(Op: Src, DemandedEltMask: DemandedSrcElts, KnownUndef&: KnownSrcUndef,
2786 KnownZero&: KnownSrcZero, TLO, Depth: Depth + 1))
2787 return true;
2788
2789 KnownBits KnownSrcBits;
2790 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedSrcBits, OriginalDemandedElts: DemandedSrcElts,
2791 Known&: KnownSrcBits, TLO, Depth: Depth + 1))
2792 return true;
2793 } else if (IsLE && (NumSrcEltBits % BitWidth) == 0) {
2794 // TODO - bigendian once we have test coverage.
2795 unsigned Scale = NumSrcEltBits / BitWidth;
2796 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2797 APInt DemandedSrcBits = APInt::getZero(numBits: NumSrcEltBits);
2798 APInt DemandedSrcElts = APInt::getZero(numBits: NumSrcElts);
2799 for (unsigned i = 0; i != NumElts; ++i)
2800 if (DemandedElts[i]) {
2801 unsigned Offset = (i % Scale) * BitWidth;
2802 DemandedSrcBits.insertBits(SubBits: DemandedBits, bitPosition: Offset);
2803 DemandedSrcElts.setBit(i / Scale);
2804 }
2805
2806 if (SrcVT.isVector()) {
2807 APInt KnownSrcUndef, KnownSrcZero;
2808 if (SimplifyDemandedVectorElts(Op: Src, DemandedEltMask: DemandedSrcElts, KnownUndef&: KnownSrcUndef,
2809 KnownZero&: KnownSrcZero, TLO, Depth: Depth + 1))
2810 return true;
2811 }
2812
2813 KnownBits KnownSrcBits;
2814 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedSrcBits, OriginalDemandedElts: DemandedSrcElts,
2815 Known&: KnownSrcBits, TLO, Depth: Depth + 1))
2816 return true;
2817
2818 // Attempt to avoid multi-use ops if we don't need anything from them.
2819 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2820 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2821 Op: Src, DemandedBits: DemandedSrcBits, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + 1)) {
2822 SDValue NewOp = TLO.DAG.getBitcast(VT, V: DemandedSrc);
2823 return TLO.CombineTo(O: Op, N: NewOp);
2824 }
2825 }
2826 }
2827
2828 // If this is a bitcast, let computeKnownBits handle it. Only do this on a
2829 // recursive call where Known may be useful to the caller.
2830 if (Depth > 0) {
2831 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2832 return false;
2833 }
2834 break;
2835 }
2836 case ISD::MUL:
2837 if (DemandedBits.isPowerOf2()) {
2838 // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
2839 // If we demand exactly one bit N and we have "X * (C' << N)" where C' is
2840 // odd (has LSB set), then the left-shifted low bit of X is the answer.
2841 unsigned CTZ = DemandedBits.countr_zero();
2842 ConstantSDNode *C = isConstOrConstSplat(N: Op.getOperand(i: 1), DemandedElts);
2843 if (C && C->getAPIntValue().countr_zero() == CTZ) {
2844 SDValue AmtC = TLO.DAG.getShiftAmountConstant(Val: CTZ, VT, DL: dl);
2845 SDValue Shl = TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op.getOperand(i: 0), N2: AmtC);
2846 return TLO.CombineTo(O: Op, N: Shl);
2847 }
2848 }
2849 // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
2850 // X * X is odd iff X is odd.
2851 // 'Quadratic Reciprocity': X * X -> 0 for bit[1]
2852 if (Op.getOperand(i: 0) == Op.getOperand(i: 1) && DemandedBits.ult(RHS: 4)) {
2853 SDValue One = TLO.DAG.getConstant(Val: 1, DL: dl, VT);
2854 SDValue And1 = TLO.DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op.getOperand(i: 0), N2: One);
2855 return TLO.CombineTo(O: Op, N: And1);
2856 }
2857 [[fallthrough]];
2858 case ISD::ADD:
2859 case ISD::SUB: {
2860 // Add, Sub, and Mul don't demand any bits in positions beyond that
2861 // of the highest bit demanded of them.
2862 SDValue Op0 = Op.getOperand(i: 0), Op1 = Op.getOperand(i: 1);
2863 SDNodeFlags Flags = Op.getNode()->getFlags();
2864 unsigned DemandedBitsLZ = DemandedBits.countl_zero();
2865 APInt LoMask = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth - DemandedBitsLZ);
2866 KnownBits KnownOp0, KnownOp1;
2867 auto GetDemandedBitsLHSMask = [&](APInt Demanded,
2868 const KnownBits &KnownRHS) {
2869 if (Op.getOpcode() == ISD::MUL)
2870 Demanded.clearHighBits(hiBits: KnownRHS.countMinTrailingZeros());
2871 return Demanded;
2872 };
2873 if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: LoMask, OriginalDemandedElts: DemandedElts, Known&: KnownOp1, TLO,
2874 Depth: Depth + 1) ||
2875 SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: GetDemandedBitsLHSMask(LoMask, KnownOp1),
2876 OriginalDemandedElts: DemandedElts, Known&: KnownOp0, TLO, Depth: Depth + 1) ||
2877 // See if the operation should be performed at a smaller bit width.
2878 ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
2879 // Disable the nsw and nuw flags. We can no longer guarantee that we
2880 // won't wrap after simplification.
2881 Op->dropFlags(Mask: SDNodeFlags::NoWrap);
2882 return true;
2883 }
2884
2885 // neg x with only low bit demanded is simply x.
2886 if (Op.getOpcode() == ISD::SUB && DemandedBits.isOne() &&
2887 isNullConstant(V: Op0))
2888 return TLO.CombineTo(O: Op, N: Op1);
2889
2890 // Attempt to avoid multi-use ops if we don't need anything from them.
2891 if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2892 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2893 Op: Op0, DemandedBits: LoMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
2894 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2895 Op: Op1, DemandedBits: LoMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
2896 if (DemandedOp0 || DemandedOp1) {
2897 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
2898 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
2899 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: Op0, N2: Op1,
2900 Flags: Flags & ~SDNodeFlags::NoWrap);
2901 return TLO.CombineTo(O: Op, N: NewOp);
2902 }
2903 }
2904
2905 // If we have a constant operand, we may be able to turn it into -1 if we
2906 // do not demand the high bits. This can make the constant smaller to
2907 // encode, allow more general folding, or match specialized instruction
2908 // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
2909 // is probably not useful (and could be detrimental).
2910 ConstantSDNode *C = isConstOrConstSplat(N: Op1);
2911 APInt HighMask = APInt::getHighBitsSet(numBits: BitWidth, hiBitsSet: DemandedBitsLZ);
2912 if (C && !C->isAllOnes() && !C->isOne() &&
2913 (C->getAPIntValue() | HighMask).isAllOnes()) {
2914 SDValue Neg1 = TLO.DAG.getAllOnesConstant(DL: dl, VT);
2915 // Disable the nsw and nuw flags. We can no longer guarantee that we
2916 // won't wrap after simplification.
2917 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: Op0, N2: Neg1,
2918 Flags: Flags & ~SDNodeFlags::NoWrap);
2919 return TLO.CombineTo(O: Op, N: NewOp);
2920 }
2921
2922 // Match a multiply with a disguised negated-power-of-2 and convert to a
2923 // an equivalent shift-left amount.
2924 // Example: (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2925 auto getShiftLeftAmt = [&HighMask](SDValue Mul) -> unsigned {
2926 if (Mul.getOpcode() != ISD::MUL || !Mul.hasOneUse())
2927 return 0;
2928
2929 // Don't touch opaque constants. Also, ignore zero and power-of-2
2930 // multiplies. Those will get folded later.
2931 ConstantSDNode *MulC = isConstOrConstSplat(N: Mul.getOperand(i: 1));
2932 if (MulC && !MulC->isOpaque() && !MulC->isZero() &&
2933 !MulC->getAPIntValue().isPowerOf2()) {
2934 APInt UnmaskedC = MulC->getAPIntValue() | HighMask;
2935 if (UnmaskedC.isNegatedPowerOf2())
2936 return (-UnmaskedC).logBase2();
2937 }
2938 return 0;
2939 };
2940
2941 auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y,
2942 unsigned ShlAmt) {
2943 SDValue ShlAmtC = TLO.DAG.getShiftAmountConstant(Val: ShlAmt, VT, DL: dl);
2944 SDValue Shl = TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: X, N2: ShlAmtC);
2945 SDValue Res = TLO.DAG.getNode(Opcode: NT, DL: dl, VT, N1: Y, N2: Shl);
2946 return TLO.CombineTo(O: Op, N: Res);
2947 };
2948
2949 if (isOperationLegalOrCustom(Op: ISD::SHL, VT)) {
2950 if (Op.getOpcode() == ISD::ADD) {
2951 // (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2952 if (unsigned ShAmt = getShiftLeftAmt(Op0))
2953 return foldMul(ISD::SUB, Op0.getOperand(i: 0), Op1, ShAmt);
2954 // Op0 + (X * MulC) --> Op0 - (X << log2(-MulC))
2955 if (unsigned ShAmt = getShiftLeftAmt(Op1))
2956 return foldMul(ISD::SUB, Op1.getOperand(i: 0), Op0, ShAmt);
2957 }
2958 if (Op.getOpcode() == ISD::SUB) {
2959 // Op0 - (X * MulC) --> Op0 + (X << log2(-MulC))
2960 if (unsigned ShAmt = getShiftLeftAmt(Op1))
2961 return foldMul(ISD::ADD, Op1.getOperand(i: 0), Op0, ShAmt);
2962 }
2963 }
2964
2965 if (Op.getOpcode() == ISD::MUL) {
2966 Known = KnownBits::mul(LHS: KnownOp0, RHS: KnownOp1);
2967 } else { // Op.getOpcode() is either ISD::ADD or ISD::SUB.
2968 Known = KnownBits::computeForAddSub(
2969 Add: Op.getOpcode() == ISD::ADD, NSW: Flags.hasNoSignedWrap(),
2970 NUW: Flags.hasNoUnsignedWrap(), LHS: KnownOp0, RHS: KnownOp1);
2971 }
2972 break;
2973 }
2974 case ISD::FABS: {
2975 SDValue Op0 = Op.getOperand(i: 0);
2976 APInt SignMask = APInt::getSignMask(BitWidth);
2977
2978 if (!DemandedBits.intersects(RHS: SignMask))
2979 return TLO.CombineTo(O: Op, N: Op0);
2980
2981 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts, Known, TLO,
2982 Depth: Depth + 1))
2983 return true;
2984
2985 if (Known.isNonNegative())
2986 return TLO.CombineTo(O: Op, N: Op0);
2987 if (Known.isNegative())
2988 return TLO.CombineTo(
2989 O: Op, N: TLO.DAG.getNode(Opcode: ISD::FNEG, DL: dl, VT, Operand: Op0, Flags: Op->getFlags()));
2990
2991 Known.Zero |= SignMask;
2992 Known.One &= ~SignMask;
2993
2994 break;
2995 }
2996 case ISD::FCOPYSIGN: {
2997 SDValue Op0 = Op.getOperand(i: 0);
2998 SDValue Op1 = Op.getOperand(i: 1);
2999
3000 unsigned BitWidth0 = Op0.getScalarValueSizeInBits();
3001 unsigned BitWidth1 = Op1.getScalarValueSizeInBits();
3002 APInt SignMask0 = APInt::getSignMask(BitWidth: BitWidth0);
3003 APInt SignMask1 = APInt::getSignMask(BitWidth: BitWidth1);
3004
3005 if (!DemandedBits.intersects(RHS: SignMask0))
3006 return TLO.CombineTo(O: Op, N: Op0);
3007
3008 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: ~SignMask0 & DemandedBits, OriginalDemandedElts: DemandedElts,
3009 Known, TLO, Depth: Depth + 1) ||
3010 SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: SignMask1, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
3011 Depth: Depth + 1))
3012 return true;
3013
3014 if (Known2.isNonNegative())
3015 return TLO.CombineTo(
3016 O: Op, N: TLO.DAG.getNode(Opcode: ISD::FABS, DL: dl, VT, Operand: Op0, Flags: Op->getFlags()));
3017
3018 if (Known2.isNegative())
3019 return TLO.CombineTo(
3020 O: Op, N: TLO.DAG.getNode(Opcode: ISD::FNEG, DL: dl, VT,
3021 Operand: TLO.DAG.getNode(Opcode: ISD::FABS, DL: SDLoc(Op0), VT, Operand: Op0)));
3022
3023 Known.Zero &= ~SignMask0;
3024 Known.One &= ~SignMask0;
3025 break;
3026 }
3027 case ISD::FNEG: {
3028 SDValue Op0 = Op.getOperand(i: 0);
3029 APInt SignMask = APInt::getSignMask(BitWidth);
3030
3031 if (!DemandedBits.intersects(RHS: SignMask))
3032 return TLO.CombineTo(O: Op, N: Op0);
3033
3034 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts, Known, TLO,
3035 Depth: Depth + 1))
3036 return true;
3037
3038 if (!Known.isSignUnknown()) {
3039 Known.Zero ^= SignMask;
3040 Known.One ^= SignMask;
3041 }
3042
3043 break;
3044 }
3045 default:
3046 // We also ask the target about intrinsics (which could be specific to it).
3047 if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3048 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
3049 // TODO: Probably okay to remove after audit; here to reduce change size
3050 // in initial enablement patch for scalable vectors
3051 if (Op.getValueType().isScalableVector())
3052 break;
3053 if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
3054 Known, TLO, Depth))
3055 return true;
3056 break;
3057 }
3058
3059 // Just use computeKnownBits to compute output bits.
3060 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
3061 break;
3062 }
3063
3064 // If we know the value of all of the demanded bits, return this as a
3065 // constant.
3066 if (!isTargetCanonicalConstantNode(Op) &&
3067 DemandedBits.isSubsetOf(RHS: Known.Zero | Known.One)) {
3068 // Avoid folding to a constant if any OpaqueConstant is involved.
3069 if (llvm::any_of(Range: Op->ops(), P: [](SDValue V) {
3070 auto *C = dyn_cast<ConstantSDNode>(Val&: V);
3071 return C && C->isOpaque();
3072 }))
3073 return false;
3074 if (VT.isInteger())
3075 return TLO.CombineTo(O: Op, N: TLO.DAG.getConstant(Val: Known.One, DL: dl, VT));
3076 if (VT.isFloatingPoint())
3077 return TLO.CombineTo(
3078 O: Op, N: TLO.DAG.getConstantFP(Val: APFloat(VT.getFltSemantics(), Known.One),
3079 DL: dl, VT));
3080 }
3081
3082 // A multi use 'all demanded elts' simplify failed to find any knownbits.
3083 // Try again just for the original demanded elts.
3084 // Ensure we do this AFTER constant folding above.
3085 if (HasMultiUse && Known.isUnknown() && !OriginalDemandedElts.isAllOnes())
3086 Known = TLO.DAG.computeKnownBits(Op, DemandedElts: OriginalDemandedElts, Depth);
3087
3088 return false;
3089}
3090
3091bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op,
3092 const APInt &DemandedElts,
3093 DAGCombinerInfo &DCI) const {
3094 SelectionDAG &DAG = DCI.DAG;
3095 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
3096 !DCI.isBeforeLegalizeOps());
3097
3098 APInt KnownUndef, KnownZero;
3099 bool Simplified =
3100 SimplifyDemandedVectorElts(Op, DemandedEltMask: DemandedElts, KnownUndef, KnownZero, TLO);
3101 if (Simplified) {
3102 DCI.AddToWorklist(N: Op.getNode());
3103 DCI.CommitTargetLoweringOpt(TLO);
3104 }
3105
3106 return Simplified;
3107}
3108
3109/// Given a vector binary operation and known undefined elements for each input
3110/// operand, compute whether each element of the output is undefined.
3111static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG,
3112 const APInt &UndefOp0,
3113 const APInt &UndefOp1) {
3114 EVT VT = BO.getValueType();
3115 assert(DAG.getTargetLoweringInfo().isBinOp(BO.getOpcode()) && VT.isVector() &&
3116 "Vector binop only");
3117
3118 EVT EltVT = VT.getVectorElementType();
3119 unsigned NumElts = VT.isFixedLengthVector() ? VT.getVectorNumElements() : 1;
3120 assert(UndefOp0.getBitWidth() == NumElts &&
3121 UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
3122
3123 auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
3124 const APInt &UndefVals) {
3125 if (UndefVals[Index])
3126 return DAG.getUNDEF(VT: EltVT);
3127
3128 if (auto *BV = dyn_cast<BuildVectorSDNode>(Val&: V)) {
3129 // Try hard to make sure that the getNode() call is not creating temporary
3130 // nodes. Ignore opaque integers because they do not constant fold.
3131 SDValue Elt = BV->getOperand(Num: Index);
3132 auto *C = dyn_cast<ConstantSDNode>(Val&: Elt);
3133 if (isa<ConstantFPSDNode>(Val: Elt) || Elt.isUndef() || (C && !C->isOpaque()))
3134 return Elt;
3135 }
3136
3137 return SDValue();
3138 };
3139
3140 APInt KnownUndef = APInt::getZero(numBits: NumElts);
3141 for (unsigned i = 0; i != NumElts; ++i) {
3142 // If both inputs for this element are either constant or undef and match
3143 // the element type, compute the constant/undef result for this element of
3144 // the vector.
3145 // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
3146 // not handle FP constants. The code within getNode() should be refactored
3147 // to avoid the danger of creating a bogus temporary node here.
3148 SDValue C0 = getUndefOrConstantElt(BO.getOperand(i: 0), i, UndefOp0);
3149 SDValue C1 = getUndefOrConstantElt(BO.getOperand(i: 1), i, UndefOp1);
3150 if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
3151 if (DAG.getNode(Opcode: BO.getOpcode(), DL: SDLoc(BO), VT: EltVT, N1: C0, N2: C1).isUndef())
3152 KnownUndef.setBit(i);
3153 }
3154 return KnownUndef;
3155}
3156
3157bool TargetLowering::SimplifyDemandedVectorElts(
3158 SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
3159 APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
3160 bool AssumeSingleUse) const {
3161 EVT VT = Op.getValueType();
3162 unsigned Opcode = Op.getOpcode();
3163 APInt DemandedElts = OriginalDemandedElts;
3164 unsigned NumElts = DemandedElts.getBitWidth();
3165 assert(VT.isVector() && "Expected vector op");
3166
3167 KnownUndef = KnownZero = APInt::getZero(numBits: NumElts);
3168
3169 if (!shouldSimplifyDemandedVectorElts(Op, TLO))
3170 return false;
3171
3172 // TODO: For now we assume we know nothing about scalable vectors.
3173 if (VT.isScalableVector())
3174 return false;
3175
3176 assert(VT.getVectorNumElements() == NumElts &&
3177 "Mask size mismatches value type element count!");
3178
3179 // Undef operand.
3180 if (Op.isUndef()) {
3181 KnownUndef.setAllBits();
3182 return false;
3183 }
3184
3185 // If Op has other users, assume that all elements are needed.
3186 if (!AssumeSingleUse && !Op.getNode()->hasOneUse())
3187 DemandedElts.setAllBits();
3188
3189 // Not demanding any elements from Op.
3190 if (DemandedElts == 0) {
3191 KnownUndef.setAllBits();
3192 return TLO.CombineTo(O: Op, N: TLO.DAG.getUNDEF(VT));
3193 }
3194
3195 // Limit search depth.
3196 if (Depth >= SelectionDAG::MaxRecursionDepth)
3197 return false;
3198
3199 SDLoc DL(Op);
3200 unsigned EltSizeInBits = VT.getScalarSizeInBits();
3201 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
3202
3203 // Helper for demanding the specified elements and all the bits of both binary
3204 // operands.
3205 auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
3206 SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op: Op0, DemandedElts,
3207 DAG&: TLO.DAG, Depth: Depth + 1);
3208 SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op: Op1, DemandedElts,
3209 DAG&: TLO.DAG, Depth: Depth + 1);
3210 if (NewOp0 || NewOp1) {
3211 SDValue NewOp =
3212 TLO.DAG.getNode(Opcode, DL: SDLoc(Op), VT, N1: NewOp0 ? NewOp0 : Op0,
3213 N2: NewOp1 ? NewOp1 : Op1, Flags: Op->getFlags());
3214 return TLO.CombineTo(O: Op, N: NewOp);
3215 }
3216 return false;
3217 };
3218
3219 switch (Opcode) {
3220 case ISD::SCALAR_TO_VECTOR: {
3221 if (!DemandedElts[0]) {
3222 KnownUndef.setAllBits();
3223 return TLO.CombineTo(O: Op, N: TLO.DAG.getUNDEF(VT));
3224 }
3225 SDValue ScalarSrc = Op.getOperand(i: 0);
3226 if (ScalarSrc.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
3227 SDValue Src = ScalarSrc.getOperand(i: 0);
3228 SDValue Idx = ScalarSrc.getOperand(i: 1);
3229 EVT SrcVT = Src.getValueType();
3230
3231 ElementCount SrcEltCnt = SrcVT.getVectorElementCount();
3232
3233 if (SrcEltCnt.isScalable())
3234 return false;
3235
3236 unsigned NumSrcElts = SrcEltCnt.getFixedValue();
3237 if (isNullConstant(V: Idx)) {
3238 APInt SrcDemandedElts = APInt::getOneBitSet(numBits: NumSrcElts, BitNo: 0);
3239 APInt SrcUndef = KnownUndef.zextOrTrunc(width: NumSrcElts);
3240 APInt SrcZero = KnownZero.zextOrTrunc(width: NumSrcElts);
3241 if (SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: SrcDemandedElts, KnownUndef&: SrcUndef, KnownZero&: SrcZero,
3242 TLO, Depth: Depth + 1))
3243 return true;
3244 }
3245 }
3246 KnownUndef.setHighBits(NumElts - 1);
3247 break;
3248 }
3249 case ISD::BITCAST: {
3250 SDValue Src = Op.getOperand(i: 0);
3251 EVT SrcVT = Src.getValueType();
3252
3253 if (!SrcVT.isVector()) {
3254 // TODO - bigendian once we have test coverage.
3255 if (IsLE) {
3256 APInt DemandedSrcBits = APInt::getZero(numBits: SrcVT.getSizeInBits());
3257 unsigned EltSize = VT.getScalarSizeInBits();
3258 for (unsigned I = 0; I != NumElts; ++I) {
3259 if (DemandedElts[I]) {
3260 unsigned Offset = I * EltSize;
3261 DemandedSrcBits.setBits(loBit: Offset, hiBit: Offset + EltSize);
3262 }
3263 }
3264 KnownBits Known;
3265 if (SimplifyDemandedBits(Op: Src, DemandedBits: DemandedSrcBits, Known, TLO, Depth: Depth + 1))
3266 return true;
3267 }
3268 break;
3269 }
3270
3271 // Fast handling of 'identity' bitcasts.
3272 unsigned NumSrcElts = SrcVT.getVectorNumElements();
3273 if (NumSrcElts == NumElts)
3274 return SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: DemandedElts, KnownUndef,
3275 KnownZero, TLO, Depth: Depth + 1);
3276
3277 APInt SrcDemandedElts, SrcZero, SrcUndef;
3278
3279 // Bitcast from 'large element' src vector to 'small element' vector, we
3280 // must demand a source element if any DemandedElt maps to it.
3281 if ((NumElts % NumSrcElts) == 0) {
3282 unsigned Scale = NumElts / NumSrcElts;
3283 SrcDemandedElts = APIntOps::ScaleBitMask(A: DemandedElts, NewBitWidth: NumSrcElts);
3284 if (SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: SrcDemandedElts, KnownUndef&: SrcUndef, KnownZero&: SrcZero,
3285 TLO, Depth: Depth + 1))
3286 return true;
3287
3288 // Try calling SimplifyDemandedBits, converting demanded elts to the bits
3289 // of the large element.
3290 // TODO - bigendian once we have test coverage.
3291 if (IsLE) {
3292 unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
3293 APInt SrcDemandedBits = APInt::getZero(numBits: SrcEltSizeInBits);
3294 for (unsigned i = 0; i != NumElts; ++i)
3295 if (DemandedElts[i]) {
3296 unsigned Ofs = (i % Scale) * EltSizeInBits;
3297 SrcDemandedBits.setBits(loBit: Ofs, hiBit: Ofs + EltSizeInBits);
3298 }
3299
3300 KnownBits Known;
3301 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: SrcDemandedBits, OriginalDemandedElts: SrcDemandedElts, Known,
3302 TLO, Depth: Depth + 1))
3303 return true;
3304
3305 // The bitcast has split each wide element into a number of
3306 // narrow subelements. We have just computed the Known bits
3307 // for wide elements. See if element splitting results in
3308 // some subelements being zero. Only for demanded elements!
3309 for (unsigned SubElt = 0; SubElt != Scale; ++SubElt) {
3310 if (!Known.Zero.extractBits(numBits: EltSizeInBits, bitPosition: SubElt * EltSizeInBits)
3311 .isAllOnes())
3312 continue;
3313 for (unsigned SrcElt = 0; SrcElt != NumSrcElts; ++SrcElt) {
3314 unsigned Elt = Scale * SrcElt + SubElt;
3315 if (DemandedElts[Elt])
3316 KnownZero.setBit(Elt);
3317 }
3318 }
3319 }
3320
3321 // If the src element is zero/undef then all the output elements will be -
3322 // only demanded elements are guaranteed to be correct.
3323 for (unsigned i = 0; i != NumSrcElts; ++i) {
3324 if (SrcDemandedElts[i]) {
3325 if (SrcZero[i])
3326 KnownZero.setBits(loBit: i * Scale, hiBit: (i + 1) * Scale);
3327 if (SrcUndef[i])
3328 KnownUndef.setBits(loBit: i * Scale, hiBit: (i + 1) * Scale);
3329 }
3330 }
3331 }
3332
3333 // Bitcast from 'small element' src vector to 'large element' vector, we
3334 // demand all smaller source elements covered by the larger demanded element
3335 // of this vector.
3336 if ((NumSrcElts % NumElts) == 0) {
3337 unsigned Scale = NumSrcElts / NumElts;
3338 SrcDemandedElts = APIntOps::ScaleBitMask(A: DemandedElts, NewBitWidth: NumSrcElts);
3339 if (SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: SrcDemandedElts, KnownUndef&: SrcUndef, KnownZero&: SrcZero,
3340 TLO, Depth: Depth + 1))
3341 return true;
3342
3343 // If all the src elements covering an output element are zero/undef, then
3344 // the output element will be as well, assuming it was demanded.
3345 for (unsigned i = 0; i != NumElts; ++i) {
3346 if (DemandedElts[i]) {
3347 if (SrcZero.extractBits(numBits: Scale, bitPosition: i * Scale).isAllOnes())
3348 KnownZero.setBit(i);
3349 if (SrcUndef.extractBits(numBits: Scale, bitPosition: i * Scale).isAllOnes())
3350 KnownUndef.setBit(i);
3351 }
3352 }
3353 }
3354 break;
3355 }
3356 case ISD::FREEZE: {
3357 SDValue N0 = Op.getOperand(i: 0);
3358 if (TLO.DAG.isGuaranteedNotToBeUndefOrPoison(Op: N0, DemandedElts,
3359 /*PoisonOnly=*/false))
3360 return TLO.CombineTo(O: Op, N: N0);
3361
3362 // TODO: Replace this with the general fold from DAGCombiner::visitFREEZE
3363 // freeze(op(x, ...)) -> op(freeze(x), ...).
3364 if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && DemandedElts == 1)
3365 return TLO.CombineTo(
3366 O: Op, N: TLO.DAG.getNode(Opcode: ISD::SCALAR_TO_VECTOR, DL, VT,
3367 Operand: TLO.DAG.getFreeze(V: N0.getOperand(i: 0))));
3368 break;
3369 }
3370 case ISD::BUILD_VECTOR: {
3371 // Check all elements and simplify any unused elements with UNDEF.
3372 if (!DemandedElts.isAllOnes()) {
3373 // Don't simplify BROADCASTS.
3374 if (llvm::any_of(Range: Op->op_values(),
3375 P: [&](SDValue Elt) { return Op.getOperand(i: 0) != Elt; })) {
3376 SmallVector<SDValue, 32> Ops(Op->ops());
3377 bool Updated = false;
3378 for (unsigned i = 0; i != NumElts; ++i) {
3379 if (!DemandedElts[i] && !Ops[i].isUndef()) {
3380 Ops[i] = TLO.DAG.getUNDEF(VT: Ops[0].getValueType());
3381 KnownUndef.setBit(i);
3382 Updated = true;
3383 }
3384 }
3385 if (Updated)
3386 return TLO.CombineTo(O: Op, N: TLO.DAG.getBuildVector(VT, DL, Ops));
3387 }
3388 }
3389 for (unsigned i = 0; i != NumElts; ++i) {
3390 SDValue SrcOp = Op.getOperand(i);
3391 if (SrcOp.isUndef()) {
3392 KnownUndef.setBit(i);
3393 } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
3394 (isNullConstant(V: SrcOp) || isNullFPConstant(V: SrcOp))) {
3395 KnownZero.setBit(i);
3396 }
3397 }
3398 break;
3399 }
3400 case ISD::CONCAT_VECTORS: {
3401 EVT SubVT = Op.getOperand(i: 0).getValueType();
3402 unsigned NumSubVecs = Op.getNumOperands();
3403 unsigned NumSubElts = SubVT.getVectorNumElements();
3404 for (unsigned i = 0; i != NumSubVecs; ++i) {
3405 SDValue SubOp = Op.getOperand(i);
3406 APInt SubElts = DemandedElts.extractBits(numBits: NumSubElts, bitPosition: i * NumSubElts);
3407 APInt SubUndef, SubZero;
3408 if (SimplifyDemandedVectorElts(Op: SubOp, OriginalDemandedElts: SubElts, KnownUndef&: SubUndef, KnownZero&: SubZero, TLO,
3409 Depth: Depth + 1))
3410 return true;
3411 KnownUndef.insertBits(SubBits: SubUndef, bitPosition: i * NumSubElts);
3412 KnownZero.insertBits(SubBits: SubZero, bitPosition: i * NumSubElts);
3413 }
3414
3415 // Attempt to avoid multi-use ops if we don't need anything from them.
3416 if (!DemandedElts.isAllOnes()) {
3417 bool FoundNewSub = false;
3418 SmallVector<SDValue, 2> DemandedSubOps;
3419 for (unsigned i = 0; i != NumSubVecs; ++i) {
3420 SDValue SubOp = Op.getOperand(i);
3421 APInt SubElts = DemandedElts.extractBits(numBits: NumSubElts, bitPosition: i * NumSubElts);
3422 SDValue NewSubOp = SimplifyMultipleUseDemandedVectorElts(
3423 Op: SubOp, DemandedElts: SubElts, DAG&: TLO.DAG, Depth: Depth + 1);
3424 DemandedSubOps.push_back(Elt: NewSubOp ? NewSubOp : SubOp);
3425 FoundNewSub = NewSubOp ? true : FoundNewSub;
3426 }
3427 if (FoundNewSub) {
3428 SDValue NewOp =
3429 TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: SDLoc(Op), VT, Ops: DemandedSubOps);
3430 return TLO.CombineTo(O: Op, N: NewOp);
3431 }
3432 }
3433 break;
3434 }
3435 case ISD::INSERT_SUBVECTOR: {
3436 // Demand any elements from the subvector and the remainder from the src its
3437 // inserted into.
3438 SDValue Src = Op.getOperand(i: 0);
3439 SDValue Sub = Op.getOperand(i: 1);
3440 uint64_t Idx = Op.getConstantOperandVal(i: 2);
3441 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
3442 APInt DemandedSubElts = DemandedElts.extractBits(numBits: NumSubElts, bitPosition: Idx);
3443 APInt DemandedSrcElts = DemandedElts;
3444 DemandedSrcElts.clearBits(LoBit: Idx, HiBit: Idx + NumSubElts);
3445
3446 APInt SubUndef, SubZero;
3447 if (SimplifyDemandedVectorElts(Op: Sub, OriginalDemandedElts: DemandedSubElts, KnownUndef&: SubUndef, KnownZero&: SubZero, TLO,
3448 Depth: Depth + 1))
3449 return true;
3450
3451 // If none of the src operand elements are demanded, replace it with undef.
3452 if (!DemandedSrcElts && !Src.isUndef())
3453 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT,
3454 N1: TLO.DAG.getUNDEF(VT), N2: Sub,
3455 N3: Op.getOperand(i: 2)));
3456
3457 if (SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: DemandedSrcElts, KnownUndef, KnownZero,
3458 TLO, Depth: Depth + 1))
3459 return true;
3460 KnownUndef.insertBits(SubBits: SubUndef, bitPosition: Idx);
3461 KnownZero.insertBits(SubBits: SubZero, bitPosition: Idx);
3462
3463 // Attempt to avoid multi-use ops if we don't need anything from them.
3464 if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) {
3465 SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3466 Op: Src, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + 1);
3467 SDValue NewSub = SimplifyMultipleUseDemandedVectorElts(
3468 Op: Sub, DemandedElts: DemandedSubElts, DAG&: TLO.DAG, Depth: Depth + 1);
3469 if (NewSrc || NewSub) {
3470 NewSrc = NewSrc ? NewSrc : Src;
3471 NewSub = NewSub ? NewSub : Sub;
3472 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: SDLoc(Op), VT, N1: NewSrc,
3473 N2: NewSub, N3: Op.getOperand(i: 2));
3474 return TLO.CombineTo(O: Op, N: NewOp);
3475 }
3476 }
3477 break;
3478 }
3479 case ISD::EXTRACT_SUBVECTOR: {
3480 // Offset the demanded elts by the subvector index.
3481 SDValue Src = Op.getOperand(i: 0);
3482 if (Src.getValueType().isScalableVector())
3483 break;
3484 uint64_t Idx = Op.getConstantOperandVal(i: 1);
3485 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3486 APInt DemandedSrcElts = DemandedElts.zext(width: NumSrcElts).shl(shiftAmt: Idx);
3487
3488 APInt SrcUndef, SrcZero;
3489 if (SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: DemandedSrcElts, KnownUndef&: SrcUndef, KnownZero&: SrcZero, TLO,
3490 Depth: Depth + 1))
3491 return true;
3492 KnownUndef = SrcUndef.extractBits(numBits: NumElts, bitPosition: Idx);
3493 KnownZero = SrcZero.extractBits(numBits: NumElts, bitPosition: Idx);
3494
3495 // Attempt to avoid multi-use ops if we don't need anything from them.
3496 if (!DemandedElts.isAllOnes()) {
3497 SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3498 Op: Src, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + 1);
3499 if (NewSrc) {
3500 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: SDLoc(Op), VT, N1: NewSrc,
3501 N2: Op.getOperand(i: 1));
3502 return TLO.CombineTo(O: Op, N: NewOp);
3503 }
3504 }
3505 break;
3506 }
3507 case ISD::INSERT_VECTOR_ELT: {
3508 SDValue Vec = Op.getOperand(i: 0);
3509 SDValue Scl = Op.getOperand(i: 1);
3510 auto *CIdx = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 2));
3511
3512 // For a legal, constant insertion index, if we don't need this insertion
3513 // then strip it, else remove it from the demanded elts.
3514 if (CIdx && CIdx->getAPIntValue().ult(RHS: NumElts)) {
3515 unsigned Idx = CIdx->getZExtValue();
3516 if (!DemandedElts[Idx])
3517 return TLO.CombineTo(O: Op, N: Vec);
3518
3519 APInt DemandedVecElts(DemandedElts);
3520 DemandedVecElts.clearBit(BitPosition: Idx);
3521 if (SimplifyDemandedVectorElts(Op: Vec, OriginalDemandedElts: DemandedVecElts, KnownUndef,
3522 KnownZero, TLO, Depth: Depth + 1))
3523 return true;
3524
3525 KnownUndef.setBitVal(BitPosition: Idx, BitValue: Scl.isUndef());
3526
3527 KnownZero.setBitVal(BitPosition: Idx, BitValue: isNullConstant(V: Scl) || isNullFPConstant(V: Scl));
3528 break;
3529 }
3530
3531 APInt VecUndef, VecZero;
3532 if (SimplifyDemandedVectorElts(Op: Vec, OriginalDemandedElts: DemandedElts, KnownUndef&: VecUndef, KnownZero&: VecZero, TLO,
3533 Depth: Depth + 1))
3534 return true;
3535 // Without knowing the insertion index we can't set KnownUndef/KnownZero.
3536 break;
3537 }
3538 case ISD::VSELECT: {
3539 SDValue Sel = Op.getOperand(i: 0);
3540 SDValue LHS = Op.getOperand(i: 1);
3541 SDValue RHS = Op.getOperand(i: 2);
3542
3543 // Try to transform the select condition based on the current demanded
3544 // elements.
3545 APInt UndefSel, ZeroSel;
3546 if (SimplifyDemandedVectorElts(Op: Sel, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefSel, KnownZero&: ZeroSel, TLO,
3547 Depth: Depth + 1))
3548 return true;
3549
3550 // See if we can simplify either vselect operand.
3551 APInt DemandedLHS(DemandedElts);
3552 APInt DemandedRHS(DemandedElts);
3553 APInt UndefLHS, ZeroLHS;
3554 APInt UndefRHS, ZeroRHS;
3555 if (SimplifyDemandedVectorElts(Op: LHS, OriginalDemandedElts: DemandedLHS, KnownUndef&: UndefLHS, KnownZero&: ZeroLHS, TLO,
3556 Depth: Depth + 1))
3557 return true;
3558 if (SimplifyDemandedVectorElts(Op: RHS, OriginalDemandedElts: DemandedRHS, KnownUndef&: UndefRHS, KnownZero&: ZeroRHS, TLO,
3559 Depth: Depth + 1))
3560 return true;
3561
3562 KnownUndef = UndefLHS & UndefRHS;
3563 KnownZero = ZeroLHS & ZeroRHS;
3564
3565 // If we know that the selected element is always zero, we don't need the
3566 // select value element.
3567 APInt DemandedSel = DemandedElts & ~KnownZero;
3568 if (DemandedSel != DemandedElts)
3569 if (SimplifyDemandedVectorElts(Op: Sel, OriginalDemandedElts: DemandedSel, KnownUndef&: UndefSel, KnownZero&: ZeroSel, TLO,
3570 Depth: Depth + 1))
3571 return true;
3572
3573 break;
3574 }
3575 case ISD::VECTOR_SHUFFLE: {
3576 SDValue LHS = Op.getOperand(i: 0);
3577 SDValue RHS = Op.getOperand(i: 1);
3578 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Val&: Op)->getMask();
3579
3580 // Collect demanded elements from shuffle operands..
3581 APInt DemandedLHS(NumElts, 0);
3582 APInt DemandedRHS(NumElts, 0);
3583 for (unsigned i = 0; i != NumElts; ++i) {
3584 int M = ShuffleMask[i];
3585 if (M < 0 || !DemandedElts[i])
3586 continue;
3587 assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
3588 if (M < (int)NumElts)
3589 DemandedLHS.setBit(M);
3590 else
3591 DemandedRHS.setBit(M - NumElts);
3592 }
3593
3594 // If either side isn't demanded, replace it by UNDEF. We handle this
3595 // explicitly here to also simplify in case of multiple uses (on the
3596 // contrary to the SimplifyDemandedVectorElts calls below).
3597 bool FoldLHS = !DemandedLHS && !LHS.isUndef();
3598 bool FoldRHS = !DemandedRHS && !RHS.isUndef();
3599 if (FoldLHS || FoldRHS) {
3600 LHS = FoldLHS ? TLO.DAG.getUNDEF(VT: LHS.getValueType()) : LHS;
3601 RHS = FoldRHS ? TLO.DAG.getUNDEF(VT: RHS.getValueType()) : RHS;
3602 SDValue NewOp =
3603 TLO.DAG.getVectorShuffle(VT, dl: SDLoc(Op), N1: LHS, N2: RHS, Mask: ShuffleMask);
3604 return TLO.CombineTo(O: Op, N: NewOp);
3605 }
3606
3607 // See if we can simplify either shuffle operand.
3608 APInt UndefLHS, ZeroLHS;
3609 APInt UndefRHS, ZeroRHS;
3610 if (SimplifyDemandedVectorElts(Op: LHS, OriginalDemandedElts: DemandedLHS, KnownUndef&: UndefLHS, KnownZero&: ZeroLHS, TLO,
3611 Depth: Depth + 1))
3612 return true;
3613 if (SimplifyDemandedVectorElts(Op: RHS, OriginalDemandedElts: DemandedRHS, KnownUndef&: UndefRHS, KnownZero&: ZeroRHS, TLO,
3614 Depth: Depth + 1))
3615 return true;
3616
3617 // Simplify mask using undef elements from LHS/RHS.
3618 bool Updated = false;
3619 bool IdentityLHS = true, IdentityRHS = true;
3620 SmallVector<int, 32> NewMask(ShuffleMask);
3621 for (unsigned i = 0; i != NumElts; ++i) {
3622 int &M = NewMask[i];
3623 if (M < 0)
3624 continue;
3625 if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
3626 (M >= (int)NumElts && UndefRHS[M - NumElts])) {
3627 Updated = true;
3628 M = -1;
3629 }
3630 IdentityLHS &= (M < 0) || (M == (int)i);
3631 IdentityRHS &= (M < 0) || ((M - NumElts) == i);
3632 }
3633
3634 // Update legal shuffle masks based on demanded elements if it won't reduce
3635 // to Identity which can cause premature removal of the shuffle mask.
3636 if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
3637 SDValue LegalShuffle =
3638 buildLegalVectorShuffle(VT, DL, N0: LHS, N1: RHS, Mask: NewMask, DAG&: TLO.DAG);
3639 if (LegalShuffle)
3640 return TLO.CombineTo(O: Op, N: LegalShuffle);
3641 }
3642
3643 // Propagate undef/zero elements from LHS/RHS.
3644 for (unsigned i = 0; i != NumElts; ++i) {
3645 int M = ShuffleMask[i];
3646 if (M < 0) {
3647 KnownUndef.setBit(i);
3648 } else if (M < (int)NumElts) {
3649 if (UndefLHS[M])
3650 KnownUndef.setBit(i);
3651 if (ZeroLHS[M])
3652 KnownZero.setBit(i);
3653 } else {
3654 if (UndefRHS[M - NumElts])
3655 KnownUndef.setBit(i);
3656 if (ZeroRHS[M - NumElts])
3657 KnownZero.setBit(i);
3658 }
3659 }
3660 break;
3661 }
3662 case ISD::ANY_EXTEND_VECTOR_INREG:
3663 case ISD::SIGN_EXTEND_VECTOR_INREG:
3664 case ISD::ZERO_EXTEND_VECTOR_INREG: {
3665 APInt SrcUndef, SrcZero;
3666 SDValue Src = Op.getOperand(i: 0);
3667 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3668 APInt DemandedSrcElts = DemandedElts.zext(width: NumSrcElts);
3669 if (SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: DemandedSrcElts, KnownUndef&: SrcUndef, KnownZero&: SrcZero, TLO,
3670 Depth: Depth + 1))
3671 return true;
3672 KnownZero = SrcZero.zextOrTrunc(width: NumElts);
3673 KnownUndef = SrcUndef.zextOrTrunc(width: NumElts);
3674
3675 if (IsLE && Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
3676 Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
3677 DemandedSrcElts == 1) {
3678 // aext - if we just need the bottom element then we can bitcast.
3679 return TLO.CombineTo(O: Op, N: TLO.DAG.getBitcast(VT, V: Src));
3680 }
3681
3682 if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
3683 // zext(undef) upper bits are guaranteed to be zero.
3684 if (DemandedElts.isSubsetOf(RHS: KnownUndef))
3685 return TLO.CombineTo(O: Op, N: TLO.DAG.getConstant(Val: 0, DL: SDLoc(Op), VT));
3686 KnownUndef.clearAllBits();
3687
3688 // zext - if we just need the bottom element then we can mask:
3689 // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
3690 if (IsLE && DemandedSrcElts == 1 && Src.getOpcode() == ISD::AND &&
3691 Op->isOnlyUserOf(N: Src.getNode()) &&
3692 Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
3693 SDLoc DL(Op);
3694 EVT SrcVT = Src.getValueType();
3695 EVT SrcSVT = SrcVT.getScalarType();
3696 SmallVector<SDValue> MaskElts;
3697 MaskElts.push_back(Elt: TLO.DAG.getAllOnesConstant(DL, VT: SrcSVT));
3698 MaskElts.append(NumInputs: NumSrcElts - 1, Elt: TLO.DAG.getConstant(Val: 0, DL, VT: SrcSVT));
3699 SDValue Mask = TLO.DAG.getBuildVector(VT: SrcVT, DL, Ops: MaskElts);
3700 if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
3701 Opcode: ISD::AND, DL, VT: SrcVT, Ops: {Src.getOperand(i: 1), Mask})) {
3702 Fold = TLO.DAG.getNode(Opcode: ISD::AND, DL, VT: SrcVT, N1: Src.getOperand(i: 0), N2: Fold);
3703 return TLO.CombineTo(O: Op, N: TLO.DAG.getBitcast(VT, V: Fold));
3704 }
3705 }
3706 }
3707 break;
3708 }
3709
3710 // TODO: There are more binop opcodes that could be handled here - MIN,
3711 // MAX, saturated math, etc.
3712 case ISD::ADD: {
3713 SDValue Op0 = Op.getOperand(i: 0);
3714 SDValue Op1 = Op.getOperand(i: 1);
3715 if (Op0 == Op1 && Op->isOnlyUserOf(N: Op0.getNode())) {
3716 APInt UndefLHS, ZeroLHS;
3717 if (SimplifyDemandedVectorElts(Op: Op0, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefLHS, KnownZero&: ZeroLHS, TLO,
3718 Depth: Depth + 1, /*AssumeSingleUse*/ true))
3719 return true;
3720 }
3721 [[fallthrough]];
3722 }
3723 case ISD::AVGCEILS:
3724 case ISD::AVGCEILU:
3725 case ISD::AVGFLOORS:
3726 case ISD::AVGFLOORU:
3727 case ISD::OR:
3728 case ISD::XOR:
3729 case ISD::SUB:
3730 case ISD::FADD:
3731 case ISD::FSUB:
3732 case ISD::FMUL:
3733 case ISD::FDIV:
3734 case ISD::FREM: {
3735 SDValue Op0 = Op.getOperand(i: 0);
3736 SDValue Op1 = Op.getOperand(i: 1);
3737
3738 APInt UndefRHS, ZeroRHS;
3739 if (SimplifyDemandedVectorElts(Op: Op1, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefRHS, KnownZero&: ZeroRHS, TLO,
3740 Depth: Depth + 1))
3741 return true;
3742 APInt UndefLHS, ZeroLHS;
3743 if (SimplifyDemandedVectorElts(Op: Op0, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefLHS, KnownZero&: ZeroLHS, TLO,
3744 Depth: Depth + 1))
3745 return true;
3746
3747 KnownZero = ZeroLHS & ZeroRHS;
3748 KnownUndef = getKnownUndefForVectorBinop(BO: Op, DAG&: TLO.DAG, UndefOp0: UndefLHS, UndefOp1: UndefRHS);
3749
3750 // Attempt to avoid multi-use ops if we don't need anything from them.
3751 // TODO - use KnownUndef to relax the demandedelts?
3752 if (!DemandedElts.isAllOnes())
3753 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3754 return true;
3755 break;
3756 }
3757 case ISD::SHL:
3758 case ISD::SRL:
3759 case ISD::SRA:
3760 case ISD::ROTL:
3761 case ISD::ROTR: {
3762 SDValue Op0 = Op.getOperand(i: 0);
3763 SDValue Op1 = Op.getOperand(i: 1);
3764
3765 APInt UndefRHS, ZeroRHS;
3766 if (SimplifyDemandedVectorElts(Op: Op1, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefRHS, KnownZero&: ZeroRHS, TLO,
3767 Depth: Depth + 1))
3768 return true;
3769 APInt UndefLHS, ZeroLHS;
3770 if (SimplifyDemandedVectorElts(Op: Op0, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefLHS, KnownZero&: ZeroLHS, TLO,
3771 Depth: Depth + 1))
3772 return true;
3773
3774 KnownZero = ZeroLHS;
3775 KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
3776
3777 // Attempt to avoid multi-use ops if we don't need anything from them.
3778 // TODO - use KnownUndef to relax the demandedelts?
3779 if (!DemandedElts.isAllOnes())
3780 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3781 return true;
3782 break;
3783 }
3784 case ISD::MUL:
3785 case ISD::MULHU:
3786 case ISD::MULHS:
3787 case ISD::AND: {
3788 SDValue Op0 = Op.getOperand(i: 0);
3789 SDValue Op1 = Op.getOperand(i: 1);
3790
3791 APInt SrcUndef, SrcZero;
3792 if (SimplifyDemandedVectorElts(Op: Op1, OriginalDemandedElts: DemandedElts, KnownUndef&: SrcUndef, KnownZero&: SrcZero, TLO,
3793 Depth: Depth + 1))
3794 return true;
3795 // If we know that a demanded element was zero in Op1 we don't need to
3796 // demand it in Op0 - its guaranteed to be zero.
3797 APInt DemandedElts0 = DemandedElts & ~SrcZero;
3798 if (SimplifyDemandedVectorElts(Op: Op0, OriginalDemandedElts: DemandedElts0, KnownUndef, KnownZero,
3799 TLO, Depth: Depth + 1))
3800 return true;
3801
3802 KnownUndef &= DemandedElts0;
3803 KnownZero &= DemandedElts0;
3804
3805 // If every element pair has a zero/undef then just fold to zero.
3806 // fold (and x, undef) -> 0 / (and x, 0) -> 0
3807 // fold (mul x, undef) -> 0 / (mul x, 0) -> 0
3808 if (DemandedElts.isSubsetOf(RHS: SrcZero | KnownZero | SrcUndef | KnownUndef))
3809 return TLO.CombineTo(O: Op, N: TLO.DAG.getConstant(Val: 0, DL: SDLoc(Op), VT));
3810
3811 // If either side has a zero element, then the result element is zero, even
3812 // if the other is an UNDEF.
3813 // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
3814 // and then handle 'and' nodes with the rest of the binop opcodes.
3815 KnownZero |= SrcZero;
3816 KnownUndef &= SrcUndef;
3817 KnownUndef &= ~KnownZero;
3818
3819 // Attempt to avoid multi-use ops if we don't need anything from them.
3820 if (!DemandedElts.isAllOnes())
3821 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3822 return true;
3823 break;
3824 }
3825 case ISD::TRUNCATE:
3826 case ISD::SIGN_EXTEND:
3827 case ISD::ZERO_EXTEND:
3828 if (SimplifyDemandedVectorElts(Op: Op.getOperand(i: 0), OriginalDemandedElts: DemandedElts, KnownUndef,
3829 KnownZero, TLO, Depth: Depth + 1))
3830 return true;
3831
3832 if (!DemandedElts.isAllOnes())
3833 if (SDValue NewOp = SimplifyMultipleUseDemandedVectorElts(
3834 Op: Op.getOperand(i: 0), DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1))
3835 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode, DL: SDLoc(Op), VT, Operand: NewOp));
3836
3837 if (Op.getOpcode() == ISD::ZERO_EXTEND) {
3838 // zext(undef) upper bits are guaranteed to be zero.
3839 if (DemandedElts.isSubsetOf(RHS: KnownUndef))
3840 return TLO.CombineTo(O: Op, N: TLO.DAG.getConstant(Val: 0, DL: SDLoc(Op), VT));
3841 KnownUndef.clearAllBits();
3842 }
3843 break;
3844 case ISD::SINT_TO_FP:
3845 case ISD::UINT_TO_FP:
3846 case ISD::FP_TO_SINT:
3847 case ISD::FP_TO_UINT:
3848 if (SimplifyDemandedVectorElts(Op: Op.getOperand(i: 0), OriginalDemandedElts: DemandedElts, KnownUndef,
3849 KnownZero, TLO, Depth: Depth + 1))
3850 return true;
3851 // Don't fall through to generic undef -> undef handling.
3852 return false;
3853 default: {
3854 if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
3855 if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
3856 KnownZero, TLO, Depth))
3857 return true;
3858 } else {
3859 KnownBits Known;
3860 APInt DemandedBits = APInt::getAllOnes(numBits: EltSizeInBits);
3861 if (SimplifyDemandedBits(Op, OriginalDemandedBits: DemandedBits, OriginalDemandedElts, Known,
3862 TLO, Depth, AssumeSingleUse))
3863 return true;
3864 }
3865 break;
3866 }
3867 }
3868 assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
3869
3870 // Constant fold all undef cases.
3871 // TODO: Handle zero cases as well.
3872 if (DemandedElts.isSubsetOf(RHS: KnownUndef))
3873 return TLO.CombineTo(O: Op, N: TLO.DAG.getUNDEF(VT));
3874
3875 return false;
3876}
3877
3878/// Determine which of the bits specified in Mask are known to be either zero or
3879/// one and return them in the Known.
3880void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
3881 KnownBits &Known,
3882 const APInt &DemandedElts,
3883 const SelectionDAG &DAG,
3884 unsigned Depth) const {
3885 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3886 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3887 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3888 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3889 "Should use MaskedValueIsZero if you don't know whether Op"
3890 " is a target node!");
3891 Known.resetAll();
3892}
3893
3894void TargetLowering::computeKnownBitsForTargetInstr(
3895 GISelValueTracking &Analysis, Register R, KnownBits &Known,
3896 const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3897 unsigned Depth) const {
3898 Known.resetAll();
3899}
3900
3901void TargetLowering::computeKnownFPClassForTargetInstr(
3902 GISelValueTracking &Analysis, Register R, KnownFPClass &Known,
3903 const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3904 unsigned Depth) const {
3905 Known.resetAll();
3906}
3907
3908void TargetLowering::computeKnownBitsForFrameIndex(
3909 const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
3910 // The low bits are known zero if the pointer is aligned.
3911 Known.Zero.setLowBits(Log2(A: MF.getFrameInfo().getObjectAlign(ObjectIdx: FrameIdx)));
3912}
3913
3914Align TargetLowering::computeKnownAlignForTargetInstr(
3915 GISelValueTracking &Analysis, Register R, const MachineRegisterInfo &MRI,
3916 unsigned Depth) const {
3917 return Align(1);
3918}
3919
3920/// This method can be implemented by targets that want to expose additional
3921/// information about sign bits to the DAG Combiner.
3922unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
3923 const APInt &,
3924 const SelectionDAG &,
3925 unsigned Depth) const {
3926 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3927 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3928 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3929 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3930 "Should use ComputeNumSignBits if you don't know whether Op"
3931 " is a target node!");
3932 return 1;
3933}
3934
3935unsigned TargetLowering::computeNumSignBitsForTargetInstr(
3936 GISelValueTracking &Analysis, Register R, const APInt &DemandedElts,
3937 const MachineRegisterInfo &MRI, unsigned Depth) const {
3938 return 1;
3939}
3940
3941bool TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
3942 SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
3943 TargetLoweringOpt &TLO, unsigned Depth) const {
3944 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3945 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3946 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3947 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3948 "Should use SimplifyDemandedVectorElts if you don't know whether Op"
3949 " is a target node!");
3950 return false;
3951}
3952
3953bool TargetLowering::SimplifyDemandedBitsForTargetNode(
3954 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3955 KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
3956 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3957 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3958 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3959 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3960 "Should use SimplifyDemandedBits if you don't know whether Op"
3961 " is a target node!");
3962 computeKnownBitsForTargetNode(Op, Known, DemandedElts, DAG: TLO.DAG, Depth);
3963 return false;
3964}
3965
3966SDValue TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
3967 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3968 SelectionDAG &DAG, unsigned Depth) const {
3969 assert(
3970 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3971 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3972 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3973 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3974 "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
3975 " is a target node!");
3976 return SDValue();
3977}
3978
3979SDValue
3980TargetLowering::buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0,
3981 SDValue N1, MutableArrayRef<int> Mask,
3982 SelectionDAG &DAG) const {
3983 bool LegalMask = isShuffleMaskLegal(Mask, VT);
3984 if (!LegalMask) {
3985 std::swap(a&: N0, b&: N1);
3986 ShuffleVectorSDNode::commuteMask(Mask);
3987 LegalMask = isShuffleMaskLegal(Mask, VT);
3988 }
3989
3990 if (!LegalMask)
3991 return SDValue();
3992
3993 return DAG.getVectorShuffle(VT, dl: DL, N1: N0, N2: N1, Mask);
3994}
3995
3996const Constant *TargetLowering::getTargetConstantFromLoad(LoadSDNode*) const {
3997 return nullptr;
3998}
3999
4000bool TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode(
4001 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
4002 bool PoisonOnly, unsigned Depth) const {
4003 assert(
4004 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4005 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4006 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4007 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4008 "Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"
4009 " is a target node!");
4010
4011 // If Op can't create undef/poison and none of its operands are undef/poison
4012 // then Op is never undef/poison.
4013 return !canCreateUndefOrPoisonForTargetNode(Op, DemandedElts, DAG, PoisonOnly,
4014 /*ConsiderFlags*/ true, Depth) &&
4015 all_of(Range: Op->ops(), P: [&](SDValue V) {
4016 return DAG.isGuaranteedNotToBeUndefOrPoison(Op: V, PoisonOnly,
4017 Depth: Depth + 1);
4018 });
4019}
4020
4021bool TargetLowering::canCreateUndefOrPoisonForTargetNode(
4022 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
4023 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
4024 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4025 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4026 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4027 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4028 "Should use canCreateUndefOrPoison if you don't know whether Op"
4029 " is a target node!");
4030 // Be conservative and return true.
4031 return true;
4032}
4033
4034bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
4035 const APInt &DemandedElts,
4036 const SelectionDAG &DAG,
4037 bool SNaN,
4038 unsigned Depth) const {
4039 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4040 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4041 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4042 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4043 "Should use isKnownNeverNaN if you don't know whether Op"
4044 " is a target node!");
4045 return false;
4046}
4047
4048bool TargetLowering::isSplatValueForTargetNode(SDValue Op,
4049 const APInt &DemandedElts,
4050 APInt &UndefElts,
4051 const SelectionDAG &DAG,
4052 unsigned Depth) const {
4053 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4054 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4055 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4056 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4057 "Should use isSplatValue if you don't know whether Op"
4058 " is a target node!");
4059 return false;
4060}
4061
4062// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
4063// work with truncating build vectors and vectors with elements of less than
4064// 8 bits.
4065bool TargetLowering::isConstTrueVal(SDValue N) const {
4066 if (!N)
4067 return false;
4068
4069 unsigned EltWidth;
4070 APInt CVal;
4071 if (ConstantSDNode *CN = isConstOrConstSplat(N, /*AllowUndefs=*/false,
4072 /*AllowTruncation=*/true)) {
4073 CVal = CN->getAPIntValue();
4074 EltWidth = N.getValueType().getScalarSizeInBits();
4075 } else
4076 return false;
4077
4078 // If this is a truncating splat, truncate the splat value.
4079 // Otherwise, we may fail to match the expected values below.
4080 if (EltWidth < CVal.getBitWidth())
4081 CVal = CVal.trunc(width: EltWidth);
4082
4083 switch (getBooleanContents(Type: N.getValueType())) {
4084 case UndefinedBooleanContent:
4085 return CVal[0];
4086 case ZeroOrOneBooleanContent:
4087 return CVal.isOne();
4088 case ZeroOrNegativeOneBooleanContent:
4089 return CVal.isAllOnes();
4090 }
4091
4092 llvm_unreachable("Invalid boolean contents");
4093}
4094
4095bool TargetLowering::isConstFalseVal(SDValue N) const {
4096 if (!N)
4097 return false;
4098
4099 const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val&: N);
4100 if (!CN) {
4101 const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Val&: N);
4102 if (!BV)
4103 return false;
4104
4105 // Only interested in constant splats, we don't care about undef
4106 // elements in identifying boolean constants and getConstantSplatNode
4107 // returns NULL if all ops are undef;
4108 CN = BV->getConstantSplatNode();
4109 if (!CN)
4110 return false;
4111 }
4112
4113 if (getBooleanContents(Type: N->getValueType(ResNo: 0)) == UndefinedBooleanContent)
4114 return !CN->getAPIntValue()[0];
4115
4116 return CN->isZero();
4117}
4118
4119bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
4120 bool SExt) const {
4121 if (VT == MVT::i1)
4122 return N->isOne();
4123
4124 TargetLowering::BooleanContent Cnt = getBooleanContents(Type: VT);
4125 switch (Cnt) {
4126 case TargetLowering::ZeroOrOneBooleanContent:
4127 // An extended value of 1 is always true, unless its original type is i1,
4128 // in which case it will be sign extended to -1.
4129 return (N->isOne() && !SExt) || (SExt && (N->getValueType(ResNo: 0) != MVT::i1));
4130 case TargetLowering::UndefinedBooleanContent:
4131 case TargetLowering::ZeroOrNegativeOneBooleanContent:
4132 return N->isAllOnes() && SExt;
4133 }
4134 llvm_unreachable("Unexpected enumeration.");
4135}
4136
4137/// This helper function of SimplifySetCC tries to optimize the comparison when
4138/// either operand of the SetCC node is a bitwise-and instruction.
4139SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
4140 ISD::CondCode Cond, const SDLoc &DL,
4141 DAGCombinerInfo &DCI) const {
4142 if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
4143 std::swap(a&: N0, b&: N1);
4144
4145 SelectionDAG &DAG = DCI.DAG;
4146 EVT OpVT = N0.getValueType();
4147 if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
4148 (Cond != ISD::SETEQ && Cond != ISD::SETNE))
4149 return SDValue();
4150
4151 // (X & Y) != 0 --> zextOrTrunc(X & Y)
4152 // iff everything but LSB is known zero:
4153 if (Cond == ISD::SETNE && isNullConstant(V: N1) &&
4154 (getBooleanContents(Type: OpVT) == TargetLowering::UndefinedBooleanContent ||
4155 getBooleanContents(Type: OpVT) == TargetLowering::ZeroOrOneBooleanContent)) {
4156 unsigned NumEltBits = OpVT.getScalarSizeInBits();
4157 APInt UpperBits = APInt::getHighBitsSet(numBits: NumEltBits, hiBitsSet: NumEltBits - 1);
4158 if (DAG.MaskedValueIsZero(Op: N0, Mask: UpperBits))
4159 return DAG.getBoolExtOrTrunc(Op: N0, SL: DL, VT, OpVT);
4160 }
4161
4162 // Try to eliminate a power-of-2 mask constant by converting to a signbit
4163 // test in a narrow type that we can truncate to with no cost. Examples:
4164 // (i32 X & 32768) == 0 --> (trunc X to i16) >= 0
4165 // (i32 X & 32768) != 0 --> (trunc X to i16) < 0
4166 // TODO: This conservatively checks for type legality on the source and
4167 // destination types. That may inhibit optimizations, but it also
4168 // allows setcc->shift transforms that may be more beneficial.
4169 auto *AndC = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1));
4170 if (AndC && isNullConstant(V: N1) && AndC->getAPIntValue().isPowerOf2() &&
4171 isTypeLegal(VT: OpVT) && N0.hasOneUse()) {
4172 EVT NarrowVT = EVT::getIntegerVT(Context&: *DAG.getContext(),
4173 BitWidth: AndC->getAPIntValue().getActiveBits());
4174 if (isTruncateFree(FromVT: OpVT, ToVT: NarrowVT) && isTypeLegal(VT: NarrowVT)) {
4175 SDValue Trunc = DAG.getZExtOrTrunc(Op: N0.getOperand(i: 0), DL, VT: NarrowVT);
4176 SDValue Zero = DAG.getConstant(Val: 0, DL, VT: NarrowVT);
4177 return DAG.getSetCC(DL, VT, LHS: Trunc, RHS: Zero,
4178 Cond: Cond == ISD::SETEQ ? ISD::SETGE : ISD::SETLT);
4179 }
4180 }
4181
4182 // Match these patterns in any of their permutations:
4183 // (X & Y) == Y
4184 // (X & Y) != Y
4185 SDValue X, Y;
4186 if (N0.getOperand(i: 0) == N1) {
4187 X = N0.getOperand(i: 1);
4188 Y = N0.getOperand(i: 0);
4189 } else if (N0.getOperand(i: 1) == N1) {
4190 X = N0.getOperand(i: 0);
4191 Y = N0.getOperand(i: 1);
4192 } else {
4193 return SDValue();
4194 }
4195
4196 // TODO: We should invert (X & Y) eq/ne 0 -> (X & Y) ne/eq Y if
4197 // `isXAndYEqZeroPreferableToXAndYEqY` is false. This is a bit difficult as
4198 // its liable to create and infinite loop.
4199 SDValue Zero = DAG.getConstant(Val: 0, DL, VT: OpVT);
4200 if (isXAndYEqZeroPreferableToXAndYEqY(Cond, OpVT) &&
4201 DAG.isKnownToBeAPowerOfTwo(Val: Y)) {
4202 // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
4203 // Note that where Y is variable and is known to have at most one bit set
4204 // (for example, if it is Z & 1) we cannot do this; the expressions are not
4205 // equivalent when Y == 0.
4206 assert(OpVT.isInteger());
4207 Cond = ISD::getSetCCInverse(Operation: Cond, Type: OpVT);
4208 if (DCI.isBeforeLegalizeOps() ||
4209 isCondCodeLegal(CC: Cond, VT: N0.getSimpleValueType()))
4210 return DAG.getSetCC(DL, VT, LHS: N0, RHS: Zero, Cond);
4211 } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
4212 // If the target supports an 'and-not' or 'and-complement' logic operation,
4213 // try to use that to make a comparison operation more efficient.
4214 // But don't do this transform if the mask is a single bit because there are
4215 // more efficient ways to deal with that case (for example, 'bt' on x86 or
4216 // 'rlwinm' on PPC).
4217
4218 // Bail out if the compare operand that we want to turn into a zero is
4219 // already a zero (otherwise, infinite loop).
4220 if (isNullConstant(V: Y))
4221 return SDValue();
4222
4223 // Transform this into: ~X & Y == 0.
4224 SDValue NotX = DAG.getNOT(DL: SDLoc(X), Val: X, VT: OpVT);
4225 SDValue NewAnd = DAG.getNode(Opcode: ISD::AND, DL: SDLoc(N0), VT: OpVT, N1: NotX, N2: Y);
4226 return DAG.getSetCC(DL, VT, LHS: NewAnd, RHS: Zero, Cond);
4227 }
4228
4229 return SDValue();
4230}
4231
4232/// This helper function of SimplifySetCC tries to optimize the comparison when
4233/// either operand of the SetCC node is a bitwise-or instruction.
4234/// For now, this just transforms (X | Y) ==/!= Y into X & ~Y ==/!= 0.
4235SDValue TargetLowering::foldSetCCWithOr(EVT VT, SDValue N0, SDValue N1,
4236 ISD::CondCode Cond, const SDLoc &DL,
4237 DAGCombinerInfo &DCI) const {
4238 if (N1.getOpcode() == ISD::OR && N0.getOpcode() != ISD::OR)
4239 std::swap(a&: N0, b&: N1);
4240
4241 SelectionDAG &DAG = DCI.DAG;
4242 EVT OpVT = N0.getValueType();
4243 if (!N0.hasOneUse() || !OpVT.isInteger() ||
4244 (Cond != ISD::SETEQ && Cond != ISD::SETNE))
4245 return SDValue();
4246
4247 // (X | Y) == Y
4248 // (X | Y) != Y
4249 SDValue X;
4250 if (sd_match(N: N0, P: m_Or(L: m_Value(N&: X), R: m_Specific(N: N1))) && hasAndNotCompare(Y: N1)) {
4251 // If the target supports an 'and-not' or 'and-complement' logic operation,
4252 // try to use that to make a comparison operation more efficient.
4253
4254 // Bail out if the compare operand that we want to turn into a zero is
4255 // already a zero (otherwise, infinite loop).
4256 if (isNullConstant(V: N1))
4257 return SDValue();
4258
4259 // Transform this into: X & ~Y ==/!= 0.
4260 SDValue NotY = DAG.getNOT(DL: SDLoc(N1), Val: N1, VT: OpVT);
4261 SDValue NewAnd = DAG.getNode(Opcode: ISD::AND, DL: SDLoc(N0), VT: OpVT, N1: X, N2: NotY);
4262 return DAG.getSetCC(DL, VT, LHS: NewAnd, RHS: DAG.getConstant(Val: 0, DL, VT: OpVT), Cond);
4263 }
4264
4265 return SDValue();
4266}
4267
4268/// There are multiple IR patterns that could be checking whether certain
4269/// truncation of a signed number would be lossy or not. The pattern which is
4270/// best at IR level, may not lower optimally. Thus, we want to unfold it.
4271/// We are looking for the following pattern: (KeptBits is a constant)
4272/// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
4273/// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
4274/// KeptBits also can't be 1, that would have been folded to %x dstcond 0
4275/// We will unfold it into the natural trunc+sext pattern:
4276/// ((%x << C) a>> C) dstcond %x
4277/// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
4278SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
4279 EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
4280 const SDLoc &DL) const {
4281 // We must be comparing with a constant.
4282 ConstantSDNode *C1;
4283 if (!(C1 = dyn_cast<ConstantSDNode>(Val&: N1)))
4284 return SDValue();
4285
4286 // N0 should be: add %x, (1 << (KeptBits-1))
4287 if (N0->getOpcode() != ISD::ADD)
4288 return SDValue();
4289
4290 // And we must be 'add'ing a constant.
4291 ConstantSDNode *C01;
4292 if (!(C01 = dyn_cast<ConstantSDNode>(Val: N0->getOperand(Num: 1))))
4293 return SDValue();
4294
4295 SDValue X = N0->getOperand(Num: 0);
4296 EVT XVT = X.getValueType();
4297
4298 // Validate constants ...
4299
4300 APInt I1 = C1->getAPIntValue();
4301
4302 ISD::CondCode NewCond;
4303 if (Cond == ISD::CondCode::SETULT) {
4304 NewCond = ISD::CondCode::SETEQ;
4305 } else if (Cond == ISD::CondCode::SETULE) {
4306 NewCond = ISD::CondCode::SETEQ;
4307 // But need to 'canonicalize' the constant.
4308 I1 += 1;
4309 } else if (Cond == ISD::CondCode::SETUGT) {
4310 NewCond = ISD::CondCode::SETNE;
4311 // But need to 'canonicalize' the constant.
4312 I1 += 1;
4313 } else if (Cond == ISD::CondCode::SETUGE) {
4314 NewCond = ISD::CondCode::SETNE;
4315 } else
4316 return SDValue();
4317
4318 APInt I01 = C01->getAPIntValue();
4319
4320 auto checkConstants = [&I1, &I01]() -> bool {
4321 // Both of them must be power-of-two, and the constant from setcc is bigger.
4322 return I1.ugt(RHS: I01) && I1.isPowerOf2() && I01.isPowerOf2();
4323 };
4324
4325 if (checkConstants()) {
4326 // Great, e.g. got icmp ult i16 (add i16 %x, 128), 256
4327 } else {
4328 // What if we invert constants? (and the target predicate)
4329 I1.negate();
4330 I01.negate();
4331 assert(XVT.isInteger());
4332 NewCond = getSetCCInverse(Operation: NewCond, Type: XVT);
4333 if (!checkConstants())
4334 return SDValue();
4335 // Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
4336 }
4337
4338 // They are power-of-two, so which bit is set?
4339 const unsigned KeptBits = I1.logBase2();
4340 const unsigned KeptBitsMinusOne = I01.logBase2();
4341
4342 // Magic!
4343 if (KeptBits != (KeptBitsMinusOne + 1))
4344 return SDValue();
4345 assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
4346
4347 // We don't want to do this in every single case.
4348 SelectionDAG &DAG = DCI.DAG;
4349 if (!shouldTransformSignedTruncationCheck(XVT, KeptBits))
4350 return SDValue();
4351
4352 // Unfold into: sext_inreg(%x) cond %x
4353 // Where 'cond' will be either 'eq' or 'ne'.
4354 SDValue SExtInReg = DAG.getNode(
4355 Opcode: ISD::SIGN_EXTEND_INREG, DL, VT: XVT, N1: X,
4356 N2: DAG.getValueType(EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: KeptBits)));
4357 return DAG.getSetCC(DL, VT: SCCVT, LHS: SExtInReg, RHS: X, Cond: NewCond);
4358}
4359
4360// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
4361SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
4362 EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
4363 DAGCombinerInfo &DCI, const SDLoc &DL) const {
4364 assert(isConstOrConstSplat(N1C) && isConstOrConstSplat(N1C)->isZero() &&
4365 "Should be a comparison with 0.");
4366 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4367 "Valid only for [in]equality comparisons.");
4368
4369 unsigned NewShiftOpcode;
4370 SDValue X, C, Y;
4371
4372 SelectionDAG &DAG = DCI.DAG;
4373
4374 // Look for '(C l>>/<< Y)'.
4375 auto Match = [&NewShiftOpcode, &X, &C, &Y, &DAG, this](SDValue V) {
4376 // The shift should be one-use.
4377 if (!V.hasOneUse())
4378 return false;
4379 unsigned OldShiftOpcode = V.getOpcode();
4380 switch (OldShiftOpcode) {
4381 case ISD::SHL:
4382 NewShiftOpcode = ISD::SRL;
4383 break;
4384 case ISD::SRL:
4385 NewShiftOpcode = ISD::SHL;
4386 break;
4387 default:
4388 return false; // must be a logical shift.
4389 }
4390 // We should be shifting a constant.
4391 // FIXME: best to use isConstantOrConstantVector().
4392 C = V.getOperand(i: 0);
4393 ConstantSDNode *CC =
4394 isConstOrConstSplat(N: C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4395 if (!CC)
4396 return false;
4397 Y = V.getOperand(i: 1);
4398
4399 ConstantSDNode *XC =
4400 isConstOrConstSplat(N: X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4401 return shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
4402 X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
4403 };
4404
4405 // LHS of comparison should be an one-use 'and'.
4406 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
4407 return SDValue();
4408
4409 X = N0.getOperand(i: 0);
4410 SDValue Mask = N0.getOperand(i: 1);
4411
4412 // 'and' is commutative!
4413 if (!Match(Mask)) {
4414 std::swap(a&: X, b&: Mask);
4415 if (!Match(Mask))
4416 return SDValue();
4417 }
4418
4419 EVT VT = X.getValueType();
4420
4421 // Produce:
4422 // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
4423 SDValue T0 = DAG.getNode(Opcode: NewShiftOpcode, DL, VT, N1: X, N2: Y);
4424 SDValue T1 = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: T0, N2: C);
4425 SDValue T2 = DAG.getSetCC(DL, VT: SCCVT, LHS: T1, RHS: N1C, Cond);
4426 return T2;
4427}
4428
4429/// Try to fold an equality comparison with a {add/sub/xor} binary operation as
4430/// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
4431/// handle the commuted versions of these patterns.
4432SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
4433 ISD::CondCode Cond, const SDLoc &DL,
4434 DAGCombinerInfo &DCI) const {
4435 unsigned BOpcode = N0.getOpcode();
4436 assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
4437 "Unexpected binop");
4438 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
4439
4440 // (X + Y) == X --> Y == 0
4441 // (X - Y) == X --> Y == 0
4442 // (X ^ Y) == X --> Y == 0
4443 SelectionDAG &DAG = DCI.DAG;
4444 EVT OpVT = N0.getValueType();
4445 SDValue X = N0.getOperand(i: 0);
4446 SDValue Y = N0.getOperand(i: 1);
4447 if (X == N1)
4448 return DAG.getSetCC(DL, VT, LHS: Y, RHS: DAG.getConstant(Val: 0, DL, VT: OpVT), Cond);
4449
4450 if (Y != N1)
4451 return SDValue();
4452
4453 // (X + Y) == Y --> X == 0
4454 // (X ^ Y) == Y --> X == 0
4455 if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
4456 return DAG.getSetCC(DL, VT, LHS: X, RHS: DAG.getConstant(Val: 0, DL, VT: OpVT), Cond);
4457
4458 // The shift would not be valid if the operands are boolean (i1).
4459 if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
4460 return SDValue();
4461
4462 // (X - Y) == Y --> X == Y << 1
4463 SDValue One = DAG.getShiftAmountConstant(Val: 1, VT: OpVT, DL);
4464 SDValue YShl1 = DAG.getNode(Opcode: ISD::SHL, DL, VT: N1.getValueType(), N1: Y, N2: One);
4465 if (!DCI.isCalledByLegalizer())
4466 DCI.AddToWorklist(N: YShl1.getNode());
4467 return DAG.getSetCC(DL, VT, LHS: X, RHS: YShl1, Cond);
4468}
4469
4470static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT,
4471 SDValue N0, const APInt &C1,
4472 ISD::CondCode Cond, const SDLoc &dl,
4473 SelectionDAG &DAG) {
4474 // Look through truncs that don't change the value of a ctpop.
4475 // FIXME: Add vector support? Need to be careful with setcc result type below.
4476 SDValue CTPOP = N0;
4477 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
4478 N0.getScalarValueSizeInBits() > Log2_32(Value: N0.getOperand(i: 0).getScalarValueSizeInBits()))
4479 CTPOP = N0.getOperand(i: 0);
4480
4481 if (CTPOP.getOpcode() != ISD::CTPOP || !CTPOP.hasOneUse())
4482 return SDValue();
4483
4484 EVT CTVT = CTPOP.getValueType();
4485 SDValue CTOp = CTPOP.getOperand(i: 0);
4486
4487 // Expand a power-of-2-or-zero comparison based on ctpop:
4488 // (ctpop x) u< 2 -> (x & x-1) == 0
4489 // (ctpop x) u> 1 -> (x & x-1) != 0
4490 if (Cond == ISD::SETULT || Cond == ISD::SETUGT) {
4491 // Keep the CTPOP if it is a cheap vector op.
4492 if (CTVT.isVector() && TLI.isCtpopFast(VT: CTVT))
4493 return SDValue();
4494
4495 unsigned CostLimit = TLI.getCustomCtpopCost(VT: CTVT, Cond);
4496 if (C1.ugt(RHS: CostLimit + (Cond == ISD::SETULT)))
4497 return SDValue();
4498 if (C1 == 0 && (Cond == ISD::SETULT))
4499 return SDValue(); // This is handled elsewhere.
4500
4501 unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);
4502
4503 SDValue NegOne = DAG.getAllOnesConstant(DL: dl, VT: CTVT);
4504 SDValue Result = CTOp;
4505 for (unsigned i = 0; i < Passes; i++) {
4506 SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: CTVT, N1: Result, N2: NegOne);
4507 Result = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: CTVT, N1: Result, N2: Add);
4508 }
4509 ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;
4510 return DAG.getSetCC(DL: dl, VT, LHS: Result, RHS: DAG.getConstant(Val: 0, DL: dl, VT: CTVT), Cond: CC);
4511 }
4512
4513 // Expand a power-of-2 comparison based on ctpop
4514 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
4515 // Keep the CTPOP if it is cheap.
4516 if (TLI.isCtpopFast(VT: CTVT))
4517 return SDValue();
4518
4519 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT: CTVT);
4520 SDValue NegOne = DAG.getAllOnesConstant(DL: dl, VT: CTVT);
4521 assert(CTVT.isInteger());
4522 SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: CTVT, N1: CTOp, N2: NegOne);
4523
4524 // Its not uncommon for known-never-zero X to exist in (ctpop X) eq/ne 1, so
4525 // check before emitting a potentially unnecessary op.
4526 if (DAG.isKnownNeverZero(Op: CTOp)) {
4527 // (ctpop x) == 1 --> (x & x-1) == 0
4528 // (ctpop x) != 1 --> (x & x-1) != 0
4529 SDValue And = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: CTVT, N1: CTOp, N2: Add);
4530 SDValue RHS = DAG.getSetCC(DL: dl, VT, LHS: And, RHS: Zero, Cond);
4531 return RHS;
4532 }
4533
4534 // (ctpop x) == 1 --> (x ^ x-1) > x-1
4535 // (ctpop x) != 1 --> (x ^ x-1) <= x-1
4536 SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: CTVT, N1: CTOp, N2: Add);
4537 ISD::CondCode CmpCond = Cond == ISD::SETEQ ? ISD::SETUGT : ISD::SETULE;
4538 return DAG.getSetCC(DL: dl, VT, LHS: Xor, RHS: Add, Cond: CmpCond);
4539 }
4540
4541 return SDValue();
4542}
4543
4544static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1,
4545 ISD::CondCode Cond, const SDLoc &dl,
4546 SelectionDAG &DAG) {
4547 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4548 return SDValue();
4549
4550 auto *C1 = isConstOrConstSplat(N: N1, /* AllowUndefs */ true);
4551 if (!C1 || !(C1->isZero() || C1->isAllOnes()))
4552 return SDValue();
4553
4554 auto getRotateSource = [](SDValue X) {
4555 if (X.getOpcode() == ISD::ROTL || X.getOpcode() == ISD::ROTR)
4556 return X.getOperand(i: 0);
4557 return SDValue();
4558 };
4559
4560 // Peek through a rotated value compared against 0 or -1:
4561 // (rot X, Y) == 0/-1 --> X == 0/-1
4562 // (rot X, Y) != 0/-1 --> X != 0/-1
4563 if (SDValue R = getRotateSource(N0))
4564 return DAG.getSetCC(DL: dl, VT, LHS: R, RHS: N1, Cond);
4565
4566 // Peek through an 'or' of a rotated value compared against 0:
4567 // or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
4568 // or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
4569 //
4570 // TODO: Add the 'and' with -1 sibling.
4571 // TODO: Recurse through a series of 'or' ops to find the rotate.
4572 EVT OpVT = N0.getValueType();
4573 if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) {
4574 if (SDValue R = getRotateSource(N0.getOperand(i: 0))) {
4575 SDValue NewOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1: R, N2: N0.getOperand(i: 1));
4576 return DAG.getSetCC(DL: dl, VT, LHS: NewOr, RHS: N1, Cond);
4577 }
4578 if (SDValue R = getRotateSource(N0.getOperand(i: 1))) {
4579 SDValue NewOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1: R, N2: N0.getOperand(i: 0));
4580 return DAG.getSetCC(DL: dl, VT, LHS: NewOr, RHS: N1, Cond);
4581 }
4582 }
4583
4584 return SDValue();
4585}
4586
4587static SDValue foldSetCCWithFunnelShift(EVT VT, SDValue N0, SDValue N1,
4588 ISD::CondCode Cond, const SDLoc &dl,
4589 SelectionDAG &DAG) {
4590 // If we are testing for all-bits-clear, we might be able to do that with
4591 // less shifting since bit-order does not matter.
4592 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4593 return SDValue();
4594
4595 auto *C1 = isConstOrConstSplat(N: N1, /* AllowUndefs */ true);
4596 if (!C1 || !C1->isZero())
4597 return SDValue();
4598
4599 if (!N0.hasOneUse() ||
4600 (N0.getOpcode() != ISD::FSHL && N0.getOpcode() != ISD::FSHR))
4601 return SDValue();
4602
4603 unsigned BitWidth = N0.getScalarValueSizeInBits();
4604 auto *ShAmtC = isConstOrConstSplat(N: N0.getOperand(i: 2));
4605 if (!ShAmtC)
4606 return SDValue();
4607
4608 uint64_t ShAmt = ShAmtC->getAPIntValue().urem(RHS: BitWidth);
4609 if (ShAmt == 0)
4610 return SDValue();
4611
4612 // Canonicalize fshr as fshl to reduce pattern-matching.
4613 if (N0.getOpcode() == ISD::FSHR)
4614 ShAmt = BitWidth - ShAmt;
4615
4616 // Match an 'or' with a specific operand 'Other' in either commuted variant.
4617 SDValue X, Y;
4618 auto matchOr = [&X, &Y](SDValue Or, SDValue Other) {
4619 if (Or.getOpcode() != ISD::OR || !Or.hasOneUse())
4620 return false;
4621 if (Or.getOperand(i: 0) == Other) {
4622 X = Or.getOperand(i: 0);
4623 Y = Or.getOperand(i: 1);
4624 return true;
4625 }
4626 if (Or.getOperand(i: 1) == Other) {
4627 X = Or.getOperand(i: 1);
4628 Y = Or.getOperand(i: 0);
4629 return true;
4630 }
4631 return false;
4632 };
4633
4634 EVT OpVT = N0.getValueType();
4635 EVT ShAmtVT = N0.getOperand(i: 2).getValueType();
4636 SDValue F0 = N0.getOperand(i: 0);
4637 SDValue F1 = N0.getOperand(i: 1);
4638 if (matchOr(F0, F1)) {
4639 // fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0
4640 SDValue NewShAmt = DAG.getConstant(Val: ShAmt, DL: dl, VT: ShAmtVT);
4641 SDValue Shift = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: OpVT, N1: Y, N2: NewShAmt);
4642 SDValue NewOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1: Shift, N2: X);
4643 return DAG.getSetCC(DL: dl, VT, LHS: NewOr, RHS: N1, Cond);
4644 }
4645 if (matchOr(F1, F0)) {
4646 // fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0
4647 SDValue NewShAmt = DAG.getConstant(Val: BitWidth - ShAmt, DL: dl, VT: ShAmtVT);
4648 SDValue Shift = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: OpVT, N1: Y, N2: NewShAmt);
4649 SDValue NewOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1: Shift, N2: X);
4650 return DAG.getSetCC(DL: dl, VT, LHS: NewOr, RHS: N1, Cond);
4651 }
4652
4653 return SDValue();
4654}
4655
4656/// Try to simplify a setcc built with the specified operands and cc. If it is
4657/// unable to simplify it, return a null SDValue.
4658SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
4659 ISD::CondCode Cond, bool foldBooleans,
4660 DAGCombinerInfo &DCI,
4661 const SDLoc &dl) const {
4662 SelectionDAG &DAG = DCI.DAG;
4663 const DataLayout &Layout = DAG.getDataLayout();
4664 EVT OpVT = N0.getValueType();
4665 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4666
4667 // Constant fold or commute setcc.
4668 if (SDValue Fold = DAG.FoldSetCC(VT, N1: N0, N2: N1, Cond, dl))
4669 return Fold;
4670
4671 bool N0ConstOrSplat =
4672 isConstOrConstSplat(N: N0, /*AllowUndefs*/ false, /*AllowTruncate*/ AllowTruncation: true);
4673 bool N1ConstOrSplat =
4674 isConstOrConstSplat(N: N1, /*AllowUndefs*/ false, /*AllowTruncate*/ AllowTruncation: true);
4675
4676 // Canonicalize toward having the constant on the RHS.
4677 // TODO: Handle non-splat vector constants. All undef causes trouble.
4678 // FIXME: We can't yet fold constant scalable vector splats, so avoid an
4679 // infinite loop here when we encounter one.
4680 ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Operation: Cond);
4681 if (N0ConstOrSplat && !N1ConstOrSplat &&
4682 (DCI.isBeforeLegalizeOps() ||
4683 isCondCodeLegal(CC: SwappedCC, VT: N0.getSimpleValueType())))
4684 return DAG.getSetCC(DL: dl, VT, LHS: N1, RHS: N0, Cond: SwappedCC);
4685
4686 // If we have a subtract with the same 2 non-constant operands as this setcc
4687 // -- but in reverse order -- then try to commute the operands of this setcc
4688 // to match. A matching pair of setcc (cmp) and sub may be combined into 1
4689 // instruction on some targets.
4690 if (!N0ConstOrSplat && !N1ConstOrSplat &&
4691 (DCI.isBeforeLegalizeOps() ||
4692 isCondCodeLegal(CC: SwappedCC, VT: N0.getSimpleValueType())) &&
4693 DAG.doesNodeExist(Opcode: ISD::SUB, VTList: DAG.getVTList(VT: OpVT), Ops: {N1, N0}) &&
4694 !DAG.doesNodeExist(Opcode: ISD::SUB, VTList: DAG.getVTList(VT: OpVT), Ops: {N0, N1}))
4695 return DAG.getSetCC(DL: dl, VT, LHS: N1, RHS: N0, Cond: SwappedCC);
4696
4697 if (SDValue V = foldSetCCWithRotate(VT, N0, N1, Cond, dl, DAG))
4698 return V;
4699
4700 if (SDValue V = foldSetCCWithFunnelShift(VT, N0, N1, Cond, dl, DAG))
4701 return V;
4702
4703 if (auto *N1C = isConstOrConstSplat(N: N1)) {
4704 const APInt &C1 = N1C->getAPIntValue();
4705
4706 // Optimize some CTPOP cases.
4707 if (SDValue V = simplifySetCCWithCTPOP(TLI: *this, VT, N0, C1, Cond, dl, DAG))
4708 return V;
4709
4710 // For equality to 0 of a no-wrap multiply, decompose and test each op:
4711 // X * Y == 0 --> (X == 0) || (Y == 0)
4712 // X * Y != 0 --> (X != 0) && (Y != 0)
4713 // TODO: This bails out if minsize is set, but if the target doesn't have a
4714 // single instruction multiply for this type, it would likely be
4715 // smaller to decompose.
4716 if (C1.isZero() && (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4717 N0.getOpcode() == ISD::MUL && N0.hasOneUse() &&
4718 (N0->getFlags().hasNoUnsignedWrap() ||
4719 N0->getFlags().hasNoSignedWrap()) &&
4720 !Attr.hasFnAttr(Kind: Attribute::MinSize)) {
4721 SDValue IsXZero = DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0), RHS: N1, Cond);
4722 SDValue IsYZero = DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 1), RHS: N1, Cond);
4723 unsigned LogicOp = Cond == ISD::SETEQ ? ISD::OR : ISD::AND;
4724 return DAG.getNode(Opcode: LogicOp, DL: dl, VT, N1: IsXZero, N2: IsYZero);
4725 }
4726
4727 // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
4728 // equality comparison, then we're just comparing whether X itself is
4729 // zero.
4730 if (N0.getOpcode() == ISD::SRL && (C1.isZero() || C1.isOne()) &&
4731 N0.getOperand(i: 0).getOpcode() == ISD::CTLZ &&
4732 llvm::has_single_bit<uint32_t>(Value: N0.getScalarValueSizeInBits())) {
4733 if (ConstantSDNode *ShAmt = isConstOrConstSplat(N: N0.getOperand(i: 1))) {
4734 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4735 ShAmt->getAPIntValue() == Log2_32(Value: N0.getScalarValueSizeInBits())) {
4736 if ((C1 == 0) == (Cond == ISD::SETEQ)) {
4737 // (srl (ctlz x), 5) == 0 -> X != 0
4738 // (srl (ctlz x), 5) != 1 -> X != 0
4739 Cond = ISD::SETNE;
4740 } else {
4741 // (srl (ctlz x), 5) != 0 -> X == 0
4742 // (srl (ctlz x), 5) == 1 -> X == 0
4743 Cond = ISD::SETEQ;
4744 }
4745 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT: N0.getValueType());
4746 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0).getOperand(i: 0), RHS: Zero,
4747 Cond);
4748 }
4749 }
4750 }
4751 }
4752
4753 // FIXME: Support vectors.
4754 if (auto *N1C = dyn_cast<ConstantSDNode>(Val: N1.getNode())) {
4755 const APInt &C1 = N1C->getAPIntValue();
4756
4757 // (zext x) == C --> x == (trunc C)
4758 // (sext x) == C --> x == (trunc C)
4759 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4760 DCI.isBeforeLegalize() && N0->hasOneUse()) {
4761 unsigned MinBits = N0.getValueSizeInBits();
4762 SDValue PreExt;
4763 bool Signed = false;
4764 if (N0->getOpcode() == ISD::ZERO_EXTEND) {
4765 // ZExt
4766 MinBits = N0->getOperand(Num: 0).getValueSizeInBits();
4767 PreExt = N0->getOperand(Num: 0);
4768 } else if (N0->getOpcode() == ISD::AND) {
4769 // DAGCombine turns costly ZExts into ANDs
4770 if (auto *C = dyn_cast<ConstantSDNode>(Val: N0->getOperand(Num: 1)))
4771 if ((C->getAPIntValue()+1).isPowerOf2()) {
4772 MinBits = C->getAPIntValue().countr_one();
4773 PreExt = N0->getOperand(Num: 0);
4774 }
4775 } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
4776 // SExt
4777 MinBits = N0->getOperand(Num: 0).getValueSizeInBits();
4778 PreExt = N0->getOperand(Num: 0);
4779 Signed = true;
4780 } else if (auto *LN0 = dyn_cast<LoadSDNode>(Val&: N0)) {
4781 // ZEXTLOAD / SEXTLOAD
4782 if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
4783 MinBits = LN0->getMemoryVT().getSizeInBits();
4784 PreExt = N0;
4785 } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
4786 Signed = true;
4787 MinBits = LN0->getMemoryVT().getSizeInBits();
4788 PreExt = N0;
4789 }
4790 }
4791
4792 // Figure out how many bits we need to preserve this constant.
4793 unsigned ReqdBits = Signed ? C1.getSignificantBits() : C1.getActiveBits();
4794
4795 // Make sure we're not losing bits from the constant.
4796 if (MinBits > 0 &&
4797 MinBits < C1.getBitWidth() &&
4798 MinBits >= ReqdBits) {
4799 EVT MinVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: MinBits);
4800 if (isTypeDesirableForOp(ISD::SETCC, VT: MinVT)) {
4801 // Will get folded away.
4802 SDValue Trunc = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MinVT, Operand: PreExt);
4803 if (MinBits == 1 && C1 == 1)
4804 // Invert the condition.
4805 return DAG.getSetCC(DL: dl, VT, LHS: Trunc, RHS: DAG.getConstant(Val: 0, DL: dl, VT: MVT::i1),
4806 Cond: Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4807 SDValue C = DAG.getConstant(Val: C1.trunc(width: MinBits), DL: dl, VT: MinVT);
4808 return DAG.getSetCC(DL: dl, VT, LHS: Trunc, RHS: C, Cond);
4809 }
4810
4811 // If truncating the setcc operands is not desirable, we can still
4812 // simplify the expression in some cases:
4813 // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
4814 // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
4815 // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
4816 // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
4817 // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
4818 // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
4819 SDValue TopSetCC = N0->getOperand(Num: 0);
4820 unsigned N0Opc = N0->getOpcode();
4821 bool SExt = (N0Opc == ISD::SIGN_EXTEND);
4822 if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
4823 TopSetCC.getOpcode() == ISD::SETCC &&
4824 (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
4825 (isConstFalseVal(N: N1) ||
4826 isExtendedTrueVal(N: N1C, VT: N0->getValueType(ResNo: 0), SExt))) {
4827
4828 bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) ||
4829 (!N1C->isZero() && Cond == ISD::SETNE);
4830
4831 if (!Inverse)
4832 return TopSetCC;
4833
4834 ISD::CondCode InvCond = ISD::getSetCCInverse(
4835 Operation: cast<CondCodeSDNode>(Val: TopSetCC.getOperand(i: 2))->get(),
4836 Type: TopSetCC.getOperand(i: 0).getValueType());
4837 return DAG.getSetCC(DL: dl, VT, LHS: TopSetCC.getOperand(i: 0),
4838 RHS: TopSetCC.getOperand(i: 1),
4839 Cond: InvCond);
4840 }
4841 }
4842 }
4843
4844 // If the LHS is '(and load, const)', the RHS is 0, the test is for
4845 // equality or unsigned, and all 1 bits of the const are in the same
4846 // partial word, see if we can shorten the load.
4847 if (DCI.isBeforeLegalize() &&
4848 !ISD::isSignedIntSetCC(Code: Cond) &&
4849 N0.getOpcode() == ISD::AND && C1 == 0 &&
4850 N0.getNode()->hasOneUse() &&
4851 isa<LoadSDNode>(Val: N0.getOperand(i: 0)) &&
4852 N0.getOperand(i: 0).getNode()->hasOneUse() &&
4853 isa<ConstantSDNode>(Val: N0.getOperand(i: 1))) {
4854 auto *Lod = cast<LoadSDNode>(Val: N0.getOperand(i: 0));
4855 APInt bestMask;
4856 unsigned bestWidth = 0, bestOffset = 0;
4857 if (Lod->isSimple() && Lod->isUnindexed() &&
4858 (Lod->getMemoryVT().isByteSized() ||
4859 isPaddedAtMostSignificantBitsWhenStored(VT: Lod->getMemoryVT()))) {
4860 unsigned memWidth = Lod->getMemoryVT().getStoreSizeInBits();
4861 unsigned origWidth = N0.getValueSizeInBits();
4862 unsigned maskWidth = origWidth;
4863 // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
4864 // 8 bits, but have to be careful...
4865 if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
4866 origWidth = Lod->getMemoryVT().getSizeInBits();
4867 const APInt &Mask = N0.getConstantOperandAPInt(i: 1);
4868 // Only consider power-of-2 widths (and at least one byte) as candiates
4869 // for the narrowed load.
4870 for (unsigned width = 8; width < origWidth; width *= 2) {
4871 EVT newVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: width);
4872 APInt newMask = APInt::getLowBitsSet(numBits: maskWidth, loBitsSet: width);
4873 // Avoid accessing any padding here for now (we could use memWidth
4874 // instead of origWidth here otherwise).
4875 unsigned maxOffset = origWidth - width;
4876 for (unsigned offset = 0; offset <= maxOffset; offset += 8) {
4877 if (Mask.isSubsetOf(RHS: newMask)) {
4878 unsigned ptrOffset =
4879 Layout.isLittleEndian() ? offset : memWidth - width - offset;
4880 unsigned IsFast = 0;
4881 assert((ptrOffset % 8) == 0 && "Non-Bytealigned pointer offset");
4882 Align NewAlign = commonAlignment(A: Lod->getAlign(), Offset: ptrOffset / 8);
4883 if (shouldReduceLoadWidth(Load: Lod, ExtTy: ISD::NON_EXTLOAD, NewVT: newVT,
4884 ByteOffset: ptrOffset / 8) &&
4885 allowsMemoryAccess(
4886 Context&: *DAG.getContext(), DL: Layout, VT: newVT, AddrSpace: Lod->getAddressSpace(),
4887 Alignment: NewAlign, Flags: Lod->getMemOperand()->getFlags(), Fast: &IsFast) &&
4888 IsFast) {
4889 bestOffset = ptrOffset / 8;
4890 bestMask = Mask.lshr(shiftAmt: offset);
4891 bestWidth = width;
4892 break;
4893 }
4894 }
4895 newMask <<= 8;
4896 }
4897 if (bestWidth)
4898 break;
4899 }
4900 }
4901 if (bestWidth) {
4902 EVT newVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: bestWidth);
4903 SDValue Ptr = Lod->getBasePtr();
4904 if (bestOffset != 0)
4905 Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: TypeSize::getFixed(ExactSize: bestOffset));
4906 SDValue NewLoad =
4907 DAG.getLoad(VT: newVT, dl, Chain: Lod->getChain(), Ptr,
4908 PtrInfo: Lod->getPointerInfo().getWithOffset(O: bestOffset),
4909 Alignment: Lod->getBaseAlign());
4910 SDValue And =
4911 DAG.getNode(Opcode: ISD::AND, DL: dl, VT: newVT, N1: NewLoad,
4912 N2: DAG.getConstant(Val: bestMask.trunc(width: bestWidth), DL: dl, VT: newVT));
4913 return DAG.getSetCC(DL: dl, VT, LHS: And, RHS: DAG.getConstant(Val: 0LL, DL: dl, VT: newVT), Cond);
4914 }
4915 }
4916
4917 // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
4918 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
4919 unsigned InSize = N0.getOperand(i: 0).getValueSizeInBits();
4920
4921 // If the comparison constant has bits in the upper part, the
4922 // zero-extended value could never match.
4923 if (C1.intersects(RHS: APInt::getHighBitsSet(numBits: C1.getBitWidth(),
4924 hiBitsSet: C1.getBitWidth() - InSize))) {
4925 switch (Cond) {
4926 case ISD::SETUGT:
4927 case ISD::SETUGE:
4928 case ISD::SETEQ:
4929 return DAG.getConstant(Val: 0, DL: dl, VT);
4930 case ISD::SETULT:
4931 case ISD::SETULE:
4932 case ISD::SETNE:
4933 return DAG.getConstant(Val: 1, DL: dl, VT);
4934 case ISD::SETGT:
4935 case ISD::SETGE:
4936 // True if the sign bit of C1 is set.
4937 return DAG.getConstant(Val: C1.isNegative(), DL: dl, VT);
4938 case ISD::SETLT:
4939 case ISD::SETLE:
4940 // True if the sign bit of C1 isn't set.
4941 return DAG.getConstant(Val: C1.isNonNegative(), DL: dl, VT);
4942 default:
4943 break;
4944 }
4945 }
4946
4947 // Otherwise, we can perform the comparison with the low bits.
4948 switch (Cond) {
4949 case ISD::SETEQ:
4950 case ISD::SETNE:
4951 case ISD::SETUGT:
4952 case ISD::SETUGE:
4953 case ISD::SETULT:
4954 case ISD::SETULE: {
4955 EVT newVT = N0.getOperand(i: 0).getValueType();
4956 // FIXME: Should use isNarrowingProfitable.
4957 if (DCI.isBeforeLegalizeOps() ||
4958 (isOperationLegal(Op: ISD::SETCC, VT: newVT) &&
4959 isCondCodeLegal(CC: Cond, VT: newVT.getSimpleVT()) &&
4960 isTypeDesirableForOp(ISD::SETCC, VT: newVT))) {
4961 EVT NewSetCCVT = getSetCCResultType(DL: Layout, Context&: *DAG.getContext(), VT: newVT);
4962 SDValue NewConst = DAG.getConstant(Val: C1.trunc(width: InSize), DL: dl, VT: newVT);
4963
4964 SDValue NewSetCC = DAG.getSetCC(DL: dl, VT: NewSetCCVT, LHS: N0.getOperand(i: 0),
4965 RHS: NewConst, Cond);
4966 return DAG.getBoolExtOrTrunc(Op: NewSetCC, SL: dl, VT, OpVT: N0.getValueType());
4967 }
4968 break;
4969 }
4970 default:
4971 break; // todo, be more careful with signed comparisons
4972 }
4973 } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
4974 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4975 !isSExtCheaperThanZExt(FromTy: cast<VTSDNode>(Val: N0.getOperand(i: 1))->getVT(),
4976 ToTy: OpVT)) {
4977 EVT ExtSrcTy = cast<VTSDNode>(Val: N0.getOperand(i: 1))->getVT();
4978 unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
4979 EVT ExtDstTy = N0.getValueType();
4980 unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
4981
4982 // If the constant doesn't fit into the number of bits for the source of
4983 // the sign extension, it is impossible for both sides to be equal.
4984 if (C1.getSignificantBits() > ExtSrcTyBits)
4985 return DAG.getBoolConstant(V: Cond == ISD::SETNE, DL: dl, VT, OpVT);
4986
4987 assert(ExtDstTy == N0.getOperand(0).getValueType() &&
4988 ExtDstTy != ExtSrcTy && "Unexpected types!");
4989 APInt Imm = APInt::getLowBitsSet(numBits: ExtDstTyBits, loBitsSet: ExtSrcTyBits);
4990 SDValue ZextOp = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ExtDstTy, N1: N0.getOperand(i: 0),
4991 N2: DAG.getConstant(Val: Imm, DL: dl, VT: ExtDstTy));
4992 if (!DCI.isCalledByLegalizer())
4993 DCI.AddToWorklist(N: ZextOp.getNode());
4994 // Otherwise, make this a use of a zext.
4995 return DAG.getSetCC(DL: dl, VT, LHS: ZextOp,
4996 RHS: DAG.getConstant(Val: C1 & Imm, DL: dl, VT: ExtDstTy), Cond);
4997 } else if ((N1C->isZero() || N1C->isOne()) &&
4998 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
4999 // SETCC (X), [0|1], [EQ|NE] -> X if X is known 0/1. i1 types are
5000 // excluded as they are handled below whilst checking for foldBooleans.
5001 if ((N0.getOpcode() == ISD::SETCC || VT.getScalarType() != MVT::i1) &&
5002 isTypeLegal(VT) && VT.bitsLE(VT: N0.getValueType()) &&
5003 (N0.getValueType() == MVT::i1 ||
5004 getBooleanContents(Type: N0.getValueType()) == ZeroOrOneBooleanContent) &&
5005 DAG.MaskedValueIsZero(
5006 Op: N0, Mask: APInt::getBitsSetFrom(numBits: N0.getValueSizeInBits(), loBit: 1))) {
5007 bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
5008 if (TrueWhenTrue)
5009 return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: N0);
5010 // Invert the condition.
5011 if (N0.getOpcode() == ISD::SETCC) {
5012 ISD::CondCode CC = cast<CondCodeSDNode>(Val: N0.getOperand(i: 2))->get();
5013 CC = ISD::getSetCCInverse(Operation: CC, Type: N0.getOperand(i: 0).getValueType());
5014 if (DCI.isBeforeLegalizeOps() ||
5015 isCondCodeLegal(CC, VT: N0.getOperand(i: 0).getSimpleValueType()))
5016 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0), RHS: N0.getOperand(i: 1), Cond: CC);
5017 }
5018 }
5019
5020 if ((N0.getOpcode() == ISD::XOR ||
5021 (N0.getOpcode() == ISD::AND &&
5022 N0.getOperand(i: 0).getOpcode() == ISD::XOR &&
5023 N0.getOperand(i: 1) == N0.getOperand(i: 0).getOperand(i: 1))) &&
5024 isOneConstant(V: N0.getOperand(i: 1))) {
5025 // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
5026 // can only do this if the top bits are known zero.
5027 unsigned BitWidth = N0.getValueSizeInBits();
5028 if (DAG.MaskedValueIsZero(Op: N0,
5029 Mask: APInt::getHighBitsSet(numBits: BitWidth,
5030 hiBitsSet: BitWidth-1))) {
5031 // Okay, get the un-inverted input value.
5032 SDValue Val;
5033 if (N0.getOpcode() == ISD::XOR) {
5034 Val = N0.getOperand(i: 0);
5035 } else {
5036 assert(N0.getOpcode() == ISD::AND &&
5037 N0.getOperand(0).getOpcode() == ISD::XOR);
5038 // ((X^1)&1)^1 -> X & 1
5039 Val = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: N0.getValueType(),
5040 N1: N0.getOperand(i: 0).getOperand(i: 0),
5041 N2: N0.getOperand(i: 1));
5042 }
5043
5044 return DAG.getSetCC(DL: dl, VT, LHS: Val, RHS: N1,
5045 Cond: Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
5046 }
5047 } else if (N1C->isOne()) {
5048 SDValue Op0 = N0;
5049 if (Op0.getOpcode() == ISD::TRUNCATE)
5050 Op0 = Op0.getOperand(i: 0);
5051
5052 if ((Op0.getOpcode() == ISD::XOR) &&
5053 Op0.getOperand(i: 0).getOpcode() == ISD::SETCC &&
5054 Op0.getOperand(i: 1).getOpcode() == ISD::SETCC) {
5055 SDValue XorLHS = Op0.getOperand(i: 0);
5056 SDValue XorRHS = Op0.getOperand(i: 1);
5057 // Ensure that the input setccs return an i1 type or 0/1 value.
5058 if (Op0.getValueType() == MVT::i1 ||
5059 (getBooleanContents(Type: XorLHS.getOperand(i: 0).getValueType()) ==
5060 ZeroOrOneBooleanContent &&
5061 getBooleanContents(Type: XorRHS.getOperand(i: 0).getValueType()) ==
5062 ZeroOrOneBooleanContent)) {
5063 // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
5064 Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ;
5065 return DAG.getSetCC(DL: dl, VT, LHS: XorLHS, RHS: XorRHS, Cond);
5066 }
5067 }
5068 if (Op0.getOpcode() == ISD::AND && isOneConstant(V: Op0.getOperand(i: 1))) {
5069 // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
5070 if (Op0.getValueType().bitsGT(VT))
5071 Op0 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
5072 N1: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Op0.getOperand(i: 0)),
5073 N2: DAG.getConstant(Val: 1, DL: dl, VT));
5074 else if (Op0.getValueType().bitsLT(VT))
5075 Op0 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
5076 N1: DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT, Operand: Op0.getOperand(i: 0)),
5077 N2: DAG.getConstant(Val: 1, DL: dl, VT));
5078
5079 return DAG.getSetCC(DL: dl, VT, LHS: Op0,
5080 RHS: DAG.getConstant(Val: 0, DL: dl, VT: Op0.getValueType()),
5081 Cond: Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
5082 }
5083 if (Op0.getOpcode() == ISD::AssertZext &&
5084 cast<VTSDNode>(Val: Op0.getOperand(i: 1))->getVT() == MVT::i1)
5085 return DAG.getSetCC(DL: dl, VT, LHS: Op0,
5086 RHS: DAG.getConstant(Val: 0, DL: dl, VT: Op0.getValueType()),
5087 Cond: Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
5088 }
5089 }
5090
5091 // Given:
5092 // icmp eq/ne (urem %x, %y), 0
5093 // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
5094 // icmp eq/ne %x, 0
5095 if (N0.getOpcode() == ISD::UREM && N1C->isZero() &&
5096 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5097 KnownBits XKnown = DAG.computeKnownBits(Op: N0.getOperand(i: 0));
5098 KnownBits YKnown = DAG.computeKnownBits(Op: N0.getOperand(i: 1));
5099 if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
5100 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0), RHS: N1, Cond);
5101 }
5102
5103 // Fold set_cc seteq (ashr X, BW-1), -1 -> set_cc setlt X, 0
5104 // and set_cc setne (ashr X, BW-1), -1 -> set_cc setge X, 0
5105 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5106 N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(Val: N0.getOperand(i: 1)) &&
5107 N0.getConstantOperandAPInt(i: 1) == OpVT.getScalarSizeInBits() - 1 &&
5108 N1C->isAllOnes()) {
5109 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0),
5110 RHS: DAG.getConstant(Val: 0, DL: dl, VT: OpVT),
5111 Cond: Cond == ISD::SETEQ ? ISD::SETLT : ISD::SETGE);
5112 }
5113
5114 if (SDValue V =
5115 optimizeSetCCOfSignedTruncationCheck(SCCVT: VT, N0, N1, Cond, DCI, DL: dl))
5116 return V;
5117 }
5118
5119 // These simplifications apply to splat vectors as well.
5120 // TODO: Handle more splat vector cases.
5121 if (auto *N1C = isConstOrConstSplat(N: N1)) {
5122 const APInt &C1 = N1C->getAPIntValue();
5123
5124 APInt MinVal, MaxVal;
5125 unsigned OperandBitSize = N1C->getValueType(ResNo: 0).getScalarSizeInBits();
5126 if (ISD::isSignedIntSetCC(Code: Cond)) {
5127 MinVal = APInt::getSignedMinValue(numBits: OperandBitSize);
5128 MaxVal = APInt::getSignedMaxValue(numBits: OperandBitSize);
5129 } else {
5130 MinVal = APInt::getMinValue(numBits: OperandBitSize);
5131 MaxVal = APInt::getMaxValue(numBits: OperandBitSize);
5132 }
5133
5134 // Canonicalize GE/LE comparisons to use GT/LT comparisons.
5135 if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
5136 // X >= MIN --> true
5137 if (C1 == MinVal)
5138 return DAG.getBoolConstant(V: true, DL: dl, VT, OpVT);
5139
5140 if (!VT.isVector()) { // TODO: Support this for vectors.
5141 // X >= C0 --> X > (C0 - 1)
5142 APInt C = C1 - 1;
5143 ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT;
5144 if ((DCI.isBeforeLegalizeOps() ||
5145 isCondCodeLegal(CC: NewCC, VT: OpVT.getSimpleVT())) &&
5146 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
5147 isLegalICmpImmediate(C.getSExtValue())))) {
5148 return DAG.getSetCC(DL: dl, VT, LHS: N0,
5149 RHS: DAG.getConstant(Val: C, DL: dl, VT: N1.getValueType()),
5150 Cond: NewCC);
5151 }
5152 }
5153 }
5154
5155 if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
5156 // X <= MAX --> true
5157 if (C1 == MaxVal)
5158 return DAG.getBoolConstant(V: true, DL: dl, VT, OpVT);
5159
5160 // X <= C0 --> X < (C0 + 1)
5161 if (!VT.isVector()) { // TODO: Support this for vectors.
5162 APInt C = C1 + 1;
5163 ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT;
5164 if ((DCI.isBeforeLegalizeOps() ||
5165 isCondCodeLegal(CC: NewCC, VT: OpVT.getSimpleVT())) &&
5166 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
5167 isLegalICmpImmediate(C.getSExtValue())))) {
5168 return DAG.getSetCC(DL: dl, VT, LHS: N0,
5169 RHS: DAG.getConstant(Val: C, DL: dl, VT: N1.getValueType()),
5170 Cond: NewCC);
5171 }
5172 }
5173 }
5174
5175 if (Cond == ISD::SETLT || Cond == ISD::SETULT) {
5176 if (C1 == MinVal)
5177 return DAG.getBoolConstant(V: false, DL: dl, VT, OpVT); // X < MIN --> false
5178
5179 // TODO: Support this for vectors after legalize ops.
5180 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5181 // Canonicalize setlt X, Max --> setne X, Max
5182 if (C1 == MaxVal)
5183 return DAG.getSetCC(DL: dl, VT, LHS: N0, RHS: N1, Cond: ISD::SETNE);
5184
5185 // If we have setult X, 1, turn it into seteq X, 0
5186 if (C1 == MinVal+1)
5187 return DAG.getSetCC(DL: dl, VT, LHS: N0,
5188 RHS: DAG.getConstant(Val: MinVal, DL: dl, VT: N0.getValueType()),
5189 Cond: ISD::SETEQ);
5190 }
5191 }
5192
5193 if (Cond == ISD::SETGT || Cond == ISD::SETUGT) {
5194 if (C1 == MaxVal)
5195 return DAG.getBoolConstant(V: false, DL: dl, VT, OpVT); // X > MAX --> false
5196
5197 // TODO: Support this for vectors after legalize ops.
5198 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5199 // Canonicalize setgt X, Min --> setne X, Min
5200 if (C1 == MinVal)
5201 return DAG.getSetCC(DL: dl, VT, LHS: N0, RHS: N1, Cond: ISD::SETNE);
5202
5203 // If we have setugt X, Max-1, turn it into seteq X, Max
5204 if (C1 == MaxVal-1)
5205 return DAG.getSetCC(DL: dl, VT, LHS: N0,
5206 RHS: DAG.getConstant(Val: MaxVal, DL: dl, VT: N0.getValueType()),
5207 Cond: ISD::SETEQ);
5208 }
5209 }
5210
5211 if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
5212 // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
5213 if (C1.isZero())
5214 if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
5215 SCCVT: VT, N0, N1C: N1, Cond, DCI, DL: dl))
5216 return CC;
5217
5218 // For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y).
5219 // For example, when high 32-bits of i64 X are known clear:
5220 // all bits clear: (X | (Y<<32)) == 0 --> (X | Y) == 0
5221 // all bits set: (X | (Y<<32)) == -1 --> (X & Y) == -1
5222 bool CmpZero = N1C->isZero();
5223 bool CmpNegOne = N1C->isAllOnes();
5224 if ((CmpZero || CmpNegOne) && N0.hasOneUse()) {
5225 // Match or(lo,shl(hi,bw/2)) pattern.
5226 auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
5227 unsigned EltBits = V.getScalarValueSizeInBits();
5228 if (V.getOpcode() != ISD::OR || (EltBits % 2) != 0)
5229 return false;
5230 SDValue LHS = V.getOperand(i: 0);
5231 SDValue RHS = V.getOperand(i: 1);
5232 APInt HiBits = APInt::getHighBitsSet(numBits: EltBits, hiBitsSet: EltBits / 2);
5233 // Unshifted element must have zero upperbits.
5234 if (RHS.getOpcode() == ISD::SHL &&
5235 isa<ConstantSDNode>(Val: RHS.getOperand(i: 1)) &&
5236 RHS.getConstantOperandAPInt(i: 1) == (EltBits / 2) &&
5237 DAG.MaskedValueIsZero(Op: LHS, Mask: HiBits)) {
5238 Lo = LHS;
5239 Hi = RHS.getOperand(i: 0);
5240 return true;
5241 }
5242 if (LHS.getOpcode() == ISD::SHL &&
5243 isa<ConstantSDNode>(Val: LHS.getOperand(i: 1)) &&
5244 LHS.getConstantOperandAPInt(i: 1) == (EltBits / 2) &&
5245 DAG.MaskedValueIsZero(Op: RHS, Mask: HiBits)) {
5246 Lo = RHS;
5247 Hi = LHS.getOperand(i: 0);
5248 return true;
5249 }
5250 return false;
5251 };
5252
5253 auto MergeConcat = [&](SDValue Lo, SDValue Hi) {
5254 unsigned EltBits = N0.getScalarValueSizeInBits();
5255 unsigned HalfBits = EltBits / 2;
5256 APInt HiBits = APInt::getHighBitsSet(numBits: EltBits, hiBitsSet: HalfBits);
5257 SDValue LoBits = DAG.getConstant(Val: ~HiBits, DL: dl, VT: OpVT);
5258 SDValue HiMask = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: OpVT, N1: Hi, N2: LoBits);
5259 SDValue NewN0 =
5260 DAG.getNode(Opcode: CmpZero ? ISD::OR : ISD::AND, DL: dl, VT: OpVT, N1: Lo, N2: HiMask);
5261 SDValue NewN1 = CmpZero ? DAG.getConstant(Val: 0, DL: dl, VT: OpVT) : LoBits;
5262 return DAG.getSetCC(DL: dl, VT, LHS: NewN0, RHS: NewN1, Cond);
5263 };
5264
5265 SDValue Lo, Hi;
5266 if (IsConcat(N0, Lo, Hi))
5267 return MergeConcat(Lo, Hi);
5268
5269 if (N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR) {
5270 SDValue Lo0, Lo1, Hi0, Hi1;
5271 if (IsConcat(N0.getOperand(i: 0), Lo0, Hi0) &&
5272 IsConcat(N0.getOperand(i: 1), Lo1, Hi1)) {
5273 return MergeConcat(DAG.getNode(Opcode: N0.getOpcode(), DL: dl, VT: OpVT, N1: Lo0, N2: Lo1),
5274 DAG.getNode(Opcode: N0.getOpcode(), DL: dl, VT: OpVT, N1: Hi0, N2: Hi1));
5275 }
5276 }
5277 }
5278 }
5279
5280 // If we have "setcc X, C0", check to see if we can shrink the immediate
5281 // by changing cc.
5282 // TODO: Support this for vectors after legalize ops.
5283 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5284 // SETUGT X, SINTMAX -> SETLT X, 0
5285 // SETUGE X, SINTMIN -> SETLT X, 0
5286 if ((Cond == ISD::SETUGT && C1.isMaxSignedValue()) ||
5287 (Cond == ISD::SETUGE && C1.isMinSignedValue()))
5288 return DAG.getSetCC(DL: dl, VT, LHS: N0,
5289 RHS: DAG.getConstant(Val: 0, DL: dl, VT: N1.getValueType()),
5290 Cond: ISD::SETLT);
5291
5292 // SETULT X, SINTMIN -> SETGT X, -1
5293 // SETULE X, SINTMAX -> SETGT X, -1
5294 if ((Cond == ISD::SETULT && C1.isMinSignedValue()) ||
5295 (Cond == ISD::SETULE && C1.isMaxSignedValue()))
5296 return DAG.getSetCC(DL: dl, VT, LHS: N0,
5297 RHS: DAG.getAllOnesConstant(DL: dl, VT: N1.getValueType()),
5298 Cond: ISD::SETGT);
5299 }
5300 }
5301
5302 // Back to non-vector simplifications.
5303 // TODO: Can we do these for vector splats?
5304 if (auto *N1C = dyn_cast<ConstantSDNode>(Val: N1.getNode())) {
5305 const APInt &C1 = N1C->getAPIntValue();
5306 EVT ShValTy = N0.getValueType();
5307
5308 // Fold bit comparisons when we can. This will result in an
5309 // incorrect value when boolean false is negative one, unless
5310 // the bitsize is 1 in which case the false value is the same
5311 // in practice regardless of the representation.
5312 if ((VT.getSizeInBits() == 1 ||
5313 getBooleanContents(Type: N0.getValueType()) == ZeroOrOneBooleanContent) &&
5314 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5315 (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(VT: ShValTy))) &&
5316 N0.getOpcode() == ISD::AND) {
5317 if (auto *AndRHS = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) {
5318 if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
5319 // Perform the xform if the AND RHS is a single bit.
5320 unsigned ShCt = AndRHS->getAPIntValue().logBase2();
5321 if (AndRHS->getAPIntValue().isPowerOf2() &&
5322 !shouldAvoidTransformToShift(VT: ShValTy, Amount: ShCt)) {
5323 return DAG.getNode(
5324 Opcode: ISD::TRUNCATE, DL: dl, VT,
5325 Operand: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: ShValTy, N1: N0,
5326 N2: DAG.getShiftAmountConstant(Val: ShCt, VT: ShValTy, DL: dl)));
5327 }
5328 } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
5329 // (X & 8) == 8 --> (X & 8) >> 3
5330 // Perform the xform if C1 is a single bit.
5331 unsigned ShCt = C1.logBase2();
5332 if (C1.isPowerOf2() && !shouldAvoidTransformToShift(VT: ShValTy, Amount: ShCt)) {
5333 return DAG.getNode(
5334 Opcode: ISD::TRUNCATE, DL: dl, VT,
5335 Operand: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: ShValTy, N1: N0,
5336 N2: DAG.getShiftAmountConstant(Val: ShCt, VT: ShValTy, DL: dl)));
5337 }
5338 }
5339 }
5340 }
5341
5342 if (C1.getSignificantBits() <= 64 &&
5343 !isLegalICmpImmediate(C1.getSExtValue())) {
5344 // (X & -256) == 256 -> (X >> 8) == 1
5345 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5346 N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
5347 if (auto *AndRHS = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) {
5348 const APInt &AndRHSC = AndRHS->getAPIntValue();
5349 if (AndRHSC.isNegatedPowerOf2() && C1.isSubsetOf(RHS: AndRHSC)) {
5350 unsigned ShiftBits = AndRHSC.countr_zero();
5351 if (!shouldAvoidTransformToShift(VT: ShValTy, Amount: ShiftBits)) {
5352 SDValue Shift = DAG.getNode(
5353 Opcode: ISD::SRL, DL: dl, VT: ShValTy, N1: N0.getOperand(i: 0),
5354 N2: DAG.getShiftAmountConstant(Val: ShiftBits, VT: ShValTy, DL: dl));
5355 SDValue CmpRHS = DAG.getConstant(Val: C1.lshr(shiftAmt: ShiftBits), DL: dl, VT: ShValTy);
5356 return DAG.getSetCC(DL: dl, VT, LHS: Shift, RHS: CmpRHS, Cond);
5357 }
5358 }
5359 }
5360 } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
5361 Cond == ISD::SETULE || Cond == ISD::SETUGT) {
5362 bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
5363 // X < 0x100000000 -> (X >> 32) < 1
5364 // X >= 0x100000000 -> (X >> 32) >= 1
5365 // X <= 0x0ffffffff -> (X >> 32) < 1
5366 // X > 0x0ffffffff -> (X >> 32) >= 1
5367 unsigned ShiftBits;
5368 APInt NewC = C1;
5369 ISD::CondCode NewCond = Cond;
5370 if (AdjOne) {
5371 ShiftBits = C1.countr_one();
5372 NewC = NewC + 1;
5373 NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
5374 } else {
5375 ShiftBits = C1.countr_zero();
5376 }
5377 NewC.lshrInPlace(ShiftAmt: ShiftBits);
5378 if (ShiftBits && NewC.getSignificantBits() <= 64 &&
5379 isLegalICmpImmediate(NewC.getSExtValue()) &&
5380 !shouldAvoidTransformToShift(VT: ShValTy, Amount: ShiftBits)) {
5381 SDValue Shift =
5382 DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: ShValTy, N1: N0,
5383 N2: DAG.getShiftAmountConstant(Val: ShiftBits, VT: ShValTy, DL: dl));
5384 SDValue CmpRHS = DAG.getConstant(Val: NewC, DL: dl, VT: ShValTy);
5385 return DAG.getSetCC(DL: dl, VT, LHS: Shift, RHS: CmpRHS, Cond: NewCond);
5386 }
5387 }
5388 }
5389 }
5390
5391 if (!isa<ConstantFPSDNode>(Val: N0) && isa<ConstantFPSDNode>(Val: N1)) {
5392 auto *CFP = cast<ConstantFPSDNode>(Val&: N1);
5393 assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
5394
5395 // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
5396 // constant if knowing that the operand is non-nan is enough. We prefer to
5397 // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
5398 // materialize 0.0.
5399 if (Cond == ISD::SETO || Cond == ISD::SETUO)
5400 return DAG.getSetCC(DL: dl, VT, LHS: N0, RHS: N0, Cond);
5401
5402 // setcc (fneg x), C -> setcc swap(pred) x, -C
5403 if (N0.getOpcode() == ISD::FNEG) {
5404 ISD::CondCode SwapCond = ISD::getSetCCSwappedOperands(Operation: Cond);
5405 if (DCI.isBeforeLegalizeOps() ||
5406 isCondCodeLegal(CC: SwapCond, VT: N0.getSimpleValueType())) {
5407 SDValue NegN1 = DAG.getNode(Opcode: ISD::FNEG, DL: dl, VT: N0.getValueType(), Operand: N1);
5408 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0), RHS: NegN1, Cond: SwapCond);
5409 }
5410 }
5411
5412 // setueq/setoeq X, (fabs Inf) -> is_fpclass X, fcInf
5413 if (isOperationLegalOrCustom(Op: ISD::IS_FPCLASS, VT: N0.getValueType()) &&
5414 !isFPImmLegal(CFP->getValueAPF(), CFP->getValueType(ResNo: 0))) {
5415 bool IsFabs = N0.getOpcode() == ISD::FABS;
5416 SDValue Op = IsFabs ? N0.getOperand(i: 0) : N0;
5417 if ((Cond == ISD::SETOEQ || Cond == ISD::SETUEQ) && CFP->isInfinity()) {
5418 FPClassTest Flag = CFP->isNegative() ? (IsFabs ? fcNone : fcNegInf)
5419 : (IsFabs ? fcInf : fcPosInf);
5420 if (Cond == ISD::SETUEQ)
5421 Flag |= fcNan;
5422 return DAG.getNode(Opcode: ISD::IS_FPCLASS, DL: dl, VT, N1: Op,
5423 N2: DAG.getTargetConstant(Val: Flag, DL: dl, VT: MVT::i32));
5424 }
5425 }
5426
5427 // If the condition is not legal, see if we can find an equivalent one
5428 // which is legal.
5429 if (!isCondCodeLegal(CC: Cond, VT: N0.getSimpleValueType())) {
5430 // If the comparison was an awkward floating-point == or != and one of
5431 // the comparison operands is infinity or negative infinity, convert the
5432 // condition to a less-awkward <= or >=.
5433 if (CFP->getValueAPF().isInfinity()) {
5434 bool IsNegInf = CFP->getValueAPF().isNegative();
5435 ISD::CondCode NewCond = ISD::SETCC_INVALID;
5436 switch (Cond) {
5437 case ISD::SETOEQ: NewCond = IsNegInf ? ISD::SETOLE : ISD::SETOGE; break;
5438 case ISD::SETUEQ: NewCond = IsNegInf ? ISD::SETULE : ISD::SETUGE; break;
5439 case ISD::SETUNE: NewCond = IsNegInf ? ISD::SETUGT : ISD::SETULT; break;
5440 case ISD::SETONE: NewCond = IsNegInf ? ISD::SETOGT : ISD::SETOLT; break;
5441 default: break;
5442 }
5443 if (NewCond != ISD::SETCC_INVALID &&
5444 isCondCodeLegal(CC: NewCond, VT: N0.getSimpleValueType()))
5445 return DAG.getSetCC(DL: dl, VT, LHS: N0, RHS: N1, Cond: NewCond);
5446 }
5447 }
5448 }
5449
5450 if (N0 == N1) {
5451 // The sext(setcc()) => setcc() optimization relies on the appropriate
5452 // constant being emitted.
5453 assert(!N0.getValueType().isInteger() &&
5454 "Integer types should be handled by FoldSetCC");
5455
5456 bool EqTrue = ISD::isTrueWhenEqual(Cond);
5457 unsigned UOF = ISD::getUnorderedFlavor(Cond);
5458 if (UOF == 2) // FP operators that are undefined on NaNs.
5459 return DAG.getBoolConstant(V: EqTrue, DL: dl, VT, OpVT);
5460 if (UOF == unsigned(EqTrue))
5461 return DAG.getBoolConstant(V: EqTrue, DL: dl, VT, OpVT);
5462 // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
5463 // if it is not already.
5464 ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
5465 if (NewCond != Cond &&
5466 (DCI.isBeforeLegalizeOps() ||
5467 isCondCodeLegal(CC: NewCond, VT: N0.getSimpleValueType())))
5468 return DAG.getSetCC(DL: dl, VT, LHS: N0, RHS: N1, Cond: NewCond);
5469 }
5470
5471 // ~X > ~Y --> Y > X
5472 // ~X < ~Y --> Y < X
5473 // ~X < C --> X > ~C
5474 // ~X > C --> X < ~C
5475 if ((isSignedIntSetCC(Code: Cond) || isUnsignedIntSetCC(Code: Cond)) &&
5476 N0.getValueType().isInteger()) {
5477 if (isBitwiseNot(V: N0)) {
5478 if (isBitwiseNot(V: N1))
5479 return DAG.getSetCC(DL: dl, VT, LHS: N1.getOperand(i: 0), RHS: N0.getOperand(i: 0), Cond);
5480
5481 if (DAG.isConstantIntBuildVectorOrConstantInt(N: N1) &&
5482 !DAG.isConstantIntBuildVectorOrConstantInt(N: N0.getOperand(i: 0))) {
5483 SDValue Not = DAG.getNOT(DL: dl, Val: N1, VT: OpVT);
5484 return DAG.getSetCC(DL: dl, VT, LHS: Not, RHS: N0.getOperand(i: 0), Cond);
5485 }
5486 }
5487 }
5488
5489 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5490 N0.getValueType().isInteger()) {
5491 if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
5492 N0.getOpcode() == ISD::XOR) {
5493 // Simplify (X+Y) == (X+Z) --> Y == Z
5494 if (N0.getOpcode() == N1.getOpcode()) {
5495 if (N0.getOperand(i: 0) == N1.getOperand(i: 0))
5496 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 1), RHS: N1.getOperand(i: 1), Cond);
5497 if (N0.getOperand(i: 1) == N1.getOperand(i: 1))
5498 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0), RHS: N1.getOperand(i: 0), Cond);
5499 if (isCommutativeBinOp(Opcode: N0.getOpcode())) {
5500 // If X op Y == Y op X, try other combinations.
5501 if (N0.getOperand(i: 0) == N1.getOperand(i: 1))
5502 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 1), RHS: N1.getOperand(i: 0),
5503 Cond);
5504 if (N0.getOperand(i: 1) == N1.getOperand(i: 0))
5505 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0), RHS: N1.getOperand(i: 1),
5506 Cond);
5507 }
5508 }
5509
5510 // If RHS is a legal immediate value for a compare instruction, we need
5511 // to be careful about increasing register pressure needlessly.
5512 bool LegalRHSImm = false;
5513
5514 if (auto *RHSC = dyn_cast<ConstantSDNode>(Val&: N1)) {
5515 if (auto *LHSR = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) {
5516 // Turn (X+C1) == C2 --> X == C2-C1
5517 if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse())
5518 return DAG.getSetCC(
5519 DL: dl, VT, LHS: N0.getOperand(i: 0),
5520 RHS: DAG.getConstant(Val: RHSC->getAPIntValue() - LHSR->getAPIntValue(),
5521 DL: dl, VT: N0.getValueType()),
5522 Cond);
5523
5524 // Turn (X^C1) == C2 --> X == C1^C2
5525 if (N0.getOpcode() == ISD::XOR && N0.getNode()->hasOneUse())
5526 return DAG.getSetCC(
5527 DL: dl, VT, LHS: N0.getOperand(i: 0),
5528 RHS: DAG.getConstant(Val: LHSR->getAPIntValue() ^ RHSC->getAPIntValue(),
5529 DL: dl, VT: N0.getValueType()),
5530 Cond);
5531 }
5532
5533 // Turn (C1-X) == C2 --> X == C1-C2
5534 if (auto *SUBC = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 0)))
5535 if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse())
5536 return DAG.getSetCC(
5537 DL: dl, VT, LHS: N0.getOperand(i: 1),
5538 RHS: DAG.getConstant(Val: SUBC->getAPIntValue() - RHSC->getAPIntValue(),
5539 DL: dl, VT: N0.getValueType()),
5540 Cond);
5541
5542 // Could RHSC fold directly into a compare?
5543 if (RHSC->getValueType(ResNo: 0).getSizeInBits() <= 64)
5544 LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
5545 }
5546
5547 // (X+Y) == X --> Y == 0 and similar folds.
5548 // Don't do this if X is an immediate that can fold into a cmp
5549 // instruction and X+Y has other uses. It could be an induction variable
5550 // chain, and the transform would increase register pressure.
5551 if (!LegalRHSImm || N0.hasOneUse())
5552 if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, DL: dl, DCI))
5553 return V;
5554 }
5555
5556 if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
5557 N1.getOpcode() == ISD::XOR)
5558 if (SDValue V = foldSetCCWithBinOp(VT, N0: N1, N1: N0, Cond, DL: dl, DCI))
5559 return V;
5560
5561 if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, DL: dl, DCI))
5562 return V;
5563
5564 if (SDValue V = foldSetCCWithOr(VT, N0, N1, Cond, DL: dl, DCI))
5565 return V;
5566 }
5567
5568 // Fold remainder of division by a constant.
5569 if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) &&
5570 N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5571 // When division is cheap or optimizing for minimum size,
5572 // fall through to DIVREM creation by skipping this fold.
5573 if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Kind: Attribute::MinSize)) {
5574 if (N0.getOpcode() == ISD::UREM) {
5575 if (SDValue Folded = buildUREMEqFold(SETCCVT: VT, REMNode: N0, CompTargetNode: N1, Cond, DCI, DL: dl))
5576 return Folded;
5577 } else if (N0.getOpcode() == ISD::SREM) {
5578 if (SDValue Folded = buildSREMEqFold(SETCCVT: VT, REMNode: N0, CompTargetNode: N1, Cond, DCI, DL: dl))
5579 return Folded;
5580 }
5581 }
5582 }
5583
5584 // Fold away ALL boolean setcc's.
5585 if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
5586 SDValue Temp;
5587 switch (Cond) {
5588 default: llvm_unreachable("Unknown integer setcc!");
5589 case ISD::SETEQ: // X == Y -> ~(X^Y)
5590 Temp = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: OpVT, N1: N0, N2: N1);
5591 N0 = DAG.getNOT(DL: dl, Val: Temp, VT: OpVT);
5592 if (!DCI.isCalledByLegalizer())
5593 DCI.AddToWorklist(N: Temp.getNode());
5594 break;
5595 case ISD::SETNE: // X != Y --> (X^Y)
5596 N0 = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: OpVT, N1: N0, N2: N1);
5597 break;
5598 case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
5599 case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
5600 Temp = DAG.getNOT(DL: dl, Val: N0, VT: OpVT);
5601 N0 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: OpVT, N1, N2: Temp);
5602 if (!DCI.isCalledByLegalizer())
5603 DCI.AddToWorklist(N: Temp.getNode());
5604 break;
5605 case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
5606 case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
5607 Temp = DAG.getNOT(DL: dl, Val: N1, VT: OpVT);
5608 N0 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: OpVT, N1: N0, N2: Temp);
5609 if (!DCI.isCalledByLegalizer())
5610 DCI.AddToWorklist(N: Temp.getNode());
5611 break;
5612 case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
5613 case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
5614 Temp = DAG.getNOT(DL: dl, Val: N0, VT: OpVT);
5615 N0 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1, N2: Temp);
5616 if (!DCI.isCalledByLegalizer())
5617 DCI.AddToWorklist(N: Temp.getNode());
5618 break;
5619 case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
5620 case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
5621 Temp = DAG.getNOT(DL: dl, Val: N1, VT: OpVT);
5622 N0 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1: N0, N2: Temp);
5623 break;
5624 }
5625 if (VT.getScalarType() != MVT::i1) {
5626 if (!DCI.isCalledByLegalizer())
5627 DCI.AddToWorklist(N: N0.getNode());
5628 // FIXME: If running after legalize, we probably can't do this.
5629 ISD::NodeType ExtendCode = getExtendForContent(Content: getBooleanContents(Type: OpVT));
5630 N0 = DAG.getNode(Opcode: ExtendCode, DL: dl, VT, Operand: N0);
5631 }
5632 return N0;
5633 }
5634
5635 // Could not fold it.
5636 return SDValue();
5637}
5638
5639/// Returns true (and the GlobalValue and the offset) if the node is a
5640/// GlobalAddress + offset.
5641bool TargetLowering::isGAPlusOffset(SDNode *WN, const GlobalValue *&GA,
5642 int64_t &Offset) const {
5643
5644 SDNode *N = unwrapAddress(N: SDValue(WN, 0)).getNode();
5645
5646 if (auto *GASD = dyn_cast<GlobalAddressSDNode>(Val: N)) {
5647 GA = GASD->getGlobal();
5648 Offset += GASD->getOffset();
5649 return true;
5650 }
5651
5652 if (N->getOpcode() == ISD::ADD) {
5653 SDValue N1 = N->getOperand(Num: 0);
5654 SDValue N2 = N->getOperand(Num: 1);
5655 if (isGAPlusOffset(WN: N1.getNode(), GA, Offset)) {
5656 if (auto *V = dyn_cast<ConstantSDNode>(Val&: N2)) {
5657 Offset += V->getSExtValue();
5658 return true;
5659 }
5660 } else if (isGAPlusOffset(WN: N2.getNode(), GA, Offset)) {
5661 if (auto *V = dyn_cast<ConstantSDNode>(Val&: N1)) {
5662 Offset += V->getSExtValue();
5663 return true;
5664 }
5665 }
5666 }
5667
5668 return false;
5669}
5670
5671SDValue TargetLowering::PerformDAGCombine(SDNode *N,
5672 DAGCombinerInfo &DCI) const {
5673 // Default implementation: no optimization.
5674 return SDValue();
5675}
5676
5677//===----------------------------------------------------------------------===//
5678// Inline Assembler Implementation Methods
5679//===----------------------------------------------------------------------===//
5680
5681TargetLowering::ConstraintType
5682TargetLowering::getConstraintType(StringRef Constraint) const {
5683 unsigned S = Constraint.size();
5684
5685 if (S == 1) {
5686 switch (Constraint[0]) {
5687 default: break;
5688 case 'r':
5689 return C_RegisterClass;
5690 case 'm': // memory
5691 case 'o': // offsetable
5692 case 'V': // not offsetable
5693 return C_Memory;
5694 case 'p': // Address.
5695 return C_Address;
5696 case 'n': // Simple Integer
5697 case 'E': // Floating Point Constant
5698 case 'F': // Floating Point Constant
5699 return C_Immediate;
5700 case 'i': // Simple Integer or Relocatable Constant
5701 case 's': // Relocatable Constant
5702 case 'X': // Allow ANY value.
5703 case 'I': // Target registers.
5704 case 'J':
5705 case 'K':
5706 case 'L':
5707 case 'M':
5708 case 'N':
5709 case 'O':
5710 case 'P':
5711 case '<':
5712 case '>':
5713 return C_Other;
5714 }
5715 }
5716
5717 if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') {
5718 if (S == 8 && Constraint.substr(Start: 1, N: 6) == "memory") // "{memory}"
5719 return C_Memory;
5720 return C_Register;
5721 }
5722 return C_Unknown;
5723}
5724
5725/// Try to replace an X constraint, which matches anything, with another that
5726/// has more specific requirements based on the type of the corresponding
5727/// operand.
5728const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
5729 if (ConstraintVT.isInteger())
5730 return "r";
5731 if (ConstraintVT.isFloatingPoint())
5732 return "f"; // works for many targets
5733 return nullptr;
5734}
5735
5736SDValue TargetLowering::LowerAsmOutputForConstraint(
5737 SDValue &Chain, SDValue &Glue, const SDLoc &DL,
5738 const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
5739 return SDValue();
5740}
5741
5742/// Lower the specified operand into the Ops vector.
5743/// If it is invalid, don't add anything to Ops.
5744void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
5745 StringRef Constraint,
5746 std::vector<SDValue> &Ops,
5747 SelectionDAG &DAG) const {
5748
5749 if (Constraint.size() > 1)
5750 return;
5751
5752 char ConstraintLetter = Constraint[0];
5753 switch (ConstraintLetter) {
5754 default: break;
5755 case 'X': // Allows any operand
5756 case 'i': // Simple Integer or Relocatable Constant
5757 case 'n': // Simple Integer
5758 case 's': { // Relocatable Constant
5759
5760 ConstantSDNode *C;
5761 uint64_t Offset = 0;
5762
5763 // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
5764 // etc., since getelementpointer is variadic. We can't use
5765 // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
5766 // while in this case the GA may be furthest from the root node which is
5767 // likely an ISD::ADD.
5768 while (true) {
5769 if ((C = dyn_cast<ConstantSDNode>(Val&: Op)) && ConstraintLetter != 's') {
5770 // gcc prints these as sign extended. Sign extend value to 64 bits
5771 // now; without this it would get ZExt'd later in
5772 // ScheduleDAGSDNodes::EmitNode, which is very generic.
5773 bool IsBool = C->getConstantIntValue()->getBitWidth() == 1;
5774 BooleanContent BCont = getBooleanContents(Type: MVT::i64);
5775 ISD::NodeType ExtOpc =
5776 IsBool ? getExtendForContent(Content: BCont) : ISD::SIGN_EXTEND;
5777 int64_t ExtVal =
5778 ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue() : C->getSExtValue();
5779 Ops.push_back(
5780 x: DAG.getTargetConstant(Val: Offset + ExtVal, DL: SDLoc(C), VT: MVT::i64));
5781 return;
5782 }
5783 if (ConstraintLetter != 'n') {
5784 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Val&: Op)) {
5785 Ops.push_back(x: DAG.getTargetGlobalAddress(GV: GA->getGlobal(), DL: SDLoc(Op),
5786 VT: GA->getValueType(ResNo: 0),
5787 offset: Offset + GA->getOffset()));
5788 return;
5789 }
5790 if (const auto *BA = dyn_cast<BlockAddressSDNode>(Val&: Op)) {
5791 Ops.push_back(x: DAG.getTargetBlockAddress(
5792 BA: BA->getBlockAddress(), VT: BA->getValueType(ResNo: 0),
5793 Offset: Offset + BA->getOffset(), TargetFlags: BA->getTargetFlags()));
5794 return;
5795 }
5796 if (isa<BasicBlockSDNode>(Val: Op)) {
5797 Ops.push_back(x: Op);
5798 return;
5799 }
5800 }
5801 const unsigned OpCode = Op.getOpcode();
5802 if (OpCode == ISD::ADD || OpCode == ISD::SUB) {
5803 if ((C = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 0))))
5804 Op = Op.getOperand(i: 1);
5805 // Subtraction is not commutative.
5806 else if (OpCode == ISD::ADD &&
5807 (C = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 1))))
5808 Op = Op.getOperand(i: 0);
5809 else
5810 return;
5811 Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue();
5812 continue;
5813 }
5814 return;
5815 }
5816 break;
5817 }
5818 }
5819}
5820
5821void TargetLowering::CollectTargetIntrinsicOperands(
5822 const CallInst &I, SmallVectorImpl<SDValue> &Ops, SelectionDAG &DAG) const {
5823}
5824
5825std::pair<unsigned, const TargetRegisterClass *>
5826TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
5827 StringRef Constraint,
5828 MVT VT) const {
5829 if (!Constraint.starts_with(Prefix: "{"))
5830 return std::make_pair(x: 0u, y: static_cast<TargetRegisterClass *>(nullptr));
5831 assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?");
5832
5833 // Remove the braces from around the name.
5834 StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
5835
5836 std::pair<unsigned, const TargetRegisterClass *> R =
5837 std::make_pair(x: 0u, y: static_cast<const TargetRegisterClass *>(nullptr));
5838
5839 // Figure out which register class contains this reg.
5840 for (const TargetRegisterClass *RC : RI->regclasses()) {
5841 // If none of the value types for this register class are valid, we
5842 // can't use it. For example, 64-bit reg classes on 32-bit targets.
5843 if (!isLegalRC(TRI: *RI, RC: *RC))
5844 continue;
5845
5846 for (const MCPhysReg &PR : *RC) {
5847 if (RegName.equals_insensitive(RHS: RI->getRegAsmName(Reg: PR))) {
5848 std::pair<unsigned, const TargetRegisterClass *> S =
5849 std::make_pair(x: PR, y&: RC);
5850
5851 // If this register class has the requested value type, return it,
5852 // otherwise keep searching and return the first class found
5853 // if no other is found which explicitly has the requested type.
5854 if (RI->isTypeLegalForClass(RC: *RC, T: VT))
5855 return S;
5856 if (!R.second)
5857 R = S;
5858 }
5859 }
5860 }
5861
5862 return R;
5863}
5864
5865//===----------------------------------------------------------------------===//
5866// Constraint Selection.
5867
5868/// Return true of this is an input operand that is a matching constraint like
5869/// "4".
5870bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {
5871 assert(!ConstraintCode.empty() && "No known constraint!");
5872 return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
5873}
5874
5875/// If this is an input matching constraint, this method returns the output
5876/// operand it matches.
5877unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
5878 assert(!ConstraintCode.empty() && "No known constraint!");
5879 return atoi(nptr: ConstraintCode.c_str());
5880}
5881
5882/// Split up the constraint string from the inline assembly value into the
5883/// specific constraints and their prefixes, and also tie in the associated
5884/// operand values.
5885/// If this returns an empty vector, and if the constraint string itself
5886/// isn't empty, there was an error parsing.
5887TargetLowering::AsmOperandInfoVector
5888TargetLowering::ParseConstraints(const DataLayout &DL,
5889 const TargetRegisterInfo *TRI,
5890 const CallBase &Call) const {
5891 /// Information about all of the constraints.
5892 AsmOperandInfoVector ConstraintOperands;
5893 const InlineAsm *IA = cast<InlineAsm>(Val: Call.getCalledOperand());
5894 unsigned maCount = 0; // Largest number of multiple alternative constraints.
5895
5896 // Do a prepass over the constraints, canonicalizing them, and building up the
5897 // ConstraintOperands list.
5898 unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
5899 unsigned ResNo = 0; // ResNo - The result number of the next output.
5900 unsigned LabelNo = 0; // LabelNo - CallBr indirect dest number.
5901
5902 for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
5903 ConstraintOperands.emplace_back(args: std::move(CI));
5904 AsmOperandInfo &OpInfo = ConstraintOperands.back();
5905
5906 // Update multiple alternative constraint count.
5907 if (OpInfo.multipleAlternatives.size() > maCount)
5908 maCount = OpInfo.multipleAlternatives.size();
5909
5910 OpInfo.ConstraintVT = MVT::Other;
5911
5912 // Compute the value type for each operand.
5913 switch (OpInfo.Type) {
5914 case InlineAsm::isOutput:
5915 // Indirect outputs just consume an argument.
5916 if (OpInfo.isIndirect) {
5917 OpInfo.CallOperandVal = Call.getArgOperand(i: ArgNo);
5918 break;
5919 }
5920
5921 // The return value of the call is this value. As such, there is no
5922 // corresponding argument.
5923 assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
5924 if (auto *STy = dyn_cast<StructType>(Val: Call.getType())) {
5925 OpInfo.ConstraintVT =
5926 getAsmOperandValueType(DL, Ty: STy->getElementType(N: ResNo))
5927 .getSimpleVT();
5928 } else {
5929 assert(ResNo == 0 && "Asm only has one result!");
5930 OpInfo.ConstraintVT =
5931 getAsmOperandValueType(DL, Ty: Call.getType()).getSimpleVT();
5932 }
5933 ++ResNo;
5934 break;
5935 case InlineAsm::isInput:
5936 OpInfo.CallOperandVal = Call.getArgOperand(i: ArgNo);
5937 break;
5938 case InlineAsm::isLabel:
5939 OpInfo.CallOperandVal = cast<CallBrInst>(Val: &Call)->getIndirectDest(i: LabelNo);
5940 ++LabelNo;
5941 continue;
5942 case InlineAsm::isClobber:
5943 // Nothing to do.
5944 break;
5945 }
5946
5947 if (OpInfo.CallOperandVal) {
5948 llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
5949 if (OpInfo.isIndirect) {
5950 OpTy = Call.getParamElementType(ArgNo);
5951 assert(OpTy && "Indirect operand must have elementtype attribute");
5952 }
5953
5954 // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
5955 if (StructType *STy = dyn_cast<StructType>(Val: OpTy))
5956 if (STy->getNumElements() == 1)
5957 OpTy = STy->getElementType(N: 0);
5958
5959 // If OpTy is not a single value, it may be a struct/union that we
5960 // can tile with integers.
5961 if (!OpTy->isSingleValueType() && OpTy->isSized()) {
5962 unsigned BitSize = DL.getTypeSizeInBits(Ty: OpTy);
5963 switch (BitSize) {
5964 default: break;
5965 case 1:
5966 case 8:
5967 case 16:
5968 case 32:
5969 case 64:
5970 case 128:
5971 OpTy = IntegerType::get(C&: OpTy->getContext(), NumBits: BitSize);
5972 break;
5973 }
5974 }
5975
5976 EVT VT = getAsmOperandValueType(DL, Ty: OpTy, AllowUnknown: true);
5977 OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
5978 ArgNo++;
5979 }
5980 }
5981
5982 // If we have multiple alternative constraints, select the best alternative.
5983 if (!ConstraintOperands.empty()) {
5984 if (maCount) {
5985 unsigned bestMAIndex = 0;
5986 int bestWeight = -1;
5987 // weight: -1 = invalid match, and 0 = so-so match to 5 = good match.
5988 int weight = -1;
5989 unsigned maIndex;
5990 // Compute the sums of the weights for each alternative, keeping track
5991 // of the best (highest weight) one so far.
5992 for (maIndex = 0; maIndex < maCount; ++maIndex) {
5993 int weightSum = 0;
5994 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
5995 cIndex != eIndex; ++cIndex) {
5996 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
5997 if (OpInfo.Type == InlineAsm::isClobber)
5998 continue;
5999
6000 // If this is an output operand with a matching input operand,
6001 // look up the matching input. If their types mismatch, e.g. one
6002 // is an integer, the other is floating point, or their sizes are
6003 // different, flag it as an maCantMatch.
6004 if (OpInfo.hasMatchingInput()) {
6005 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
6006 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
6007 if ((OpInfo.ConstraintVT.isInteger() !=
6008 Input.ConstraintVT.isInteger()) ||
6009 (OpInfo.ConstraintVT.getSizeInBits() !=
6010 Input.ConstraintVT.getSizeInBits())) {
6011 weightSum = -1; // Can't match.
6012 break;
6013 }
6014 }
6015 }
6016 weight = getMultipleConstraintMatchWeight(info&: OpInfo, maIndex);
6017 if (weight == -1) {
6018 weightSum = -1;
6019 break;
6020 }
6021 weightSum += weight;
6022 }
6023 // Update best.
6024 if (weightSum > bestWeight) {
6025 bestWeight = weightSum;
6026 bestMAIndex = maIndex;
6027 }
6028 }
6029
6030 // Now select chosen alternative in each constraint.
6031 for (AsmOperandInfo &cInfo : ConstraintOperands)
6032 if (cInfo.Type != InlineAsm::isClobber)
6033 cInfo.selectAlternative(index: bestMAIndex);
6034 }
6035 }
6036
6037 // Check and hook up tied operands, choose constraint code to use.
6038 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
6039 cIndex != eIndex; ++cIndex) {
6040 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
6041
6042 // If this is an output operand with a matching input operand, look up the
6043 // matching input. If their types mismatch, e.g. one is an integer, the
6044 // other is floating point, or their sizes are different, flag it as an
6045 // error.
6046 if (OpInfo.hasMatchingInput()) {
6047 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
6048
6049 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
6050 std::pair<unsigned, const TargetRegisterClass *> MatchRC =
6051 getRegForInlineAsmConstraint(RI: TRI, Constraint: OpInfo.ConstraintCode,
6052 VT: OpInfo.ConstraintVT);
6053 std::pair<unsigned, const TargetRegisterClass *> InputRC =
6054 getRegForInlineAsmConstraint(RI: TRI, Constraint: Input.ConstraintCode,
6055 VT: Input.ConstraintVT);
6056 const bool OutOpIsIntOrFP = OpInfo.ConstraintVT.isInteger() ||
6057 OpInfo.ConstraintVT.isFloatingPoint();
6058 const bool InOpIsIntOrFP = Input.ConstraintVT.isInteger() ||
6059 Input.ConstraintVT.isFloatingPoint();
6060 if ((OutOpIsIntOrFP != InOpIsIntOrFP) ||
6061 (MatchRC.second != InputRC.second)) {
6062 report_fatal_error(reason: "Unsupported asm: input constraint"
6063 " with a matching output constraint of"
6064 " incompatible type!");
6065 }
6066 }
6067 }
6068 }
6069
6070 return ConstraintOperands;
6071}
6072
6073/// Return a number indicating our preference for chosing a type of constraint
6074/// over another, for the purpose of sorting them. Immediates are almost always
6075/// preferrable (when they can be emitted). A higher return value means a
6076/// stronger preference for one constraint type relative to another.
6077/// FIXME: We should prefer registers over memory but doing so may lead to
6078/// unrecoverable register exhaustion later.
6079/// https://github.com/llvm/llvm-project/issues/20571
6080static unsigned getConstraintPiority(TargetLowering::ConstraintType CT) {
6081 switch (CT) {
6082 case TargetLowering::C_Immediate:
6083 case TargetLowering::C_Other:
6084 return 4;
6085 case TargetLowering::C_Memory:
6086 case TargetLowering::C_Address:
6087 return 3;
6088 case TargetLowering::C_RegisterClass:
6089 return 2;
6090 case TargetLowering::C_Register:
6091 return 1;
6092 case TargetLowering::C_Unknown:
6093 return 0;
6094 }
6095 llvm_unreachable("Invalid constraint type");
6096}
6097
6098/// Examine constraint type and operand type and determine a weight value.
6099/// This object must already have been set up with the operand type
6100/// and the current alternative constraint selected.
6101TargetLowering::ConstraintWeight
6102 TargetLowering::getMultipleConstraintMatchWeight(
6103 AsmOperandInfo &info, int maIndex) const {
6104 InlineAsm::ConstraintCodeVector *rCodes;
6105 if (maIndex >= (int)info.multipleAlternatives.size())
6106 rCodes = &info.Codes;
6107 else
6108 rCodes = &info.multipleAlternatives[maIndex].Codes;
6109 ConstraintWeight BestWeight = CW_Invalid;
6110
6111 // Loop over the options, keeping track of the most general one.
6112 for (const std::string &rCode : *rCodes) {
6113 ConstraintWeight weight =
6114 getSingleConstraintMatchWeight(info, constraint: rCode.c_str());
6115 if (weight > BestWeight)
6116 BestWeight = weight;
6117 }
6118
6119 return BestWeight;
6120}
6121
6122/// Examine constraint type and operand type and determine a weight value.
6123/// This object must already have been set up with the operand type
6124/// and the current alternative constraint selected.
6125TargetLowering::ConstraintWeight
6126 TargetLowering::getSingleConstraintMatchWeight(
6127 AsmOperandInfo &info, const char *constraint) const {
6128 ConstraintWeight weight = CW_Invalid;
6129 Value *CallOperandVal = info.CallOperandVal;
6130 // If we don't have a value, we can't do a match,
6131 // but allow it at the lowest weight.
6132 if (!CallOperandVal)
6133 return CW_Default;
6134 // Look at the constraint type.
6135 switch (*constraint) {
6136 case 'i': // immediate integer.
6137 case 'n': // immediate integer with a known value.
6138 if (isa<ConstantInt>(Val: CallOperandVal))
6139 weight = CW_Constant;
6140 break;
6141 case 's': // non-explicit intregal immediate.
6142 if (isa<GlobalValue>(Val: CallOperandVal))
6143 weight = CW_Constant;
6144 break;
6145 case 'E': // immediate float if host format.
6146 case 'F': // immediate float.
6147 if (isa<ConstantFP>(Val: CallOperandVal))
6148 weight = CW_Constant;
6149 break;
6150 case '<': // memory operand with autodecrement.
6151 case '>': // memory operand with autoincrement.
6152 case 'm': // memory operand.
6153 case 'o': // offsettable memory operand
6154 case 'V': // non-offsettable memory operand
6155 weight = CW_Memory;
6156 break;
6157 case 'r': // general register.
6158 case 'g': // general register, memory operand or immediate integer.
6159 // note: Clang converts "g" to "imr".
6160 if (CallOperandVal->getType()->isIntegerTy())
6161 weight = CW_Register;
6162 break;
6163 case 'X': // any operand.
6164 default:
6165 weight = CW_Default;
6166 break;
6167 }
6168 return weight;
6169}
6170
6171/// If there are multiple different constraints that we could pick for this
6172/// operand (e.g. "imr") try to pick the 'best' one.
6173/// This is somewhat tricky: constraints (TargetLowering::ConstraintType) fall
6174/// into seven classes:
6175/// Register -> one specific register
6176/// RegisterClass -> a group of regs
6177/// Memory -> memory
6178/// Address -> a symbolic memory reference
6179/// Immediate -> immediate values
6180/// Other -> magic values (such as "Flag Output Operands")
6181/// Unknown -> something we don't recognize yet and can't handle
6182/// Ideally, we would pick the most specific constraint possible: if we have
6183/// something that fits into a register, we would pick it. The problem here
6184/// is that if we have something that could either be in a register or in
6185/// memory that use of the register could cause selection of *other*
6186/// operands to fail: they might only succeed if we pick memory. Because of
6187/// this the heuristic we use is:
6188///
6189/// 1) If there is an 'other' constraint, and if the operand is valid for
6190/// that constraint, use it. This makes us take advantage of 'i'
6191/// constraints when available.
6192/// 2) Otherwise, pick the most general constraint present. This prefers
6193/// 'm' over 'r', for example.
6194///
6195TargetLowering::ConstraintGroup TargetLowering::getConstraintPreferences(
6196 TargetLowering::AsmOperandInfo &OpInfo) const {
6197 ConstraintGroup Ret;
6198
6199 Ret.reserve(N: OpInfo.Codes.size());
6200 for (StringRef Code : OpInfo.Codes) {
6201 TargetLowering::ConstraintType CType = getConstraintType(Constraint: Code);
6202
6203 // Indirect 'other' or 'immediate' constraints are not allowed.
6204 if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory ||
6205 CType == TargetLowering::C_Register ||
6206 CType == TargetLowering::C_RegisterClass))
6207 continue;
6208
6209 // Things with matching constraints can only be registers, per gcc
6210 // documentation. This mainly affects "g" constraints.
6211 if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
6212 continue;
6213
6214 Ret.emplace_back(Args&: Code, Args&: CType);
6215 }
6216
6217 llvm::stable_sort(Range&: Ret, C: [](ConstraintPair a, ConstraintPair b) {
6218 return getConstraintPiority(CT: a.second) > getConstraintPiority(CT: b.second);
6219 });
6220
6221 return Ret;
6222}
6223
6224/// If we have an immediate, see if we can lower it. Return true if we can,
6225/// false otherwise.
6226static bool lowerImmediateIfPossible(TargetLowering::ConstraintPair &P,
6227 SDValue Op, SelectionDAG *DAG,
6228 const TargetLowering &TLI) {
6229
6230 assert((P.second == TargetLowering::C_Other ||
6231 P.second == TargetLowering::C_Immediate) &&
6232 "need immediate or other");
6233
6234 if (!Op.getNode())
6235 return false;
6236
6237 std::vector<SDValue> ResultOps;
6238 TLI.LowerAsmOperandForConstraint(Op, Constraint: P.first, Ops&: ResultOps, DAG&: *DAG);
6239 return !ResultOps.empty();
6240}
6241
6242/// Determines the constraint code and constraint type to use for the specific
6243/// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
6244void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
6245 SDValue Op,
6246 SelectionDAG *DAG) const {
6247 assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
6248
6249 // Single-letter constraints ('r') are very common.
6250 if (OpInfo.Codes.size() == 1) {
6251 OpInfo.ConstraintCode = OpInfo.Codes[0];
6252 OpInfo.ConstraintType = getConstraintType(Constraint: OpInfo.ConstraintCode);
6253 } else {
6254 ConstraintGroup G = getConstraintPreferences(OpInfo);
6255 if (G.empty())
6256 return;
6257
6258 unsigned BestIdx = 0;
6259 for (const unsigned E = G.size();
6260 BestIdx < E && (G[BestIdx].second == TargetLowering::C_Other ||
6261 G[BestIdx].second == TargetLowering::C_Immediate);
6262 ++BestIdx) {
6263 if (lowerImmediateIfPossible(P&: G[BestIdx], Op, DAG, TLI: *this))
6264 break;
6265 // If we're out of constraints, just pick the first one.
6266 if (BestIdx + 1 == E) {
6267 BestIdx = 0;
6268 break;
6269 }
6270 }
6271
6272 OpInfo.ConstraintCode = G[BestIdx].first;
6273 OpInfo.ConstraintType = G[BestIdx].second;
6274 }
6275
6276 // 'X' matches anything.
6277 if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
6278 // Constants are handled elsewhere. For Functions, the type here is the
6279 // type of the result, which is not what we want to look at; leave them
6280 // alone.
6281 Value *v = OpInfo.CallOperandVal;
6282 if (isa<ConstantInt>(Val: v) || isa<Function>(Val: v)) {
6283 return;
6284 }
6285
6286 if (isa<BasicBlock>(Val: v) || isa<BlockAddress>(Val: v)) {
6287 OpInfo.ConstraintCode = "i";
6288 return;
6289 }
6290
6291 // Otherwise, try to resolve it to something we know about by looking at
6292 // the actual operand type.
6293 if (const char *Repl = LowerXConstraint(ConstraintVT: OpInfo.ConstraintVT)) {
6294 OpInfo.ConstraintCode = Repl;
6295 OpInfo.ConstraintType = getConstraintType(Constraint: OpInfo.ConstraintCode);
6296 }
6297 }
6298}
6299
6300/// Given an exact SDIV by a constant, create a multiplication
6301/// with the multiplicative inverse of the constant.
6302/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6303static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
6304 const SDLoc &dl, SelectionDAG &DAG,
6305 SmallVectorImpl<SDNode *> &Created) {
6306 SDValue Op0 = N->getOperand(Num: 0);
6307 SDValue Op1 = N->getOperand(Num: 1);
6308 EVT VT = N->getValueType(ResNo: 0);
6309 EVT SVT = VT.getScalarType();
6310 EVT ShVT = TLI.getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
6311 EVT ShSVT = ShVT.getScalarType();
6312
6313 bool UseSRA = false;
6314 SmallVector<SDValue, 16> Shifts, Factors;
6315
6316 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6317 if (C->isZero())
6318 return false;
6319 APInt Divisor = C->getAPIntValue();
6320 unsigned Shift = Divisor.countr_zero();
6321 if (Shift) {
6322 Divisor.ashrInPlace(ShiftAmt: Shift);
6323 UseSRA = true;
6324 }
6325 APInt Factor = Divisor.multiplicativeInverse();
6326 Shifts.push_back(Elt: DAG.getConstant(Val: Shift, DL: dl, VT: ShSVT));
6327 Factors.push_back(Elt: DAG.getConstant(Val: Factor, DL: dl, VT: SVT));
6328 return true;
6329 };
6330
6331 // Collect all magic values from the build vector.
6332 if (!ISD::matchUnaryPredicate(Op: Op1, Match: BuildSDIVPattern))
6333 return SDValue();
6334
6335 SDValue Shift, Factor;
6336 if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6337 Shift = DAG.getBuildVector(VT: ShVT, DL: dl, Ops: Shifts);
6338 Factor = DAG.getBuildVector(VT, DL: dl, Ops: Factors);
6339 } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6340 assert(Shifts.size() == 1 && Factors.size() == 1 &&
6341 "Expected matchUnaryPredicate to return one element for scalable "
6342 "vectors");
6343 Shift = DAG.getSplatVector(VT: ShVT, DL: dl, Op: Shifts[0]);
6344 Factor = DAG.getSplatVector(VT, DL: dl, Op: Factors[0]);
6345 } else {
6346 assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6347 Shift = Shifts[0];
6348 Factor = Factors[0];
6349 }
6350
6351 SDValue Res = Op0;
6352 if (UseSRA) {
6353 Res = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: Res, N2: Shift, Flags: SDNodeFlags::Exact);
6354 Created.push_back(Elt: Res.getNode());
6355 }
6356
6357 return DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: Res, N2: Factor);
6358}
6359
6360/// Given an exact UDIV by a constant, create a multiplication
6361/// with the multiplicative inverse of the constant.
6362/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6363static SDValue BuildExactUDIV(const TargetLowering &TLI, SDNode *N,
6364 const SDLoc &dl, SelectionDAG &DAG,
6365 SmallVectorImpl<SDNode *> &Created) {
6366 EVT VT = N->getValueType(ResNo: 0);
6367 EVT SVT = VT.getScalarType();
6368 EVT ShVT = TLI.getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
6369 EVT ShSVT = ShVT.getScalarType();
6370
6371 bool UseSRL = false;
6372 SmallVector<SDValue, 16> Shifts, Factors;
6373
6374 auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6375 if (C->isZero())
6376 return false;
6377 APInt Divisor = C->getAPIntValue();
6378 unsigned Shift = Divisor.countr_zero();
6379 if (Shift) {
6380 Divisor.lshrInPlace(ShiftAmt: Shift);
6381 UseSRL = true;
6382 }
6383 // Calculate the multiplicative inverse modulo BW.
6384 APInt Factor = Divisor.multiplicativeInverse();
6385 Shifts.push_back(Elt: DAG.getConstant(Val: Shift, DL: dl, VT: ShSVT));
6386 Factors.push_back(Elt: DAG.getConstant(Val: Factor, DL: dl, VT: SVT));
6387 return true;
6388 };
6389
6390 SDValue Op1 = N->getOperand(Num: 1);
6391
6392 // Collect all magic values from the build vector.
6393 if (!ISD::matchUnaryPredicate(Op: Op1, Match: BuildUDIVPattern))
6394 return SDValue();
6395
6396 SDValue Shift, Factor;
6397 if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6398 Shift = DAG.getBuildVector(VT: ShVT, DL: dl, Ops: Shifts);
6399 Factor = DAG.getBuildVector(VT, DL: dl, Ops: Factors);
6400 } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6401 assert(Shifts.size() == 1 && Factors.size() == 1 &&
6402 "Expected matchUnaryPredicate to return one element for scalable "
6403 "vectors");
6404 Shift = DAG.getSplatVector(VT: ShVT, DL: dl, Op: Shifts[0]);
6405 Factor = DAG.getSplatVector(VT, DL: dl, Op: Factors[0]);
6406 } else {
6407 assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6408 Shift = Shifts[0];
6409 Factor = Factors[0];
6410 }
6411
6412 SDValue Res = N->getOperand(Num: 0);
6413 if (UseSRL) {
6414 Res = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Res, N2: Shift, Flags: SDNodeFlags::Exact);
6415 Created.push_back(Elt: Res.getNode());
6416 }
6417
6418 return DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: Res, N2: Factor);
6419}
6420
6421SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
6422 SelectionDAG &DAG,
6423 SmallVectorImpl<SDNode *> &Created) const {
6424 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6425 if (isIntDivCheap(VT: N->getValueType(ResNo: 0), Attr))
6426 return SDValue(N, 0); // Lower SDIV as SDIV
6427 return SDValue();
6428}
6429
6430SDValue
6431TargetLowering::BuildSREMPow2(SDNode *N, const APInt &Divisor,
6432 SelectionDAG &DAG,
6433 SmallVectorImpl<SDNode *> &Created) const {
6434 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6435 if (isIntDivCheap(VT: N->getValueType(ResNo: 0), Attr))
6436 return SDValue(N, 0); // Lower SREM as SREM
6437 return SDValue();
6438}
6439
6440/// Build sdiv by power-of-2 with conditional move instructions
6441/// Ref: "Hacker's Delight" by Henry Warren 10-1
6442/// If conditional move/branch is preferred, we lower sdiv x, +/-2**k into:
6443/// bgez x, label
6444/// add x, x, 2**k-1
6445/// label:
6446/// sra res, x, k
6447/// neg res, res (when the divisor is negative)
6448SDValue TargetLowering::buildSDIVPow2WithCMov(
6449 SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
6450 SmallVectorImpl<SDNode *> &Created) const {
6451 unsigned Lg2 = Divisor.countr_zero();
6452 EVT VT = N->getValueType(ResNo: 0);
6453
6454 SDLoc DL(N);
6455 SDValue N0 = N->getOperand(Num: 0);
6456 SDValue Zero = DAG.getConstant(Val: 0, DL, VT);
6457 APInt Lg2Mask = APInt::getLowBitsSet(numBits: VT.getSizeInBits(), loBitsSet: Lg2);
6458 SDValue Pow2MinusOne = DAG.getConstant(Val: Lg2Mask, DL, VT);
6459
6460 // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
6461 EVT CCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
6462 SDValue Cmp = DAG.getSetCC(DL, VT: CCVT, LHS: N0, RHS: Zero, Cond: ISD::SETLT);
6463 SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: N0, N2: Pow2MinusOne);
6464 SDValue CMov = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: Cmp, N2: Add, N3: N0);
6465
6466 Created.push_back(Elt: Cmp.getNode());
6467 Created.push_back(Elt: Add.getNode());
6468 Created.push_back(Elt: CMov.getNode());
6469
6470 // Divide by pow2.
6471 SDValue SRA =
6472 DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: CMov, N2: DAG.getConstant(Val: Lg2, DL, VT));
6473
6474 // If we're dividing by a positive value, we're done. Otherwise, we must
6475 // negate the result.
6476 if (Divisor.isNonNegative())
6477 return SRA;
6478
6479 Created.push_back(Elt: SRA.getNode());
6480 return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Zero, N2: SRA);
6481}
6482
6483/// Given an ISD::SDIV node expressing a divide by constant,
6484/// return a DAG expression to select that will generate the same value by
6485/// multiplying by a magic number.
6486/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6487SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
6488 bool IsAfterLegalization,
6489 bool IsAfterLegalTypes,
6490 SmallVectorImpl<SDNode *> &Created) const {
6491 SDLoc dl(N);
6492 EVT VT = N->getValueType(ResNo: 0);
6493 EVT SVT = VT.getScalarType();
6494 EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
6495 EVT ShSVT = ShVT.getScalarType();
6496 unsigned EltBits = VT.getScalarSizeInBits();
6497 EVT MulVT;
6498
6499 // Check to see if we can do this.
6500 // FIXME: We should be more aggressive here.
6501 if (!isTypeLegal(VT)) {
6502 // Limit this to simple scalars for now.
6503 if (VT.isVector() || !VT.isSimple())
6504 return SDValue();
6505
6506 // If this type will be promoted to a large enough type with a legal
6507 // multiply operation, we can go ahead and do this transform.
6508 if (getTypeAction(VT: VT.getSimpleVT()) != TypePromoteInteger)
6509 return SDValue();
6510
6511 MulVT = getTypeToTransformTo(Context&: *DAG.getContext(), VT);
6512 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6513 !isOperationLegal(Op: ISD::MUL, VT: MulVT))
6514 return SDValue();
6515 }
6516
6517 // If the sdiv has an 'exact' bit we can use a simpler lowering.
6518 if (N->getFlags().hasExact())
6519 return BuildExactSDIV(TLI: *this, N, dl, DAG, Created);
6520
6521 SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
6522
6523 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6524 if (C->isZero())
6525 return false;
6526
6527 const APInt &Divisor = C->getAPIntValue();
6528 SignedDivisionByConstantInfo magics = SignedDivisionByConstantInfo::get(D: Divisor);
6529 int NumeratorFactor = 0;
6530 int ShiftMask = -1;
6531
6532 if (Divisor.isOne() || Divisor.isAllOnes()) {
6533 // If d is +1/-1, we just multiply the numerator by +1/-1.
6534 NumeratorFactor = Divisor.getSExtValue();
6535 magics.Magic = 0;
6536 magics.ShiftAmount = 0;
6537 ShiftMask = 0;
6538 } else if (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) {
6539 // If d > 0 and m < 0, add the numerator.
6540 NumeratorFactor = 1;
6541 } else if (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) {
6542 // If d < 0 and m > 0, subtract the numerator.
6543 NumeratorFactor = -1;
6544 }
6545
6546 MagicFactors.push_back(Elt: DAG.getConstant(Val: magics.Magic, DL: dl, VT: SVT));
6547 Factors.push_back(Elt: DAG.getSignedConstant(Val: NumeratorFactor, DL: dl, VT: SVT));
6548 Shifts.push_back(Elt: DAG.getConstant(Val: magics.ShiftAmount, DL: dl, VT: ShSVT));
6549 ShiftMasks.push_back(Elt: DAG.getSignedConstant(Val: ShiftMask, DL: dl, VT: SVT));
6550 return true;
6551 };
6552
6553 SDValue N0 = N->getOperand(Num: 0);
6554 SDValue N1 = N->getOperand(Num: 1);
6555
6556 // Collect the shifts / magic values from each element.
6557 if (!ISD::matchUnaryPredicate(Op: N1, Match: BuildSDIVPattern))
6558 return SDValue();
6559
6560 SDValue MagicFactor, Factor, Shift, ShiftMask;
6561 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6562 MagicFactor = DAG.getBuildVector(VT, DL: dl, Ops: MagicFactors);
6563 Factor = DAG.getBuildVector(VT, DL: dl, Ops: Factors);
6564 Shift = DAG.getBuildVector(VT: ShVT, DL: dl, Ops: Shifts);
6565 ShiftMask = DAG.getBuildVector(VT, DL: dl, Ops: ShiftMasks);
6566 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6567 assert(MagicFactors.size() == 1 && Factors.size() == 1 &&
6568 Shifts.size() == 1 && ShiftMasks.size() == 1 &&
6569 "Expected matchUnaryPredicate to return one element for scalable "
6570 "vectors");
6571 MagicFactor = DAG.getSplatVector(VT, DL: dl, Op: MagicFactors[0]);
6572 Factor = DAG.getSplatVector(VT, DL: dl, Op: Factors[0]);
6573 Shift = DAG.getSplatVector(VT: ShVT, DL: dl, Op: Shifts[0]);
6574 ShiftMask = DAG.getSplatVector(VT, DL: dl, Op: ShiftMasks[0]);
6575 } else {
6576 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6577 MagicFactor = MagicFactors[0];
6578 Factor = Factors[0];
6579 Shift = Shifts[0];
6580 ShiftMask = ShiftMasks[0];
6581 }
6582
6583 // Multiply the numerator (operand 0) by the magic value.
6584 // FIXME: We should support doing a MUL in a wider type.
6585 auto GetMULHS = [&](SDValue X, SDValue Y) {
6586 // If the type isn't legal, use a wider mul of the type calculated
6587 // earlier.
6588 if (!isTypeLegal(VT)) {
6589 X = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: MulVT, Operand: X);
6590 Y = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: MulVT, Operand: Y);
6591 Y = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: MulVT, N1: X, N2: Y);
6592 Y = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: MulVT, N1: Y,
6593 N2: DAG.getShiftAmountConstant(Val: EltBits, VT: MulVT, DL: dl));
6594 return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Y);
6595 }
6596
6597 if (isOperationLegalOrCustom(Op: ISD::MULHS, VT, LegalOnly: IsAfterLegalization))
6598 return DAG.getNode(Opcode: ISD::MULHS, DL: dl, VT, N1: X, N2: Y);
6599 if (isOperationLegalOrCustom(Op: ISD::SMUL_LOHI, VT, LegalOnly: IsAfterLegalization)) {
6600 SDValue LoHi =
6601 DAG.getNode(Opcode: ISD::SMUL_LOHI, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: VT), N1: X, N2: Y);
6602 return SDValue(LoHi.getNode(), 1);
6603 }
6604 // If type twice as wide legal, widen and use a mul plus a shift.
6605 unsigned Size = VT.getScalarSizeInBits();
6606 EVT WideVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: Size * 2);
6607 if (VT.isVector())
6608 WideVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: WideVT,
6609 EC: VT.getVectorElementCount());
6610 // Some targets like AMDGPU try to go from SDIV to SDIVREM which is then
6611 // custom lowered. This is very expensive so avoid it at all costs for
6612 // constant divisors.
6613 if ((!IsAfterLegalTypes && isOperationExpand(Op: ISD::SDIV, VT) &&
6614 isOperationCustom(Op: ISD::SDIVREM, VT: VT.getScalarType())) ||
6615 isOperationLegalOrCustom(Op: ISD::MUL, VT: WideVT)) {
6616 X = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: WideVT, Operand: X);
6617 Y = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: WideVT, Operand: Y);
6618 Y = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: WideVT, N1: X, N2: Y);
6619 Y = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: WideVT, N1: Y,
6620 N2: DAG.getShiftAmountConstant(Val: EltBits, VT: WideVT, DL: dl));
6621 return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Y);
6622 }
6623 return SDValue();
6624 };
6625
6626 SDValue Q = GetMULHS(N0, MagicFactor);
6627 if (!Q)
6628 return SDValue();
6629
6630 Created.push_back(Elt: Q.getNode());
6631
6632 // (Optionally) Add/subtract the numerator using Factor.
6633 Factor = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: N0, N2: Factor);
6634 Created.push_back(Elt: Factor.getNode());
6635 Q = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Q, N2: Factor);
6636 Created.push_back(Elt: Q.getNode());
6637
6638 // Shift right algebraic by shift value.
6639 Q = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: Q, N2: Shift);
6640 Created.push_back(Elt: Q.getNode());
6641
6642 // Extract the sign bit, mask it and add it to the quotient.
6643 SDValue SignShift = DAG.getConstant(Val: EltBits - 1, DL: dl, VT: ShVT);
6644 SDValue T = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Q, N2: SignShift);
6645 Created.push_back(Elt: T.getNode());
6646 T = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: T, N2: ShiftMask);
6647 Created.push_back(Elt: T.getNode());
6648 return DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Q, N2: T);
6649}
6650
6651/// Given an ISD::UDIV node expressing a divide by constant,
6652/// return a DAG expression to select that will generate the same value by
6653/// multiplying by a magic number.
6654/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6655SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
6656 bool IsAfterLegalization,
6657 bool IsAfterLegalTypes,
6658 SmallVectorImpl<SDNode *> &Created) const {
6659 SDLoc dl(N);
6660 EVT VT = N->getValueType(ResNo: 0);
6661 EVT SVT = VT.getScalarType();
6662 EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
6663 EVT ShSVT = ShVT.getScalarType();
6664 unsigned EltBits = VT.getScalarSizeInBits();
6665 EVT MulVT;
6666
6667 // Check to see if we can do this.
6668 // FIXME: We should be more aggressive here.
6669 if (!isTypeLegal(VT)) {
6670 // Limit this to simple scalars for now.
6671 if (VT.isVector() || !VT.isSimple())
6672 return SDValue();
6673
6674 // If this type will be promoted to a large enough type with a legal
6675 // multiply operation, we can go ahead and do this transform.
6676 if (getTypeAction(VT: VT.getSimpleVT()) != TypePromoteInteger)
6677 return SDValue();
6678
6679 MulVT = getTypeToTransformTo(Context&: *DAG.getContext(), VT);
6680 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6681 !isOperationLegal(Op: ISD::MUL, VT: MulVT))
6682 return SDValue();
6683 }
6684
6685 // If the udiv has an 'exact' bit we can use a simpler lowering.
6686 if (N->getFlags().hasExact())
6687 return BuildExactUDIV(TLI: *this, N, dl, DAG, Created);
6688
6689 SDValue N0 = N->getOperand(Num: 0);
6690 SDValue N1 = N->getOperand(Num: 1);
6691
6692 // Try to use leading zeros of the dividend to reduce the multiplier and
6693 // avoid expensive fixups.
6694 unsigned KnownLeadingZeros = DAG.computeKnownBits(Op: N0).countMinLeadingZeros();
6695
6696 bool UseNPQ = false, UsePreShift = false, UsePostShift = false;
6697 SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
6698
6699 auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6700 if (C->isZero())
6701 return false;
6702 const APInt& Divisor = C->getAPIntValue();
6703
6704 SDValue PreShift, MagicFactor, NPQFactor, PostShift;
6705
6706 // Magic algorithm doesn't work for division by 1. We need to emit a select
6707 // at the end.
6708 if (Divisor.isOne()) {
6709 PreShift = PostShift = DAG.getUNDEF(VT: ShSVT);
6710 MagicFactor = NPQFactor = DAG.getUNDEF(VT: SVT);
6711 } else {
6712 UnsignedDivisionByConstantInfo magics =
6713 UnsignedDivisionByConstantInfo::get(
6714 D: Divisor, LeadingZeros: std::min(a: KnownLeadingZeros, b: Divisor.countl_zero()));
6715
6716 MagicFactor = DAG.getConstant(Val: magics.Magic, DL: dl, VT: SVT);
6717
6718 assert(magics.PreShift < Divisor.getBitWidth() &&
6719 "We shouldn't generate an undefined shift!");
6720 assert(magics.PostShift < Divisor.getBitWidth() &&
6721 "We shouldn't generate an undefined shift!");
6722 assert((!magics.IsAdd || magics.PreShift == 0) &&
6723 "Unexpected pre-shift");
6724 PreShift = DAG.getConstant(Val: magics.PreShift, DL: dl, VT: ShSVT);
6725 PostShift = DAG.getConstant(Val: magics.PostShift, DL: dl, VT: ShSVT);
6726 NPQFactor = DAG.getConstant(
6727 Val: magics.IsAdd ? APInt::getOneBitSet(numBits: EltBits, BitNo: EltBits - 1)
6728 : APInt::getZero(numBits: EltBits),
6729 DL: dl, VT: SVT);
6730 UseNPQ |= magics.IsAdd;
6731 UsePreShift |= magics.PreShift != 0;
6732 UsePostShift |= magics.PostShift != 0;
6733 }
6734
6735 PreShifts.push_back(Elt: PreShift);
6736 MagicFactors.push_back(Elt: MagicFactor);
6737 NPQFactors.push_back(Elt: NPQFactor);
6738 PostShifts.push_back(Elt: PostShift);
6739 return true;
6740 };
6741
6742 // Collect the shifts/magic values from each element.
6743 if (!ISD::matchUnaryPredicate(Op: N1, Match: BuildUDIVPattern))
6744 return SDValue();
6745
6746 SDValue PreShift, PostShift, MagicFactor, NPQFactor;
6747 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6748 PreShift = DAG.getBuildVector(VT: ShVT, DL: dl, Ops: PreShifts);
6749 MagicFactor = DAG.getBuildVector(VT, DL: dl, Ops: MagicFactors);
6750 NPQFactor = DAG.getBuildVector(VT, DL: dl, Ops: NPQFactors);
6751 PostShift = DAG.getBuildVector(VT: ShVT, DL: dl, Ops: PostShifts);
6752 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6753 assert(PreShifts.size() == 1 && MagicFactors.size() == 1 &&
6754 NPQFactors.size() == 1 && PostShifts.size() == 1 &&
6755 "Expected matchUnaryPredicate to return one for scalable vectors");
6756 PreShift = DAG.getSplatVector(VT: ShVT, DL: dl, Op: PreShifts[0]);
6757 MagicFactor = DAG.getSplatVector(VT, DL: dl, Op: MagicFactors[0]);
6758 NPQFactor = DAG.getSplatVector(VT, DL: dl, Op: NPQFactors[0]);
6759 PostShift = DAG.getSplatVector(VT: ShVT, DL: dl, Op: PostShifts[0]);
6760 } else {
6761 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6762 PreShift = PreShifts[0];
6763 MagicFactor = MagicFactors[0];
6764 PostShift = PostShifts[0];
6765 }
6766
6767 SDValue Q = N0;
6768 if (UsePreShift) {
6769 Q = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Q, N2: PreShift);
6770 Created.push_back(Elt: Q.getNode());
6771 }
6772
6773 // FIXME: We should support doing a MUL in a wider type.
6774 auto GetMULHU = [&](SDValue X, SDValue Y) {
6775 // If the type isn't legal, use a wider mul of the type calculated
6776 // earlier.
6777 if (!isTypeLegal(VT)) {
6778 X = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: MulVT, Operand: X);
6779 Y = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: MulVT, Operand: Y);
6780 Y = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: MulVT, N1: X, N2: Y);
6781 Y = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: MulVT, N1: Y,
6782 N2: DAG.getShiftAmountConstant(Val: EltBits, VT: MulVT, DL: dl));
6783 return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Y);
6784 }
6785
6786 if (isOperationLegalOrCustom(Op: ISD::MULHU, VT, LegalOnly: IsAfterLegalization))
6787 return DAG.getNode(Opcode: ISD::MULHU, DL: dl, VT, N1: X, N2: Y);
6788 if (isOperationLegalOrCustom(Op: ISD::UMUL_LOHI, VT, LegalOnly: IsAfterLegalization)) {
6789 SDValue LoHi =
6790 DAG.getNode(Opcode: ISD::UMUL_LOHI, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: VT), N1: X, N2: Y);
6791 return SDValue(LoHi.getNode(), 1);
6792 }
6793 // If type twice as wide legal, widen and use a mul plus a shift.
6794 unsigned Size = VT.getScalarSizeInBits();
6795 EVT WideVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: Size * 2);
6796 if (VT.isVector())
6797 WideVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: WideVT,
6798 EC: VT.getVectorElementCount());
6799 // Some targets like AMDGPU try to go from UDIV to UDIVREM which is then
6800 // custom lowered. This is very expensive so avoid it at all costs for
6801 // constant divisors.
6802 if ((!IsAfterLegalTypes && isOperationExpand(Op: ISD::UDIV, VT) &&
6803 isOperationCustom(Op: ISD::UDIVREM, VT: VT.getScalarType())) ||
6804 isOperationLegalOrCustom(Op: ISD::MUL, VT: WideVT)) {
6805 X = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: WideVT, Operand: X);
6806 Y = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: WideVT, Operand: Y);
6807 Y = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: WideVT, N1: X, N2: Y);
6808 Y = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: WideVT, N1: Y,
6809 N2: DAG.getShiftAmountConstant(Val: EltBits, VT: WideVT, DL: dl));
6810 return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Y);
6811 }
6812 return SDValue(); // No mulhu or equivalent
6813 };
6814
6815 // Multiply the numerator (operand 0) by the magic value.
6816 Q = GetMULHU(Q, MagicFactor);
6817 if (!Q)
6818 return SDValue();
6819
6820 Created.push_back(Elt: Q.getNode());
6821
6822 if (UseNPQ) {
6823 SDValue NPQ = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: N0, N2: Q);
6824 Created.push_back(Elt: NPQ.getNode());
6825
6826 // For vectors we might have a mix of non-NPQ/NPQ paths, so use
6827 // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
6828 if (VT.isVector())
6829 NPQ = GetMULHU(NPQ, NPQFactor);
6830 else
6831 NPQ = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: NPQ, N2: DAG.getConstant(Val: 1, DL: dl, VT: ShVT));
6832
6833 Created.push_back(Elt: NPQ.getNode());
6834
6835 Q = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: NPQ, N2: Q);
6836 Created.push_back(Elt: Q.getNode());
6837 }
6838
6839 if (UsePostShift) {
6840 Q = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Q, N2: PostShift);
6841 Created.push_back(Elt: Q.getNode());
6842 }
6843
6844 EVT SetCCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
6845
6846 SDValue One = DAG.getConstant(Val: 1, DL: dl, VT);
6847 SDValue IsOne = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: N1, RHS: One, Cond: ISD::SETEQ);
6848 return DAG.getSelect(DL: dl, VT, Cond: IsOne, LHS: N0, RHS: Q);
6849}
6850
6851/// If all values in Values that *don't* match the predicate are same 'splat'
6852/// value, then replace all values with that splat value.
6853/// Else, if AlternativeReplacement was provided, then replace all values that
6854/// do match predicate with AlternativeReplacement value.
6855static void
6856turnVectorIntoSplatVector(MutableArrayRef<SDValue> Values,
6857 std::function<bool(SDValue)> Predicate,
6858 SDValue AlternativeReplacement = SDValue()) {
6859 SDValue Replacement;
6860 // Is there a value for which the Predicate does *NOT* match? What is it?
6861 auto SplatValue = llvm::find_if_not(Range&: Values, P: Predicate);
6862 if (SplatValue != Values.end()) {
6863 // Does Values consist only of SplatValue's and values matching Predicate?
6864 if (llvm::all_of(Range&: Values, P: [Predicate, SplatValue](SDValue Value) {
6865 return Value == *SplatValue || Predicate(Value);
6866 })) // Then we shall replace values matching predicate with SplatValue.
6867 Replacement = *SplatValue;
6868 }
6869 if (!Replacement) {
6870 // Oops, we did not find the "baseline" splat value.
6871 if (!AlternativeReplacement)
6872 return; // Nothing to do.
6873 // Let's replace with provided value then.
6874 Replacement = AlternativeReplacement;
6875 }
6876 std::replace_if(first: Values.begin(), last: Values.end(), pred: Predicate, new_value: Replacement);
6877}
6878
6879/// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
6880/// where the divisor is constant and the comparison target is zero,
6881/// return a DAG expression that will generate the same comparison result
6882/// using only multiplications, additions and shifts/rotations.
6883/// Ref: "Hacker's Delight" 10-17.
6884SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
6885 SDValue CompTargetNode,
6886 ISD::CondCode Cond,
6887 DAGCombinerInfo &DCI,
6888 const SDLoc &DL) const {
6889 SmallVector<SDNode *, 5> Built;
6890 if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
6891 DCI, DL, Created&: Built)) {
6892 for (SDNode *N : Built)
6893 DCI.AddToWorklist(N);
6894 return Folded;
6895 }
6896
6897 return SDValue();
6898}
6899
6900SDValue
6901TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
6902 SDValue CompTargetNode, ISD::CondCode Cond,
6903 DAGCombinerInfo &DCI, const SDLoc &DL,
6904 SmallVectorImpl<SDNode *> &Created) const {
6905 // fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
6906 // - D must be constant, with D = D0 * 2^K where D0 is odd
6907 // - P is the multiplicative inverse of D0 modulo 2^W
6908 // - Q = floor(((2^W) - 1) / D)
6909 // where W is the width of the common type of N and D.
6910 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
6911 "Only applicable for (in)equality comparisons.");
6912
6913 SelectionDAG &DAG = DCI.DAG;
6914
6915 EVT VT = REMNode.getValueType();
6916 EVT SVT = VT.getScalarType();
6917 EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
6918 EVT ShSVT = ShVT.getScalarType();
6919
6920 // If MUL is unavailable, we cannot proceed in any case.
6921 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::MUL, VT))
6922 return SDValue();
6923
6924 bool ComparingWithAllZeros = true;
6925 bool AllComparisonsWithNonZerosAreTautological = true;
6926 bool HadTautologicalLanes = false;
6927 bool AllLanesAreTautological = true;
6928 bool HadEvenDivisor = false;
6929 bool AllDivisorsArePowerOfTwo = true;
6930 bool HadTautologicalInvertedLanes = false;
6931 SmallVector<SDValue, 16> PAmts, KAmts, QAmts;
6932
6933 auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
6934 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
6935 if (CDiv->isZero())
6936 return false;
6937
6938 const APInt &D = CDiv->getAPIntValue();
6939 const APInt &Cmp = CCmp->getAPIntValue();
6940
6941 ComparingWithAllZeros &= Cmp.isZero();
6942
6943 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
6944 // if C2 is not less than C1, the comparison is always false.
6945 // But we will only be able to produce the comparison that will give the
6946 // opposive tautological answer. So this lane would need to be fixed up.
6947 bool TautologicalInvertedLane = D.ule(RHS: Cmp);
6948 HadTautologicalInvertedLanes |= TautologicalInvertedLane;
6949
6950 // If all lanes are tautological (either all divisors are ones, or divisor
6951 // is not greater than the constant we are comparing with),
6952 // we will prefer to avoid the fold.
6953 bool TautologicalLane = D.isOne() || TautologicalInvertedLane;
6954 HadTautologicalLanes |= TautologicalLane;
6955 AllLanesAreTautological &= TautologicalLane;
6956
6957 // If we are comparing with non-zero, we need'll need to subtract said
6958 // comparison value from the LHS. But there is no point in doing that if
6959 // every lane where we are comparing with non-zero is tautological..
6960 if (!Cmp.isZero())
6961 AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
6962
6963 // Decompose D into D0 * 2^K
6964 unsigned K = D.countr_zero();
6965 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
6966 APInt D0 = D.lshr(shiftAmt: K);
6967
6968 // D is even if it has trailing zeros.
6969 HadEvenDivisor |= (K != 0);
6970 // D is a power-of-two if D0 is one.
6971 // If all divisors are power-of-two, we will prefer to avoid the fold.
6972 AllDivisorsArePowerOfTwo &= D0.isOne();
6973
6974 // P = inv(D0, 2^W)
6975 // 2^W requires W + 1 bits, so we have to extend and then truncate.
6976 unsigned W = D.getBitWidth();
6977 APInt P = D0.multiplicativeInverse();
6978 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
6979
6980 // Q = floor((2^W - 1) u/ D)
6981 // R = ((2^W - 1) u% D)
6982 APInt Q, R;
6983 APInt::udivrem(LHS: APInt::getAllOnes(numBits: W), RHS: D, Quotient&: Q, Remainder&: R);
6984
6985 // If we are comparing with zero, then that comparison constant is okay,
6986 // else it may need to be one less than that.
6987 if (Cmp.ugt(RHS: R))
6988 Q -= 1;
6989
6990 assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&
6991 "We are expecting that K is always less than all-ones for ShSVT");
6992
6993 // If the lane is tautological the result can be constant-folded.
6994 if (TautologicalLane) {
6995 // Set P and K amount to a bogus values so we can try to splat them.
6996 P = 0;
6997 K = -1;
6998 // And ensure that comparison constant is tautological,
6999 // it will always compare true/false.
7000 Q = -1;
7001 }
7002
7003 PAmts.push_back(Elt: DAG.getConstant(Val: P, DL, VT: SVT));
7004 KAmts.push_back(
7005 Elt: DAG.getConstant(Val: APInt(ShSVT.getSizeInBits(), K, /*isSigned=*/false,
7006 /*implicitTrunc=*/true),
7007 DL, VT: ShSVT));
7008 QAmts.push_back(Elt: DAG.getConstant(Val: Q, DL, VT: SVT));
7009 return true;
7010 };
7011
7012 SDValue N = REMNode.getOperand(i: 0);
7013 SDValue D = REMNode.getOperand(i: 1);
7014
7015 // Collect the values from each element.
7016 if (!ISD::matchBinaryPredicate(LHS: D, RHS: CompTargetNode, Match: BuildUREMPattern))
7017 return SDValue();
7018
7019 // If all lanes are tautological, the result can be constant-folded.
7020 if (AllLanesAreTautological)
7021 return SDValue();
7022
7023 // If this is a urem by a powers-of-two, avoid the fold since it can be
7024 // best implemented as a bit test.
7025 if (AllDivisorsArePowerOfTwo)
7026 return SDValue();
7027
7028 SDValue PVal, KVal, QVal;
7029 if (D.getOpcode() == ISD::BUILD_VECTOR) {
7030 if (HadTautologicalLanes) {
7031 // Try to turn PAmts into a splat, since we don't care about the values
7032 // that are currently '0'. If we can't, just keep '0'`s.
7033 turnVectorIntoSplatVector(Values: PAmts, Predicate: isNullConstant);
7034 // Try to turn KAmts into a splat, since we don't care about the values
7035 // that are currently '-1'. If we can't, change them to '0'`s.
7036 turnVectorIntoSplatVector(Values: KAmts, Predicate: isAllOnesConstant,
7037 AlternativeReplacement: DAG.getConstant(Val: 0, DL, VT: ShSVT));
7038 }
7039
7040 PVal = DAG.getBuildVector(VT, DL, Ops: PAmts);
7041 KVal = DAG.getBuildVector(VT: ShVT, DL, Ops: KAmts);
7042 QVal = DAG.getBuildVector(VT, DL, Ops: QAmts);
7043 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
7044 assert(PAmts.size() == 1 && KAmts.size() == 1 && QAmts.size() == 1 &&
7045 "Expected matchBinaryPredicate to return one element for "
7046 "SPLAT_VECTORs");
7047 PVal = DAG.getSplatVector(VT, DL, Op: PAmts[0]);
7048 KVal = DAG.getSplatVector(VT: ShVT, DL, Op: KAmts[0]);
7049 QVal = DAG.getSplatVector(VT, DL, Op: QAmts[0]);
7050 } else {
7051 PVal = PAmts[0];
7052 KVal = KAmts[0];
7053 QVal = QAmts[0];
7054 }
7055
7056 if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
7057 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::SUB, VT))
7058 return SDValue(); // FIXME: Could/should use `ISD::ADD`?
7059 assert(CompTargetNode.getValueType() == N.getValueType() &&
7060 "Expecting that the types on LHS and RHS of comparisons match.");
7061 N = DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: N, N2: CompTargetNode);
7062 }
7063
7064 // (mul N, P)
7065 SDValue Op0 = DAG.getNode(Opcode: ISD::MUL, DL, VT, N1: N, N2: PVal);
7066 Created.push_back(Elt: Op0.getNode());
7067
7068 // Rotate right only if any divisor was even. We avoid rotates for all-odd
7069 // divisors as a performance improvement, since rotating by 0 is a no-op.
7070 if (HadEvenDivisor) {
7071 // We need ROTR to do this.
7072 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::ROTR, VT))
7073 return SDValue();
7074 // UREM: (rotr (mul N, P), K)
7075 Op0 = DAG.getNode(Opcode: ISD::ROTR, DL, VT, N1: Op0, N2: KVal);
7076 Created.push_back(Elt: Op0.getNode());
7077 }
7078
7079 // UREM: (setule/setugt (rotr (mul N, P), K), Q)
7080 SDValue NewCC =
7081 DAG.getSetCC(DL, VT: SETCCVT, LHS: Op0, RHS: QVal,
7082 Cond: ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
7083 if (!HadTautologicalInvertedLanes)
7084 return NewCC;
7085
7086 // If any lanes previously compared always-false, the NewCC will give
7087 // always-true result for them, so we need to fixup those lanes.
7088 // Or the other way around for inequality predicate.
7089 assert(VT.isVector() && "Can/should only get here for vectors.");
7090 Created.push_back(Elt: NewCC.getNode());
7091
7092 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
7093 // if C2 is not less than C1, the comparison is always false.
7094 // But we have produced the comparison that will give the
7095 // opposive tautological answer. So these lanes would need to be fixed up.
7096 SDValue TautologicalInvertedChannels =
7097 DAG.getSetCC(DL, VT: SETCCVT, LHS: D, RHS: CompTargetNode, Cond: ISD::SETULE);
7098 Created.push_back(Elt: TautologicalInvertedChannels.getNode());
7099
7100 // NOTE: we avoid letting illegal types through even if we're before legalize
7101 // ops – legalization has a hard time producing good code for this.
7102 if (isOperationLegalOrCustom(Op: ISD::VSELECT, VT: SETCCVT)) {
7103 // If we have a vector select, let's replace the comparison results in the
7104 // affected lanes with the correct tautological result.
7105 SDValue Replacement = DAG.getBoolConstant(V: Cond == ISD::SETEQ ? false : true,
7106 DL, VT: SETCCVT, OpVT: SETCCVT);
7107 return DAG.getNode(Opcode: ISD::VSELECT, DL, VT: SETCCVT, N1: TautologicalInvertedChannels,
7108 N2: Replacement, N3: NewCC);
7109 }
7110
7111 // Else, we can just invert the comparison result in the appropriate lanes.
7112 //
7113 // NOTE: see the note above VSELECT above.
7114 if (isOperationLegalOrCustom(Op: ISD::XOR, VT: SETCCVT))
7115 return DAG.getNode(Opcode: ISD::XOR, DL, VT: SETCCVT, N1: NewCC,
7116 N2: TautologicalInvertedChannels);
7117
7118 return SDValue(); // Don't know how to lower.
7119}
7120
7121/// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
7122/// where the divisor is constant and the comparison target is zero,
7123/// return a DAG expression that will generate the same comparison result
7124/// using only multiplications, additions and shifts/rotations.
7125/// Ref: "Hacker's Delight" 10-17.
7126SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
7127 SDValue CompTargetNode,
7128 ISD::CondCode Cond,
7129 DAGCombinerInfo &DCI,
7130 const SDLoc &DL) const {
7131 SmallVector<SDNode *, 7> Built;
7132 if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
7133 DCI, DL, Created&: Built)) {
7134 assert(Built.size() <= 7 && "Max size prediction failed.");
7135 for (SDNode *N : Built)
7136 DCI.AddToWorklist(N);
7137 return Folded;
7138 }
7139
7140 return SDValue();
7141}
7142
7143SDValue
7144TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
7145 SDValue CompTargetNode, ISD::CondCode Cond,
7146 DAGCombinerInfo &DCI, const SDLoc &DL,
7147 SmallVectorImpl<SDNode *> &Created) const {
7148 // Derived from Hacker's Delight, 2nd Edition, by Hank Warren. Section 10-17.
7149 // Fold:
7150 // (seteq/ne (srem N, D), 0)
7151 // To:
7152 // (setule/ugt (rotr (add (mul N, P), A), K), Q)
7153 //
7154 // - D must be constant, with D = D0 * 2^K where D0 is odd
7155 // - P is the multiplicative inverse of D0 modulo 2^W
7156 // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
7157 // - Q = floor((2 * A) / (2^K))
7158 // where W is the width of the common type of N and D.
7159 //
7160 // When D is a power of two (and thus D0 is 1), the normal
7161 // formula for A and Q don't apply, because the derivation
7162 // depends on D not dividing 2^(W-1), and thus theorem ZRS
7163 // does not apply. This specifically fails when N = INT_MIN.
7164 //
7165 // Instead, for power-of-two D, we use:
7166 // - A = 2^(W-1)
7167 // |-> Order-preserving map from [-2^(W-1), 2^(W-1) - 1] to [0,2^W - 1])
7168 // - Q = 2^(W-K) - 1
7169 // |-> Test that the top K bits are zero after rotation
7170 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
7171 "Only applicable for (in)equality comparisons.");
7172
7173 SelectionDAG &DAG = DCI.DAG;
7174
7175 EVT VT = REMNode.getValueType();
7176 EVT SVT = VT.getScalarType();
7177 EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
7178 EVT ShSVT = ShVT.getScalarType();
7179
7180 // If we are after ops legalization, and MUL is unavailable, we can not
7181 // proceed.
7182 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::MUL, VT))
7183 return SDValue();
7184
7185 // TODO: Could support comparing with non-zero too.
7186 ConstantSDNode *CompTarget = isConstOrConstSplat(N: CompTargetNode);
7187 if (!CompTarget || !CompTarget->isZero())
7188 return SDValue();
7189
7190 bool HadIntMinDivisor = false;
7191 bool HadOneDivisor = false;
7192 bool AllDivisorsAreOnes = true;
7193 bool HadEvenDivisor = false;
7194 bool NeedToApplyOffset = false;
7195 bool AllDivisorsArePowerOfTwo = true;
7196 SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts;
7197
7198 auto BuildSREMPattern = [&](ConstantSDNode *C) {
7199 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
7200 if (C->isZero())
7201 return false;
7202
7203 // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
7204
7205 // WARNING: this fold is only valid for positive divisors!
7206 APInt D = C->getAPIntValue();
7207 if (D.isNegative())
7208 D.negate(); // `rem %X, -C` is equivalent to `rem %X, C`
7209
7210 HadIntMinDivisor |= D.isMinSignedValue();
7211
7212 // If all divisors are ones, we will prefer to avoid the fold.
7213 HadOneDivisor |= D.isOne();
7214 AllDivisorsAreOnes &= D.isOne();
7215
7216 // Decompose D into D0 * 2^K
7217 unsigned K = D.countr_zero();
7218 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
7219 APInt D0 = D.lshr(shiftAmt: K);
7220
7221 if (!D.isMinSignedValue()) {
7222 // D is even if it has trailing zeros; unless it's INT_MIN, in which case
7223 // we don't care about this lane in this fold, we'll special-handle it.
7224 HadEvenDivisor |= (K != 0);
7225 }
7226
7227 // D is a power-of-two if D0 is one. This includes INT_MIN.
7228 // If all divisors are power-of-two, we will prefer to avoid the fold.
7229 AllDivisorsArePowerOfTwo &= D0.isOne();
7230
7231 // P = inv(D0, 2^W)
7232 // 2^W requires W + 1 bits, so we have to extend and then truncate.
7233 unsigned W = D.getBitWidth();
7234 APInt P = D0.multiplicativeInverse();
7235 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
7236
7237 // A = floor((2^(W - 1) - 1) / D0) & -2^K
7238 APInt A = APInt::getSignedMaxValue(numBits: W).udiv(RHS: D0);
7239 A.clearLowBits(loBits: K);
7240
7241 if (!D.isMinSignedValue()) {
7242 // If divisor INT_MIN, then we don't care about this lane in this fold,
7243 // we'll special-handle it.
7244 NeedToApplyOffset |= A != 0;
7245 }
7246
7247 // Q = floor((2 * A) / (2^K))
7248 APInt Q = (2 * A).udiv(RHS: APInt::getOneBitSet(numBits: W, BitNo: K));
7249
7250 assert(APInt::getAllOnes(SVT.getSizeInBits()).ugt(A) &&
7251 "We are expecting that A is always less than all-ones for SVT");
7252 assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&
7253 "We are expecting that K is always less than all-ones for ShSVT");
7254
7255 // If D was a power of two, apply the alternate constant derivation.
7256 if (D0.isOne()) {
7257 // A = 2^(W-1)
7258 A = APInt::getSignedMinValue(numBits: W);
7259 // - Q = 2^(W-K) - 1
7260 Q = APInt::getAllOnes(numBits: W - K).zext(width: W);
7261 }
7262
7263 // If the divisor is 1 the result can be constant-folded. Likewise, we
7264 // don't care about INT_MIN lanes, those can be set to undef if appropriate.
7265 if (D.isOne()) {
7266 // Set P, A and K to a bogus values so we can try to splat them.
7267 P = 0;
7268 A = -1;
7269 K = -1;
7270
7271 // x ?% 1 == 0 <--> true <--> x u<= -1
7272 Q = -1;
7273 }
7274
7275 PAmts.push_back(Elt: DAG.getConstant(Val: P, DL, VT: SVT));
7276 AAmts.push_back(Elt: DAG.getConstant(Val: A, DL, VT: SVT));
7277 KAmts.push_back(
7278 Elt: DAG.getConstant(Val: APInt(ShSVT.getSizeInBits(), K, /*isSigned=*/false,
7279 /*implicitTrunc=*/true),
7280 DL, VT: ShSVT));
7281 QAmts.push_back(Elt: DAG.getConstant(Val: Q, DL, VT: SVT));
7282 return true;
7283 };
7284
7285 SDValue N = REMNode.getOperand(i: 0);
7286 SDValue D = REMNode.getOperand(i: 1);
7287
7288 // Collect the values from each element.
7289 if (!ISD::matchUnaryPredicate(Op: D, Match: BuildSREMPattern))
7290 return SDValue();
7291
7292 // If this is a srem by a one, avoid the fold since it can be constant-folded.
7293 if (AllDivisorsAreOnes)
7294 return SDValue();
7295
7296 // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
7297 // since it can be best implemented as a bit test.
7298 if (AllDivisorsArePowerOfTwo)
7299 return SDValue();
7300
7301 SDValue PVal, AVal, KVal, QVal;
7302 if (D.getOpcode() == ISD::BUILD_VECTOR) {
7303 if (HadOneDivisor) {
7304 // Try to turn PAmts into a splat, since we don't care about the values
7305 // that are currently '0'. If we can't, just keep '0'`s.
7306 turnVectorIntoSplatVector(Values: PAmts, Predicate: isNullConstant);
7307 // Try to turn AAmts into a splat, since we don't care about the
7308 // values that are currently '-1'. If we can't, change them to '0'`s.
7309 turnVectorIntoSplatVector(Values: AAmts, Predicate: isAllOnesConstant,
7310 AlternativeReplacement: DAG.getConstant(Val: 0, DL, VT: SVT));
7311 // Try to turn KAmts into a splat, since we don't care about the values
7312 // that are currently '-1'. If we can't, change them to '0'`s.
7313 turnVectorIntoSplatVector(Values: KAmts, Predicate: isAllOnesConstant,
7314 AlternativeReplacement: DAG.getConstant(Val: 0, DL, VT: ShSVT));
7315 }
7316
7317 PVal = DAG.getBuildVector(VT, DL, Ops: PAmts);
7318 AVal = DAG.getBuildVector(VT, DL, Ops: AAmts);
7319 KVal = DAG.getBuildVector(VT: ShVT, DL, Ops: KAmts);
7320 QVal = DAG.getBuildVector(VT, DL, Ops: QAmts);
7321 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
7322 assert(PAmts.size() == 1 && AAmts.size() == 1 && KAmts.size() == 1 &&
7323 QAmts.size() == 1 &&
7324 "Expected matchUnaryPredicate to return one element for scalable "
7325 "vectors");
7326 PVal = DAG.getSplatVector(VT, DL, Op: PAmts[0]);
7327 AVal = DAG.getSplatVector(VT, DL, Op: AAmts[0]);
7328 KVal = DAG.getSplatVector(VT: ShVT, DL, Op: KAmts[0]);
7329 QVal = DAG.getSplatVector(VT, DL, Op: QAmts[0]);
7330 } else {
7331 assert(isa<ConstantSDNode>(D) && "Expected a constant");
7332 PVal = PAmts[0];
7333 AVal = AAmts[0];
7334 KVal = KAmts[0];
7335 QVal = QAmts[0];
7336 }
7337
7338 // (mul N, P)
7339 SDValue Op0 = DAG.getNode(Opcode: ISD::MUL, DL, VT, N1: N, N2: PVal);
7340 Created.push_back(Elt: Op0.getNode());
7341
7342 if (NeedToApplyOffset) {
7343 // We need ADD to do this.
7344 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::ADD, VT))
7345 return SDValue();
7346
7347 // (add (mul N, P), A)
7348 Op0 = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Op0, N2: AVal);
7349 Created.push_back(Elt: Op0.getNode());
7350 }
7351
7352 // Rotate right only if any divisor was even. We avoid rotates for all-odd
7353 // divisors as a performance improvement, since rotating by 0 is a no-op.
7354 if (HadEvenDivisor) {
7355 // We need ROTR to do this.
7356 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::ROTR, VT))
7357 return SDValue();
7358 // SREM: (rotr (add (mul N, P), A), K)
7359 Op0 = DAG.getNode(Opcode: ISD::ROTR, DL, VT, N1: Op0, N2: KVal);
7360 Created.push_back(Elt: Op0.getNode());
7361 }
7362
7363 // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
7364 SDValue Fold =
7365 DAG.getSetCC(DL, VT: SETCCVT, LHS: Op0, RHS: QVal,
7366 Cond: ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
7367
7368 // If we didn't have lanes with INT_MIN divisor, then we're done.
7369 if (!HadIntMinDivisor)
7370 return Fold;
7371
7372 // That fold is only valid for positive divisors. Which effectively means,
7373 // it is invalid for INT_MIN divisors. So if we have such a lane,
7374 // we must fix-up results for said lanes.
7375 assert(VT.isVector() && "Can/should only get here for vectors.");
7376
7377 // NOTE: we avoid letting illegal types through even if we're before legalize
7378 // ops – legalization has a hard time producing good code for the code that
7379 // follows.
7380 if (!isOperationLegalOrCustom(Op: ISD::SETCC, VT: SETCCVT) ||
7381 !isOperationLegalOrCustom(Op: ISD::AND, VT) ||
7382 !isCondCodeLegalOrCustom(CC: Cond, VT: VT.getSimpleVT()) ||
7383 !isOperationLegalOrCustom(Op: ISD::VSELECT, VT: SETCCVT))
7384 return SDValue();
7385
7386 Created.push_back(Elt: Fold.getNode());
7387
7388 SDValue IntMin = DAG.getConstant(
7389 Val: APInt::getSignedMinValue(numBits: SVT.getScalarSizeInBits()), DL, VT);
7390 SDValue IntMax = DAG.getConstant(
7391 Val: APInt::getSignedMaxValue(numBits: SVT.getScalarSizeInBits()), DL, VT);
7392 SDValue Zero =
7393 DAG.getConstant(Val: APInt::getZero(numBits: SVT.getScalarSizeInBits()), DL, VT);
7394
7395 // Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.
7396 SDValue DivisorIsIntMin = DAG.getSetCC(DL, VT: SETCCVT, LHS: D, RHS: IntMin, Cond: ISD::SETEQ);
7397 Created.push_back(Elt: DivisorIsIntMin.getNode());
7398
7399 // (N s% INT_MIN) ==/!= 0 <--> (N & INT_MAX) ==/!= 0
7400 SDValue Masked = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: N, N2: IntMax);
7401 Created.push_back(Elt: Masked.getNode());
7402 SDValue MaskedIsZero = DAG.getSetCC(DL, VT: SETCCVT, LHS: Masked, RHS: Zero, Cond);
7403 Created.push_back(Elt: MaskedIsZero.getNode());
7404
7405 // To produce final result we need to blend 2 vectors: 'SetCC' and
7406 // 'MaskedIsZero'. If the divisor for channel was *NOT* INT_MIN, we pick
7407 // from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is
7408 // constant-folded, select can get lowered to a shuffle with constant mask.
7409 SDValue Blended = DAG.getNode(Opcode: ISD::VSELECT, DL, VT: SETCCVT, N1: DivisorIsIntMin,
7410 N2: MaskedIsZero, N3: Fold);
7411
7412 return Blended;
7413}
7414
7415bool TargetLowering::
7416verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {
7417 if (!isa<ConstantSDNode>(Val: Op.getOperand(i: 0))) {
7418 DAG.getContext()->emitError(ErrorStr: "argument to '__builtin_return_address' must "
7419 "be a constant integer");
7420 return true;
7421 }
7422
7423 return false;
7424}
7425
7426SDValue TargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
7427 const DenormalMode &Mode) const {
7428 SDLoc DL(Op);
7429 EVT VT = Op.getValueType();
7430 EVT CCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
7431 SDValue FPZero = DAG.getConstantFP(Val: 0.0, DL, VT);
7432
7433 // This is specifically a check for the handling of denormal inputs, not the
7434 // result.
7435 if (Mode.Input == DenormalMode::PreserveSign ||
7436 Mode.Input == DenormalMode::PositiveZero) {
7437 // Test = X == 0.0
7438 return DAG.getSetCC(DL, VT: CCVT, LHS: Op, RHS: FPZero, Cond: ISD::SETEQ);
7439 }
7440
7441 // Testing it with denormal inputs to avoid wrong estimate.
7442 //
7443 // Test = fabs(X) < SmallestNormal
7444 const fltSemantics &FltSem = VT.getFltSemantics();
7445 APFloat SmallestNorm = APFloat::getSmallestNormalized(Sem: FltSem);
7446 SDValue NormC = DAG.getConstantFP(Val: SmallestNorm, DL, VT);
7447 SDValue Fabs = DAG.getNode(Opcode: ISD::FABS, DL, VT, Operand: Op);
7448 return DAG.getSetCC(DL, VT: CCVT, LHS: Fabs, RHS: NormC, Cond: ISD::SETLT);
7449}
7450
7451SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
7452 bool LegalOps, bool OptForSize,
7453 NegatibleCost &Cost,
7454 unsigned Depth) const {
7455 // fneg is removable even if it has multiple uses.
7456 if (Op.getOpcode() == ISD::FNEG || Op.getOpcode() == ISD::VP_FNEG) {
7457 Cost = NegatibleCost::Cheaper;
7458 return Op.getOperand(i: 0);
7459 }
7460
7461 // Don't recurse exponentially.
7462 if (Depth > SelectionDAG::MaxRecursionDepth)
7463 return SDValue();
7464
7465 // Pre-increment recursion depth for use in recursive calls.
7466 ++Depth;
7467 const SDNodeFlags Flags = Op->getFlags();
7468 const TargetOptions &Options = DAG.getTarget().Options;
7469 EVT VT = Op.getValueType();
7470 unsigned Opcode = Op.getOpcode();
7471
7472 // Don't allow anything with multiple uses unless we know it is free.
7473 if (!Op.hasOneUse() && Opcode != ISD::ConstantFP) {
7474 bool IsFreeExtend = Opcode == ISD::FP_EXTEND &&
7475 isFPExtFree(DestVT: VT, SrcVT: Op.getOperand(i: 0).getValueType());
7476 if (!IsFreeExtend)
7477 return SDValue();
7478 }
7479
7480 auto RemoveDeadNode = [&](SDValue N) {
7481 if (N && N.getNode()->use_empty())
7482 DAG.RemoveDeadNode(N: N.getNode());
7483 };
7484
7485 SDLoc DL(Op);
7486
7487 // Because getNegatedExpression can delete nodes we need a handle to keep
7488 // temporary nodes alive in case the recursion manages to create an identical
7489 // node.
7490 std::list<HandleSDNode> Handles;
7491
7492 switch (Opcode) {
7493 case ISD::ConstantFP: {
7494 // Don't invert constant FP values after legalization unless the target says
7495 // the negated constant is legal.
7496 bool IsOpLegal =
7497 isOperationLegal(Op: ISD::ConstantFP, VT) ||
7498 isFPImmLegal(neg(X: cast<ConstantFPSDNode>(Val&: Op)->getValueAPF()), VT,
7499 ForCodeSize: OptForSize);
7500
7501 if (LegalOps && !IsOpLegal)
7502 break;
7503
7504 APFloat V = cast<ConstantFPSDNode>(Val&: Op)->getValueAPF();
7505 V.changeSign();
7506 SDValue CFP = DAG.getConstantFP(Val: V, DL, VT);
7507
7508 // If we already have the use of the negated floating constant, it is free
7509 // to negate it even it has multiple uses.
7510 if (!Op.hasOneUse() && CFP.use_empty())
7511 break;
7512 Cost = NegatibleCost::Neutral;
7513 return CFP;
7514 }
7515 case ISD::BUILD_VECTOR: {
7516 // Only permit BUILD_VECTOR of constants.
7517 if (llvm::any_of(Range: Op->op_values(), P: [&](SDValue N) {
7518 return !N.isUndef() && !isa<ConstantFPSDNode>(Val: N);
7519 }))
7520 break;
7521
7522 bool IsOpLegal =
7523 (isOperationLegal(Op: ISD::ConstantFP, VT) &&
7524 isOperationLegal(Op: ISD::BUILD_VECTOR, VT)) ||
7525 llvm::all_of(Range: Op->op_values(), P: [&](SDValue N) {
7526 return N.isUndef() ||
7527 isFPImmLegal(neg(X: cast<ConstantFPSDNode>(Val&: N)->getValueAPF()), VT,
7528 ForCodeSize: OptForSize);
7529 });
7530
7531 if (LegalOps && !IsOpLegal)
7532 break;
7533
7534 SmallVector<SDValue, 4> Ops;
7535 for (SDValue C : Op->op_values()) {
7536 if (C.isUndef()) {
7537 Ops.push_back(Elt: C);
7538 continue;
7539 }
7540 APFloat V = cast<ConstantFPSDNode>(Val&: C)->getValueAPF();
7541 V.changeSign();
7542 Ops.push_back(Elt: DAG.getConstantFP(Val: V, DL, VT: C.getValueType()));
7543 }
7544 Cost = NegatibleCost::Neutral;
7545 return DAG.getBuildVector(VT, DL, Ops);
7546 }
7547 case ISD::FADD: {
7548 if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7549 break;
7550
7551 // After operation legalization, it might not be legal to create new FSUBs.
7552 if (LegalOps && !isOperationLegalOrCustom(Op: ISD::FSUB, VT))
7553 break;
7554 SDValue X = Op.getOperand(i: 0), Y = Op.getOperand(i: 1);
7555
7556 // fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y)
7557 NegatibleCost CostX = NegatibleCost::Expensive;
7558 SDValue NegX =
7559 getNegatedExpression(Op: X, DAG, LegalOps, OptForSize, Cost&: CostX, Depth);
7560 // Prevent this node from being deleted by the next call.
7561 if (NegX)
7562 Handles.emplace_back(args&: NegX);
7563
7564 // fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
7565 NegatibleCost CostY = NegatibleCost::Expensive;
7566 SDValue NegY =
7567 getNegatedExpression(Op: Y, DAG, LegalOps, OptForSize, Cost&: CostY, Depth);
7568
7569 // We're done with the handles.
7570 Handles.clear();
7571
7572 // Negate the X if its cost is less or equal than Y.
7573 if (NegX && (CostX <= CostY)) {
7574 Cost = CostX;
7575 SDValue N = DAG.getNode(Opcode: ISD::FSUB, DL, VT, N1: NegX, N2: Y, Flags);
7576 if (NegY != N)
7577 RemoveDeadNode(NegY);
7578 return N;
7579 }
7580
7581 // Negate the Y if it is not expensive.
7582 if (NegY) {
7583 Cost = CostY;
7584 SDValue N = DAG.getNode(Opcode: ISD::FSUB, DL, VT, N1: NegY, N2: X, Flags);
7585 if (NegX != N)
7586 RemoveDeadNode(NegX);
7587 return N;
7588 }
7589 break;
7590 }
7591 case ISD::FSUB: {
7592 // We can't turn -(A-B) into B-A when we honor signed zeros.
7593 if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7594 break;
7595
7596 SDValue X = Op.getOperand(i: 0), Y = Op.getOperand(i: 1);
7597 // fold (fneg (fsub 0, Y)) -> Y
7598 if (ConstantFPSDNode *C = isConstOrConstSplatFP(N: X, /*AllowUndefs*/ true))
7599 if (C->isZero()) {
7600 Cost = NegatibleCost::Cheaper;
7601 return Y;
7602 }
7603
7604 // fold (fneg (fsub X, Y)) -> (fsub Y, X)
7605 Cost = NegatibleCost::Neutral;
7606 return DAG.getNode(Opcode: ISD::FSUB, DL, VT, N1: Y, N2: X, Flags);
7607 }
7608 case ISD::FMUL:
7609 case ISD::FDIV: {
7610 SDValue X = Op.getOperand(i: 0), Y = Op.getOperand(i: 1);
7611
7612 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
7613 NegatibleCost CostX = NegatibleCost::Expensive;
7614 SDValue NegX =
7615 getNegatedExpression(Op: X, DAG, LegalOps, OptForSize, Cost&: CostX, Depth);
7616 // Prevent this node from being deleted by the next call.
7617 if (NegX)
7618 Handles.emplace_back(args&: NegX);
7619
7620 // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
7621 NegatibleCost CostY = NegatibleCost::Expensive;
7622 SDValue NegY =
7623 getNegatedExpression(Op: Y, DAG, LegalOps, OptForSize, Cost&: CostY, Depth);
7624
7625 // We're done with the handles.
7626 Handles.clear();
7627
7628 // Negate the X if its cost is less or equal than Y.
7629 if (NegX && (CostX <= CostY)) {
7630 Cost = CostX;
7631 SDValue N = DAG.getNode(Opcode, DL, VT, N1: NegX, N2: Y, Flags);
7632 if (NegY != N)
7633 RemoveDeadNode(NegY);
7634 return N;
7635 }
7636
7637 // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
7638 if (auto *C = isConstOrConstSplatFP(N: Op.getOperand(i: 1)))
7639 if (C->isExactlyValue(V: 2.0) && Op.getOpcode() == ISD::FMUL)
7640 break;
7641
7642 // Negate the Y if it is not expensive.
7643 if (NegY) {
7644 Cost = CostY;
7645 SDValue N = DAG.getNode(Opcode, DL, VT, N1: X, N2: NegY, Flags);
7646 if (NegX != N)
7647 RemoveDeadNode(NegX);
7648 return N;
7649 }
7650 break;
7651 }
7652 case ISD::FMA:
7653 case ISD::FMAD: {
7654 if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7655 break;
7656
7657 SDValue X = Op.getOperand(i: 0), Y = Op.getOperand(i: 1), Z = Op.getOperand(i: 2);
7658 NegatibleCost CostZ = NegatibleCost::Expensive;
7659 SDValue NegZ =
7660 getNegatedExpression(Op: Z, DAG, LegalOps, OptForSize, Cost&: CostZ, Depth);
7661 // Give up if fail to negate the Z.
7662 if (!NegZ)
7663 break;
7664
7665 // Prevent this node from being deleted by the next two calls.
7666 Handles.emplace_back(args&: NegZ);
7667
7668 // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
7669 NegatibleCost CostX = NegatibleCost::Expensive;
7670 SDValue NegX =
7671 getNegatedExpression(Op: X, DAG, LegalOps, OptForSize, Cost&: CostX, Depth);
7672 // Prevent this node from being deleted by the next call.
7673 if (NegX)
7674 Handles.emplace_back(args&: NegX);
7675
7676 // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
7677 NegatibleCost CostY = NegatibleCost::Expensive;
7678 SDValue NegY =
7679 getNegatedExpression(Op: Y, DAG, LegalOps, OptForSize, Cost&: CostY, Depth);
7680
7681 // We're done with the handles.
7682 Handles.clear();
7683
7684 // Negate the X if its cost is less or equal than Y.
7685 if (NegX && (CostX <= CostY)) {
7686 Cost = std::min(a: CostX, b: CostZ);
7687 SDValue N = DAG.getNode(Opcode, DL, VT, N1: NegX, N2: Y, N3: NegZ, Flags);
7688 if (NegY != N)
7689 RemoveDeadNode(NegY);
7690 return N;
7691 }
7692
7693 // Negate the Y if it is not expensive.
7694 if (NegY) {
7695 Cost = std::min(a: CostY, b: CostZ);
7696 SDValue N = DAG.getNode(Opcode, DL, VT, N1: X, N2: NegY, N3: NegZ, Flags);
7697 if (NegX != N)
7698 RemoveDeadNode(NegX);
7699 return N;
7700 }
7701 break;
7702 }
7703
7704 case ISD::FP_EXTEND:
7705 case ISD::FSIN:
7706 if (SDValue NegV = getNegatedExpression(Op: Op.getOperand(i: 0), DAG, LegalOps,
7707 OptForSize, Cost, Depth))
7708 return DAG.getNode(Opcode, DL, VT, Operand: NegV);
7709 break;
7710 case ISD::FP_ROUND:
7711 if (SDValue NegV = getNegatedExpression(Op: Op.getOperand(i: 0), DAG, LegalOps,
7712 OptForSize, Cost, Depth))
7713 return DAG.getNode(Opcode: ISD::FP_ROUND, DL, VT, N1: NegV, N2: Op.getOperand(i: 1));
7714 break;
7715 case ISD::SELECT:
7716 case ISD::VSELECT: {
7717 // fold (fneg (select C, LHS, RHS)) -> (select C, (fneg LHS), (fneg RHS))
7718 // iff at least one cost is cheaper and the other is neutral/cheaper
7719 SDValue LHS = Op.getOperand(i: 1);
7720 NegatibleCost CostLHS = NegatibleCost::Expensive;
7721 SDValue NegLHS =
7722 getNegatedExpression(Op: LHS, DAG, LegalOps, OptForSize, Cost&: CostLHS, Depth);
7723 if (!NegLHS || CostLHS > NegatibleCost::Neutral) {
7724 RemoveDeadNode(NegLHS);
7725 break;
7726 }
7727
7728 // Prevent this node from being deleted by the next call.
7729 Handles.emplace_back(args&: NegLHS);
7730
7731 SDValue RHS = Op.getOperand(i: 2);
7732 NegatibleCost CostRHS = NegatibleCost::Expensive;
7733 SDValue NegRHS =
7734 getNegatedExpression(Op: RHS, DAG, LegalOps, OptForSize, Cost&: CostRHS, Depth);
7735
7736 // We're done with the handles.
7737 Handles.clear();
7738
7739 if (!NegRHS || CostRHS > NegatibleCost::Neutral ||
7740 (CostLHS != NegatibleCost::Cheaper &&
7741 CostRHS != NegatibleCost::Cheaper)) {
7742 RemoveDeadNode(NegLHS);
7743 RemoveDeadNode(NegRHS);
7744 break;
7745 }
7746
7747 Cost = std::min(a: CostLHS, b: CostRHS);
7748 return DAG.getSelect(DL, VT, Cond: Op.getOperand(i: 0), LHS: NegLHS, RHS: NegRHS);
7749 }
7750 }
7751
7752 return SDValue();
7753}
7754
7755//===----------------------------------------------------------------------===//
7756// Legalization Utilities
7757//===----------------------------------------------------------------------===//
7758
7759bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
7760 SDValue LHS, SDValue RHS,
7761 SmallVectorImpl<SDValue> &Result,
7762 EVT HiLoVT, SelectionDAG &DAG,
7763 MulExpansionKind Kind, SDValue LL,
7764 SDValue LH, SDValue RL, SDValue RH) const {
7765 assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
7766 Opcode == ISD::SMUL_LOHI);
7767
7768 bool HasMULHS = (Kind == MulExpansionKind::Always) ||
7769 isOperationLegalOrCustom(Op: ISD::MULHS, VT: HiLoVT);
7770 bool HasMULHU = (Kind == MulExpansionKind::Always) ||
7771 isOperationLegalOrCustom(Op: ISD::MULHU, VT: HiLoVT);
7772 bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7773 isOperationLegalOrCustom(Op: ISD::SMUL_LOHI, VT: HiLoVT);
7774 bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7775 isOperationLegalOrCustom(Op: ISD::UMUL_LOHI, VT: HiLoVT);
7776
7777 if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
7778 return false;
7779
7780 unsigned OuterBitSize = VT.getScalarSizeInBits();
7781 unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
7782
7783 // LL, LH, RL, and RH must be either all NULL or all set to a value.
7784 assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
7785 (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
7786
7787 SDVTList VTs = DAG.getVTList(VT1: HiLoVT, VT2: HiLoVT);
7788 auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
7789 bool Signed) -> bool {
7790 if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
7791 Lo = DAG.getNode(Opcode: Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, DL: dl, VTList: VTs, N1: L, N2: R);
7792 Hi = SDValue(Lo.getNode(), 1);
7793 return true;
7794 }
7795 if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
7796 Lo = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: HiLoVT, N1: L, N2: R);
7797 Hi = DAG.getNode(Opcode: Signed ? ISD::MULHS : ISD::MULHU, DL: dl, VT: HiLoVT, N1: L, N2: R);
7798 return true;
7799 }
7800 return false;
7801 };
7802
7803 SDValue Lo, Hi;
7804
7805 if (!LL.getNode() && !RL.getNode() &&
7806 isOperationLegalOrCustom(Op: ISD::TRUNCATE, VT: HiLoVT)) {
7807 LL = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: LHS);
7808 RL = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: RHS);
7809 }
7810
7811 if (!LL.getNode())
7812 return false;
7813
7814 APInt HighMask = APInt::getHighBitsSet(numBits: OuterBitSize, hiBitsSet: InnerBitSize);
7815 if (DAG.MaskedValueIsZero(Op: LHS, Mask: HighMask) &&
7816 DAG.MaskedValueIsZero(Op: RHS, Mask: HighMask)) {
7817 // The inputs are both zero-extended.
7818 if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
7819 Result.push_back(Elt: Lo);
7820 Result.push_back(Elt: Hi);
7821 if (Opcode != ISD::MUL) {
7822 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT: HiLoVT);
7823 Result.push_back(Elt: Zero);
7824 Result.push_back(Elt: Zero);
7825 }
7826 return true;
7827 }
7828 }
7829
7830 if (!VT.isVector() && Opcode == ISD::MUL &&
7831 DAG.ComputeMaxSignificantBits(Op: LHS) <= InnerBitSize &&
7832 DAG.ComputeMaxSignificantBits(Op: RHS) <= InnerBitSize) {
7833 // The input values are both sign-extended.
7834 // TODO non-MUL case?
7835 if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
7836 Result.push_back(Elt: Lo);
7837 Result.push_back(Elt: Hi);
7838 return true;
7839 }
7840 }
7841
7842 unsigned ShiftAmount = OuterBitSize - InnerBitSize;
7843 SDValue Shift = DAG.getShiftAmountConstant(Val: ShiftAmount, VT, DL: dl);
7844
7845 if (!LH.getNode() && !RH.getNode() &&
7846 isOperationLegalOrCustom(Op: ISD::SRL, VT) &&
7847 isOperationLegalOrCustom(Op: ISD::TRUNCATE, VT: HiLoVT)) {
7848 LH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: LHS, N2: Shift);
7849 LH = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: LH);
7850 RH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: RHS, N2: Shift);
7851 RH = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: RH);
7852 }
7853
7854 if (!LH.getNode())
7855 return false;
7856
7857 if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
7858 return false;
7859
7860 Result.push_back(Elt: Lo);
7861
7862 if (Opcode == ISD::MUL) {
7863 RH = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: HiLoVT, N1: LL, N2: RH);
7864 LH = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: HiLoVT, N1: LH, N2: RL);
7865 Hi = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: HiLoVT, N1: Hi, N2: RH);
7866 Hi = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: HiLoVT, N1: Hi, N2: LH);
7867 Result.push_back(Elt: Hi);
7868 return true;
7869 }
7870
7871 // Compute the full width result.
7872 auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
7873 Lo = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: Lo);
7874 Hi = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: Hi);
7875 Hi = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Hi, N2: Shift);
7876 return DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Lo, N2: Hi);
7877 };
7878
7879 SDValue Next = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: Hi);
7880 if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
7881 return false;
7882
7883 // This is effectively the add part of a multiply-add of half-sized operands,
7884 // so it cannot overflow.
7885 Next = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Next, N2: Merge(Lo, Hi));
7886
7887 if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
7888 return false;
7889
7890 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT: HiLoVT);
7891 EVT BoolType = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
7892
7893 bool UseGlue = (isOperationLegalOrCustom(Op: ISD::ADDC, VT) &&
7894 isOperationLegalOrCustom(Op: ISD::ADDE, VT));
7895 if (UseGlue)
7896 Next = DAG.getNode(Opcode: ISD::ADDC, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: MVT::Glue), N1: Next,
7897 N2: Merge(Lo, Hi));
7898 else
7899 Next = DAG.getNode(Opcode: ISD::UADDO_CARRY, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: BoolType), N1: Next,
7900 N2: Merge(Lo, Hi), N3: DAG.getConstant(Val: 0, DL: dl, VT: BoolType));
7901
7902 SDValue Carry = Next.getValue(R: 1);
7903 Result.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: Next));
7904 Next = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Next, N2: Shift);
7905
7906 if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
7907 return false;
7908
7909 if (UseGlue)
7910 Hi = DAG.getNode(Opcode: ISD::ADDE, DL: dl, VTList: DAG.getVTList(VT1: HiLoVT, VT2: MVT::Glue), N1: Hi, N2: Zero,
7911 N3: Carry);
7912 else
7913 Hi = DAG.getNode(Opcode: ISD::UADDO_CARRY, DL: dl, VTList: DAG.getVTList(VT1: HiLoVT, VT2: BoolType), N1: Hi,
7914 N2: Zero, N3: Carry);
7915
7916 Next = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Next, N2: Merge(Lo, Hi));
7917
7918 if (Opcode == ISD::SMUL_LOHI) {
7919 SDValue NextSub = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Next,
7920 N2: DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: RL));
7921 Next = DAG.getSelectCC(DL: dl, LHS: LH, RHS: Zero, True: NextSub, False: Next, Cond: ISD::SETLT);
7922
7923 NextSub = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Next,
7924 N2: DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: LL));
7925 Next = DAG.getSelectCC(DL: dl, LHS: RH, RHS: Zero, True: NextSub, False: Next, Cond: ISD::SETLT);
7926 }
7927
7928 Result.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: Next));
7929 Next = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Next, N2: Shift);
7930 Result.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: Next));
7931 return true;
7932}
7933
7934bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
7935 SelectionDAG &DAG, MulExpansionKind Kind,
7936 SDValue LL, SDValue LH, SDValue RL,
7937 SDValue RH) const {
7938 SmallVector<SDValue, 2> Result;
7939 bool Ok = expandMUL_LOHI(Opcode: N->getOpcode(), VT: N->getValueType(ResNo: 0), dl: SDLoc(N),
7940 LHS: N->getOperand(Num: 0), RHS: N->getOperand(Num: 1), Result, HiLoVT,
7941 DAG, Kind, LL, LH, RL, RH);
7942 if (Ok) {
7943 assert(Result.size() == 2);
7944 Lo = Result[0];
7945 Hi = Result[1];
7946 }
7947 return Ok;
7948}
7949
7950// Optimize unsigned division or remainder by constants for types twice as large
7951// as a legal VT.
7952//
7953// If (1 << (BitWidth / 2)) % Constant == 1, then the remainder
7954// can be computed
7955// as:
7956// Sum += __builtin_uadd_overflow(Lo, High, &Sum);
7957// Remainder = Sum % Constant
7958// This is based on "Remainder by Summing Digits" from Hacker's Delight.
7959//
7960// For division, we can compute the remainder using the algorithm described
7961// above, subtract it from the dividend to get an exact multiple of Constant.
7962// Then multiply that exact multiply by the multiplicative inverse modulo
7963// (1 << (BitWidth / 2)) to get the quotient.
7964
7965// If Constant is even, we can shift right the dividend and the divisor by the
7966// number of trailing zeros in Constant before applying the remainder algorithm.
7967// If we're after the quotient, we can subtract this value from the shifted
7968// dividend and multiply by the multiplicative inverse of the shifted divisor.
7969// If we want the remainder, we shift the value left by the number of trailing
7970// zeros and add the bits that were shifted out of the dividend.
7971bool TargetLowering::expandDIVREMByConstant(SDNode *N,
7972 SmallVectorImpl<SDValue> &Result,
7973 EVT HiLoVT, SelectionDAG &DAG,
7974 SDValue LL, SDValue LH) const {
7975 unsigned Opcode = N->getOpcode();
7976 EVT VT = N->getValueType(ResNo: 0);
7977
7978 // TODO: Support signed division/remainder.
7979 if (Opcode == ISD::SREM || Opcode == ISD::SDIV || Opcode == ISD::SDIVREM)
7980 return false;
7981 assert(
7982 (Opcode == ISD::UREM || Opcode == ISD::UDIV || Opcode == ISD::UDIVREM) &&
7983 "Unexpected opcode");
7984
7985 auto *CN = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 1));
7986 if (!CN)
7987 return false;
7988
7989 APInt Divisor = CN->getAPIntValue();
7990 unsigned BitWidth = Divisor.getBitWidth();
7991 unsigned HBitWidth = BitWidth / 2;
7992 assert(VT.getScalarSizeInBits() == BitWidth &&
7993 HiLoVT.getScalarSizeInBits() == HBitWidth && "Unexpected VTs");
7994
7995 // Divisor needs to less than (1 << HBitWidth).
7996 APInt HalfMaxPlus1 = APInt::getOneBitSet(numBits: BitWidth, BitNo: HBitWidth);
7997 if (Divisor.uge(RHS: HalfMaxPlus1))
7998 return false;
7999
8000 // We depend on the UREM by constant optimization in DAGCombiner that requires
8001 // high multiply.
8002 if (!isOperationLegalOrCustom(Op: ISD::MULHU, VT: HiLoVT) &&
8003 !isOperationLegalOrCustom(Op: ISD::UMUL_LOHI, VT: HiLoVT))
8004 return false;
8005
8006 // Don't expand if optimizing for size.
8007 if (DAG.shouldOptForSize())
8008 return false;
8009
8010 // Early out for 0 or 1 divisors.
8011 if (Divisor.ule(RHS: 1))
8012 return false;
8013
8014 // If the divisor is even, shift it until it becomes odd.
8015 unsigned TrailingZeros = 0;
8016 if (!Divisor[0]) {
8017 TrailingZeros = Divisor.countr_zero();
8018 Divisor.lshrInPlace(ShiftAmt: TrailingZeros);
8019 }
8020
8021 SDLoc dl(N);
8022 SDValue Sum;
8023 SDValue PartialRem;
8024
8025 // If (1 << HBitWidth) % divisor == 1, we can add the two halves together and
8026 // then add in the carry.
8027 // TODO: If we can't split it in half, we might be able to split into 3 or
8028 // more pieces using a smaller bit width.
8029 if (HalfMaxPlus1.urem(RHS: Divisor).isOne()) {
8030 assert(!LL == !LH && "Expected both input halves or no input halves!");
8031 if (!LL)
8032 std::tie(args&: LL, args&: LH) = DAG.SplitScalar(N: N->getOperand(Num: 0), DL: dl, LoVT: HiLoVT, HiVT: HiLoVT);
8033
8034 // Shift the input by the number of TrailingZeros in the divisor. The
8035 // shifted out bits will be added to the remainder later.
8036 if (TrailingZeros) {
8037 // Save the shifted off bits if we need the remainder.
8038 if (Opcode != ISD::UDIV) {
8039 APInt Mask = APInt::getLowBitsSet(numBits: HBitWidth, loBitsSet: TrailingZeros);
8040 PartialRem = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: HiLoVT, N1: LL,
8041 N2: DAG.getConstant(Val: Mask, DL: dl, VT: HiLoVT));
8042 }
8043
8044 LL = DAG.getNode(
8045 Opcode: ISD::OR, DL: dl, VT: HiLoVT,
8046 N1: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: HiLoVT, N1: LL,
8047 N2: DAG.getShiftAmountConstant(Val: TrailingZeros, VT: HiLoVT, DL: dl)),
8048 N2: DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: HiLoVT, N1: LH,
8049 N2: DAG.getShiftAmountConstant(Val: HBitWidth - TrailingZeros,
8050 VT: HiLoVT, DL: dl)));
8051 LH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: HiLoVT, N1: LH,
8052 N2: DAG.getShiftAmountConstant(Val: TrailingZeros, VT: HiLoVT, DL: dl));
8053 }
8054
8055 // Use uaddo_carry if we can, otherwise use a compare to detect overflow.
8056 EVT SetCCType =
8057 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: HiLoVT);
8058 if (isOperationLegalOrCustom(Op: ISD::UADDO_CARRY, VT: HiLoVT)) {
8059 SDVTList VTList = DAG.getVTList(VT1: HiLoVT, VT2: SetCCType);
8060 Sum = DAG.getNode(Opcode: ISD::UADDO, DL: dl, VTList, N1: LL, N2: LH);
8061 Sum = DAG.getNode(Opcode: ISD::UADDO_CARRY, DL: dl, VTList, N1: Sum,
8062 N2: DAG.getConstant(Val: 0, DL: dl, VT: HiLoVT), N3: Sum.getValue(R: 1));
8063 } else {
8064 Sum = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: HiLoVT, N1: LL, N2: LH);
8065 SDValue Carry = DAG.getSetCC(DL: dl, VT: SetCCType, LHS: Sum, RHS: LL, Cond: ISD::SETULT);
8066 // If the boolean for the target is 0 or 1, we can add the setcc result
8067 // directly.
8068 if (getBooleanContents(Type: HiLoVT) ==
8069 TargetLoweringBase::ZeroOrOneBooleanContent)
8070 Carry = DAG.getZExtOrTrunc(Op: Carry, DL: dl, VT: HiLoVT);
8071 else
8072 Carry = DAG.getSelect(DL: dl, VT: HiLoVT, Cond: Carry, LHS: DAG.getConstant(Val: 1, DL: dl, VT: HiLoVT),
8073 RHS: DAG.getConstant(Val: 0, DL: dl, VT: HiLoVT));
8074 Sum = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: HiLoVT, N1: Sum, N2: Carry);
8075 }
8076 }
8077
8078 // If we didn't find a sum, we can't do the expansion.
8079 if (!Sum)
8080 return false;
8081
8082 // Perform a HiLoVT urem on the Sum using truncated divisor.
8083 SDValue RemL =
8084 DAG.getNode(Opcode: ISD::UREM, DL: dl, VT: HiLoVT, N1: Sum,
8085 N2: DAG.getConstant(Val: Divisor.trunc(width: HBitWidth), DL: dl, VT: HiLoVT));
8086 SDValue RemH = DAG.getConstant(Val: 0, DL: dl, VT: HiLoVT);
8087
8088 if (Opcode != ISD::UREM) {
8089 // Subtract the remainder from the shifted dividend.
8090 SDValue Dividend = DAG.getNode(Opcode: ISD::BUILD_PAIR, DL: dl, VT, N1: LL, N2: LH);
8091 SDValue Rem = DAG.getNode(Opcode: ISD::BUILD_PAIR, DL: dl, VT, N1: RemL, N2: RemH);
8092
8093 Dividend = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Dividend, N2: Rem);
8094
8095 // Multiply by the multiplicative inverse of the divisor modulo
8096 // (1 << BitWidth).
8097 APInt MulFactor = Divisor.multiplicativeInverse();
8098
8099 SDValue Quotient = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: Dividend,
8100 N2: DAG.getConstant(Val: MulFactor, DL: dl, VT));
8101
8102 // Split the quotient into low and high parts.
8103 SDValue QuotL, QuotH;
8104 std::tie(args&: QuotL, args&: QuotH) = DAG.SplitScalar(N: Quotient, DL: dl, LoVT: HiLoVT, HiVT: HiLoVT);
8105 Result.push_back(Elt: QuotL);
8106 Result.push_back(Elt: QuotH);
8107 }
8108
8109 if (Opcode != ISD::UDIV) {
8110 // If we shifted the input, shift the remainder left and add the bits we
8111 // shifted off the input.
8112 if (TrailingZeros) {
8113 RemL = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: HiLoVT, N1: RemL,
8114 N2: DAG.getShiftAmountConstant(Val: TrailingZeros, VT: HiLoVT, DL: dl));
8115 RemL = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: HiLoVT, N1: RemL, N2: PartialRem);
8116 }
8117 Result.push_back(Elt: RemL);
8118 Result.push_back(Elt: DAG.getConstant(Val: 0, DL: dl, VT: HiLoVT));
8119 }
8120
8121 return true;
8122}
8123
8124// Check that (every element of) Z is undef or not an exact multiple of BW.
8125static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
8126 return ISD::matchUnaryPredicate(
8127 Op: Z,
8128 Match: [=](ConstantSDNode *C) { return !C || C->getAPIntValue().urem(RHS: BW) != 0; },
8129 /*AllowUndef=*/AllowUndefs: true, /*AllowTruncation=*/true);
8130}
8131
8132static SDValue expandVPFunnelShift(SDNode *Node, SelectionDAG &DAG) {
8133 EVT VT = Node->getValueType(ResNo: 0);
8134 SDValue ShX, ShY;
8135 SDValue ShAmt, InvShAmt;
8136 SDValue X = Node->getOperand(Num: 0);
8137 SDValue Y = Node->getOperand(Num: 1);
8138 SDValue Z = Node->getOperand(Num: 2);
8139 SDValue Mask = Node->getOperand(Num: 3);
8140 SDValue VL = Node->getOperand(Num: 4);
8141
8142 unsigned BW = VT.getScalarSizeInBits();
8143 bool IsFSHL = Node->getOpcode() == ISD::VP_FSHL;
8144 SDLoc DL(SDValue(Node, 0));
8145
8146 EVT ShVT = Z.getValueType();
8147 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8148 // fshl: X << C | Y >> (BW - C)
8149 // fshr: X << (BW - C) | Y >> C
8150 // where C = Z % BW is not zero
8151 SDValue BitWidthC = DAG.getConstant(Val: BW, DL, VT: ShVT);
8152 ShAmt = DAG.getNode(Opcode: ISD::VP_UREM, DL, VT: ShVT, N1: Z, N2: BitWidthC, N3: Mask, N4: VL);
8153 InvShAmt = DAG.getNode(Opcode: ISD::VP_SUB, DL, VT: ShVT, N1: BitWidthC, N2: ShAmt, N3: Mask, N4: VL);
8154 ShX = DAG.getNode(Opcode: ISD::VP_SHL, DL, VT, N1: X, N2: IsFSHL ? ShAmt : InvShAmt, N3: Mask,
8155 N4: VL);
8156 ShY = DAG.getNode(Opcode: ISD::VP_SRL, DL, VT, N1: Y, N2: IsFSHL ? InvShAmt : ShAmt, N3: Mask,
8157 N4: VL);
8158 } else {
8159 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
8160 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
8161 SDValue BitMask = DAG.getConstant(Val: BW - 1, DL, VT: ShVT);
8162 if (isPowerOf2_32(Value: BW)) {
8163 // Z % BW -> Z & (BW - 1)
8164 ShAmt = DAG.getNode(Opcode: ISD::VP_AND, DL, VT: ShVT, N1: Z, N2: BitMask, N3: Mask, N4: VL);
8165 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8166 SDValue NotZ = DAG.getNode(Opcode: ISD::VP_XOR, DL, VT: ShVT, N1: Z,
8167 N2: DAG.getAllOnesConstant(DL, VT: ShVT), N3: Mask, N4: VL);
8168 InvShAmt = DAG.getNode(Opcode: ISD::VP_AND, DL, VT: ShVT, N1: NotZ, N2: BitMask, N3: Mask, N4: VL);
8169 } else {
8170 SDValue BitWidthC = DAG.getConstant(Val: BW, DL, VT: ShVT);
8171 ShAmt = DAG.getNode(Opcode: ISD::VP_UREM, DL, VT: ShVT, N1: Z, N2: BitWidthC, N3: Mask, N4: VL);
8172 InvShAmt = DAG.getNode(Opcode: ISD::VP_SUB, DL, VT: ShVT, N1: BitMask, N2: ShAmt, N3: Mask, N4: VL);
8173 }
8174
8175 SDValue One = DAG.getConstant(Val: 1, DL, VT: ShVT);
8176 if (IsFSHL) {
8177 ShX = DAG.getNode(Opcode: ISD::VP_SHL, DL, VT, N1: X, N2: ShAmt, N3: Mask, N4: VL);
8178 SDValue ShY1 = DAG.getNode(Opcode: ISD::VP_SRL, DL, VT, N1: Y, N2: One, N3: Mask, N4: VL);
8179 ShY = DAG.getNode(Opcode: ISD::VP_SRL, DL, VT, N1: ShY1, N2: InvShAmt, N3: Mask, N4: VL);
8180 } else {
8181 SDValue ShX1 = DAG.getNode(Opcode: ISD::VP_SHL, DL, VT, N1: X, N2: One, N3: Mask, N4: VL);
8182 ShX = DAG.getNode(Opcode: ISD::VP_SHL, DL, VT, N1: ShX1, N2: InvShAmt, N3: Mask, N4: VL);
8183 ShY = DAG.getNode(Opcode: ISD::VP_SRL, DL, VT, N1: Y, N2: ShAmt, N3: Mask, N4: VL);
8184 }
8185 }
8186 return DAG.getNode(Opcode: ISD::VP_OR, DL, VT, N1: ShX, N2: ShY, N3: Mask, N4: VL);
8187}
8188
8189SDValue TargetLowering::expandFunnelShift(SDNode *Node,
8190 SelectionDAG &DAG) const {
8191 if (Node->isVPOpcode())
8192 return expandVPFunnelShift(Node, DAG);
8193
8194 EVT VT = Node->getValueType(ResNo: 0);
8195
8196 if (VT.isVector() && (!isOperationLegalOrCustom(Op: ISD::SHL, VT) ||
8197 !isOperationLegalOrCustom(Op: ISD::SRL, VT) ||
8198 !isOperationLegalOrCustom(Op: ISD::SUB, VT) ||
8199 !isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT)))
8200 return SDValue();
8201
8202 SDValue X = Node->getOperand(Num: 0);
8203 SDValue Y = Node->getOperand(Num: 1);
8204 SDValue Z = Node->getOperand(Num: 2);
8205
8206 unsigned BW = VT.getScalarSizeInBits();
8207 bool IsFSHL = Node->getOpcode() == ISD::FSHL;
8208 SDLoc DL(SDValue(Node, 0));
8209
8210 EVT ShVT = Z.getValueType();
8211
8212 // If a funnel shift in the other direction is more supported, use it.
8213 unsigned RevOpcode = IsFSHL ? ISD::FSHR : ISD::FSHL;
8214 if (!isOperationLegalOrCustom(Op: Node->getOpcode(), VT) &&
8215 isOperationLegalOrCustom(Op: RevOpcode, VT) && isPowerOf2_32(Value: BW)) {
8216 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8217 // fshl X, Y, Z -> fshr X, Y, -Z
8218 // fshr X, Y, Z -> fshl X, Y, -Z
8219 SDValue Zero = DAG.getConstant(Val: 0, DL, VT: ShVT);
8220 Z = DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Zero, N2: Z);
8221 } else {
8222 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
8223 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
8224 SDValue One = DAG.getConstant(Val: 1, DL, VT: ShVT);
8225 if (IsFSHL) {
8226 Y = DAG.getNode(Opcode: RevOpcode, DL, VT, N1: X, N2: Y, N3: One);
8227 X = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: X, N2: One);
8228 } else {
8229 X = DAG.getNode(Opcode: RevOpcode, DL, VT, N1: X, N2: Y, N3: One);
8230 Y = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Y, N2: One);
8231 }
8232 Z = DAG.getNOT(DL, Val: Z, VT: ShVT);
8233 }
8234 return DAG.getNode(Opcode: RevOpcode, DL, VT, N1: X, N2: Y, N3: Z);
8235 }
8236
8237 SDValue ShX, ShY;
8238 SDValue ShAmt, InvShAmt;
8239 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8240 // fshl: X << C | Y >> (BW - C)
8241 // fshr: X << (BW - C) | Y >> C
8242 // where C = Z % BW is not zero
8243 SDValue BitWidthC = DAG.getConstant(Val: BW, DL, VT: ShVT);
8244 ShAmt = DAG.getNode(Opcode: ISD::UREM, DL, VT: ShVT, N1: Z, N2: BitWidthC);
8245 InvShAmt = DAG.getNode(Opcode: ISD::SUB, DL, VT: ShVT, N1: BitWidthC, N2: ShAmt);
8246 ShX = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: X, N2: IsFSHL ? ShAmt : InvShAmt);
8247 ShY = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Y, N2: IsFSHL ? InvShAmt : ShAmt);
8248 } else {
8249 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
8250 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
8251 SDValue Mask = DAG.getConstant(Val: BW - 1, DL, VT: ShVT);
8252 if (isPowerOf2_32(Value: BW)) {
8253 // Z % BW -> Z & (BW - 1)
8254 ShAmt = DAG.getNode(Opcode: ISD::AND, DL, VT: ShVT, N1: Z, N2: Mask);
8255 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8256 InvShAmt = DAG.getNode(Opcode: ISD::AND, DL, VT: ShVT, N1: DAG.getNOT(DL, Val: Z, VT: ShVT), N2: Mask);
8257 } else {
8258 SDValue BitWidthC = DAG.getConstant(Val: BW, DL, VT: ShVT);
8259 ShAmt = DAG.getNode(Opcode: ISD::UREM, DL, VT: ShVT, N1: Z, N2: BitWidthC);
8260 InvShAmt = DAG.getNode(Opcode: ISD::SUB, DL, VT: ShVT, N1: Mask, N2: ShAmt);
8261 }
8262
8263 SDValue One = DAG.getConstant(Val: 1, DL, VT: ShVT);
8264 if (IsFSHL) {
8265 ShX = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: X, N2: ShAmt);
8266 SDValue ShY1 = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Y, N2: One);
8267 ShY = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: ShY1, N2: InvShAmt);
8268 } else {
8269 SDValue ShX1 = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: X, N2: One);
8270 ShX = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: ShX1, N2: InvShAmt);
8271 ShY = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Y, N2: ShAmt);
8272 }
8273 }
8274 return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: ShX, N2: ShY);
8275}
8276
8277// TODO: Merge with expandFunnelShift.
8278SDValue TargetLowering::expandROT(SDNode *Node, bool AllowVectorOps,
8279 SelectionDAG &DAG) const {
8280 EVT VT = Node->getValueType(ResNo: 0);
8281 unsigned EltSizeInBits = VT.getScalarSizeInBits();
8282 bool IsLeft = Node->getOpcode() == ISD::ROTL;
8283 SDValue Op0 = Node->getOperand(Num: 0);
8284 SDValue Op1 = Node->getOperand(Num: 1);
8285 SDLoc DL(SDValue(Node, 0));
8286
8287 EVT ShVT = Op1.getValueType();
8288 SDValue Zero = DAG.getConstant(Val: 0, DL, VT: ShVT);
8289
8290 // If a rotate in the other direction is more supported, use it.
8291 unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
8292 if (!isOperationLegalOrCustom(Op: Node->getOpcode(), VT) &&
8293 isOperationLegalOrCustom(Op: RevRot, VT) && isPowerOf2_32(Value: EltSizeInBits)) {
8294 SDValue Sub = DAG.getNode(Opcode: ISD::SUB, DL, VT: ShVT, N1: Zero, N2: Op1);
8295 return DAG.getNode(Opcode: RevRot, DL, VT, N1: Op0, N2: Sub);
8296 }
8297
8298 if (!AllowVectorOps && VT.isVector() &&
8299 (!isOperationLegalOrCustom(Op: ISD::SHL, VT) ||
8300 !isOperationLegalOrCustom(Op: ISD::SRL, VT) ||
8301 !isOperationLegalOrCustom(Op: ISD::SUB, VT) ||
8302 !isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT) ||
8303 !isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT)))
8304 return SDValue();
8305
8306 unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
8307 unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
8308 SDValue BitWidthMinusOneC = DAG.getConstant(Val: EltSizeInBits - 1, DL, VT: ShVT);
8309 SDValue ShVal;
8310 SDValue HsVal;
8311 if (isPowerOf2_32(Value: EltSizeInBits)) {
8312 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
8313 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
8314 SDValue NegOp1 = DAG.getNode(Opcode: ISD::SUB, DL, VT: ShVT, N1: Zero, N2: Op1);
8315 SDValue ShAmt = DAG.getNode(Opcode: ISD::AND, DL, VT: ShVT, N1: Op1, N2: BitWidthMinusOneC);
8316 ShVal = DAG.getNode(Opcode: ShOpc, DL, VT, N1: Op0, N2: ShAmt);
8317 SDValue HsAmt = DAG.getNode(Opcode: ISD::AND, DL, VT: ShVT, N1: NegOp1, N2: BitWidthMinusOneC);
8318 HsVal = DAG.getNode(Opcode: HsOpc, DL, VT, N1: Op0, N2: HsAmt);
8319 } else {
8320 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
8321 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
8322 SDValue BitWidthC = DAG.getConstant(Val: EltSizeInBits, DL, VT: ShVT);
8323 SDValue ShAmt = DAG.getNode(Opcode: ISD::UREM, DL, VT: ShVT, N1: Op1, N2: BitWidthC);
8324 ShVal = DAG.getNode(Opcode: ShOpc, DL, VT, N1: Op0, N2: ShAmt);
8325 SDValue HsAmt = DAG.getNode(Opcode: ISD::SUB, DL, VT: ShVT, N1: BitWidthMinusOneC, N2: ShAmt);
8326 SDValue One = DAG.getConstant(Val: 1, DL, VT: ShVT);
8327 HsVal =
8328 DAG.getNode(Opcode: HsOpc, DL, VT, N1: DAG.getNode(Opcode: HsOpc, DL, VT, N1: Op0, N2: One), N2: HsAmt);
8329 }
8330 return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: ShVal, N2: HsVal);
8331}
8332
8333void TargetLowering::expandShiftParts(SDNode *Node, SDValue &Lo, SDValue &Hi,
8334 SelectionDAG &DAG) const {
8335 assert(Node->getNumOperands() == 3 && "Not a double-shift!");
8336 EVT VT = Node->getValueType(ResNo: 0);
8337 unsigned VTBits = VT.getScalarSizeInBits();
8338 assert(isPowerOf2_32(VTBits) && "Power-of-two integer type expected");
8339
8340 bool IsSHL = Node->getOpcode() == ISD::SHL_PARTS;
8341 bool IsSRA = Node->getOpcode() == ISD::SRA_PARTS;
8342 SDValue ShOpLo = Node->getOperand(Num: 0);
8343 SDValue ShOpHi = Node->getOperand(Num: 1);
8344 SDValue ShAmt = Node->getOperand(Num: 2);
8345 EVT ShAmtVT = ShAmt.getValueType();
8346 EVT ShAmtCCVT =
8347 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: ShAmtVT);
8348 SDLoc dl(Node);
8349
8350 // ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and
8351 // ISD::SRA/L nodes haven't. Insert an AND to be safe, it's usually optimized
8352 // away during isel.
8353 SDValue SafeShAmt = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ShAmtVT, N1: ShAmt,
8354 N2: DAG.getConstant(Val: VTBits - 1, DL: dl, VT: ShAmtVT));
8355 SDValue Tmp1 = IsSRA ? DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: ShOpHi,
8356 N2: DAG.getConstant(Val: VTBits - 1, DL: dl, VT: ShAmtVT))
8357 : DAG.getConstant(Val: 0, DL: dl, VT);
8358
8359 SDValue Tmp2, Tmp3;
8360 if (IsSHL) {
8361 Tmp2 = DAG.getNode(Opcode: ISD::FSHL, DL: dl, VT, N1: ShOpHi, N2: ShOpLo, N3: ShAmt);
8362 Tmp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: ShOpLo, N2: SafeShAmt);
8363 } else {
8364 Tmp2 = DAG.getNode(Opcode: ISD::FSHR, DL: dl, VT, N1: ShOpHi, N2: ShOpLo, N3: ShAmt);
8365 Tmp3 = DAG.getNode(Opcode: IsSRA ? ISD::SRA : ISD::SRL, DL: dl, VT, N1: ShOpHi, N2: SafeShAmt);
8366 }
8367
8368 // If the shift amount is larger or equal than the width of a part we don't
8369 // use the result from the FSHL/FSHR. Insert a test and select the appropriate
8370 // values for large shift amounts.
8371 SDValue AndNode = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ShAmtVT, N1: ShAmt,
8372 N2: DAG.getConstant(Val: VTBits, DL: dl, VT: ShAmtVT));
8373 SDValue Cond = DAG.getSetCC(DL: dl, VT: ShAmtCCVT, LHS: AndNode,
8374 RHS: DAG.getConstant(Val: 0, DL: dl, VT: ShAmtVT), Cond: ISD::SETNE);
8375
8376 if (IsSHL) {
8377 Hi = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT, N1: Cond, N2: Tmp3, N3: Tmp2);
8378 Lo = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT, N1: Cond, N2: Tmp1, N3: Tmp3);
8379 } else {
8380 Lo = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT, N1: Cond, N2: Tmp3, N3: Tmp2);
8381 Hi = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT, N1: Cond, N2: Tmp1, N3: Tmp3);
8382 }
8383}
8384
8385bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
8386 SelectionDAG &DAG) const {
8387 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8388 SDValue Src = Node->getOperand(Num: OpNo);
8389 EVT SrcVT = Src.getValueType();
8390 EVT DstVT = Node->getValueType(ResNo: 0);
8391 SDLoc dl(SDValue(Node, 0));
8392
8393 // FIXME: Only f32 to i64 conversions are supported.
8394 if (SrcVT != MVT::f32 || DstVT != MVT::i64)
8395 return false;
8396
8397 if (Node->isStrictFPOpcode())
8398 // When a NaN is converted to an integer a trap is allowed. We can't
8399 // use this expansion here because it would eliminate that trap. Other
8400 // traps are also allowed and cannot be eliminated. See
8401 // IEEE 754-2008 sec 5.8.
8402 return false;
8403
8404 // Expand f32 -> i64 conversion
8405 // This algorithm comes from compiler-rt's implementation of fixsfdi:
8406 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
8407 unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
8408 EVT IntVT = SrcVT.changeTypeToInteger();
8409 EVT IntShVT = getShiftAmountTy(LHSTy: IntVT, DL: DAG.getDataLayout());
8410
8411 SDValue ExponentMask = DAG.getConstant(Val: 0x7F800000, DL: dl, VT: IntVT);
8412 SDValue ExponentLoBit = DAG.getConstant(Val: 23, DL: dl, VT: IntVT);
8413 SDValue Bias = DAG.getConstant(Val: 127, DL: dl, VT: IntVT);
8414 SDValue SignMask = DAG.getConstant(Val: APInt::getSignMask(BitWidth: SrcEltBits), DL: dl, VT: IntVT);
8415 SDValue SignLowBit = DAG.getConstant(Val: SrcEltBits - 1, DL: dl, VT: IntVT);
8416 SDValue MantissaMask = DAG.getConstant(Val: 0x007FFFFF, DL: dl, VT: IntVT);
8417
8418 SDValue Bits = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: IntVT, Operand: Src);
8419
8420 SDValue ExponentBits = DAG.getNode(
8421 Opcode: ISD::SRL, DL: dl, VT: IntVT, N1: DAG.getNode(Opcode: ISD::AND, DL: dl, VT: IntVT, N1: Bits, N2: ExponentMask),
8422 N2: DAG.getZExtOrTrunc(Op: ExponentLoBit, DL: dl, VT: IntShVT));
8423 SDValue Exponent = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: IntVT, N1: ExponentBits, N2: Bias);
8424
8425 SDValue Sign = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT: IntVT,
8426 N1: DAG.getNode(Opcode: ISD::AND, DL: dl, VT: IntVT, N1: Bits, N2: SignMask),
8427 N2: DAG.getZExtOrTrunc(Op: SignLowBit, DL: dl, VT: IntShVT));
8428 Sign = DAG.getSExtOrTrunc(Op: Sign, DL: dl, VT: DstVT);
8429
8430 SDValue R = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: IntVT,
8431 N1: DAG.getNode(Opcode: ISD::AND, DL: dl, VT: IntVT, N1: Bits, N2: MantissaMask),
8432 N2: DAG.getConstant(Val: 0x00800000, DL: dl, VT: IntVT));
8433
8434 R = DAG.getZExtOrTrunc(Op: R, DL: dl, VT: DstVT);
8435
8436 R = DAG.getSelectCC(
8437 DL: dl, LHS: Exponent, RHS: ExponentLoBit,
8438 True: DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: DstVT, N1: R,
8439 N2: DAG.getZExtOrTrunc(
8440 Op: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: IntVT, N1: Exponent, N2: ExponentLoBit),
8441 DL: dl, VT: IntShVT)),
8442 False: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: DstVT, N1: R,
8443 N2: DAG.getZExtOrTrunc(
8444 Op: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: IntVT, N1: ExponentLoBit, N2: Exponent),
8445 DL: dl, VT: IntShVT)),
8446 Cond: ISD::SETGT);
8447
8448 SDValue Ret = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: DstVT,
8449 N1: DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: DstVT, N1: R, N2: Sign), N2: Sign);
8450
8451 Result = DAG.getSelectCC(DL: dl, LHS: Exponent, RHS: DAG.getConstant(Val: 0, DL: dl, VT: IntVT),
8452 True: DAG.getConstant(Val: 0, DL: dl, VT: DstVT), False: Ret, Cond: ISD::SETLT);
8453 return true;
8454}
8455
8456bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
8457 SDValue &Chain,
8458 SelectionDAG &DAG) const {
8459 SDLoc dl(SDValue(Node, 0));
8460 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8461 SDValue Src = Node->getOperand(Num: OpNo);
8462
8463 EVT SrcVT = Src.getValueType();
8464 EVT DstVT = Node->getValueType(ResNo: 0);
8465 EVT SetCCVT =
8466 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: SrcVT);
8467 EVT DstSetCCVT =
8468 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: DstVT);
8469
8470 // Only expand vector types if we have the appropriate vector bit operations.
8471 unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
8472 ISD::FP_TO_SINT;
8473 if (DstVT.isVector() && (!isOperationLegalOrCustom(Op: SIntOpcode, VT: DstVT) ||
8474 !isOperationLegalOrCustomOrPromote(Op: ISD::XOR, VT: SrcVT)))
8475 return false;
8476
8477 // If the maximum float value is smaller then the signed integer range,
8478 // the destination signmask can't be represented by the float, so we can
8479 // just use FP_TO_SINT directly.
8480 const fltSemantics &APFSem = SrcVT.getFltSemantics();
8481 APFloat APF(APFSem, APInt::getZero(numBits: SrcVT.getScalarSizeInBits()));
8482 APInt SignMask = APInt::getSignMask(BitWidth: DstVT.getScalarSizeInBits());
8483 if (APFloat::opOverflow &
8484 APF.convertFromAPInt(Input: SignMask, IsSigned: false, RM: APFloat::rmNearestTiesToEven)) {
8485 if (Node->isStrictFPOpcode()) {
8486 Result = DAG.getNode(Opcode: ISD::STRICT_FP_TO_SINT, DL: dl, ResultTys: { DstVT, MVT::Other },
8487 Ops: { Node->getOperand(Num: 0), Src });
8488 Chain = Result.getValue(R: 1);
8489 } else
8490 Result = DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: dl, VT: DstVT, Operand: Src);
8491 return true;
8492 }
8493
8494 // Don't expand it if there isn't cheap fsub instruction.
8495 if (!isOperationLegalOrCustom(
8496 Op: Node->isStrictFPOpcode() ? ISD::STRICT_FSUB : ISD::FSUB, VT: SrcVT))
8497 return false;
8498
8499 SDValue Cst = DAG.getConstantFP(Val: APF, DL: dl, VT: SrcVT);
8500 SDValue Sel;
8501
8502 if (Node->isStrictFPOpcode()) {
8503 Sel = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: Cst, Cond: ISD::SETLT,
8504 Chain: Node->getOperand(Num: 0), /*IsSignaling*/ true);
8505 Chain = Sel.getValue(R: 1);
8506 } else {
8507 Sel = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: Cst, Cond: ISD::SETLT);
8508 }
8509
8510 bool Strict = Node->isStrictFPOpcode() ||
8511 shouldUseStrictFP_TO_INT(FpVT: SrcVT, IntVT: DstVT, /*IsSigned*/ false);
8512
8513 if (Strict) {
8514 // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
8515 // signmask then offset (the result of which should be fully representable).
8516 // Sel = Src < 0x8000000000000000
8517 // FltOfs = select Sel, 0, 0x8000000000000000
8518 // IntOfs = select Sel, 0, 0x8000000000000000
8519 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8520
8521 // TODO: Should any fast-math-flags be set for the FSUB?
8522 SDValue FltOfs = DAG.getSelect(DL: dl, VT: SrcVT, Cond: Sel,
8523 LHS: DAG.getConstantFP(Val: 0.0, DL: dl, VT: SrcVT), RHS: Cst);
8524 Sel = DAG.getBoolExtOrTrunc(Op: Sel, SL: dl, VT: DstSetCCVT, OpVT: DstVT);
8525 SDValue IntOfs = DAG.getSelect(DL: dl, VT: DstVT, Cond: Sel,
8526 LHS: DAG.getConstant(Val: 0, DL: dl, VT: DstVT),
8527 RHS: DAG.getConstant(Val: SignMask, DL: dl, VT: DstVT));
8528 SDValue SInt;
8529 if (Node->isStrictFPOpcode()) {
8530 SDValue Val = DAG.getNode(Opcode: ISD::STRICT_FSUB, DL: dl, ResultTys: { SrcVT, MVT::Other },
8531 Ops: { Chain, Src, FltOfs });
8532 SInt = DAG.getNode(Opcode: ISD::STRICT_FP_TO_SINT, DL: dl, ResultTys: { DstVT, MVT::Other },
8533 Ops: { Val.getValue(R: 1), Val });
8534 Chain = SInt.getValue(R: 1);
8535 } else {
8536 SDValue Val = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: SrcVT, N1: Src, N2: FltOfs);
8537 SInt = DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: dl, VT: DstVT, Operand: Val);
8538 }
8539 Result = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: DstVT, N1: SInt, N2: IntOfs);
8540 } else {
8541 // Expand based on maximum range of FP_TO_SINT:
8542 // True = fp_to_sint(Src)
8543 // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
8544 // Result = select (Src < 0x8000000000000000), True, False
8545
8546 SDValue True = DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: dl, VT: DstVT, Operand: Src);
8547 // TODO: Should any fast-math-flags be set for the FSUB?
8548 SDValue False = DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: dl, VT: DstVT,
8549 Operand: DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: SrcVT, N1: Src, N2: Cst));
8550 False = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: DstVT, N1: False,
8551 N2: DAG.getConstant(Val: SignMask, DL: dl, VT: DstVT));
8552 Sel = DAG.getBoolExtOrTrunc(Op: Sel, SL: dl, VT: DstSetCCVT, OpVT: DstVT);
8553 Result = DAG.getSelect(DL: dl, VT: DstVT, Cond: Sel, LHS: True, RHS: False);
8554 }
8555 return true;
8556}
8557
8558bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
8559 SDValue &Chain, SelectionDAG &DAG) const {
8560 // This transform is not correct for converting 0 when rounding mode is set
8561 // to round toward negative infinity which will produce -0.0. So disable
8562 // under strictfp.
8563 if (Node->isStrictFPOpcode())
8564 return false;
8565
8566 SDValue Src = Node->getOperand(Num: 0);
8567 EVT SrcVT = Src.getValueType();
8568 EVT DstVT = Node->getValueType(ResNo: 0);
8569
8570 // If the input is known to be non-negative and SINT_TO_FP is legal then use
8571 // it.
8572 if (Node->getFlags().hasNonNeg() &&
8573 isOperationLegalOrCustom(Op: ISD::SINT_TO_FP, VT: SrcVT)) {
8574 Result =
8575 DAG.getNode(Opcode: ISD::SINT_TO_FP, DL: SDLoc(Node), VT: DstVT, Operand: Node->getOperand(Num: 0));
8576 return true;
8577 }
8578
8579 if (SrcVT.getScalarType() != MVT::i64 || DstVT.getScalarType() != MVT::f64)
8580 return false;
8581
8582 // Only expand vector types if we have the appropriate vector bit
8583 // operations.
8584 if (SrcVT.isVector() && (!isOperationLegalOrCustom(Op: ISD::SRL, VT: SrcVT) ||
8585 !isOperationLegalOrCustom(Op: ISD::FADD, VT: DstVT) ||
8586 !isOperationLegalOrCustom(Op: ISD::FSUB, VT: DstVT) ||
8587 !isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT: SrcVT) ||
8588 !isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT: SrcVT)))
8589 return false;
8590
8591 SDLoc dl(SDValue(Node, 0));
8592
8593 // Implementation of unsigned i64 to f64 following the algorithm in
8594 // __floatundidf in compiler_rt. This implementation performs rounding
8595 // correctly in all rounding modes with the exception of converting 0
8596 // when rounding toward negative infinity. In that case the fsub will
8597 // produce -0.0. This will be added to +0.0 and produce -0.0 which is
8598 // incorrect.
8599 SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), DL: dl, VT: SrcVT);
8600 SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
8601 Val: llvm::bit_cast<double>(UINT64_C(0x4530000000100000)), DL: dl, VT: DstVT);
8602 SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), DL: dl, VT: SrcVT);
8603 SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), DL: dl, VT: SrcVT);
8604 SDValue HiShift = DAG.getShiftAmountConstant(Val: 32, VT: SrcVT, DL: dl);
8605
8606 SDValue Lo = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: SrcVT, N1: Src, N2: LoMask);
8607 SDValue Hi = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: SrcVT, N1: Src, N2: HiShift);
8608 SDValue LoOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: SrcVT, N1: Lo, N2: TwoP52);
8609 SDValue HiOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: SrcVT, N1: Hi, N2: TwoP84);
8610 SDValue LoFlt = DAG.getBitcast(VT: DstVT, V: LoOr);
8611 SDValue HiFlt = DAG.getBitcast(VT: DstVT, V: HiOr);
8612 SDValue HiSub = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: DstVT, N1: HiFlt, N2: TwoP84PlusTwoP52);
8613 Result = DAG.getNode(Opcode: ISD::FADD, DL: dl, VT: DstVT, N1: LoFlt, N2: HiSub);
8614 return true;
8615}
8616
8617SDValue
8618TargetLowering::createSelectForFMINNUM_FMAXNUM(SDNode *Node,
8619 SelectionDAG &DAG) const {
8620 unsigned Opcode = Node->getOpcode();
8621 assert((Opcode == ISD::FMINNUM || Opcode == ISD::FMAXNUM ||
8622 Opcode == ISD::STRICT_FMINNUM || Opcode == ISD::STRICT_FMAXNUM) &&
8623 "Wrong opcode");
8624
8625 if (Node->getFlags().hasNoNaNs()) {
8626 ISD::CondCode Pred = Opcode == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
8627 EVT VT = Node->getValueType(ResNo: 0);
8628 if ((!isCondCodeLegal(CC: Pred, VT: VT.getSimpleVT()) ||
8629 !isOperationLegalOrCustom(Op: ISD::VSELECT, VT)) &&
8630 VT.isVector())
8631 return SDValue();
8632 SDValue Op1 = Node->getOperand(Num: 0);
8633 SDValue Op2 = Node->getOperand(Num: 1);
8634 SDValue SelCC = DAG.getSelectCC(DL: SDLoc(Node), LHS: Op1, RHS: Op2, True: Op1, False: Op2, Cond: Pred);
8635 SelCC->setFlags(Node->getFlags());
8636 return SelCC;
8637 }
8638
8639 return SDValue();
8640}
8641
8642SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
8643 SelectionDAG &DAG) const {
8644 if (SDValue Expanded = expandVectorNaryOpBySplitting(Node, DAG))
8645 return Expanded;
8646
8647 EVT VT = Node->getValueType(ResNo: 0);
8648 if (VT.isScalableVector())
8649 report_fatal_error(
8650 reason: "Expanding fminnum/fmaxnum for scalable vectors is undefined.");
8651
8652 SDLoc dl(Node);
8653 unsigned NewOp =
8654 Node->getOpcode() == ISD::FMINNUM ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
8655
8656 if (isOperationLegalOrCustom(Op: NewOp, VT)) {
8657 SDValue Quiet0 = Node->getOperand(Num: 0);
8658 SDValue Quiet1 = Node->getOperand(Num: 1);
8659
8660 if (!Node->getFlags().hasNoNaNs()) {
8661 // Insert canonicalizes if it's possible we need to quiet to get correct
8662 // sNaN behavior.
8663 if (!DAG.isKnownNeverSNaN(Op: Quiet0)) {
8664 Quiet0 = DAG.getNode(Opcode: ISD::FCANONICALIZE, DL: dl, VT, Operand: Quiet0,
8665 Flags: Node->getFlags());
8666 }
8667 if (!DAG.isKnownNeverSNaN(Op: Quiet1)) {
8668 Quiet1 = DAG.getNode(Opcode: ISD::FCANONICALIZE, DL: dl, VT, Operand: Quiet1,
8669 Flags: Node->getFlags());
8670 }
8671 }
8672
8673 return DAG.getNode(Opcode: NewOp, DL: dl, VT, N1: Quiet0, N2: Quiet1, Flags: Node->getFlags());
8674 }
8675
8676 // If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
8677 // instead if there are no NaNs and there can't be an incompatible zero
8678 // compare: at least one operand isn't +/-0, or there are no signed-zeros.
8679 if ((Node->getFlags().hasNoNaNs() ||
8680 (DAG.isKnownNeverNaN(Op: Node->getOperand(Num: 0)) &&
8681 DAG.isKnownNeverNaN(Op: Node->getOperand(Num: 1)))) &&
8682 (Node->getFlags().hasNoSignedZeros() ||
8683 DAG.isKnownNeverZeroFloat(Op: Node->getOperand(Num: 0)) ||
8684 DAG.isKnownNeverZeroFloat(Op: Node->getOperand(Num: 1)))) {
8685 unsigned IEEE2018Op =
8686 Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
8687 if (isOperationLegalOrCustom(Op: IEEE2018Op, VT))
8688 return DAG.getNode(Opcode: IEEE2018Op, DL: dl, VT, N1: Node->getOperand(Num: 0),
8689 N2: Node->getOperand(Num: 1), Flags: Node->getFlags());
8690 }
8691
8692 if (SDValue SelCC = createSelectForFMINNUM_FMAXNUM(Node, DAG))
8693 return SelCC;
8694
8695 return SDValue();
8696}
8697
8698SDValue TargetLowering::expandFMINIMUM_FMAXIMUM(SDNode *N,
8699 SelectionDAG &DAG) const {
8700 if (SDValue Expanded = expandVectorNaryOpBySplitting(Node: N, DAG))
8701 return Expanded;
8702
8703 SDLoc DL(N);
8704 SDValue LHS = N->getOperand(Num: 0);
8705 SDValue RHS = N->getOperand(Num: 1);
8706 unsigned Opc = N->getOpcode();
8707 EVT VT = N->getValueType(ResNo: 0);
8708 EVT CCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
8709 bool IsMax = Opc == ISD::FMAXIMUM;
8710 SDNodeFlags Flags = N->getFlags();
8711
8712 // First, implement comparison not propagating NaN. If no native fmin or fmax
8713 // available, use plain select with setcc instead.
8714 SDValue MinMax;
8715 unsigned CompOpcIeee = IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
8716 unsigned CompOpc = IsMax ? ISD::FMAXNUM : ISD::FMINNUM;
8717
8718 // FIXME: We should probably define fminnum/fmaxnum variants with correct
8719 // signed zero behavior.
8720 bool MinMaxMustRespectOrderedZero = false;
8721
8722 if (isOperationLegalOrCustom(Op: CompOpcIeee, VT)) {
8723 MinMax = DAG.getNode(Opcode: CompOpcIeee, DL, VT, N1: LHS, N2: RHS, Flags);
8724 MinMaxMustRespectOrderedZero = true;
8725 } else if (isOperationLegalOrCustom(Op: CompOpc, VT)) {
8726 MinMax = DAG.getNode(Opcode: CompOpc, DL, VT, N1: LHS, N2: RHS, Flags);
8727 } else {
8728 if (VT.isVector() && !isOperationLegalOrCustom(Op: ISD::VSELECT, VT))
8729 return DAG.UnrollVectorOp(N);
8730
8731 // NaN (if exists) will be propagated later, so orderness doesn't matter.
8732 SDValue Compare =
8733 DAG.getSetCC(DL, VT: CCVT, LHS, RHS, Cond: IsMax ? ISD::SETOGT : ISD::SETOLT);
8734 MinMax = DAG.getSelect(DL, VT, Cond: Compare, LHS, RHS, Flags);
8735 }
8736
8737 // Propagate any NaN of both operands
8738 if (!N->getFlags().hasNoNaNs() &&
8739 (!DAG.isKnownNeverNaN(Op: RHS) || !DAG.isKnownNeverNaN(Op: LHS))) {
8740 ConstantFP *FPNaN = ConstantFP::get(Context&: *DAG.getContext(),
8741 V: APFloat::getNaN(Sem: VT.getFltSemantics()));
8742 MinMax = DAG.getSelect(DL, VT, Cond: DAG.getSetCC(DL, VT: CCVT, LHS, RHS, Cond: ISD::SETUO),
8743 LHS: DAG.getConstantFP(V: *FPNaN, DL, VT), RHS: MinMax, Flags);
8744 }
8745
8746 // fminimum/fmaximum requires -0.0 less than +0.0
8747 if (!MinMaxMustRespectOrderedZero && !N->getFlags().hasNoSignedZeros() &&
8748 !DAG.isKnownNeverZeroFloat(Op: RHS) && !DAG.isKnownNeverZeroFloat(Op: LHS)) {
8749 SDValue IsZero = DAG.getSetCC(DL, VT: CCVT, LHS: MinMax,
8750 RHS: DAG.getConstantFP(Val: 0.0, DL, VT), Cond: ISD::SETOEQ);
8751 SDValue TestZero =
8752 DAG.getTargetConstant(Val: IsMax ? fcPosZero : fcNegZero, DL, VT: MVT::i32);
8753 SDValue LCmp = DAG.getSelect(
8754 DL, VT, Cond: DAG.getNode(Opcode: ISD::IS_FPCLASS, DL, VT: CCVT, N1: LHS, N2: TestZero), LHS,
8755 RHS: MinMax, Flags);
8756 SDValue RCmp = DAG.getSelect(
8757 DL, VT, Cond: DAG.getNode(Opcode: ISD::IS_FPCLASS, DL, VT: CCVT, N1: RHS, N2: TestZero), LHS: RHS,
8758 RHS: LCmp, Flags);
8759 MinMax = DAG.getSelect(DL, VT, Cond: IsZero, LHS: RCmp, RHS: MinMax, Flags);
8760 }
8761
8762 return MinMax;
8763}
8764
8765SDValue TargetLowering::expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *Node,
8766 SelectionDAG &DAG) const {
8767 SDLoc DL(Node);
8768 SDValue LHS = Node->getOperand(Num: 0);
8769 SDValue RHS = Node->getOperand(Num: 1);
8770 unsigned Opc = Node->getOpcode();
8771 EVT VT = Node->getValueType(ResNo: 0);
8772 EVT CCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
8773 bool IsMax = Opc == ISD::FMAXIMUMNUM;
8774 const TargetOptions &Options = DAG.getTarget().Options;
8775 SDNodeFlags Flags = Node->getFlags();
8776
8777 unsigned NewOp =
8778 Opc == ISD::FMINIMUMNUM ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
8779
8780 if (isOperationLegalOrCustom(Op: NewOp, VT)) {
8781 if (!Flags.hasNoNaNs()) {
8782 // Insert canonicalizes if it's possible we need to quiet to get correct
8783 // sNaN behavior.
8784 if (!DAG.isKnownNeverSNaN(Op: LHS)) {
8785 LHS = DAG.getNode(Opcode: ISD::FCANONICALIZE, DL, VT, Operand: LHS, Flags);
8786 }
8787 if (!DAG.isKnownNeverSNaN(Op: RHS)) {
8788 RHS = DAG.getNode(Opcode: ISD::FCANONICALIZE, DL, VT, Operand: RHS, Flags);
8789 }
8790 }
8791
8792 return DAG.getNode(Opcode: NewOp, DL, VT, N1: LHS, N2: RHS, Flags);
8793 }
8794
8795 // We can use FMINIMUM/FMAXIMUM if there is no NaN, since it has
8796 // same behaviors for all of other cases: +0.0 vs -0.0 included.
8797 if (Flags.hasNoNaNs() ||
8798 (DAG.isKnownNeverNaN(Op: LHS) && DAG.isKnownNeverNaN(Op: RHS))) {
8799 unsigned IEEE2019Op =
8800 Opc == ISD::FMINIMUMNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
8801 if (isOperationLegalOrCustom(Op: IEEE2019Op, VT))
8802 return DAG.getNode(Opcode: IEEE2019Op, DL, VT, N1: LHS, N2: RHS, Flags);
8803 }
8804
8805 // FMINNUM/FMAXMUM returns qNaN if either operand is sNaN, and it may return
8806 // either one for +0.0 vs -0.0.
8807 if ((Flags.hasNoNaNs() ||
8808 (DAG.isKnownNeverSNaN(Op: LHS) && DAG.isKnownNeverSNaN(Op: RHS))) &&
8809 (Flags.hasNoSignedZeros() || DAG.isKnownNeverZeroFloat(Op: LHS) ||
8810 DAG.isKnownNeverZeroFloat(Op: RHS))) {
8811 unsigned IEEE2008Op = Opc == ISD::FMINIMUMNUM ? ISD::FMINNUM : ISD::FMAXNUM;
8812 if (isOperationLegalOrCustom(Op: IEEE2008Op, VT))
8813 return DAG.getNode(Opcode: IEEE2008Op, DL, VT, N1: LHS, N2: RHS, Flags);
8814 }
8815
8816 if (VT.isVector() && !isOperationLegalOrCustom(Op: ISD::VSELECT, VT))
8817 return DAG.UnrollVectorOp(N: Node);
8818
8819 // If only one operand is NaN, override it with another operand.
8820 if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(Op: LHS)) {
8821 LHS = DAG.getSelectCC(DL, LHS, RHS: LHS, True: RHS, False: LHS, Cond: ISD::SETUO);
8822 }
8823 if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(Op: RHS)) {
8824 RHS = DAG.getSelectCC(DL, LHS: RHS, RHS, True: LHS, False: RHS, Cond: ISD::SETUO);
8825 }
8826
8827 SDValue MinMax =
8828 DAG.getSelectCC(DL, LHS, RHS, True: LHS, False: RHS, Cond: IsMax ? ISD::SETGT : ISD::SETLT);
8829
8830 // TODO: We need quiet sNaN if strictfp.
8831
8832 // Fixup signed zero behavior.
8833 if (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros() ||
8834 DAG.isKnownNeverZeroFloat(Op: LHS) || DAG.isKnownNeverZeroFloat(Op: RHS)) {
8835 return MinMax;
8836 }
8837 SDValue TestZero =
8838 DAG.getTargetConstant(Val: IsMax ? fcPosZero : fcNegZero, DL, VT: MVT::i32);
8839 SDValue IsZero = DAG.getSetCC(DL, VT: CCVT, LHS: MinMax,
8840 RHS: DAG.getConstantFP(Val: 0.0, DL, VT), Cond: ISD::SETEQ);
8841 SDValue LCmp = DAG.getSelect(
8842 DL, VT, Cond: DAG.getNode(Opcode: ISD::IS_FPCLASS, DL, VT: CCVT, N1: LHS, N2: TestZero), LHS,
8843 RHS: MinMax, Flags);
8844 SDValue RCmp = DAG.getSelect(
8845 DL, VT, Cond: DAG.getNode(Opcode: ISD::IS_FPCLASS, DL, VT: CCVT, N1: RHS, N2: TestZero), LHS: RHS, RHS: LCmp,
8846 Flags);
8847 return DAG.getSelect(DL, VT, Cond: IsZero, LHS: RCmp, RHS: MinMax, Flags);
8848}
8849
8850/// Returns a true value if if this FPClassTest can be performed with an ordered
8851/// fcmp to 0, and a false value if it's an unordered fcmp to 0. Returns
8852/// std::nullopt if it cannot be performed as a compare with 0.
8853static std::optional<bool> isFCmpEqualZero(FPClassTest Test,
8854 const fltSemantics &Semantics,
8855 const MachineFunction &MF) {
8856 FPClassTest OrderedMask = Test & ~fcNan;
8857 FPClassTest NanTest = Test & fcNan;
8858 bool IsOrdered = NanTest == fcNone;
8859 bool IsUnordered = NanTest == fcNan;
8860
8861 // Skip cases that are testing for only a qnan or snan.
8862 if (!IsOrdered && !IsUnordered)
8863 return std::nullopt;
8864
8865 if (OrderedMask == fcZero &&
8866 MF.getDenormalMode(FPType: Semantics).Input == DenormalMode::IEEE)
8867 return IsOrdered;
8868 if (OrderedMask == (fcZero | fcSubnormal) &&
8869 MF.getDenormalMode(FPType: Semantics).inputsAreZero())
8870 return IsOrdered;
8871 return std::nullopt;
8872}
8873
8874SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
8875 const FPClassTest OrigTestMask,
8876 SDNodeFlags Flags, const SDLoc &DL,
8877 SelectionDAG &DAG) const {
8878 EVT OperandVT = Op.getValueType();
8879 assert(OperandVT.isFloatingPoint());
8880 FPClassTest Test = OrigTestMask;
8881
8882 // Degenerated cases.
8883 if (Test == fcNone)
8884 return DAG.getBoolConstant(V: false, DL, VT: ResultVT, OpVT: OperandVT);
8885 if (Test == fcAllFlags)
8886 return DAG.getBoolConstant(V: true, DL, VT: ResultVT, OpVT: OperandVT);
8887
8888 // PPC double double is a pair of doubles, of which the higher part determines
8889 // the value class.
8890 if (OperandVT == MVT::ppcf128) {
8891 Op = DAG.getNode(Opcode: ISD::EXTRACT_ELEMENT, DL, VT: MVT::f64, N1: Op,
8892 N2: DAG.getConstant(Val: 1, DL, VT: MVT::i32));
8893 OperandVT = MVT::f64;
8894 }
8895
8896 // Floating-point type properties.
8897 EVT ScalarFloatVT = OperandVT.getScalarType();
8898 const Type *FloatTy = ScalarFloatVT.getTypeForEVT(Context&: *DAG.getContext());
8899 const llvm::fltSemantics &Semantics = FloatTy->getFltSemantics();
8900 bool IsF80 = (ScalarFloatVT == MVT::f80);
8901
8902 // Some checks can be implemented using float comparisons, if floating point
8903 // exceptions are ignored.
8904 if (Flags.hasNoFPExcept() &&
8905 isOperationLegalOrCustom(Op: ISD::SETCC, VT: OperandVT.getScalarType())) {
8906 FPClassTest FPTestMask = Test;
8907 bool IsInvertedFP = false;
8908
8909 if (FPClassTest InvertedFPCheck =
8910 invertFPClassTestIfSimpler(Test: FPTestMask, UseFCmp: true)) {
8911 FPTestMask = InvertedFPCheck;
8912 IsInvertedFP = true;
8913 }
8914
8915 ISD::CondCode OrderedCmpOpcode = IsInvertedFP ? ISD::SETUNE : ISD::SETOEQ;
8916 ISD::CondCode UnorderedCmpOpcode = IsInvertedFP ? ISD::SETONE : ISD::SETUEQ;
8917
8918 // See if we can fold an | fcNan into an unordered compare.
8919 FPClassTest OrderedFPTestMask = FPTestMask & ~fcNan;
8920
8921 // Can't fold the ordered check if we're only testing for snan or qnan
8922 // individually.
8923 if ((FPTestMask & fcNan) != fcNan)
8924 OrderedFPTestMask = FPTestMask;
8925
8926 const bool IsOrdered = FPTestMask == OrderedFPTestMask;
8927
8928 if (std::optional<bool> IsCmp0 =
8929 isFCmpEqualZero(Test: FPTestMask, Semantics, MF: DAG.getMachineFunction());
8930 IsCmp0 && (isCondCodeLegalOrCustom(
8931 CC: *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode,
8932 VT: OperandVT.getScalarType().getSimpleVT()))) {
8933
8934 // If denormals could be implicitly treated as 0, this is not equivalent
8935 // to a compare with 0 since it will also be true for denormals.
8936 return DAG.getSetCC(DL, VT: ResultVT, LHS: Op,
8937 RHS: DAG.getConstantFP(Val: 0.0, DL, VT: OperandVT),
8938 Cond: *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode);
8939 }
8940
8941 if (FPTestMask == fcNan &&
8942 isCondCodeLegalOrCustom(CC: IsInvertedFP ? ISD::SETO : ISD::SETUO,
8943 VT: OperandVT.getScalarType().getSimpleVT()))
8944 return DAG.getSetCC(DL, VT: ResultVT, LHS: Op, RHS: Op,
8945 Cond: IsInvertedFP ? ISD::SETO : ISD::SETUO);
8946
8947 bool IsOrderedInf = FPTestMask == fcInf;
8948 if ((FPTestMask == fcInf || FPTestMask == (fcInf | fcNan)) &&
8949 isCondCodeLegalOrCustom(CC: IsOrderedInf ? OrderedCmpOpcode
8950 : UnorderedCmpOpcode,
8951 VT: OperandVT.getScalarType().getSimpleVT()) &&
8952 isOperationLegalOrCustom(Op: ISD::FABS, VT: OperandVT.getScalarType()) &&
8953 (isOperationLegal(Op: ISD::ConstantFP, VT: OperandVT.getScalarType()) ||
8954 (OperandVT.isVector() &&
8955 isOperationLegalOrCustom(Op: ISD::BUILD_VECTOR, VT: OperandVT)))) {
8956 // isinf(x) --> fabs(x) == inf
8957 SDValue Abs = DAG.getNode(Opcode: ISD::FABS, DL, VT: OperandVT, Operand: Op);
8958 SDValue Inf =
8959 DAG.getConstantFP(Val: APFloat::getInf(Sem: Semantics), DL, VT: OperandVT);
8960 return DAG.getSetCC(DL, VT: ResultVT, LHS: Abs, RHS: Inf,
8961 Cond: IsOrderedInf ? OrderedCmpOpcode : UnorderedCmpOpcode);
8962 }
8963
8964 if ((OrderedFPTestMask == fcPosInf || OrderedFPTestMask == fcNegInf) &&
8965 isCondCodeLegalOrCustom(CC: IsOrdered ? OrderedCmpOpcode
8966 : UnorderedCmpOpcode,
8967 VT: OperandVT.getSimpleVT())) {
8968 // isposinf(x) --> x == inf
8969 // isneginf(x) --> x == -inf
8970 // isposinf(x) || nan --> x u== inf
8971 // isneginf(x) || nan --> x u== -inf
8972
8973 SDValue Inf = DAG.getConstantFP(
8974 Val: APFloat::getInf(Sem: Semantics, Negative: OrderedFPTestMask == fcNegInf), DL,
8975 VT: OperandVT);
8976 return DAG.getSetCC(DL, VT: ResultVT, LHS: Op, RHS: Inf,
8977 Cond: IsOrdered ? OrderedCmpOpcode : UnorderedCmpOpcode);
8978 }
8979
8980 if (OrderedFPTestMask == (fcSubnormal | fcZero) && !IsOrdered) {
8981 // TODO: Could handle ordered case, but it produces worse code for
8982 // x86. Maybe handle ordered if fabs is free?
8983
8984 ISD::CondCode OrderedOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
8985 ISD::CondCode UnorderedOp = IsInvertedFP ? ISD::SETOGE : ISD::SETULT;
8986
8987 if (isCondCodeLegalOrCustom(CC: IsOrdered ? OrderedOp : UnorderedOp,
8988 VT: OperandVT.getScalarType().getSimpleVT())) {
8989 // (issubnormal(x) || iszero(x)) --> fabs(x) < smallest_normal
8990
8991 // TODO: Maybe only makes sense if fabs is free. Integer test of
8992 // exponent bits seems better for x86.
8993 SDValue Abs = DAG.getNode(Opcode: ISD::FABS, DL, VT: OperandVT, Operand: Op);
8994 SDValue SmallestNormal = DAG.getConstantFP(
8995 Val: APFloat::getSmallestNormalized(Sem: Semantics), DL, VT: OperandVT);
8996 return DAG.getSetCC(DL, VT: ResultVT, LHS: Abs, RHS: SmallestNormal,
8997 Cond: IsOrdered ? OrderedOp : UnorderedOp);
8998 }
8999 }
9000
9001 if (FPTestMask == fcNormal) {
9002 // TODO: Handle unordered
9003 ISD::CondCode IsFiniteOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
9004 ISD::CondCode IsNormalOp = IsInvertedFP ? ISD::SETOLT : ISD::SETUGE;
9005
9006 if (isCondCodeLegalOrCustom(CC: IsFiniteOp,
9007 VT: OperandVT.getScalarType().getSimpleVT()) &&
9008 isCondCodeLegalOrCustom(CC: IsNormalOp,
9009 VT: OperandVT.getScalarType().getSimpleVT()) &&
9010 isFAbsFree(VT: OperandVT)) {
9011 // isnormal(x) --> fabs(x) < infinity && !(fabs(x) < smallest_normal)
9012 SDValue Inf =
9013 DAG.getConstantFP(Val: APFloat::getInf(Sem: Semantics), DL, VT: OperandVT);
9014 SDValue SmallestNormal = DAG.getConstantFP(
9015 Val: APFloat::getSmallestNormalized(Sem: Semantics), DL, VT: OperandVT);
9016
9017 SDValue Abs = DAG.getNode(Opcode: ISD::FABS, DL, VT: OperandVT, Operand: Op);
9018 SDValue IsFinite = DAG.getSetCC(DL, VT: ResultVT, LHS: Abs, RHS: Inf, Cond: IsFiniteOp);
9019 SDValue IsNormal =
9020 DAG.getSetCC(DL, VT: ResultVT, LHS: Abs, RHS: SmallestNormal, Cond: IsNormalOp);
9021 unsigned LogicOp = IsInvertedFP ? ISD::OR : ISD::AND;
9022 return DAG.getNode(Opcode: LogicOp, DL, VT: ResultVT, N1: IsFinite, N2: IsNormal);
9023 }
9024 }
9025 }
9026
9027 // Some checks may be represented as inversion of simpler check, for example
9028 // "inf|normal|subnormal|zero" => !"nan".
9029 bool IsInverted = false;
9030
9031 if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test, UseFCmp: false)) {
9032 Test = InvertedCheck;
9033 IsInverted = true;
9034 }
9035
9036 // In the general case use integer operations.
9037 unsigned BitSize = OperandVT.getScalarSizeInBits();
9038 EVT IntVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: BitSize);
9039 if (OperandVT.isVector())
9040 IntVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: IntVT,
9041 EC: OperandVT.getVectorElementCount());
9042 SDValue OpAsInt = DAG.getBitcast(VT: IntVT, V: Op);
9043
9044 // Various masks.
9045 APInt SignBit = APInt::getSignMask(BitWidth: BitSize);
9046 APInt ValueMask = APInt::getSignedMaxValue(numBits: BitSize); // All bits but sign.
9047 APInt Inf = APFloat::getInf(Sem: Semantics).bitcastToAPInt(); // Exp and int bit.
9048 const unsigned ExplicitIntBitInF80 = 63;
9049 APInt ExpMask = Inf;
9050 if (IsF80)
9051 ExpMask.clearBit(BitPosition: ExplicitIntBitInF80);
9052 APInt AllOneMantissa = APFloat::getLargest(Sem: Semantics).bitcastToAPInt() & ~Inf;
9053 APInt QNaNBitMask =
9054 APInt::getOneBitSet(numBits: BitSize, BitNo: AllOneMantissa.getActiveBits() - 1);
9055 APInt InvertionMask = APInt::getAllOnes(numBits: ResultVT.getScalarSizeInBits());
9056
9057 SDValue ValueMaskV = DAG.getConstant(Val: ValueMask, DL, VT: IntVT);
9058 SDValue SignBitV = DAG.getConstant(Val: SignBit, DL, VT: IntVT);
9059 SDValue ExpMaskV = DAG.getConstant(Val: ExpMask, DL, VT: IntVT);
9060 SDValue ZeroV = DAG.getConstant(Val: 0, DL, VT: IntVT);
9061 SDValue InfV = DAG.getConstant(Val: Inf, DL, VT: IntVT);
9062 SDValue ResultInvertionMask = DAG.getConstant(Val: InvertionMask, DL, VT: ResultVT);
9063
9064 SDValue Res;
9065 const auto appendResult = [&](SDValue PartialRes) {
9066 if (PartialRes) {
9067 if (Res)
9068 Res = DAG.getNode(Opcode: ISD::OR, DL, VT: ResultVT, N1: Res, N2: PartialRes);
9069 else
9070 Res = PartialRes;
9071 }
9072 };
9073
9074 SDValue IntBitIsSetV; // Explicit integer bit in f80 mantissa is set.
9075 const auto getIntBitIsSet = [&]() -> SDValue {
9076 if (!IntBitIsSetV) {
9077 APInt IntBitMask(BitSize, 0);
9078 IntBitMask.setBit(ExplicitIntBitInF80);
9079 SDValue IntBitMaskV = DAG.getConstant(Val: IntBitMask, DL, VT: IntVT);
9080 SDValue IntBitV = DAG.getNode(Opcode: ISD::AND, DL, VT: IntVT, N1: OpAsInt, N2: IntBitMaskV);
9081 IntBitIsSetV = DAG.getSetCC(DL, VT: ResultVT, LHS: IntBitV, RHS: ZeroV, Cond: ISD::SETNE);
9082 }
9083 return IntBitIsSetV;
9084 };
9085
9086 // Split the value into sign bit and absolute value.
9087 SDValue AbsV = DAG.getNode(Opcode: ISD::AND, DL, VT: IntVT, N1: OpAsInt, N2: ValueMaskV);
9088 SDValue SignV = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt,
9089 RHS: DAG.getConstant(Val: 0, DL, VT: IntVT), Cond: ISD::SETLT);
9090
9091 // Tests that involve more than one class should be processed first.
9092 SDValue PartialRes;
9093
9094 if (IsF80)
9095 ; // Detect finite numbers of f80 by checking individual classes because
9096 // they have different settings of the explicit integer bit.
9097 else if ((Test & fcFinite) == fcFinite) {
9098 // finite(V) ==> abs(V) < exp_mask
9099 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: ExpMaskV, Cond: ISD::SETLT);
9100 Test &= ~fcFinite;
9101 } else if ((Test & fcFinite) == fcPosFinite) {
9102 // finite(V) && V > 0 ==> V < exp_mask
9103 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt, RHS: ExpMaskV, Cond: ISD::SETULT);
9104 Test &= ~fcPosFinite;
9105 } else if ((Test & fcFinite) == fcNegFinite) {
9106 // finite(V) && V < 0 ==> abs(V) < exp_mask && signbit == 1
9107 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: ExpMaskV, Cond: ISD::SETLT);
9108 PartialRes = DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: PartialRes, N2: SignV);
9109 Test &= ~fcNegFinite;
9110 }
9111 appendResult(PartialRes);
9112
9113 if (FPClassTest PartialCheck = Test & (fcZero | fcSubnormal)) {
9114 // fcZero | fcSubnormal => test all exponent bits are 0
9115 // TODO: Handle sign bit specific cases
9116 if (PartialCheck == (fcZero | fcSubnormal)) {
9117 SDValue ExpBits = DAG.getNode(Opcode: ISD::AND, DL, VT: IntVT, N1: OpAsInt, N2: ExpMaskV);
9118 SDValue ExpIsZero =
9119 DAG.getSetCC(DL, VT: ResultVT, LHS: ExpBits, RHS: ZeroV, Cond: ISD::SETEQ);
9120 appendResult(ExpIsZero);
9121 Test &= ~PartialCheck & fcAllFlags;
9122 }
9123 }
9124
9125 // Check for individual classes.
9126
9127 if (unsigned PartialCheck = Test & fcZero) {
9128 if (PartialCheck == fcPosZero)
9129 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt, RHS: ZeroV, Cond: ISD::SETEQ);
9130 else if (PartialCheck == fcZero)
9131 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: ZeroV, Cond: ISD::SETEQ);
9132 else // ISD::fcNegZero
9133 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt, RHS: SignBitV, Cond: ISD::SETEQ);
9134 appendResult(PartialRes);
9135 }
9136
9137 if (unsigned PartialCheck = Test & fcSubnormal) {
9138 // issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set)
9139 // issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set)
9140 SDValue V = (PartialCheck == fcPosSubnormal) ? OpAsInt : AbsV;
9141 SDValue MantissaV = DAG.getConstant(Val: AllOneMantissa, DL, VT: IntVT);
9142 SDValue VMinusOneV =
9143 DAG.getNode(Opcode: ISD::SUB, DL, VT: IntVT, N1: V, N2: DAG.getConstant(Val: 1, DL, VT: IntVT));
9144 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: VMinusOneV, RHS: MantissaV, Cond: ISD::SETULT);
9145 if (PartialCheck == fcNegSubnormal)
9146 PartialRes = DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: PartialRes, N2: SignV);
9147 appendResult(PartialRes);
9148 }
9149
9150 if (unsigned PartialCheck = Test & fcInf) {
9151 if (PartialCheck == fcPosInf)
9152 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt, RHS: InfV, Cond: ISD::SETEQ);
9153 else if (PartialCheck == fcInf)
9154 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: InfV, Cond: ISD::SETEQ);
9155 else { // ISD::fcNegInf
9156 APInt NegInf = APFloat::getInf(Sem: Semantics, Negative: true).bitcastToAPInt();
9157 SDValue NegInfV = DAG.getConstant(Val: NegInf, DL, VT: IntVT);
9158 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt, RHS: NegInfV, Cond: ISD::SETEQ);
9159 }
9160 appendResult(PartialRes);
9161 }
9162
9163 if (unsigned PartialCheck = Test & fcNan) {
9164 APInt InfWithQnanBit = Inf | QNaNBitMask;
9165 SDValue InfWithQnanBitV = DAG.getConstant(Val: InfWithQnanBit, DL, VT: IntVT);
9166 if (PartialCheck == fcNan) {
9167 // isnan(V) ==> abs(V) > int(inf)
9168 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: InfV, Cond: ISD::SETGT);
9169 if (IsF80) {
9170 // Recognize unsupported values as NaNs for compatibility with glibc.
9171 // In them (exp(V)==0) == int_bit.
9172 SDValue ExpBits = DAG.getNode(Opcode: ISD::AND, DL, VT: IntVT, N1: AbsV, N2: ExpMaskV);
9173 SDValue ExpIsZero =
9174 DAG.getSetCC(DL, VT: ResultVT, LHS: ExpBits, RHS: ZeroV, Cond: ISD::SETEQ);
9175 SDValue IsPseudo =
9176 DAG.getSetCC(DL, VT: ResultVT, LHS: getIntBitIsSet(), RHS: ExpIsZero, Cond: ISD::SETEQ);
9177 PartialRes = DAG.getNode(Opcode: ISD::OR, DL, VT: ResultVT, N1: PartialRes, N2: IsPseudo);
9178 }
9179 } else if (PartialCheck == fcQNan) {
9180 // isquiet(V) ==> abs(V) >= (unsigned(Inf) | quiet_bit)
9181 PartialRes =
9182 DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: InfWithQnanBitV, Cond: ISD::SETGE);
9183 } else { // ISD::fcSNan
9184 // issignaling(V) ==> abs(V) > unsigned(Inf) &&
9185 // abs(V) < (unsigned(Inf) | quiet_bit)
9186 SDValue IsNan = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: InfV, Cond: ISD::SETGT);
9187 SDValue IsNotQnan =
9188 DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: InfWithQnanBitV, Cond: ISD::SETLT);
9189 PartialRes = DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: IsNan, N2: IsNotQnan);
9190 }
9191 appendResult(PartialRes);
9192 }
9193
9194 if (unsigned PartialCheck = Test & fcNormal) {
9195 // isnormal(V) ==> (0 < exp < max_exp) ==> (unsigned(exp-1) < (max_exp-1))
9196 APInt ExpLSB = ExpMask & ~(ExpMask.shl(shiftAmt: 1));
9197 SDValue ExpLSBV = DAG.getConstant(Val: ExpLSB, DL, VT: IntVT);
9198 SDValue ExpMinus1 = DAG.getNode(Opcode: ISD::SUB, DL, VT: IntVT, N1: AbsV, N2: ExpLSBV);
9199 APInt ExpLimit = ExpMask - ExpLSB;
9200 SDValue ExpLimitV = DAG.getConstant(Val: ExpLimit, DL, VT: IntVT);
9201 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: ExpMinus1, RHS: ExpLimitV, Cond: ISD::SETULT);
9202 if (PartialCheck == fcNegNormal)
9203 PartialRes = DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: PartialRes, N2: SignV);
9204 else if (PartialCheck == fcPosNormal) {
9205 SDValue PosSignV =
9206 DAG.getNode(Opcode: ISD::XOR, DL, VT: ResultVT, N1: SignV, N2: ResultInvertionMask);
9207 PartialRes = DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: PartialRes, N2: PosSignV);
9208 }
9209 if (IsF80)
9210 PartialRes =
9211 DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: PartialRes, N2: getIntBitIsSet());
9212 appendResult(PartialRes);
9213 }
9214
9215 if (!Res)
9216 return DAG.getConstant(Val: IsInverted, DL, VT: ResultVT);
9217 if (IsInverted)
9218 Res = DAG.getNode(Opcode: ISD::XOR, DL, VT: ResultVT, N1: Res, N2: ResultInvertionMask);
9219 return Res;
9220}
9221
9222// Only expand vector types if we have the appropriate vector bit operations.
9223static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) {
9224 assert(VT.isVector() && "Expected vector type");
9225 unsigned Len = VT.getScalarSizeInBits();
9226 return TLI.isOperationLegalOrCustom(Op: ISD::ADD, VT) &&
9227 TLI.isOperationLegalOrCustom(Op: ISD::SUB, VT) &&
9228 TLI.isOperationLegalOrCustom(Op: ISD::SRL, VT) &&
9229 (Len == 8 || TLI.isOperationLegalOrCustom(Op: ISD::MUL, VT)) &&
9230 TLI.isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT);
9231}
9232
9233SDValue TargetLowering::expandCTPOP(SDNode *Node, SelectionDAG &DAG) const {
9234 SDLoc dl(Node);
9235 EVT VT = Node->getValueType(ResNo: 0);
9236 EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
9237 SDValue Op = Node->getOperand(Num: 0);
9238 unsigned Len = VT.getScalarSizeInBits();
9239 assert(VT.isInteger() && "CTPOP not implemented for this type.");
9240
9241 // TODO: Add support for irregular type lengths.
9242 if (!(Len <= 128 && Len % 8 == 0))
9243 return SDValue();
9244
9245 // Only expand vector types if we have the appropriate vector bit operations.
9246 if (VT.isVector() && !canExpandVectorCTPOP(TLI: *this, VT))
9247 return SDValue();
9248
9249 // This is the "best" algorithm from
9250 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
9251 SDValue Mask55 =
9252 DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt(8, 0x55)), DL: dl, VT);
9253 SDValue Mask33 =
9254 DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt(8, 0x33)), DL: dl, VT);
9255 SDValue Mask0F =
9256 DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt(8, 0x0F)), DL: dl, VT);
9257
9258 // v = v - ((v >> 1) & 0x55555555...)
9259 Op = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Op,
9260 N2: DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
9261 N1: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op,
9262 N2: DAG.getConstant(Val: 1, DL: dl, VT: ShVT)),
9263 N2: Mask55));
9264 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
9265 Op = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op, N2: Mask33),
9266 N2: DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
9267 N1: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op,
9268 N2: DAG.getConstant(Val: 2, DL: dl, VT: ShVT)),
9269 N2: Mask33));
9270 // v = (v + (v >> 4)) & 0x0F0F0F0F...
9271 Op = DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
9272 N1: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Op,
9273 N2: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op,
9274 N2: DAG.getConstant(Val: 4, DL: dl, VT: ShVT))),
9275 N2: Mask0F);
9276
9277 if (Len <= 8)
9278 return Op;
9279
9280 // Avoid the multiply if we only have 2 bytes to add.
9281 // TODO: Only doing this for scalars because vectors weren't as obviously
9282 // improved.
9283 if (Len == 16 && !VT.isVector()) {
9284 // v = (v + (v >> 8)) & 0x00FF;
9285 return DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
9286 N1: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Op,
9287 N2: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op,
9288 N2: DAG.getConstant(Val: 8, DL: dl, VT: ShVT))),
9289 N2: DAG.getConstant(Val: 0xFF, DL: dl, VT));
9290 }
9291
9292 // v = (v * 0x01010101...) >> (Len - 8)
9293 SDValue V;
9294 if (isOperationLegalOrCustomOrPromote(
9295 Op: ISD::MUL, VT: getTypeToTransformTo(Context&: *DAG.getContext(), VT))) {
9296 SDValue Mask01 =
9297 DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt(8, 0x01)), DL: dl, VT);
9298 V = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: Op, N2: Mask01);
9299 } else {
9300 V = Op;
9301 for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
9302 SDValue ShiftC = DAG.getShiftAmountConstant(Val: Shift, VT, DL: dl);
9303 V = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: V,
9304 N2: DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: V, N2: ShiftC));
9305 }
9306 }
9307 return DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: V, N2: DAG.getConstant(Val: Len - 8, DL: dl, VT: ShVT));
9308}
9309
9310SDValue TargetLowering::expandVPCTPOP(SDNode *Node, SelectionDAG &DAG) const {
9311 SDLoc dl(Node);
9312 EVT VT = Node->getValueType(ResNo: 0);
9313 EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
9314 SDValue Op = Node->getOperand(Num: 0);
9315 SDValue Mask = Node->getOperand(Num: 1);
9316 SDValue VL = Node->getOperand(Num: 2);
9317 unsigned Len = VT.getScalarSizeInBits();
9318 assert(VT.isInteger() && "VP_CTPOP not implemented for this type.");
9319
9320 // TODO: Add support for irregular type lengths.
9321 if (!(Len <= 128 && Len % 8 == 0))
9322 return SDValue();
9323
9324 // This is same algorithm of expandCTPOP from
9325 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
9326 SDValue Mask55 =
9327 DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt(8, 0x55)), DL: dl, VT);
9328 SDValue Mask33 =
9329 DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt(8, 0x33)), DL: dl, VT);
9330 SDValue Mask0F =
9331 DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt(8, 0x0F)), DL: dl, VT);
9332
9333 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5;
9334
9335 // v = v - ((v >> 1) & 0x55555555...)
9336 Tmp1 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT,
9337 N1: DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op,
9338 N2: DAG.getConstant(Val: 1, DL: dl, VT: ShVT), N3: Mask, N4: VL),
9339 N2: Mask55, N3: Mask, N4: VL);
9340 Op = DAG.getNode(Opcode: ISD::VP_SUB, DL: dl, VT, N1: Op, N2: Tmp1, N3: Mask, N4: VL);
9341
9342 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
9343 Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Op, N2: Mask33, N3: Mask, N4: VL);
9344 Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT,
9345 N1: DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op,
9346 N2: DAG.getConstant(Val: 2, DL: dl, VT: ShVT), N3: Mask, N4: VL),
9347 N2: Mask33, N3: Mask, N4: VL);
9348 Op = DAG.getNode(Opcode: ISD::VP_ADD, DL: dl, VT, N1: Tmp2, N2: Tmp3, N3: Mask, N4: VL);
9349
9350 // v = (v + (v >> 4)) & 0x0F0F0F0F...
9351 Tmp4 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 4, DL: dl, VT: ShVT),
9352 N3: Mask, N4: VL),
9353 Tmp5 = DAG.getNode(Opcode: ISD::VP_ADD, DL: dl, VT, N1: Op, N2: Tmp4, N3: Mask, N4: VL);
9354 Op = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp5, N2: Mask0F, N3: Mask, N4: VL);
9355
9356 if (Len <= 8)
9357 return Op;
9358
9359 // v = (v * 0x01010101...) >> (Len - 8)
9360 SDValue V;
9361 if (isOperationLegalOrCustomOrPromote(
9362 Op: ISD::VP_MUL, VT: getTypeToTransformTo(Context&: *DAG.getContext(), VT))) {
9363 SDValue Mask01 =
9364 DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt(8, 0x01)), DL: dl, VT);
9365 V = DAG.getNode(Opcode: ISD::VP_MUL, DL: dl, VT, N1: Op, N2: Mask01, N3: Mask, N4: VL);
9366 } else {
9367 V = Op;
9368 for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
9369 SDValue ShiftC = DAG.getShiftAmountConstant(Val: Shift, VT, DL: dl);
9370 V = DAG.getNode(Opcode: ISD::VP_ADD, DL: dl, VT, N1: V,
9371 N2: DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: V, N2: ShiftC, N3: Mask, N4: VL),
9372 N3: Mask, N4: VL);
9373 }
9374 }
9375 return DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: V, N2: DAG.getConstant(Val: Len - 8, DL: dl, VT: ShVT),
9376 N3: Mask, N4: VL);
9377}
9378
9379SDValue TargetLowering::expandCTLZ(SDNode *Node, SelectionDAG &DAG) const {
9380 SDLoc dl(Node);
9381 EVT VT = Node->getValueType(ResNo: 0);
9382 EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
9383 SDValue Op = Node->getOperand(Num: 0);
9384 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9385
9386 // If the non-ZERO_UNDEF version is supported we can use that instead.
9387 if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
9388 isOperationLegalOrCustom(Op: ISD::CTLZ, VT))
9389 return DAG.getNode(Opcode: ISD::CTLZ, DL: dl, VT, Operand: Op);
9390
9391 // If the ZERO_UNDEF version is supported use that and handle the zero case.
9392 if (isOperationLegalOrCustom(Op: ISD::CTLZ_ZERO_UNDEF, VT)) {
9393 EVT SetCCVT =
9394 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
9395 SDValue CTLZ = DAG.getNode(Opcode: ISD::CTLZ_ZERO_UNDEF, DL: dl, VT, Operand: Op);
9396 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT);
9397 SDValue SrcIsZero = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Op, RHS: Zero, Cond: ISD::SETEQ);
9398 return DAG.getSelect(DL: dl, VT, Cond: SrcIsZero,
9399 LHS: DAG.getConstant(Val: NumBitsPerElt, DL: dl, VT), RHS: CTLZ);
9400 }
9401
9402 // Only expand vector types if we have the appropriate vector bit operations.
9403 // This includes the operations needed to expand CTPOP if it isn't supported.
9404 if (VT.isVector() && (!isPowerOf2_32(Value: NumBitsPerElt) ||
9405 (!isOperationLegalOrCustom(Op: ISD::CTPOP, VT) &&
9406 !canExpandVectorCTPOP(TLI: *this, VT)) ||
9407 !isOperationLegalOrCustom(Op: ISD::SRL, VT) ||
9408 !isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT)))
9409 return SDValue();
9410
9411 // for now, we do this:
9412 // x = x | (x >> 1);
9413 // x = x | (x >> 2);
9414 // ...
9415 // x = x | (x >>16);
9416 // x = x | (x >>32); // for 64-bit input
9417 // return popcount(~x);
9418 //
9419 // Ref: "Hacker's Delight" by Henry Warren
9420 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
9421 SDValue Tmp = DAG.getConstant(Val: 1ULL << i, DL: dl, VT: ShVT);
9422 Op = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Op,
9423 N2: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: Tmp));
9424 }
9425 Op = DAG.getNOT(DL: dl, Val: Op, VT);
9426 return DAG.getNode(Opcode: ISD::CTPOP, DL: dl, VT, Operand: Op);
9427}
9428
9429SDValue TargetLowering::expandVPCTLZ(SDNode *Node, SelectionDAG &DAG) const {
9430 SDLoc dl(Node);
9431 EVT VT = Node->getValueType(ResNo: 0);
9432 EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
9433 SDValue Op = Node->getOperand(Num: 0);
9434 SDValue Mask = Node->getOperand(Num: 1);
9435 SDValue VL = Node->getOperand(Num: 2);
9436 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9437
9438 // do this:
9439 // x = x | (x >> 1);
9440 // x = x | (x >> 2);
9441 // ...
9442 // x = x | (x >>16);
9443 // x = x | (x >>32); // for 64-bit input
9444 // return popcount(~x);
9445 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
9446 SDValue Tmp = DAG.getConstant(Val: 1ULL << i, DL: dl, VT: ShVT);
9447 Op = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Op,
9448 N2: DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: Tmp, N3: Mask, N4: VL), N3: Mask,
9449 N4: VL);
9450 }
9451 Op = DAG.getNode(Opcode: ISD::VP_XOR, DL: dl, VT, N1: Op, N2: DAG.getAllOnesConstant(DL: dl, VT),
9452 N3: Mask, N4: VL);
9453 return DAG.getNode(Opcode: ISD::VP_CTPOP, DL: dl, VT, N1: Op, N2: Mask, N3: VL);
9454}
9455
9456SDValue TargetLowering::CTTZTableLookup(SDNode *Node, SelectionDAG &DAG,
9457 const SDLoc &DL, EVT VT, SDValue Op,
9458 unsigned BitWidth) const {
9459 if (BitWidth != 32 && BitWidth != 64)
9460 return SDValue();
9461 APInt DeBruijn = BitWidth == 32 ? APInt(32, 0x077CB531U)
9462 : APInt(64, 0x0218A392CD3D5DBFULL);
9463 const DataLayout &TD = DAG.getDataLayout();
9464 MachinePointerInfo PtrInfo =
9465 MachinePointerInfo::getConstantPool(MF&: DAG.getMachineFunction());
9466 unsigned ShiftAmt = BitWidth - Log2_32(Value: BitWidth);
9467 SDValue Neg = DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: DAG.getConstant(Val: 0, DL, VT), N2: Op);
9468 SDValue Lookup = DAG.getNode(
9469 Opcode: ISD::SRL, DL, VT,
9470 N1: DAG.getNode(Opcode: ISD::MUL, DL, VT, N1: DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Op, N2: Neg),
9471 N2: DAG.getConstant(Val: DeBruijn, DL, VT)),
9472 N2: DAG.getConstant(Val: ShiftAmt, DL, VT));
9473 Lookup = DAG.getSExtOrTrunc(Op: Lookup, DL, VT: getPointerTy(DL: TD));
9474
9475 SmallVector<uint8_t> Table(BitWidth, 0);
9476 for (unsigned i = 0; i < BitWidth; i++) {
9477 APInt Shl = DeBruijn.shl(shiftAmt: i);
9478 APInt Lshr = Shl.lshr(shiftAmt: ShiftAmt);
9479 Table[Lshr.getZExtValue()] = i;
9480 }
9481
9482 // Create a ConstantArray in Constant Pool
9483 auto *CA = ConstantDataArray::get(Context&: *DAG.getContext(), Elts&: Table);
9484 SDValue CPIdx = DAG.getConstantPool(C: CA, VT: getPointerTy(DL: TD),
9485 Align: TD.getPrefTypeAlign(Ty: CA->getType()));
9486 SDValue ExtLoad = DAG.getExtLoad(ExtType: ISD::ZEXTLOAD, dl: DL, VT, Chain: DAG.getEntryNode(),
9487 Ptr: DAG.getMemBasePlusOffset(Base: CPIdx, Offset: Lookup, DL),
9488 PtrInfo, MemVT: MVT::i8);
9489 if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF)
9490 return ExtLoad;
9491
9492 EVT SetCCVT =
9493 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
9494 SDValue Zero = DAG.getConstant(Val: 0, DL, VT);
9495 SDValue SrcIsZero = DAG.getSetCC(DL, VT: SetCCVT, LHS: Op, RHS: Zero, Cond: ISD::SETEQ);
9496 return DAG.getSelect(DL, VT, Cond: SrcIsZero,
9497 LHS: DAG.getConstant(Val: BitWidth, DL, VT), RHS: ExtLoad);
9498}
9499
9500SDValue TargetLowering::expandCTTZ(SDNode *Node, SelectionDAG &DAG) const {
9501 SDLoc dl(Node);
9502 EVT VT = Node->getValueType(ResNo: 0);
9503 SDValue Op = Node->getOperand(Num: 0);
9504 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9505
9506 // If the non-ZERO_UNDEF version is supported we can use that instead.
9507 if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
9508 isOperationLegalOrCustom(Op: ISD::CTTZ, VT))
9509 return DAG.getNode(Opcode: ISD::CTTZ, DL: dl, VT, Operand: Op);
9510
9511 // If the ZERO_UNDEF version is supported use that and handle the zero case.
9512 if (isOperationLegalOrCustom(Op: ISD::CTTZ_ZERO_UNDEF, VT)) {
9513 EVT SetCCVT =
9514 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
9515 SDValue CTTZ = DAG.getNode(Opcode: ISD::CTTZ_ZERO_UNDEF, DL: dl, VT, Operand: Op);
9516 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT);
9517 SDValue SrcIsZero = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Op, RHS: Zero, Cond: ISD::SETEQ);
9518 return DAG.getSelect(DL: dl, VT, Cond: SrcIsZero,
9519 LHS: DAG.getConstant(Val: NumBitsPerElt, DL: dl, VT), RHS: CTTZ);
9520 }
9521
9522 // Only expand vector types if we have the appropriate vector bit operations.
9523 // This includes the operations needed to expand CTPOP if it isn't supported.
9524 if (VT.isVector() && (!isPowerOf2_32(Value: NumBitsPerElt) ||
9525 (!isOperationLegalOrCustom(Op: ISD::CTPOP, VT) &&
9526 !isOperationLegalOrCustom(Op: ISD::CTLZ, VT) &&
9527 !canExpandVectorCTPOP(TLI: *this, VT)) ||
9528 !isOperationLegalOrCustom(Op: ISD::SUB, VT) ||
9529 !isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT) ||
9530 !isOperationLegalOrCustomOrPromote(Op: ISD::XOR, VT)))
9531 return SDValue();
9532
9533 // Emit Table Lookup if ISD::CTPOP used in the fallback path below is going
9534 // to be expanded or converted to a libcall.
9535 if (!VT.isVector() && !isOperationLegalOrCustomOrPromote(Op: ISD::CTPOP, VT) &&
9536 !isOperationLegal(Op: ISD::CTLZ, VT))
9537 if (SDValue V = CTTZTableLookup(Node, DAG, DL: dl, VT, Op, BitWidth: NumBitsPerElt))
9538 return V;
9539
9540 // for now, we use: { return popcount(~x & (x - 1)); }
9541 // unless the target has ctlz but not ctpop, in which case we use:
9542 // { return 32 - nlz(~x & (x-1)); }
9543 // Ref: "Hacker's Delight" by Henry Warren
9544 SDValue Tmp = DAG.getNode(
9545 Opcode: ISD::AND, DL: dl, VT, N1: DAG.getNOT(DL: dl, Val: Op, VT),
9546 N2: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 1, DL: dl, VT)));
9547
9548 // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
9549 if (isOperationLegal(Op: ISD::CTLZ, VT) && !isOperationLegal(Op: ISD::CTPOP, VT)) {
9550 return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: DAG.getConstant(Val: NumBitsPerElt, DL: dl, VT),
9551 N2: DAG.getNode(Opcode: ISD::CTLZ, DL: dl, VT, Operand: Tmp));
9552 }
9553
9554 return DAG.getNode(Opcode: ISD::CTPOP, DL: dl, VT, Operand: Tmp);
9555}
9556
9557SDValue TargetLowering::expandVPCTTZ(SDNode *Node, SelectionDAG &DAG) const {
9558 SDValue Op = Node->getOperand(Num: 0);
9559 SDValue Mask = Node->getOperand(Num: 1);
9560 SDValue VL = Node->getOperand(Num: 2);
9561 SDLoc dl(Node);
9562 EVT VT = Node->getValueType(ResNo: 0);
9563
9564 // Same as the vector part of expandCTTZ, use: popcount(~x & (x - 1))
9565 SDValue Not = DAG.getNode(Opcode: ISD::VP_XOR, DL: dl, VT, N1: Op,
9566 N2: DAG.getAllOnesConstant(DL: dl, VT), N3: Mask, N4: VL);
9567 SDValue MinusOne = DAG.getNode(Opcode: ISD::VP_SUB, DL: dl, VT, N1: Op,
9568 N2: DAG.getConstant(Val: 1, DL: dl, VT), N3: Mask, N4: VL);
9569 SDValue Tmp = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Not, N2: MinusOne, N3: Mask, N4: VL);
9570 return DAG.getNode(Opcode: ISD::VP_CTPOP, DL: dl, VT, N1: Tmp, N2: Mask, N3: VL);
9571}
9572
9573SDValue TargetLowering::expandVPCTTZElements(SDNode *N,
9574 SelectionDAG &DAG) const {
9575 // %cond = to_bool_vec %source
9576 // %splat = splat /*val=*/VL
9577 // %tz = step_vector
9578 // %v = vp.select %cond, /*true=*/tz, /*false=*/%splat
9579 // %r = vp.reduce.umin %v
9580 SDLoc DL(N);
9581 SDValue Source = N->getOperand(Num: 0);
9582 SDValue Mask = N->getOperand(Num: 1);
9583 SDValue EVL = N->getOperand(Num: 2);
9584 EVT SrcVT = Source.getValueType();
9585 EVT ResVT = N->getValueType(ResNo: 0);
9586 EVT ResVecVT =
9587 EVT::getVectorVT(Context&: *DAG.getContext(), VT: ResVT, EC: SrcVT.getVectorElementCount());
9588
9589 // Convert to boolean vector.
9590 if (SrcVT.getScalarType() != MVT::i1) {
9591 SDValue AllZero = DAG.getConstant(Val: 0, DL, VT: SrcVT);
9592 SrcVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: MVT::i1,
9593 EC: SrcVT.getVectorElementCount());
9594 Source = DAG.getNode(Opcode: ISD::VP_SETCC, DL, VT: SrcVT, N1: Source, N2: AllZero,
9595 N3: DAG.getCondCode(Cond: ISD::SETNE), N4: Mask, N5: EVL);
9596 }
9597
9598 SDValue ExtEVL = DAG.getZExtOrTrunc(Op: EVL, DL, VT: ResVT);
9599 SDValue Splat = DAG.getSplat(VT: ResVecVT, DL, Op: ExtEVL);
9600 SDValue StepVec = DAG.getStepVector(DL, ResVT: ResVecVT);
9601 SDValue Select =
9602 DAG.getNode(Opcode: ISD::VP_SELECT, DL, VT: ResVecVT, N1: Source, N2: StepVec, N3: Splat, N4: EVL);
9603 return DAG.getNode(Opcode: ISD::VP_REDUCE_UMIN, DL, VT: ResVT, N1: ExtEVL, N2: Select, N3: Mask, N4: EVL);
9604}
9605
9606SDValue TargetLowering::expandVectorFindLastActive(SDNode *N,
9607 SelectionDAG &DAG) const {
9608 SDLoc DL(N);
9609 SDValue Mask = N->getOperand(Num: 0);
9610 EVT MaskVT = Mask.getValueType();
9611 EVT BoolVT = MaskVT.getScalarType();
9612
9613 // Find a suitable type for a stepvector.
9614 ConstantRange VScaleRange(1, /*isFullSet=*/true); // Fixed length default.
9615 if (MaskVT.isScalableVector())
9616 VScaleRange = getVScaleRange(F: &DAG.getMachineFunction().getFunction(), BitWidth: 64);
9617 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9618 unsigned EltWidth = TLI.getBitWidthForCttzElements(
9619 RetTy: BoolVT.getTypeForEVT(Context&: *DAG.getContext()), EC: MaskVT.getVectorElementCount(),
9620 /*ZeroIsPoison=*/true, VScaleRange: &VScaleRange);
9621 EVT StepVT = MVT::getIntegerVT(BitWidth: EltWidth);
9622 EVT StepVecVT = MaskVT.changeVectorElementType(EltVT: StepVT);
9623
9624 // If promotion is required to make the type legal, do it here; promotion
9625 // of integers within LegalizeVectorOps is looking for types of the same
9626 // size but with a smaller number of larger elements, not the usual larger
9627 // size with the same number of larger elements.
9628 if (TLI.getTypeAction(VT: StepVecVT.getSimpleVT()) ==
9629 TargetLowering::TypePromoteInteger) {
9630 StepVecVT = TLI.getTypeToTransformTo(Context&: *DAG.getContext(), VT: StepVecVT);
9631 StepVT = StepVecVT.getVectorElementType();
9632 }
9633
9634 // Zero out lanes with inactive elements, then find the highest remaining
9635 // value from the stepvector.
9636 SDValue Zeroes = DAG.getConstant(Val: 0, DL, VT: StepVecVT);
9637 SDValue StepVec = DAG.getStepVector(DL, ResVT: StepVecVT);
9638 SDValue ActiveElts = DAG.getSelect(DL, VT: StepVecVT, Cond: Mask, LHS: StepVec, RHS: Zeroes);
9639 SDValue HighestIdx = DAG.getNode(Opcode: ISD::VECREDUCE_UMAX, DL, VT: StepVT, Operand: ActiveElts);
9640 return DAG.getZExtOrTrunc(Op: HighestIdx, DL, VT: N->getValueType(ResNo: 0));
9641}
9642
9643SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG,
9644 bool IsNegative) const {
9645 SDLoc dl(N);
9646 EVT VT = N->getValueType(ResNo: 0);
9647 SDValue Op = N->getOperand(Num: 0);
9648
9649 // abs(x) -> smax(x,sub(0,x))
9650 if (!IsNegative && isOperationLegal(Op: ISD::SUB, VT) &&
9651 isOperationLegal(Op: ISD::SMAX, VT)) {
9652 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT);
9653 Op = DAG.getFreeze(V: Op);
9654 return DAG.getNode(Opcode: ISD::SMAX, DL: dl, VT, N1: Op,
9655 N2: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Zero, N2: Op));
9656 }
9657
9658 // abs(x) -> umin(x,sub(0,x))
9659 if (!IsNegative && isOperationLegal(Op: ISD::SUB, VT) &&
9660 isOperationLegal(Op: ISD::UMIN, VT)) {
9661 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT);
9662 Op = DAG.getFreeze(V: Op);
9663 return DAG.getNode(Opcode: ISD::UMIN, DL: dl, VT, N1: Op,
9664 N2: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Zero, N2: Op));
9665 }
9666
9667 // 0 - abs(x) -> smin(x, sub(0,x))
9668 if (IsNegative && isOperationLegal(Op: ISD::SUB, VT) &&
9669 isOperationLegal(Op: ISD::SMIN, VT)) {
9670 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT);
9671 Op = DAG.getFreeze(V: Op);
9672 return DAG.getNode(Opcode: ISD::SMIN, DL: dl, VT, N1: Op,
9673 N2: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Zero, N2: Op));
9674 }
9675
9676 // Only expand vector types if we have the appropriate vector operations.
9677 if (VT.isVector() &&
9678 (!isOperationLegalOrCustom(Op: ISD::SRA, VT) ||
9679 (!IsNegative && !isOperationLegalOrCustom(Op: ISD::ADD, VT)) ||
9680 (IsNegative && !isOperationLegalOrCustom(Op: ISD::SUB, VT)) ||
9681 !isOperationLegalOrCustomOrPromote(Op: ISD::XOR, VT)))
9682 return SDValue();
9683
9684 Op = DAG.getFreeze(V: Op);
9685 SDValue Shift = DAG.getNode(
9686 Opcode: ISD::SRA, DL: dl, VT, N1: Op,
9687 N2: DAG.getShiftAmountConstant(Val: VT.getScalarSizeInBits() - 1, VT, DL: dl));
9688 SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: Op, N2: Shift);
9689
9690 // abs(x) -> Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
9691 if (!IsNegative)
9692 return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Xor, N2: Shift);
9693
9694 // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
9695 return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Shift, N2: Xor);
9696}
9697
9698SDValue TargetLowering::expandABD(SDNode *N, SelectionDAG &DAG) const {
9699 SDLoc dl(N);
9700 EVT VT = N->getValueType(ResNo: 0);
9701 SDValue LHS = DAG.getFreeze(V: N->getOperand(Num: 0));
9702 SDValue RHS = DAG.getFreeze(V: N->getOperand(Num: 1));
9703 bool IsSigned = N->getOpcode() == ISD::ABDS;
9704
9705 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
9706 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
9707 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
9708 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
9709 if (isOperationLegal(Op: MaxOpc, VT) && isOperationLegal(Op: MinOpc, VT)) {
9710 SDValue Max = DAG.getNode(Opcode: MaxOpc, DL: dl, VT, N1: LHS, N2: RHS);
9711 SDValue Min = DAG.getNode(Opcode: MinOpc, DL: dl, VT, N1: LHS, N2: RHS);
9712 return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Max, N2: Min);
9713 }
9714
9715 // abdu(lhs, rhs) -> or(usubsat(lhs,rhs), usubsat(rhs,lhs))
9716 if (!IsSigned && isOperationLegal(Op: ISD::USUBSAT, VT))
9717 return DAG.getNode(Opcode: ISD::OR, DL: dl, VT,
9718 N1: DAG.getNode(Opcode: ISD::USUBSAT, DL: dl, VT, N1: LHS, N2: RHS),
9719 N2: DAG.getNode(Opcode: ISD::USUBSAT, DL: dl, VT, N1: RHS, N2: LHS));
9720
9721 // If the subtract doesn't overflow then just use abs(sub())
9722 // NOTE: don't use frozen operands for value tracking.
9723 bool IsNonNegative = DAG.SignBitIsZero(Op: N->getOperand(Num: 1)) &&
9724 DAG.SignBitIsZero(Op: N->getOperand(Num: 0));
9725
9726 if (DAG.willNotOverflowSub(IsSigned: IsSigned || IsNonNegative, N0: N->getOperand(Num: 0),
9727 N1: N->getOperand(Num: 1)))
9728 return DAG.getNode(Opcode: ISD::ABS, DL: dl, VT,
9729 Operand: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: LHS, N2: RHS));
9730
9731 if (DAG.willNotOverflowSub(IsSigned: IsSigned || IsNonNegative, N0: N->getOperand(Num: 1),
9732 N1: N->getOperand(Num: 0)))
9733 return DAG.getNode(Opcode: ISD::ABS, DL: dl, VT,
9734 Operand: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: RHS, N2: LHS));
9735
9736 EVT CCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
9737 ISD::CondCode CC = IsSigned ? ISD::CondCode::SETGT : ISD::CondCode::SETUGT;
9738 SDValue Cmp = DAG.getSetCC(DL: dl, VT: CCVT, LHS, RHS, Cond: CC);
9739
9740 // Branchless expansion iff cmp result is allbits:
9741 // abds(lhs, rhs) -> sub(sgt(lhs, rhs), xor(sgt(lhs, rhs), sub(lhs, rhs)))
9742 // abdu(lhs, rhs) -> sub(ugt(lhs, rhs), xor(ugt(lhs, rhs), sub(lhs, rhs)))
9743 if (CCVT == VT && getBooleanContents(Type: VT) == ZeroOrNegativeOneBooleanContent) {
9744 SDValue Diff = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: LHS, N2: RHS);
9745 SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: Diff, N2: Cmp);
9746 return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Cmp, N2: Xor);
9747 }
9748
9749 // Similar to the branchless expansion, use the (sign-extended) usubo overflow
9750 // flag if the (scalar) type is illegal as this is more likely to legalize
9751 // cleanly:
9752 // abdu(lhs, rhs) -> sub(xor(sub(lhs, rhs), uof(lhs, rhs)), uof(lhs, rhs))
9753 if (!IsSigned && VT.isScalarInteger() && !isTypeLegal(VT)) {
9754 SDValue USubO =
9755 DAG.getNode(Opcode: ISD::USUBO, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: MVT::i1), Ops: {LHS, RHS});
9756 SDValue Cmp = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT, Operand: USubO.getValue(R: 1));
9757 SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: USubO.getValue(R: 0), N2: Cmp);
9758 return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Xor, N2: Cmp);
9759 }
9760
9761 // FIXME: Should really try to split the vector in case it's legal on a
9762 // subvector.
9763 if (VT.isVector() && !isOperationLegalOrCustom(Op: ISD::VSELECT, VT))
9764 return DAG.UnrollVectorOp(N);
9765
9766 // abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9767 // abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9768 return DAG.getSelect(DL: dl, VT, Cond: Cmp, LHS: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: LHS, N2: RHS),
9769 RHS: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: RHS, N2: LHS));
9770}
9771
9772SDValue TargetLowering::expandAVG(SDNode *N, SelectionDAG &DAG) const {
9773 SDLoc dl(N);
9774 EVT VT = N->getValueType(ResNo: 0);
9775 SDValue LHS = N->getOperand(Num: 0);
9776 SDValue RHS = N->getOperand(Num: 1);
9777
9778 unsigned Opc = N->getOpcode();
9779 bool IsFloor = Opc == ISD::AVGFLOORS || Opc == ISD::AVGFLOORU;
9780 bool IsSigned = Opc == ISD::AVGCEILS || Opc == ISD::AVGFLOORS;
9781 unsigned SumOpc = IsFloor ? ISD::ADD : ISD::SUB;
9782 unsigned SignOpc = IsFloor ? ISD::AND : ISD::OR;
9783 unsigned ShiftOpc = IsSigned ? ISD::SRA : ISD::SRL;
9784 unsigned ExtOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
9785 assert((Opc == ISD::AVGFLOORS || Opc == ISD::AVGCEILS ||
9786 Opc == ISD::AVGFLOORU || Opc == ISD::AVGCEILU) &&
9787 "Unknown AVG node");
9788
9789 // If the operands are already extended, we can add+shift.
9790 bool IsExt =
9791 (IsSigned && DAG.ComputeNumSignBits(Op: LHS) >= 2 &&
9792 DAG.ComputeNumSignBits(Op: RHS) >= 2) ||
9793 (!IsSigned && DAG.computeKnownBits(Op: LHS).countMinLeadingZeros() >= 1 &&
9794 DAG.computeKnownBits(Op: RHS).countMinLeadingZeros() >= 1);
9795 if (IsExt) {
9796 SDValue Sum = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: LHS, N2: RHS);
9797 if (!IsFloor)
9798 Sum = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Sum, N2: DAG.getConstant(Val: 1, DL: dl, VT));
9799 return DAG.getNode(Opcode: ShiftOpc, DL: dl, VT, N1: Sum,
9800 N2: DAG.getShiftAmountConstant(Val: 1, VT, DL: dl));
9801 }
9802
9803 // For scalars, see if we can efficiently extend/truncate to use add+shift.
9804 if (VT.isScalarInteger()) {
9805 unsigned BW = VT.getScalarSizeInBits();
9806 EVT ExtVT = VT.getIntegerVT(Context&: *DAG.getContext(), BitWidth: 2 * BW);
9807 if (isTypeLegal(VT: ExtVT) && isTruncateFree(FromVT: ExtVT, ToVT: VT)) {
9808 LHS = DAG.getNode(Opcode: ExtOpc, DL: dl, VT: ExtVT, Operand: LHS);
9809 RHS = DAG.getNode(Opcode: ExtOpc, DL: dl, VT: ExtVT, Operand: RHS);
9810 SDValue Avg = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: ExtVT, N1: LHS, N2: RHS);
9811 if (!IsFloor)
9812 Avg = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: ExtVT, N1: Avg,
9813 N2: DAG.getConstant(Val: 1, DL: dl, VT: ExtVT));
9814 // Just use SRL as we will be truncating away the extended sign bits.
9815 Avg = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: ExtVT, N1: Avg,
9816 N2: DAG.getShiftAmountConstant(Val: 1, VT: ExtVT, DL: dl));
9817 return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Avg);
9818 }
9819 }
9820
9821 // avgflooru(lhs, rhs) -> or(lshr(add(lhs, rhs),1),shl(overflow, typesize-1))
9822 if (Opc == ISD::AVGFLOORU && VT.isScalarInteger() && !isTypeLegal(VT)) {
9823 SDValue UAddWithOverflow =
9824 DAG.getNode(Opcode: ISD::UADDO, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: MVT::i1), Ops: {RHS, LHS});
9825
9826 SDValue Sum = UAddWithOverflow.getValue(R: 0);
9827 SDValue Overflow = UAddWithOverflow.getValue(R: 1);
9828
9829 // Right shift the sum by 1
9830 SDValue LShrVal = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Sum,
9831 N2: DAG.getShiftAmountConstant(Val: 1, VT, DL: dl));
9832
9833 SDValue ZeroExtOverflow = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT, Operand: Overflow);
9834 SDValue OverflowShl = DAG.getNode(
9835 Opcode: ISD::SHL, DL: dl, VT, N1: ZeroExtOverflow,
9836 N2: DAG.getShiftAmountConstant(Val: VT.getScalarSizeInBits() - 1, VT, DL: dl));
9837
9838 return DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: LShrVal, N2: OverflowShl);
9839 }
9840
9841 // avgceils(lhs, rhs) -> sub(or(lhs,rhs),ashr(xor(lhs,rhs),1))
9842 // avgceilu(lhs, rhs) -> sub(or(lhs,rhs),lshr(xor(lhs,rhs),1))
9843 // avgfloors(lhs, rhs) -> add(and(lhs,rhs),ashr(xor(lhs,rhs),1))
9844 // avgflooru(lhs, rhs) -> add(and(lhs,rhs),lshr(xor(lhs,rhs),1))
9845 LHS = DAG.getFreeze(V: LHS);
9846 RHS = DAG.getFreeze(V: RHS);
9847 SDValue Sign = DAG.getNode(Opcode: SignOpc, DL: dl, VT, N1: LHS, N2: RHS);
9848 SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: LHS, N2: RHS);
9849 SDValue Shift =
9850 DAG.getNode(Opcode: ShiftOpc, DL: dl, VT, N1: Xor, N2: DAG.getShiftAmountConstant(Val: 1, VT, DL: dl));
9851 return DAG.getNode(Opcode: SumOpc, DL: dl, VT, N1: Sign, N2: Shift);
9852}
9853
9854SDValue TargetLowering::expandBSWAP(SDNode *N, SelectionDAG &DAG) const {
9855 SDLoc dl(N);
9856 EVT VT = N->getValueType(ResNo: 0);
9857 SDValue Op = N->getOperand(Num: 0);
9858
9859 if (!VT.isSimple())
9860 return SDValue();
9861
9862 EVT SHVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
9863 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9864 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
9865 default:
9866 return SDValue();
9867 case MVT::i16:
9868 // Use a rotate by 8. This can be further expanded if necessary.
9869 return DAG.getNode(Opcode: ISD::ROTL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT));
9870 case MVT::i32:
9871 Tmp4 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 24, DL: dl, VT: SHVT));
9872 Tmp3 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op,
9873 N2: DAG.getConstant(Val: 0xFF00, DL: dl, VT));
9874 Tmp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT));
9875 Tmp2 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT));
9876 Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2, N2: DAG.getConstant(Val: 0xFF00, DL: dl, VT));
9877 Tmp1 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 24, DL: dl, VT: SHVT));
9878 Tmp4 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp4, N2: Tmp3);
9879 Tmp2 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp1);
9880 return DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp4, N2: Tmp2);
9881 case MVT::i64:
9882 Tmp8 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 56, DL: dl, VT: SHVT));
9883 Tmp7 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op,
9884 N2: DAG.getConstant(Val: 255ULL<<8, DL: dl, VT));
9885 Tmp7 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp7, N2: DAG.getConstant(Val: 40, DL: dl, VT: SHVT));
9886 Tmp6 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op,
9887 N2: DAG.getConstant(Val: 255ULL<<16, DL: dl, VT));
9888 Tmp6 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp6, N2: DAG.getConstant(Val: 24, DL: dl, VT: SHVT));
9889 Tmp5 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op,
9890 N2: DAG.getConstant(Val: 255ULL<<24, DL: dl, VT));
9891 Tmp5 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp5, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT));
9892 Tmp4 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT));
9893 Tmp4 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp4,
9894 N2: DAG.getConstant(Val: 255ULL<<24, DL: dl, VT));
9895 Tmp3 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 24, DL: dl, VT: SHVT));
9896 Tmp3 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp3,
9897 N2: DAG.getConstant(Val: 255ULL<<16, DL: dl, VT));
9898 Tmp2 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 40, DL: dl, VT: SHVT));
9899 Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2,
9900 N2: DAG.getConstant(Val: 255ULL<<8, DL: dl, VT));
9901 Tmp1 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 56, DL: dl, VT: SHVT));
9902 Tmp8 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp8, N2: Tmp7);
9903 Tmp6 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp6, N2: Tmp5);
9904 Tmp4 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp4, N2: Tmp3);
9905 Tmp2 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp1);
9906 Tmp8 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp8, N2: Tmp6);
9907 Tmp4 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp4, N2: Tmp2);
9908 return DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp8, N2: Tmp4);
9909 }
9910}
9911
9912SDValue TargetLowering::expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const {
9913 SDLoc dl(N);
9914 EVT VT = N->getValueType(ResNo: 0);
9915 SDValue Op = N->getOperand(Num: 0);
9916 SDValue Mask = N->getOperand(Num: 1);
9917 SDValue EVL = N->getOperand(Num: 2);
9918
9919 if (!VT.isSimple())
9920 return SDValue();
9921
9922 EVT SHVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
9923 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9924 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
9925 default:
9926 return SDValue();
9927 case MVT::i16:
9928 Tmp1 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT),
9929 N3: Mask, N4: EVL);
9930 Tmp2 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT),
9931 N3: Mask, N4: EVL);
9932 return DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp1, N2: Tmp2, N3: Mask, N4: EVL);
9933 case MVT::i32:
9934 Tmp4 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 24, DL: dl, VT: SHVT),
9935 N3: Mask, N4: EVL);
9936 Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 0xFF00, DL: dl, VT),
9937 N3: Mask, N4: EVL);
9938 Tmp3 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT),
9939 N3: Mask, N4: EVL);
9940 Tmp2 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT),
9941 N3: Mask, N4: EVL);
9942 Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp2,
9943 N2: DAG.getConstant(Val: 0xFF00, DL: dl, VT), N3: Mask, N4: EVL);
9944 Tmp1 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 24, DL: dl, VT: SHVT),
9945 N3: Mask, N4: EVL);
9946 Tmp4 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp4, N2: Tmp3, N3: Mask, N4: EVL);
9947 Tmp2 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp2, N2: Tmp1, N3: Mask, N4: EVL);
9948 return DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp4, N2: Tmp2, N3: Mask, N4: EVL);
9949 case MVT::i64:
9950 Tmp8 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 56, DL: dl, VT: SHVT),
9951 N3: Mask, N4: EVL);
9952 Tmp7 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Op,
9953 N2: DAG.getConstant(Val: 255ULL << 8, DL: dl, VT), N3: Mask, N4: EVL);
9954 Tmp7 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp7, N2: DAG.getConstant(Val: 40, DL: dl, VT: SHVT),
9955 N3: Mask, N4: EVL);
9956 Tmp6 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Op,
9957 N2: DAG.getConstant(Val: 255ULL << 16, DL: dl, VT), N3: Mask, N4: EVL);
9958 Tmp6 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp6, N2: DAG.getConstant(Val: 24, DL: dl, VT: SHVT),
9959 N3: Mask, N4: EVL);
9960 Tmp5 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Op,
9961 N2: DAG.getConstant(Val: 255ULL << 24, DL: dl, VT), N3: Mask, N4: EVL);
9962 Tmp5 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp5, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT),
9963 N3: Mask, N4: EVL);
9964 Tmp4 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT),
9965 N3: Mask, N4: EVL);
9966 Tmp4 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp4,
9967 N2: DAG.getConstant(Val: 255ULL << 24, DL: dl, VT), N3: Mask, N4: EVL);
9968 Tmp3 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 24, DL: dl, VT: SHVT),
9969 N3: Mask, N4: EVL);
9970 Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp3,
9971 N2: DAG.getConstant(Val: 255ULL << 16, DL: dl, VT), N3: Mask, N4: EVL);
9972 Tmp2 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 40, DL: dl, VT: SHVT),
9973 N3: Mask, N4: EVL);
9974 Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp2,
9975 N2: DAG.getConstant(Val: 255ULL << 8, DL: dl, VT), N3: Mask, N4: EVL);
9976 Tmp1 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 56, DL: dl, VT: SHVT),
9977 N3: Mask, N4: EVL);
9978 Tmp8 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp8, N2: Tmp7, N3: Mask, N4: EVL);
9979 Tmp6 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp6, N2: Tmp5, N3: Mask, N4: EVL);
9980 Tmp4 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp4, N2: Tmp3, N3: Mask, N4: EVL);
9981 Tmp2 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp2, N2: Tmp1, N3: Mask, N4: EVL);
9982 Tmp8 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp8, N2: Tmp6, N3: Mask, N4: EVL);
9983 Tmp4 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp4, N2: Tmp2, N3: Mask, N4: EVL);
9984 return DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp8, N2: Tmp4, N3: Mask, N4: EVL);
9985 }
9986}
9987
9988SDValue TargetLowering::expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const {
9989 SDLoc dl(N);
9990 EVT VT = N->getValueType(ResNo: 0);
9991 SDValue Op = N->getOperand(Num: 0);
9992 EVT SHVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
9993 unsigned Sz = VT.getScalarSizeInBits();
9994
9995 SDValue Tmp, Tmp2, Tmp3;
9996
9997 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
9998 // and finally the i1 pairs.
9999 // TODO: We can easily support i4/i2 legal types if any target ever does.
10000 if (Sz >= 8 && isPowerOf2_32(Value: Sz)) {
10001 // Create the masks - repeating the pattern every byte.
10002 APInt Mask4 = APInt::getSplat(NewLen: Sz, V: APInt(8, 0x0F));
10003 APInt Mask2 = APInt::getSplat(NewLen: Sz, V: APInt(8, 0x33));
10004 APInt Mask1 = APInt::getSplat(NewLen: Sz, V: APInt(8, 0x55));
10005
10006 // BSWAP if the type is wider than a single byte.
10007 Tmp = (Sz > 8 ? DAG.getNode(Opcode: ISD::BSWAP, DL: dl, VT, Operand: Op) : Op);
10008
10009 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
10010 Tmp2 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: 4, DL: dl, VT: SHVT));
10011 Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2, N2: DAG.getConstant(Val: Mask4, DL: dl, VT));
10012 Tmp3 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask4, DL: dl, VT));
10013 Tmp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: 4, DL: dl, VT: SHVT));
10014 Tmp = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp3);
10015
10016 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
10017 Tmp2 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: 2, DL: dl, VT: SHVT));
10018 Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2, N2: DAG.getConstant(Val: Mask2, DL: dl, VT));
10019 Tmp3 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask2, DL: dl, VT));
10020 Tmp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: 2, DL: dl, VT: SHVT));
10021 Tmp = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp3);
10022
10023 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
10024 Tmp2 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: 1, DL: dl, VT: SHVT));
10025 Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2, N2: DAG.getConstant(Val: Mask1, DL: dl, VT));
10026 Tmp3 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask1, DL: dl, VT));
10027 Tmp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: 1, DL: dl, VT: SHVT));
10028 Tmp = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp3);
10029 return Tmp;
10030 }
10031
10032 Tmp = DAG.getConstant(Val: 0, DL: dl, VT);
10033 for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) {
10034 if (I < J)
10035 Tmp2 =
10036 DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: J - I, DL: dl, VT: SHVT));
10037 else
10038 Tmp2 =
10039 DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: I - J, DL: dl, VT: SHVT));
10040
10041 APInt Shift = APInt::getOneBitSet(numBits: Sz, BitNo: J);
10042 Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2, N2: DAG.getConstant(Val: Shift, DL: dl, VT));
10043 Tmp = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp, N2: Tmp2);
10044 }
10045
10046 return Tmp;
10047}
10048
10049SDValue TargetLowering::expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const {
10050 assert(N->getOpcode() == ISD::VP_BITREVERSE);
10051
10052 SDLoc dl(N);
10053 EVT VT = N->getValueType(ResNo: 0);
10054 SDValue Op = N->getOperand(Num: 0);
10055 SDValue Mask = N->getOperand(Num: 1);
10056 SDValue EVL = N->getOperand(Num: 2);
10057 EVT SHVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
10058 unsigned Sz = VT.getScalarSizeInBits();
10059
10060 SDValue Tmp, Tmp2, Tmp3;
10061
10062 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
10063 // and finally the i1 pairs.
10064 // TODO: We can easily support i4/i2 legal types if any target ever does.
10065 if (Sz >= 8 && isPowerOf2_32(Value: Sz)) {
10066 // Create the masks - repeating the pattern every byte.
10067 APInt Mask4 = APInt::getSplat(NewLen: Sz, V: APInt(8, 0x0F));
10068 APInt Mask2 = APInt::getSplat(NewLen: Sz, V: APInt(8, 0x33));
10069 APInt Mask1 = APInt::getSplat(NewLen: Sz, V: APInt(8, 0x55));
10070
10071 // BSWAP if the type is wider than a single byte.
10072 Tmp = (Sz > 8 ? DAG.getNode(Opcode: ISD::VP_BSWAP, DL: dl, VT, N1: Op, N2: Mask, N3: EVL) : Op);
10073
10074 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
10075 Tmp2 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: 4, DL: dl, VT: SHVT),
10076 N3: Mask, N4: EVL);
10077 Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp2,
10078 N2: DAG.getConstant(Val: Mask4, DL: dl, VT), N3: Mask, N4: EVL);
10079 Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask4, DL: dl, VT),
10080 N3: Mask, N4: EVL);
10081 Tmp3 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: 4, DL: dl, VT: SHVT),
10082 N3: Mask, N4: EVL);
10083 Tmp = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp2, N2: Tmp3, N3: Mask, N4: EVL);
10084
10085 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
10086 Tmp2 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: 2, DL: dl, VT: SHVT),
10087 N3: Mask, N4: EVL);
10088 Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp2,
10089 N2: DAG.getConstant(Val: Mask2, DL: dl, VT), N3: Mask, N4: EVL);
10090 Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask2, DL: dl, VT),
10091 N3: Mask, N4: EVL);
10092 Tmp3 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: 2, DL: dl, VT: SHVT),
10093 N3: Mask, N4: EVL);
10094 Tmp = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp2, N2: Tmp3, N3: Mask, N4: EVL);
10095
10096 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
10097 Tmp2 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: 1, DL: dl, VT: SHVT),
10098 N3: Mask, N4: EVL);
10099 Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp2,
10100 N2: DAG.getConstant(Val: Mask1, DL: dl, VT), N3: Mask, N4: EVL);
10101 Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask1, DL: dl, VT),
10102 N3: Mask, N4: EVL);
10103 Tmp3 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: 1, DL: dl, VT: SHVT),
10104 N3: Mask, N4: EVL);
10105 Tmp = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp2, N2: Tmp3, N3: Mask, N4: EVL);
10106 return Tmp;
10107 }
10108 return SDValue();
10109}
10110
10111std::pair<SDValue, SDValue>
10112TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
10113 SelectionDAG &DAG) const {
10114 SDLoc SL(LD);
10115 SDValue Chain = LD->getChain();
10116 SDValue BasePTR = LD->getBasePtr();
10117 EVT SrcVT = LD->getMemoryVT();
10118 EVT DstVT = LD->getValueType(ResNo: 0);
10119 ISD::LoadExtType ExtType = LD->getExtensionType();
10120
10121 if (SrcVT.isScalableVector())
10122 report_fatal_error(reason: "Cannot scalarize scalable vector loads");
10123
10124 unsigned NumElem = SrcVT.getVectorNumElements();
10125
10126 EVT SrcEltVT = SrcVT.getScalarType();
10127 EVT DstEltVT = DstVT.getScalarType();
10128
10129 // A vector must always be stored in memory as-is, i.e. without any padding
10130 // between the elements, since various code depend on it, e.g. in the
10131 // handling of a bitcast of a vector type to int, which may be done with a
10132 // vector store followed by an integer load. A vector that does not have
10133 // elements that are byte-sized must therefore be stored as an integer
10134 // built out of the extracted vector elements.
10135 if (!SrcEltVT.isByteSized()) {
10136 unsigned NumLoadBits = SrcVT.getStoreSizeInBits();
10137 EVT LoadVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NumLoadBits);
10138
10139 unsigned NumSrcBits = SrcVT.getSizeInBits();
10140 EVT SrcIntVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NumSrcBits);
10141
10142 unsigned SrcEltBits = SrcEltVT.getSizeInBits();
10143 SDValue SrcEltBitMask = DAG.getConstant(
10144 Val: APInt::getLowBitsSet(numBits: NumLoadBits, loBitsSet: SrcEltBits), DL: SL, VT: LoadVT);
10145
10146 // Load the whole vector and avoid masking off the top bits as it makes
10147 // the codegen worse.
10148 SDValue Load =
10149 DAG.getExtLoad(ExtType: ISD::EXTLOAD, dl: SL, VT: LoadVT, Chain, Ptr: BasePTR,
10150 PtrInfo: LD->getPointerInfo(), MemVT: SrcIntVT, Alignment: LD->getBaseAlign(),
10151 MMOFlags: LD->getMemOperand()->getFlags(), AAInfo: LD->getAAInfo());
10152
10153 SmallVector<SDValue, 8> Vals;
10154 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10155 unsigned ShiftIntoIdx =
10156 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
10157 SDValue ShiftAmount = DAG.getShiftAmountConstant(
10158 Val: ShiftIntoIdx * SrcEltVT.getSizeInBits(), VT: LoadVT, DL: SL);
10159 SDValue ShiftedElt = DAG.getNode(Opcode: ISD::SRL, DL: SL, VT: LoadVT, N1: Load, N2: ShiftAmount);
10160 SDValue Elt =
10161 DAG.getNode(Opcode: ISD::AND, DL: SL, VT: LoadVT, N1: ShiftedElt, N2: SrcEltBitMask);
10162 SDValue Scalar = DAG.getNode(Opcode: ISD::TRUNCATE, DL: SL, VT: SrcEltVT, Operand: Elt);
10163
10164 if (ExtType != ISD::NON_EXTLOAD) {
10165 unsigned ExtendOp = ISD::getExtForLoadExtType(IsFP: false, ExtType);
10166 Scalar = DAG.getNode(Opcode: ExtendOp, DL: SL, VT: DstEltVT, Operand: Scalar);
10167 }
10168
10169 Vals.push_back(Elt: Scalar);
10170 }
10171
10172 SDValue Value = DAG.getBuildVector(VT: DstVT, DL: SL, Ops: Vals);
10173 return std::make_pair(x&: Value, y: Load.getValue(R: 1));
10174 }
10175
10176 unsigned Stride = SrcEltVT.getSizeInBits() / 8;
10177 assert(SrcEltVT.isByteSized());
10178
10179 SmallVector<SDValue, 8> Vals;
10180 SmallVector<SDValue, 8> LoadChains;
10181
10182 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10183 SDValue ScalarLoad = DAG.getExtLoad(
10184 ExtType, dl: SL, VT: DstEltVT, Chain, Ptr: BasePTR,
10185 PtrInfo: LD->getPointerInfo().getWithOffset(O: Idx * Stride), MemVT: SrcEltVT,
10186 Alignment: LD->getBaseAlign(), MMOFlags: LD->getMemOperand()->getFlags(), AAInfo: LD->getAAInfo());
10187
10188 BasePTR = DAG.getObjectPtrOffset(SL, Ptr: BasePTR, Offset: TypeSize::getFixed(ExactSize: Stride));
10189
10190 Vals.push_back(Elt: ScalarLoad.getValue(R: 0));
10191 LoadChains.push_back(Elt: ScalarLoad.getValue(R: 1));
10192 }
10193
10194 SDValue NewChain = DAG.getNode(Opcode: ISD::TokenFactor, DL: SL, VT: MVT::Other, Ops: LoadChains);
10195 SDValue Value = DAG.getBuildVector(VT: DstVT, DL: SL, Ops: Vals);
10196
10197 return std::make_pair(x&: Value, y&: NewChain);
10198}
10199
10200SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
10201 SelectionDAG &DAG) const {
10202 SDLoc SL(ST);
10203
10204 SDValue Chain = ST->getChain();
10205 SDValue BasePtr = ST->getBasePtr();
10206 SDValue Value = ST->getValue();
10207 EVT StVT = ST->getMemoryVT();
10208
10209 if (StVT.isScalableVector())
10210 report_fatal_error(reason: "Cannot scalarize scalable vector stores");
10211
10212 // The type of the data we want to save
10213 EVT RegVT = Value.getValueType();
10214 EVT RegSclVT = RegVT.getScalarType();
10215
10216 // The type of data as saved in memory.
10217 EVT MemSclVT = StVT.getScalarType();
10218
10219 unsigned NumElem = StVT.getVectorNumElements();
10220
10221 // A vector must always be stored in memory as-is, i.e. without any padding
10222 // between the elements, since various code depend on it, e.g. in the
10223 // handling of a bitcast of a vector type to int, which may be done with a
10224 // vector store followed by an integer load. A vector that does not have
10225 // elements that are byte-sized must therefore be stored as an integer
10226 // built out of the extracted vector elements.
10227 if (!MemSclVT.isByteSized()) {
10228 unsigned NumBits = StVT.getSizeInBits();
10229 EVT IntVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NumBits);
10230
10231 SDValue CurrVal = DAG.getConstant(Val: 0, DL: SL, VT: IntVT);
10232
10233 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10234 SDValue Elt = DAG.getExtractVectorElt(DL: SL, VT: RegSclVT, Vec: Value, Idx);
10235 SDValue Trunc = DAG.getNode(Opcode: ISD::TRUNCATE, DL: SL, VT: MemSclVT, Operand: Elt);
10236 SDValue ExtElt = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: SL, VT: IntVT, Operand: Trunc);
10237 unsigned ShiftIntoIdx =
10238 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
10239 SDValue ShiftAmount =
10240 DAG.getConstant(Val: ShiftIntoIdx * MemSclVT.getSizeInBits(), DL: SL, VT: IntVT);
10241 SDValue ShiftedElt =
10242 DAG.getNode(Opcode: ISD::SHL, DL: SL, VT: IntVT, N1: ExtElt, N2: ShiftAmount);
10243 CurrVal = DAG.getNode(Opcode: ISD::OR, DL: SL, VT: IntVT, N1: CurrVal, N2: ShiftedElt);
10244 }
10245
10246 return DAG.getStore(Chain, dl: SL, Val: CurrVal, Ptr: BasePtr, PtrInfo: ST->getPointerInfo(),
10247 Alignment: ST->getBaseAlign(), MMOFlags: ST->getMemOperand()->getFlags(),
10248 AAInfo: ST->getAAInfo());
10249 }
10250
10251 // Store Stride in bytes
10252 unsigned Stride = MemSclVT.getSizeInBits() / 8;
10253 assert(Stride && "Zero stride!");
10254 // Extract each of the elements from the original vector and save them into
10255 // memory individually.
10256 SmallVector<SDValue, 8> Stores;
10257 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10258 SDValue Elt = DAG.getExtractVectorElt(DL: SL, VT: RegSclVT, Vec: Value, Idx);
10259
10260 SDValue Ptr =
10261 DAG.getObjectPtrOffset(SL, Ptr: BasePtr, Offset: TypeSize::getFixed(ExactSize: Idx * Stride));
10262
10263 // This scalar TruncStore may be illegal, but we legalize it later.
10264 SDValue Store = DAG.getTruncStore(
10265 Chain, dl: SL, Val: Elt, Ptr, PtrInfo: ST->getPointerInfo().getWithOffset(O: Idx * Stride),
10266 SVT: MemSclVT, Alignment: ST->getBaseAlign(), MMOFlags: ST->getMemOperand()->getFlags(),
10267 AAInfo: ST->getAAInfo());
10268
10269 Stores.push_back(Elt: Store);
10270 }
10271
10272 return DAG.getNode(Opcode: ISD::TokenFactor, DL: SL, VT: MVT::Other, Ops: Stores);
10273}
10274
10275std::pair<SDValue, SDValue>
10276TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
10277 assert(LD->getAddressingMode() == ISD::UNINDEXED &&
10278 "unaligned indexed loads not implemented!");
10279 SDValue Chain = LD->getChain();
10280 SDValue Ptr = LD->getBasePtr();
10281 EVT VT = LD->getValueType(ResNo: 0);
10282 EVT LoadedVT = LD->getMemoryVT();
10283 SDLoc dl(LD);
10284 auto &MF = DAG.getMachineFunction();
10285
10286 if (VT.isFloatingPoint() || VT.isVector()) {
10287 EVT intVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: LoadedVT.getSizeInBits());
10288 if (isTypeLegal(VT: intVT) && isTypeLegal(VT: LoadedVT)) {
10289 if (!isOperationLegalOrCustom(Op: ISD::LOAD, VT: intVT) &&
10290 LoadedVT.isVector()) {
10291 // Scalarize the load and let the individual components be handled.
10292 return scalarizeVectorLoad(LD, DAG);
10293 }
10294
10295 // Expand to a (misaligned) integer load of the same size,
10296 // then bitconvert to floating point or vector.
10297 SDValue newLoad = DAG.getLoad(VT: intVT, dl, Chain, Ptr,
10298 MMO: LD->getMemOperand());
10299 SDValue Result = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: LoadedVT, Operand: newLoad);
10300 if (LoadedVT != VT)
10301 Result = DAG.getNode(Opcode: VT.isFloatingPoint() ? ISD::FP_EXTEND :
10302 ISD::ANY_EXTEND, DL: dl, VT, Operand: Result);
10303
10304 return std::make_pair(x&: Result, y: newLoad.getValue(R: 1));
10305 }
10306
10307 // Copy the value to a (aligned) stack slot using (unaligned) integer
10308 // loads and stores, then do a (aligned) load from the stack slot.
10309 MVT RegVT = getRegisterType(Context&: *DAG.getContext(), VT: intVT);
10310 unsigned LoadedBytes = LoadedVT.getStoreSize();
10311 unsigned RegBytes = RegVT.getSizeInBits() / 8;
10312 unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
10313
10314 // Make sure the stack slot is also aligned for the register type.
10315 SDValue StackBase = DAG.CreateStackTemporary(VT1: LoadedVT, VT2: RegVT);
10316 auto FrameIndex = cast<FrameIndexSDNode>(Val: StackBase.getNode())->getIndex();
10317 SmallVector<SDValue, 8> Stores;
10318 SDValue StackPtr = StackBase;
10319 unsigned Offset = 0;
10320
10321 EVT PtrVT = Ptr.getValueType();
10322 EVT StackPtrVT = StackPtr.getValueType();
10323
10324 SDValue PtrIncrement = DAG.getConstant(Val: RegBytes, DL: dl, VT: PtrVT);
10325 SDValue StackPtrIncrement = DAG.getConstant(Val: RegBytes, DL: dl, VT: StackPtrVT);
10326
10327 // Do all but one copies using the full register width.
10328 for (unsigned i = 1; i < NumRegs; i++) {
10329 // Load one integer register's worth from the original location.
10330 SDValue Load = DAG.getLoad(
10331 VT: RegVT, dl, Chain, Ptr, PtrInfo: LD->getPointerInfo().getWithOffset(O: Offset),
10332 Alignment: LD->getBaseAlign(), MMOFlags: LD->getMemOperand()->getFlags(), AAInfo: LD->getAAInfo());
10333 // Follow the load with a store to the stack slot. Remember the store.
10334 Stores.push_back(Elt: DAG.getStore(
10335 Chain: Load.getValue(R: 1), dl, Val: Load, Ptr: StackPtr,
10336 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset)));
10337 // Increment the pointers.
10338 Offset += RegBytes;
10339
10340 Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: PtrIncrement);
10341 StackPtr = DAG.getObjectPtrOffset(SL: dl, Ptr: StackPtr, Offset: StackPtrIncrement);
10342 }
10343
10344 // The last copy may be partial. Do an extending load.
10345 EVT MemVT = EVT::getIntegerVT(Context&: *DAG.getContext(),
10346 BitWidth: 8 * (LoadedBytes - Offset));
10347 SDValue Load = DAG.getExtLoad(
10348 ExtType: ISD::EXTLOAD, dl, VT: RegVT, Chain, Ptr,
10349 PtrInfo: LD->getPointerInfo().getWithOffset(O: Offset), MemVT, Alignment: LD->getBaseAlign(),
10350 MMOFlags: LD->getMemOperand()->getFlags(), AAInfo: LD->getAAInfo());
10351 // Follow the load with a store to the stack slot. Remember the store.
10352 // On big-endian machines this requires a truncating store to ensure
10353 // that the bits end up in the right place.
10354 Stores.push_back(Elt: DAG.getTruncStore(
10355 Chain: Load.getValue(R: 1), dl, Val: Load, Ptr: StackPtr,
10356 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset), SVT: MemVT));
10357
10358 // The order of the stores doesn't matter - say it with a TokenFactor.
10359 SDValue TF = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: Stores);
10360
10361 // Finally, perform the original load only redirected to the stack slot.
10362 Load = DAG.getExtLoad(ExtType: LD->getExtensionType(), dl, VT, Chain: TF, Ptr: StackBase,
10363 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset: 0),
10364 MemVT: LoadedVT);
10365
10366 // Callers expect a MERGE_VALUES node.
10367 return std::make_pair(x&: Load, y&: TF);
10368 }
10369
10370 assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
10371 "Unaligned load of unsupported type.");
10372
10373 // Compute the new VT that is half the size of the old one. This is an
10374 // integer MVT.
10375 unsigned NumBits = LoadedVT.getSizeInBits();
10376 EVT NewLoadedVT;
10377 NewLoadedVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NumBits/2);
10378 NumBits >>= 1;
10379
10380 Align Alignment = LD->getBaseAlign();
10381 unsigned IncrementSize = NumBits / 8;
10382 ISD::LoadExtType HiExtType = LD->getExtensionType();
10383
10384 // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
10385 if (HiExtType == ISD::NON_EXTLOAD)
10386 HiExtType = ISD::ZEXTLOAD;
10387
10388 // Load the value in two parts
10389 SDValue Lo, Hi;
10390 if (DAG.getDataLayout().isLittleEndian()) {
10391 Lo = DAG.getExtLoad(ExtType: ISD::ZEXTLOAD, dl, VT, Chain, Ptr, PtrInfo: LD->getPointerInfo(),
10392 MemVT: NewLoadedVT, Alignment, MMOFlags: LD->getMemOperand()->getFlags(),
10393 AAInfo: LD->getAAInfo());
10394
10395 Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: TypeSize::getFixed(ExactSize: IncrementSize));
10396 Hi = DAG.getExtLoad(ExtType: HiExtType, dl, VT, Chain, Ptr,
10397 PtrInfo: LD->getPointerInfo().getWithOffset(O: IncrementSize),
10398 MemVT: NewLoadedVT, Alignment, MMOFlags: LD->getMemOperand()->getFlags(),
10399 AAInfo: LD->getAAInfo());
10400 } else {
10401 Hi = DAG.getExtLoad(ExtType: HiExtType, dl, VT, Chain, Ptr, PtrInfo: LD->getPointerInfo(),
10402 MemVT: NewLoadedVT, Alignment, MMOFlags: LD->getMemOperand()->getFlags(),
10403 AAInfo: LD->getAAInfo());
10404
10405 Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: TypeSize::getFixed(ExactSize: IncrementSize));
10406 Lo = DAG.getExtLoad(ExtType: ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
10407 PtrInfo: LD->getPointerInfo().getWithOffset(O: IncrementSize),
10408 MemVT: NewLoadedVT, Alignment, MMOFlags: LD->getMemOperand()->getFlags(),
10409 AAInfo: LD->getAAInfo());
10410 }
10411
10412 // aggregate the two parts
10413 SDValue ShiftAmount = DAG.getShiftAmountConstant(Val: NumBits, VT, DL: dl);
10414 SDValue Result = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Hi, N2: ShiftAmount);
10415 Result = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Result, N2: Lo);
10416
10417 SDValue TF = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, N1: Lo.getValue(R: 1),
10418 N2: Hi.getValue(R: 1));
10419
10420 return std::make_pair(x&: Result, y&: TF);
10421}
10422
10423SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
10424 SelectionDAG &DAG) const {
10425 assert(ST->getAddressingMode() == ISD::UNINDEXED &&
10426 "unaligned indexed stores not implemented!");
10427 SDValue Chain = ST->getChain();
10428 SDValue Ptr = ST->getBasePtr();
10429 SDValue Val = ST->getValue();
10430 EVT VT = Val.getValueType();
10431 Align Alignment = ST->getBaseAlign();
10432 auto &MF = DAG.getMachineFunction();
10433 EVT StoreMemVT = ST->getMemoryVT();
10434
10435 SDLoc dl(ST);
10436 if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) {
10437 EVT intVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: VT.getSizeInBits());
10438 if (isTypeLegal(VT: intVT)) {
10439 if (!isOperationLegalOrCustom(Op: ISD::STORE, VT: intVT) &&
10440 StoreMemVT.isVector()) {
10441 // Scalarize the store and let the individual components be handled.
10442 SDValue Result = scalarizeVectorStore(ST, DAG);
10443 return Result;
10444 }
10445 // Expand to a bitconvert of the value to the integer type of the
10446 // same size, then a (misaligned) int store.
10447 // FIXME: Does not handle truncating floating point stores!
10448 SDValue Result = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: intVT, Operand: Val);
10449 Result = DAG.getStore(Chain, dl, Val: Result, Ptr, PtrInfo: ST->getPointerInfo(),
10450 Alignment, MMOFlags: ST->getMemOperand()->getFlags());
10451 return Result;
10452 }
10453 // Do a (aligned) store to a stack slot, then copy from the stack slot
10454 // to the final destination using (unaligned) integer loads and stores.
10455 MVT RegVT = getRegisterType(
10456 Context&: *DAG.getContext(),
10457 VT: EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: StoreMemVT.getSizeInBits()));
10458 EVT PtrVT = Ptr.getValueType();
10459 unsigned StoredBytes = StoreMemVT.getStoreSize();
10460 unsigned RegBytes = RegVT.getSizeInBits() / 8;
10461 unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
10462
10463 // Make sure the stack slot is also aligned for the register type.
10464 SDValue StackPtr = DAG.CreateStackTemporary(VT1: StoreMemVT, VT2: RegVT);
10465 auto FrameIndex = cast<FrameIndexSDNode>(Val: StackPtr.getNode())->getIndex();
10466
10467 // Perform the original store, only redirected to the stack slot.
10468 SDValue Store = DAG.getTruncStore(
10469 Chain, dl, Val, Ptr: StackPtr,
10470 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset: 0), SVT: StoreMemVT);
10471
10472 EVT StackPtrVT = StackPtr.getValueType();
10473
10474 SDValue PtrIncrement = DAG.getConstant(Val: RegBytes, DL: dl, VT: PtrVT);
10475 SDValue StackPtrIncrement = DAG.getConstant(Val: RegBytes, DL: dl, VT: StackPtrVT);
10476 SmallVector<SDValue, 8> Stores;
10477 unsigned Offset = 0;
10478
10479 // Do all but one copies using the full register width.
10480 for (unsigned i = 1; i < NumRegs; i++) {
10481 // Load one integer register's worth from the stack slot.
10482 SDValue Load = DAG.getLoad(
10483 VT: RegVT, dl, Chain: Store, Ptr: StackPtr,
10484 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset));
10485 // Store it to the final location. Remember the store.
10486 Stores.push_back(Elt: DAG.getStore(Chain: Load.getValue(R: 1), dl, Val: Load, Ptr,
10487 PtrInfo: ST->getPointerInfo().getWithOffset(O: Offset),
10488 Alignment: ST->getBaseAlign(),
10489 MMOFlags: ST->getMemOperand()->getFlags()));
10490 // Increment the pointers.
10491 Offset += RegBytes;
10492 StackPtr = DAG.getObjectPtrOffset(SL: dl, Ptr: StackPtr, Offset: StackPtrIncrement);
10493 Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: PtrIncrement);
10494 }
10495
10496 // The last store may be partial. Do a truncating store. On big-endian
10497 // machines this requires an extending load from the stack slot to ensure
10498 // that the bits are in the right place.
10499 EVT LoadMemVT =
10500 EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: 8 * (StoredBytes - Offset));
10501
10502 // Load from the stack slot.
10503 SDValue Load = DAG.getExtLoad(
10504 ExtType: ISD::EXTLOAD, dl, VT: RegVT, Chain: Store, Ptr: StackPtr,
10505 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset), MemVT: LoadMemVT);
10506
10507 Stores.push_back(Elt: DAG.getTruncStore(
10508 Chain: Load.getValue(R: 1), dl, Val: Load, Ptr,
10509 PtrInfo: ST->getPointerInfo().getWithOffset(O: Offset), SVT: LoadMemVT,
10510 Alignment: ST->getBaseAlign(), MMOFlags: ST->getMemOperand()->getFlags(), AAInfo: ST->getAAInfo()));
10511 // The order of the stores doesn't matter - say it with a TokenFactor.
10512 SDValue Result = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: Stores);
10513 return Result;
10514 }
10515
10516 assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
10517 "Unaligned store of unknown type.");
10518 // Get the half-size VT
10519 EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(Context&: *DAG.getContext());
10520 unsigned NumBits = NewStoredVT.getFixedSizeInBits();
10521 unsigned IncrementSize = NumBits / 8;
10522
10523 // Divide the stored value in two parts.
10524 SDValue ShiftAmount =
10525 DAG.getShiftAmountConstant(Val: NumBits, VT: Val.getValueType(), DL: dl);
10526 SDValue Lo = Val;
10527 // If Val is a constant, replace the upper bits with 0. The SRL will constant
10528 // fold and not use the upper bits. A smaller constant may be easier to
10529 // materialize.
10530 if (auto *C = dyn_cast<ConstantSDNode>(Val&: Lo); C && !C->isOpaque())
10531 Lo = DAG.getNode(
10532 Opcode: ISD::AND, DL: dl, VT, N1: Lo,
10533 N2: DAG.getConstant(Val: APInt::getLowBitsSet(numBits: VT.getSizeInBits(), loBitsSet: NumBits), DL: dl,
10534 VT));
10535 SDValue Hi = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Val, N2: ShiftAmount);
10536
10537 // Store the two parts
10538 SDValue Store1, Store2;
10539 Store1 = DAG.getTruncStore(Chain, dl,
10540 Val: DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
10541 Ptr, PtrInfo: ST->getPointerInfo(), SVT: NewStoredVT, Alignment,
10542 MMOFlags: ST->getMemOperand()->getFlags());
10543
10544 Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: TypeSize::getFixed(ExactSize: IncrementSize));
10545 Store2 = DAG.getTruncStore(
10546 Chain, dl, Val: DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
10547 PtrInfo: ST->getPointerInfo().getWithOffset(O: IncrementSize), SVT: NewStoredVT, Alignment,
10548 MMOFlags: ST->getMemOperand()->getFlags(), AAInfo: ST->getAAInfo());
10549
10550 SDValue Result =
10551 DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, N1: Store1, N2: Store2);
10552 return Result;
10553}
10554
10555SDValue
10556TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
10557 const SDLoc &DL, EVT DataVT,
10558 SelectionDAG &DAG,
10559 bool IsCompressedMemory) const {
10560 SDValue Increment;
10561 EVT AddrVT = Addr.getValueType();
10562 EVT MaskVT = Mask.getValueType();
10563 assert(DataVT.getVectorElementCount() == MaskVT.getVectorElementCount() &&
10564 "Incompatible types of Data and Mask");
10565 if (IsCompressedMemory) {
10566 if (DataVT.isScalableVector())
10567 report_fatal_error(
10568 reason: "Cannot currently handle compressed memory with scalable vectors");
10569 // Incrementing the pointer according to number of '1's in the mask.
10570 EVT MaskIntVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: MaskVT.getSizeInBits());
10571 SDValue MaskInIntReg = DAG.getBitcast(VT: MaskIntVT, V: Mask);
10572 if (MaskIntVT.getSizeInBits() < 32) {
10573 MaskInIntReg = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: MVT::i32, Operand: MaskInIntReg);
10574 MaskIntVT = MVT::i32;
10575 }
10576
10577 // Count '1's with POPCNT.
10578 Increment = DAG.getNode(Opcode: ISD::CTPOP, DL, VT: MaskIntVT, Operand: MaskInIntReg);
10579 Increment = DAG.getZExtOrTrunc(Op: Increment, DL, VT: AddrVT);
10580 // Scale is an element size in bytes.
10581 SDValue Scale = DAG.getConstant(Val: DataVT.getScalarSizeInBits() / 8, DL,
10582 VT: AddrVT);
10583 Increment = DAG.getNode(Opcode: ISD::MUL, DL, VT: AddrVT, N1: Increment, N2: Scale);
10584 } else if (DataVT.isScalableVector()) {
10585 Increment = DAG.getVScale(DL, VT: AddrVT,
10586 MulImm: APInt(AddrVT.getFixedSizeInBits(),
10587 DataVT.getStoreSize().getKnownMinValue()));
10588 } else
10589 Increment = DAG.getConstant(Val: DataVT.getStoreSize(), DL, VT: AddrVT);
10590
10591 return DAG.getNode(Opcode: ISD::ADD, DL, VT: AddrVT, N1: Addr, N2: Increment);
10592}
10593
10594static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx,
10595 EVT VecVT, const SDLoc &dl,
10596 ElementCount SubEC) {
10597 assert(!(SubEC.isScalable() && VecVT.isFixedLengthVector()) &&
10598 "Cannot index a scalable vector within a fixed-width vector");
10599
10600 unsigned NElts = VecVT.getVectorMinNumElements();
10601 unsigned NumSubElts = SubEC.getKnownMinValue();
10602 EVT IdxVT = Idx.getValueType();
10603
10604 if (VecVT.isScalableVector() && !SubEC.isScalable()) {
10605 // If this is a constant index and we know the value plus the number of the
10606 // elements in the subvector minus one is less than the minimum number of
10607 // elements then it's safe to return Idx.
10608 if (auto *IdxCst = dyn_cast<ConstantSDNode>(Val&: Idx))
10609 if (IdxCst->getZExtValue() + (NumSubElts - 1) < NElts)
10610 return Idx;
10611 SDValue VS =
10612 DAG.getVScale(DL: dl, VT: IdxVT, MulImm: APInt(IdxVT.getFixedSizeInBits(), NElts));
10613 unsigned SubOpcode = NumSubElts <= NElts ? ISD::SUB : ISD::USUBSAT;
10614 SDValue Sub = DAG.getNode(Opcode: SubOpcode, DL: dl, VT: IdxVT, N1: VS,
10615 N2: DAG.getConstant(Val: NumSubElts, DL: dl, VT: IdxVT));
10616 return DAG.getNode(Opcode: ISD::UMIN, DL: dl, VT: IdxVT, N1: Idx, N2: Sub);
10617 }
10618 if (isPowerOf2_32(Value: NElts) && NumSubElts == 1) {
10619 APInt Imm = APInt::getLowBitsSet(numBits: IdxVT.getSizeInBits(), loBitsSet: Log2_32(Value: NElts));
10620 return DAG.getNode(Opcode: ISD::AND, DL: dl, VT: IdxVT, N1: Idx,
10621 N2: DAG.getConstant(Val: Imm, DL: dl, VT: IdxVT));
10622 }
10623 unsigned MaxIndex = NumSubElts < NElts ? NElts - NumSubElts : 0;
10624 return DAG.getNode(Opcode: ISD::UMIN, DL: dl, VT: IdxVT, N1: Idx,
10625 N2: DAG.getConstant(Val: MaxIndex, DL: dl, VT: IdxVT));
10626}
10627
10628SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG,
10629 SDValue VecPtr, EVT VecVT,
10630 SDValue Index) const {
10631 return getVectorSubVecPointer(
10632 DAG, VecPtr, VecVT,
10633 SubVecVT: EVT::getVectorVT(Context&: *DAG.getContext(), VT: VecVT.getVectorElementType(), NumElements: 1),
10634 Index);
10635}
10636
10637SDValue TargetLowering::getVectorSubVecPointer(SelectionDAG &DAG,
10638 SDValue VecPtr, EVT VecVT,
10639 EVT SubVecVT,
10640 SDValue Index) const {
10641 SDLoc dl(Index);
10642 // Make sure the index type is big enough to compute in.
10643 Index = DAG.getZExtOrTrunc(Op: Index, DL: dl, VT: VecPtr.getValueType());
10644
10645 EVT EltVT = VecVT.getVectorElementType();
10646
10647 // Calculate the element offset and add it to the pointer.
10648 unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size.
10649 assert(EltSize * 8 == EltVT.getFixedSizeInBits() &&
10650 "Converting bits to bytes lost precision");
10651 assert(SubVecVT.getVectorElementType() == EltVT &&
10652 "Sub-vector must be a vector with matching element type");
10653 Index = clampDynamicVectorIndex(DAG, Idx: Index, VecVT, dl,
10654 SubEC: SubVecVT.getVectorElementCount());
10655
10656 EVT IdxVT = Index.getValueType();
10657 if (SubVecVT.isScalableVector())
10658 Index =
10659 DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: IdxVT, N1: Index,
10660 N2: DAG.getVScale(DL: dl, VT: IdxVT, MulImm: APInt(IdxVT.getSizeInBits(), 1)));
10661
10662 Index = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: IdxVT, N1: Index,
10663 N2: DAG.getConstant(Val: EltSize, DL: dl, VT: IdxVT));
10664 return DAG.getMemBasePlusOffset(Base: VecPtr, Offset: Index, DL: dl);
10665}
10666
10667//===----------------------------------------------------------------------===//
10668// Implementation of Emulated TLS Model
10669//===----------------------------------------------------------------------===//
10670
10671SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
10672 SelectionDAG &DAG) const {
10673 // Access to address of TLS varialbe xyz is lowered to a function call:
10674 // __emutls_get_address( address of global variable named "__emutls_v.xyz" )
10675 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
10676 PointerType *VoidPtrType = PointerType::get(C&: *DAG.getContext(), AddressSpace: 0);
10677 SDLoc dl(GA);
10678
10679 ArgListTy Args;
10680 ArgListEntry Entry;
10681 const GlobalValue *GV =
10682 cast<GlobalValue>(Val: GA->getGlobal()->stripPointerCastsAndAliases());
10683 SmallString<32> NameString("__emutls_v.");
10684 NameString += GV->getName();
10685 StringRef EmuTlsVarName(NameString);
10686 const GlobalVariable *EmuTlsVar =
10687 GV->getParent()->getNamedGlobal(Name: EmuTlsVarName);
10688 assert(EmuTlsVar && "Cannot find EmuTlsVar ");
10689 Entry.Node = DAG.getGlobalAddress(GV: EmuTlsVar, DL: dl, VT: PtrVT);
10690 Entry.Ty = VoidPtrType;
10691 Args.push_back(x: Entry);
10692
10693 SDValue EmuTlsGetAddr = DAG.getExternalSymbol(Sym: "__emutls_get_address", VT: PtrVT);
10694
10695 TargetLowering::CallLoweringInfo CLI(DAG);
10696 CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
10697 CLI.setLibCallee(CC: CallingConv::C, ResultType: VoidPtrType, Target: EmuTlsGetAddr, ArgsList: std::move(Args));
10698 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
10699
10700 // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
10701 // At last for X86 targets, maybe good for other targets too?
10702 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
10703 MFI.setAdjustsStack(true); // Is this only for X86 target?
10704 MFI.setHasCalls(true);
10705
10706 assert((GA->getOffset() == 0) &&
10707 "Emulated TLS must have zero offset in GlobalAddressSDNode");
10708 return CallResult.first;
10709}
10710
10711SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,
10712 SelectionDAG &DAG) const {
10713 assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
10714 if (!isCtlzFast())
10715 return SDValue();
10716 ISD::CondCode CC = cast<CondCodeSDNode>(Val: Op.getOperand(i: 2))->get();
10717 SDLoc dl(Op);
10718 if (isNullConstant(V: Op.getOperand(i: 1)) && CC == ISD::SETEQ) {
10719 EVT VT = Op.getOperand(i: 0).getValueType();
10720 SDValue Zext = Op.getOperand(i: 0);
10721 if (VT.bitsLT(VT: MVT::i32)) {
10722 VT = MVT::i32;
10723 Zext = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: Op.getOperand(i: 0));
10724 }
10725 unsigned Log2b = Log2_32(Value: VT.getSizeInBits());
10726 SDValue Clz = DAG.getNode(Opcode: ISD::CTLZ, DL: dl, VT, Operand: Zext);
10727 SDValue Scc = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Clz,
10728 N2: DAG.getConstant(Val: Log2b, DL: dl, VT: MVT::i32));
10729 return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i32, Operand: Scc);
10730 }
10731 return SDValue();
10732}
10733
10734SDValue TargetLowering::expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const {
10735 SDValue Op0 = Node->getOperand(Num: 0);
10736 SDValue Op1 = Node->getOperand(Num: 1);
10737 EVT VT = Op0.getValueType();
10738 EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
10739 unsigned Opcode = Node->getOpcode();
10740 SDLoc DL(Node);
10741
10742 // umax(x,1) --> sub(x,cmpeq(x,0)) iff cmp result is allbits
10743 if (Opcode == ISD::UMAX && llvm::isOneOrOneSplat(V: Op1, AllowUndefs: true) && BoolVT == VT &&
10744 getBooleanContents(Type: VT) == ZeroOrNegativeOneBooleanContent) {
10745 Op0 = DAG.getFreeze(V: Op0);
10746 SDValue Zero = DAG.getConstant(Val: 0, DL, VT);
10747 return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Op0,
10748 N2: DAG.getSetCC(DL, VT, LHS: Op0, RHS: Zero, Cond: ISD::SETEQ));
10749 }
10750
10751 // umin(x,y) -> sub(x,usubsat(x,y))
10752 // TODO: Missing freeze(Op0)?
10753 if (Opcode == ISD::UMIN && isOperationLegal(Op: ISD::SUB, VT) &&
10754 isOperationLegal(Op: ISD::USUBSAT, VT)) {
10755 return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Op0,
10756 N2: DAG.getNode(Opcode: ISD::USUBSAT, DL, VT, N1: Op0, N2: Op1));
10757 }
10758
10759 // umax(x,y) -> add(x,usubsat(y,x))
10760 // TODO: Missing freeze(Op0)?
10761 if (Opcode == ISD::UMAX && isOperationLegal(Op: ISD::ADD, VT) &&
10762 isOperationLegal(Op: ISD::USUBSAT, VT)) {
10763 return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Op0,
10764 N2: DAG.getNode(Opcode: ISD::USUBSAT, DL, VT, N1: Op1, N2: Op0));
10765 }
10766
10767 // FIXME: Should really try to split the vector in case it's legal on a
10768 // subvector.
10769 if (VT.isVector() && !isOperationLegalOrCustom(Op: ISD::VSELECT, VT))
10770 return DAG.UnrollVectorOp(N: Node);
10771
10772 // Attempt to find an existing SETCC node that we can reuse.
10773 // TODO: Do we need a generic doesSETCCNodeExist?
10774 // TODO: Missing freeze(Op0)/freeze(Op1)?
10775 auto buildMinMax = [&](ISD::CondCode PrefCC, ISD::CondCode AltCC,
10776 ISD::CondCode PrefCommuteCC,
10777 ISD::CondCode AltCommuteCC) {
10778 SDVTList BoolVTList = DAG.getVTList(VT: BoolVT);
10779 for (ISD::CondCode CC : {PrefCC, AltCC}) {
10780 if (DAG.doesNodeExist(Opcode: ISD::SETCC, VTList: BoolVTList,
10781 Ops: {Op0, Op1, DAG.getCondCode(Cond: CC)})) {
10782 SDValue Cond = DAG.getSetCC(DL, VT: BoolVT, LHS: Op0, RHS: Op1, Cond: CC);
10783 return DAG.getSelect(DL, VT, Cond, LHS: Op0, RHS: Op1);
10784 }
10785 }
10786 for (ISD::CondCode CC : {PrefCommuteCC, AltCommuteCC}) {
10787 if (DAG.doesNodeExist(Opcode: ISD::SETCC, VTList: BoolVTList,
10788 Ops: {Op0, Op1, DAG.getCondCode(Cond: CC)})) {
10789 SDValue Cond = DAG.getSetCC(DL, VT: BoolVT, LHS: Op0, RHS: Op1, Cond: CC);
10790 return DAG.getSelect(DL, VT, Cond, LHS: Op1, RHS: Op0);
10791 }
10792 }
10793 SDValue Cond = DAG.getSetCC(DL, VT: BoolVT, LHS: Op0, RHS: Op1, Cond: PrefCC);
10794 return DAG.getSelect(DL, VT, Cond, LHS: Op0, RHS: Op1);
10795 };
10796
10797 // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
10798 // -> Y = (A < B) ? B : A
10799 // -> Y = (A >= B) ? A : B
10800 // -> Y = (A <= B) ? B : A
10801 switch (Opcode) {
10802 case ISD::SMAX:
10803 return buildMinMax(ISD::SETGT, ISD::SETGE, ISD::SETLT, ISD::SETLE);
10804 case ISD::SMIN:
10805 return buildMinMax(ISD::SETLT, ISD::SETLE, ISD::SETGT, ISD::SETGE);
10806 case ISD::UMAX:
10807 return buildMinMax(ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE);
10808 case ISD::UMIN:
10809 return buildMinMax(ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE);
10810 }
10811
10812 llvm_unreachable("How did we get here?");
10813}
10814
10815SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
10816 unsigned Opcode = Node->getOpcode();
10817 SDValue LHS = Node->getOperand(Num: 0);
10818 SDValue RHS = Node->getOperand(Num: 1);
10819 EVT VT = LHS.getValueType();
10820 SDLoc dl(Node);
10821
10822 assert(VT == RHS.getValueType() && "Expected operands to be the same type");
10823 assert(VT.isInteger() && "Expected operands to be integers");
10824
10825 // usub.sat(a, b) -> umax(a, b) - b
10826 if (Opcode == ISD::USUBSAT && isOperationLegal(Op: ISD::UMAX, VT)) {
10827 SDValue Max = DAG.getNode(Opcode: ISD::UMAX, DL: dl, VT, N1: LHS, N2: RHS);
10828 return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Max, N2: RHS);
10829 }
10830
10831 // uadd.sat(a, b) -> umin(a, ~b) + b
10832 if (Opcode == ISD::UADDSAT && isOperationLegal(Op: ISD::UMIN, VT)) {
10833 SDValue InvRHS = DAG.getNOT(DL: dl, Val: RHS, VT);
10834 SDValue Min = DAG.getNode(Opcode: ISD::UMIN, DL: dl, VT, N1: LHS, N2: InvRHS);
10835 return DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Min, N2: RHS);
10836 }
10837
10838 unsigned OverflowOp;
10839 switch (Opcode) {
10840 case ISD::SADDSAT:
10841 OverflowOp = ISD::SADDO;
10842 break;
10843 case ISD::UADDSAT:
10844 OverflowOp = ISD::UADDO;
10845 break;
10846 case ISD::SSUBSAT:
10847 OverflowOp = ISD::SSUBO;
10848 break;
10849 case ISD::USUBSAT:
10850 OverflowOp = ISD::USUBO;
10851 break;
10852 default:
10853 llvm_unreachable("Expected method to receive signed or unsigned saturation "
10854 "addition or subtraction node.");
10855 }
10856
10857 // FIXME: Should really try to split the vector in case it's legal on a
10858 // subvector.
10859 if (VT.isVector() && !isOperationLegalOrCustom(Op: ISD::VSELECT, VT))
10860 return DAG.UnrollVectorOp(N: Node);
10861
10862 unsigned BitWidth = LHS.getScalarValueSizeInBits();
10863 EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
10864 SDValue Result = DAG.getNode(Opcode: OverflowOp, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: BoolVT), N1: LHS, N2: RHS);
10865 SDValue SumDiff = Result.getValue(R: 0);
10866 SDValue Overflow = Result.getValue(R: 1);
10867 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT);
10868 SDValue AllOnes = DAG.getAllOnesConstant(DL: dl, VT);
10869
10870 if (Opcode == ISD::UADDSAT) {
10871 if (getBooleanContents(Type: VT) == ZeroOrNegativeOneBooleanContent) {
10872 // (LHS + RHS) | OverflowMask
10873 SDValue OverflowMask = DAG.getSExtOrTrunc(Op: Overflow, DL: dl, VT);
10874 return DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: SumDiff, N2: OverflowMask);
10875 }
10876 // Overflow ? 0xffff.... : (LHS + RHS)
10877 return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: AllOnes, RHS: SumDiff);
10878 }
10879
10880 if (Opcode == ISD::USUBSAT) {
10881 if (getBooleanContents(Type: VT) == ZeroOrNegativeOneBooleanContent) {
10882 // (LHS - RHS) & ~OverflowMask
10883 SDValue OverflowMask = DAG.getSExtOrTrunc(Op: Overflow, DL: dl, VT);
10884 SDValue Not = DAG.getNOT(DL: dl, Val: OverflowMask, VT);
10885 return DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: SumDiff, N2: Not);
10886 }
10887 // Overflow ? 0 : (LHS - RHS)
10888 return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: Zero, RHS: SumDiff);
10889 }
10890
10891 if (Opcode == ISD::SADDSAT || Opcode == ISD::SSUBSAT) {
10892 APInt MinVal = APInt::getSignedMinValue(numBits: BitWidth);
10893 APInt MaxVal = APInt::getSignedMaxValue(numBits: BitWidth);
10894
10895 KnownBits KnownLHS = DAG.computeKnownBits(Op: LHS);
10896 KnownBits KnownRHS = DAG.computeKnownBits(Op: RHS);
10897
10898 // If either of the operand signs are known, then they are guaranteed to
10899 // only saturate in one direction. If non-negative they will saturate
10900 // towards SIGNED_MAX, if negative they will saturate towards SIGNED_MIN.
10901 //
10902 // In the case of ISD::SSUBSAT, 'x - y' is equivalent to 'x + (-y)', so the
10903 // sign of 'y' has to be flipped.
10904
10905 bool LHSIsNonNegative = KnownLHS.isNonNegative();
10906 bool RHSIsNonNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNonNegative()
10907 : KnownRHS.isNegative();
10908 if (LHSIsNonNegative || RHSIsNonNegative) {
10909 SDValue SatMax = DAG.getConstant(Val: MaxVal, DL: dl, VT);
10910 return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: SatMax, RHS: SumDiff);
10911 }
10912
10913 bool LHSIsNegative = KnownLHS.isNegative();
10914 bool RHSIsNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNegative()
10915 : KnownRHS.isNonNegative();
10916 if (LHSIsNegative || RHSIsNegative) {
10917 SDValue SatMin = DAG.getConstant(Val: MinVal, DL: dl, VT);
10918 return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: SatMin, RHS: SumDiff);
10919 }
10920 }
10921
10922 // Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff
10923 APInt MinVal = APInt::getSignedMinValue(numBits: BitWidth);
10924 SDValue SatMin = DAG.getConstant(Val: MinVal, DL: dl, VT);
10925 SDValue Shift = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: SumDiff,
10926 N2: DAG.getConstant(Val: BitWidth - 1, DL: dl, VT));
10927 Result = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: Shift, N2: SatMin);
10928 return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: Result, RHS: SumDiff);
10929}
10930
10931SDValue TargetLowering::expandCMP(SDNode *Node, SelectionDAG &DAG) const {
10932 unsigned Opcode = Node->getOpcode();
10933 SDValue LHS = Node->getOperand(Num: 0);
10934 SDValue RHS = Node->getOperand(Num: 1);
10935 EVT VT = LHS.getValueType();
10936 EVT ResVT = Node->getValueType(ResNo: 0);
10937 EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
10938 SDLoc dl(Node);
10939
10940 auto LTPredicate = (Opcode == ISD::UCMP ? ISD::SETULT : ISD::SETLT);
10941 auto GTPredicate = (Opcode == ISD::UCMP ? ISD::SETUGT : ISD::SETGT);
10942 SDValue IsLT = DAG.getSetCC(DL: dl, VT: BoolVT, LHS, RHS, Cond: LTPredicate);
10943 SDValue IsGT = DAG.getSetCC(DL: dl, VT: BoolVT, LHS, RHS, Cond: GTPredicate);
10944
10945 // We can't perform arithmetic on i1 values. Extending them would
10946 // probably result in worse codegen, so let's just use two selects instead.
10947 // Some targets are also just better off using selects rather than subtraction
10948 // because one of the conditions can be merged with one of the selects.
10949 // And finally, if we don't know the contents of high bits of a boolean value
10950 // we can't perform any arithmetic either.
10951 if (shouldExpandCmpUsingSelects(VT) || BoolVT.getScalarSizeInBits() == 1 ||
10952 getBooleanContents(Type: BoolVT) == UndefinedBooleanContent) {
10953 SDValue SelectZeroOrOne =
10954 DAG.getSelect(DL: dl, VT: ResVT, Cond: IsGT, LHS: DAG.getConstant(Val: 1, DL: dl, VT: ResVT),
10955 RHS: DAG.getConstant(Val: 0, DL: dl, VT: ResVT));
10956 return DAG.getSelect(DL: dl, VT: ResVT, Cond: IsLT, LHS: DAG.getAllOnesConstant(DL: dl, VT: ResVT),
10957 RHS: SelectZeroOrOne);
10958 }
10959
10960 if (getBooleanContents(Type: BoolVT) == ZeroOrNegativeOneBooleanContent)
10961 std::swap(a&: IsGT, b&: IsLT);
10962 return DAG.getSExtOrTrunc(Op: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: BoolVT, N1: IsGT, N2: IsLT), DL: dl,
10963 VT: ResVT);
10964}
10965
10966SDValue TargetLowering::expandShlSat(SDNode *Node, SelectionDAG &DAG) const {
10967 unsigned Opcode = Node->getOpcode();
10968 bool IsSigned = Opcode == ISD::SSHLSAT;
10969 SDValue LHS = Node->getOperand(Num: 0);
10970 SDValue RHS = Node->getOperand(Num: 1);
10971 EVT VT = LHS.getValueType();
10972 SDLoc dl(Node);
10973
10974 assert((Node->getOpcode() == ISD::SSHLSAT ||
10975 Node->getOpcode() == ISD::USHLSAT) &&
10976 "Expected a SHLSAT opcode");
10977 assert(VT == RHS.getValueType() && "Expected operands to be the same type");
10978 assert(VT.isInteger() && "Expected operands to be integers");
10979
10980 if (VT.isVector() && !isOperationLegalOrCustom(Op: ISD::VSELECT, VT))
10981 return DAG.UnrollVectorOp(N: Node);
10982
10983 // If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate.
10984
10985 unsigned BW = VT.getScalarSizeInBits();
10986 EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
10987 SDValue Result = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: LHS, N2: RHS);
10988 SDValue Orig =
10989 DAG.getNode(Opcode: IsSigned ? ISD::SRA : ISD::SRL, DL: dl, VT, N1: Result, N2: RHS);
10990
10991 SDValue SatVal;
10992 if (IsSigned) {
10993 SDValue SatMin = DAG.getConstant(Val: APInt::getSignedMinValue(numBits: BW), DL: dl, VT);
10994 SDValue SatMax = DAG.getConstant(Val: APInt::getSignedMaxValue(numBits: BW), DL: dl, VT);
10995 SDValue Cond =
10996 DAG.getSetCC(DL: dl, VT: BoolVT, LHS, RHS: DAG.getConstant(Val: 0, DL: dl, VT), Cond: ISD::SETLT);
10997 SatVal = DAG.getSelect(DL: dl, VT, Cond, LHS: SatMin, RHS: SatMax);
10998 } else {
10999 SatVal = DAG.getConstant(Val: APInt::getMaxValue(numBits: BW), DL: dl, VT);
11000 }
11001 SDValue Cond = DAG.getSetCC(DL: dl, VT: BoolVT, LHS, RHS: Orig, Cond: ISD::SETNE);
11002 return DAG.getSelect(DL: dl, VT, Cond, LHS: SatVal, RHS: Result);
11003}
11004
11005void TargetLowering::forceExpandMultiply(SelectionDAG &DAG, const SDLoc &dl,
11006 bool Signed, SDValue &Lo, SDValue &Hi,
11007 SDValue LHS, SDValue RHS,
11008 SDValue HiLHS, SDValue HiRHS) const {
11009 EVT VT = LHS.getValueType();
11010 assert(RHS.getValueType() == VT && "Mismatching operand types");
11011
11012 assert((HiLHS && HiRHS) || (!HiLHS && !HiRHS));
11013 assert((!Signed || !HiLHS) &&
11014 "Signed flag should only be set when HiLHS and RiRHS are null");
11015
11016 // We'll expand the multiplication by brute force because we have no other
11017 // options. This is a trivially-generalized version of the code from
11018 // Hacker's Delight (itself derived from Knuth's Algorithm M from section
11019 // 4.3.1). If Signed is set, we can use arithmetic right shifts to propagate
11020 // sign bits while calculating the Hi half.
11021 unsigned Bits = VT.getSizeInBits();
11022 unsigned HalfBits = Bits / 2;
11023 SDValue Mask = DAG.getConstant(Val: APInt::getLowBitsSet(numBits: Bits, loBitsSet: HalfBits), DL: dl, VT);
11024 SDValue LL = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: LHS, N2: Mask);
11025 SDValue RL = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: RHS, N2: Mask);
11026
11027 SDValue T = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LL, N2: RL);
11028 SDValue TL = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: T, N2: Mask);
11029
11030 SDValue Shift = DAG.getShiftAmountConstant(Val: HalfBits, VT, DL: dl);
11031 // This is always an unsigned shift.
11032 SDValue TH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: T, N2: Shift);
11033
11034 unsigned ShiftOpc = Signed ? ISD::SRA : ISD::SRL;
11035 SDValue LH = DAG.getNode(Opcode: ShiftOpc, DL: dl, VT, N1: LHS, N2: Shift);
11036 SDValue RH = DAG.getNode(Opcode: ShiftOpc, DL: dl, VT, N1: RHS, N2: Shift);
11037
11038 SDValue U =
11039 DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LH, N2: RL), N2: TH);
11040 SDValue UL = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: U, N2: Mask);
11041 SDValue UH = DAG.getNode(Opcode: ShiftOpc, DL: dl, VT, N1: U, N2: Shift);
11042
11043 SDValue V =
11044 DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LL, N2: RH), N2: UL);
11045 SDValue VH = DAG.getNode(Opcode: ShiftOpc, DL: dl, VT, N1: V, N2: Shift);
11046
11047 Lo = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: TL,
11048 N2: DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: V, N2: Shift));
11049
11050 Hi = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LH, N2: RH),
11051 N2: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: UH, N2: VH));
11052
11053 // If HiLHS and HiRHS are set, multiply them by the opposite low part and add
11054 // the products to Hi.
11055 if (HiLHS) {
11056 Hi = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Hi,
11057 N2: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT,
11058 N1: DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: HiRHS, N2: LHS),
11059 N2: DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: RHS, N2: HiLHS)));
11060 }
11061}
11062
11063void TargetLowering::forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl,
11064 bool Signed, const SDValue LHS,
11065 const SDValue RHS, SDValue &Lo,
11066 SDValue &Hi) const {
11067 EVT VT = LHS.getValueType();
11068 assert(RHS.getValueType() == VT && "Mismatching operand types");
11069 EVT WideVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: VT.getSizeInBits() * 2);
11070 // We can fall back to a libcall with an illegal type for the MUL if we
11071 // have a libcall big enough.
11072 RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
11073 if (WideVT == MVT::i16)
11074 LC = RTLIB::MUL_I16;
11075 else if (WideVT == MVT::i32)
11076 LC = RTLIB::MUL_I32;
11077 else if (WideVT == MVT::i64)
11078 LC = RTLIB::MUL_I64;
11079 else if (WideVT == MVT::i128)
11080 LC = RTLIB::MUL_I128;
11081
11082 if (LC == RTLIB::UNKNOWN_LIBCALL || !getLibcallName(Call: LC)) {
11083 forceExpandMultiply(DAG, dl, Signed, Lo, Hi, LHS, RHS);
11084 return;
11085 }
11086
11087 SDValue HiLHS, HiRHS;
11088 if (Signed) {
11089 // The high part is obtained by SRA'ing all but one of the bits of low
11090 // part.
11091 unsigned LoSize = VT.getFixedSizeInBits();
11092 SDValue Shift = DAG.getShiftAmountConstant(Val: LoSize - 1, VT, DL: dl);
11093 HiLHS = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: LHS, N2: Shift);
11094 HiRHS = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: RHS, N2: Shift);
11095 } else {
11096 HiLHS = DAG.getConstant(Val: 0, DL: dl, VT);
11097 HiRHS = DAG.getConstant(Val: 0, DL: dl, VT);
11098 }
11099
11100 // Attempt a libcall.
11101 SDValue Ret;
11102 TargetLowering::MakeLibCallOptions CallOptions;
11103 CallOptions.setIsSigned(Signed);
11104 CallOptions.setIsPostTypeLegalization(true);
11105 if (shouldSplitFunctionArgumentsAsLittleEndian(DL: DAG.getDataLayout())) {
11106 // Halves of WideVT are packed into registers in different order
11107 // depending on platform endianness. This is usually handled by
11108 // the C calling convention, but we can't defer to it in
11109 // the legalizer.
11110 SDValue Args[] = {LHS, HiLHS, RHS, HiRHS};
11111 Ret = makeLibCall(DAG, LC, RetVT: WideVT, Ops: Args, CallOptions, dl).first;
11112 } else {
11113 SDValue Args[] = {HiLHS, LHS, HiRHS, RHS};
11114 Ret = makeLibCall(DAG, LC, RetVT: WideVT, Ops: Args, CallOptions, dl).first;
11115 }
11116 assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
11117 "Ret value is a collection of constituent nodes holding result.");
11118 if (DAG.getDataLayout().isLittleEndian()) {
11119 // Same as above.
11120 Lo = Ret.getOperand(i: 0);
11121 Hi = Ret.getOperand(i: 1);
11122 } else {
11123 Lo = Ret.getOperand(i: 1);
11124 Hi = Ret.getOperand(i: 0);
11125 }
11126}
11127
11128SDValue
11129TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
11130 assert((Node->getOpcode() == ISD::SMULFIX ||
11131 Node->getOpcode() == ISD::UMULFIX ||
11132 Node->getOpcode() == ISD::SMULFIXSAT ||
11133 Node->getOpcode() == ISD::UMULFIXSAT) &&
11134 "Expected a fixed point multiplication opcode");
11135
11136 SDLoc dl(Node);
11137 SDValue LHS = Node->getOperand(Num: 0);
11138 SDValue RHS = Node->getOperand(Num: 1);
11139 EVT VT = LHS.getValueType();
11140 unsigned Scale = Node->getConstantOperandVal(Num: 2);
11141 bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT ||
11142 Node->getOpcode() == ISD::UMULFIXSAT);
11143 bool Signed = (Node->getOpcode() == ISD::SMULFIX ||
11144 Node->getOpcode() == ISD::SMULFIXSAT);
11145 EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
11146 unsigned VTSize = VT.getScalarSizeInBits();
11147
11148 if (!Scale) {
11149 // [us]mul.fix(a, b, 0) -> mul(a, b)
11150 if (!Saturating) {
11151 if (isOperationLegalOrCustom(Op: ISD::MUL, VT))
11152 return DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LHS, N2: RHS);
11153 } else if (Signed && isOperationLegalOrCustom(Op: ISD::SMULO, VT)) {
11154 SDValue Result =
11155 DAG.getNode(Opcode: ISD::SMULO, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: BoolVT), N1: LHS, N2: RHS);
11156 SDValue Product = Result.getValue(R: 0);
11157 SDValue Overflow = Result.getValue(R: 1);
11158 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT);
11159
11160 APInt MinVal = APInt::getSignedMinValue(numBits: VTSize);
11161 APInt MaxVal = APInt::getSignedMaxValue(numBits: VTSize);
11162 SDValue SatMin = DAG.getConstant(Val: MinVal, DL: dl, VT);
11163 SDValue SatMax = DAG.getConstant(Val: MaxVal, DL: dl, VT);
11164 // Xor the inputs, if resulting sign bit is 0 the product will be
11165 // positive, else negative.
11166 SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: LHS, N2: RHS);
11167 SDValue ProdNeg = DAG.getSetCC(DL: dl, VT: BoolVT, LHS: Xor, RHS: Zero, Cond: ISD::SETLT);
11168 Result = DAG.getSelect(DL: dl, VT, Cond: ProdNeg, LHS: SatMin, RHS: SatMax);
11169 return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: Result, RHS: Product);
11170 } else if (!Signed && isOperationLegalOrCustom(Op: ISD::UMULO, VT)) {
11171 SDValue Result =
11172 DAG.getNode(Opcode: ISD::UMULO, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: BoolVT), N1: LHS, N2: RHS);
11173 SDValue Product = Result.getValue(R: 0);
11174 SDValue Overflow = Result.getValue(R: 1);
11175
11176 APInt MaxVal = APInt::getMaxValue(numBits: VTSize);
11177 SDValue SatMax = DAG.getConstant(Val: MaxVal, DL: dl, VT);
11178 return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: SatMax, RHS: Product);
11179 }
11180 }
11181
11182 assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
11183 "Expected scale to be less than the number of bits if signed or at "
11184 "most the number of bits if unsigned.");
11185 assert(LHS.getValueType() == RHS.getValueType() &&
11186 "Expected both operands to be the same type");
11187
11188 // Get the upper and lower bits of the result.
11189 SDValue Lo, Hi;
11190 unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
11191 unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
11192 EVT WideVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: VTSize * 2);
11193 if (VT.isVector())
11194 WideVT =
11195 EVT::getVectorVT(Context&: *DAG.getContext(), VT: WideVT, EC: VT.getVectorElementCount());
11196 if (isOperationLegalOrCustom(Op: LoHiOp, VT)) {
11197 SDValue Result = DAG.getNode(Opcode: LoHiOp, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: VT), N1: LHS, N2: RHS);
11198 Lo = Result.getValue(R: 0);
11199 Hi = Result.getValue(R: 1);
11200 } else if (isOperationLegalOrCustom(Op: HiOp, VT)) {
11201 Lo = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LHS, N2: RHS);
11202 Hi = DAG.getNode(Opcode: HiOp, DL: dl, VT, N1: LHS, N2: RHS);
11203 } else if (isOperationLegalOrCustom(Op: ISD::MUL, VT: WideVT)) {
11204 // Try for a multiplication using a wider type.
11205 unsigned Ext = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
11206 SDValue LHSExt = DAG.getNode(Opcode: Ext, DL: dl, VT: WideVT, Operand: LHS);
11207 SDValue RHSExt = DAG.getNode(Opcode: Ext, DL: dl, VT: WideVT, Operand: RHS);
11208 SDValue Res = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: WideVT, N1: LHSExt, N2: RHSExt);
11209 Lo = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Res);
11210 SDValue Shifted =
11211 DAG.getNode(Opcode: ISD::SRA, DL: dl, VT: WideVT, N1: Res,
11212 N2: DAG.getShiftAmountConstant(Val: VTSize, VT: WideVT, DL: dl));
11213 Hi = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Shifted);
11214 } else if (VT.isVector()) {
11215 return SDValue();
11216 } else {
11217 forceExpandWideMUL(DAG, dl, Signed, LHS, RHS, Lo, Hi);
11218 }
11219
11220 if (Scale == VTSize)
11221 // Result is just the top half since we'd be shifting by the width of the
11222 // operand. Overflow impossible so this works for both UMULFIX and
11223 // UMULFIXSAT.
11224 return Hi;
11225
11226 // The result will need to be shifted right by the scale since both operands
11227 // are scaled. The result is given to us in 2 halves, so we only want part of
11228 // both in the result.
11229 SDValue Result = DAG.getNode(Opcode: ISD::FSHR, DL: dl, VT, N1: Hi, N2: Lo,
11230 N3: DAG.getShiftAmountConstant(Val: Scale, VT, DL: dl));
11231 if (!Saturating)
11232 return Result;
11233
11234 if (!Signed) {
11235 // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
11236 // widened multiplication) aren't all zeroes.
11237
11238 // Saturate to max if ((Hi >> Scale) != 0),
11239 // which is the same as if (Hi > ((1 << Scale) - 1))
11240 APInt MaxVal = APInt::getMaxValue(numBits: VTSize);
11241 SDValue LowMask = DAG.getConstant(Val: APInt::getLowBitsSet(numBits: VTSize, loBitsSet: Scale),
11242 DL: dl, VT);
11243 Result = DAG.getSelectCC(DL: dl, LHS: Hi, RHS: LowMask,
11244 True: DAG.getConstant(Val: MaxVal, DL: dl, VT), False: Result,
11245 Cond: ISD::SETUGT);
11246
11247 return Result;
11248 }
11249
11250 // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
11251 // widened multiplication) aren't all ones or all zeroes.
11252
11253 SDValue SatMin = DAG.getConstant(Val: APInt::getSignedMinValue(numBits: VTSize), DL: dl, VT);
11254 SDValue SatMax = DAG.getConstant(Val: APInt::getSignedMaxValue(numBits: VTSize), DL: dl, VT);
11255
11256 if (Scale == 0) {
11257 SDValue Sign = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: Lo,
11258 N2: DAG.getShiftAmountConstant(Val: VTSize - 1, VT, DL: dl));
11259 SDValue Overflow = DAG.getSetCC(DL: dl, VT: BoolVT, LHS: Hi, RHS: Sign, Cond: ISD::SETNE);
11260 // Saturated to SatMin if wide product is negative, and SatMax if wide
11261 // product is positive ...
11262 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT);
11263 SDValue ResultIfOverflow = DAG.getSelectCC(DL: dl, LHS: Hi, RHS: Zero, True: SatMin, False: SatMax,
11264 Cond: ISD::SETLT);
11265 // ... but only if we overflowed.
11266 return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: ResultIfOverflow, RHS: Result);
11267 }
11268
11269 // We handled Scale==0 above so all the bits to examine is in Hi.
11270
11271 // Saturate to max if ((Hi >> (Scale - 1)) > 0),
11272 // which is the same as if (Hi > (1 << (Scale - 1)) - 1)
11273 SDValue LowMask = DAG.getConstant(Val: APInt::getLowBitsSet(numBits: VTSize, loBitsSet: Scale - 1),
11274 DL: dl, VT);
11275 Result = DAG.getSelectCC(DL: dl, LHS: Hi, RHS: LowMask, True: SatMax, False: Result, Cond: ISD::SETGT);
11276 // Saturate to min if (Hi >> (Scale - 1)) < -1),
11277 // which is the same as if (HI < (-1 << (Scale - 1))
11278 SDValue HighMask =
11279 DAG.getConstant(Val: APInt::getHighBitsSet(numBits: VTSize, hiBitsSet: VTSize - Scale + 1),
11280 DL: dl, VT);
11281 Result = DAG.getSelectCC(DL: dl, LHS: Hi, RHS: HighMask, True: SatMin, False: Result, Cond: ISD::SETLT);
11282 return Result;
11283}
11284
11285SDValue
11286TargetLowering::expandFixedPointDiv(unsigned Opcode, const SDLoc &dl,
11287 SDValue LHS, SDValue RHS,
11288 unsigned Scale, SelectionDAG &DAG) const {
11289 assert((Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT ||
11290 Opcode == ISD::UDIVFIX || Opcode == ISD::UDIVFIXSAT) &&
11291 "Expected a fixed point division opcode");
11292
11293 EVT VT = LHS.getValueType();
11294 bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT;
11295 bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT;
11296 EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
11297
11298 // If there is enough room in the type to upscale the LHS or downscale the
11299 // RHS before the division, we can perform it in this type without having to
11300 // resize. For signed operations, the LHS headroom is the number of
11301 // redundant sign bits, and for unsigned ones it is the number of zeroes.
11302 // The headroom for the RHS is the number of trailing zeroes.
11303 unsigned LHSLead = Signed ? DAG.ComputeNumSignBits(Op: LHS) - 1
11304 : DAG.computeKnownBits(Op: LHS).countMinLeadingZeros();
11305 unsigned RHSTrail = DAG.computeKnownBits(Op: RHS).countMinTrailingZeros();
11306
11307 // For signed saturating operations, we need to be able to detect true integer
11308 // division overflow; that is, when you have MIN / -EPS. However, this
11309 // is undefined behavior and if we emit divisions that could take such
11310 // values it may cause undesired behavior (arithmetic exceptions on x86, for
11311 // example).
11312 // Avoid this by requiring an extra bit so that we never get this case.
11313 // FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale
11314 // signed saturating division, we need to emit a whopping 32-bit division.
11315 if (LHSLead + RHSTrail < Scale + (unsigned)(Saturating && Signed))
11316 return SDValue();
11317
11318 unsigned LHSShift = std::min(a: LHSLead, b: Scale);
11319 unsigned RHSShift = Scale - LHSShift;
11320
11321 // At this point, we know that if we shift the LHS up by LHSShift and the
11322 // RHS down by RHSShift, we can emit a regular division with a final scaling
11323 // factor of Scale.
11324
11325 if (LHSShift)
11326 LHS = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: LHS,
11327 N2: DAG.getShiftAmountConstant(Val: LHSShift, VT, DL: dl));
11328 if (RHSShift)
11329 RHS = DAG.getNode(Opcode: Signed ? ISD::SRA : ISD::SRL, DL: dl, VT, N1: RHS,
11330 N2: DAG.getShiftAmountConstant(Val: RHSShift, VT, DL: dl));
11331
11332 SDValue Quot;
11333 if (Signed) {
11334 // For signed operations, if the resulting quotient is negative and the
11335 // remainder is nonzero, subtract 1 from the quotient to round towards
11336 // negative infinity.
11337 SDValue Rem;
11338 // FIXME: Ideally we would always produce an SDIVREM here, but if the
11339 // type isn't legal, SDIVREM cannot be expanded. There is no reason why
11340 // we couldn't just form a libcall, but the type legalizer doesn't do it.
11341 if (isTypeLegal(VT) &&
11342 isOperationLegalOrCustom(Op: ISD::SDIVREM, VT)) {
11343 Quot = DAG.getNode(Opcode: ISD::SDIVREM, DL: dl,
11344 VTList: DAG.getVTList(VT1: VT, VT2: VT),
11345 N1: LHS, N2: RHS);
11346 Rem = Quot.getValue(R: 1);
11347 Quot = Quot.getValue(R: 0);
11348 } else {
11349 Quot = DAG.getNode(Opcode: ISD::SDIV, DL: dl, VT,
11350 N1: LHS, N2: RHS);
11351 Rem = DAG.getNode(Opcode: ISD::SREM, DL: dl, VT,
11352 N1: LHS, N2: RHS);
11353 }
11354 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT);
11355 SDValue RemNonZero = DAG.getSetCC(DL: dl, VT: BoolVT, LHS: Rem, RHS: Zero, Cond: ISD::SETNE);
11356 SDValue LHSNeg = DAG.getSetCC(DL: dl, VT: BoolVT, LHS, RHS: Zero, Cond: ISD::SETLT);
11357 SDValue RHSNeg = DAG.getSetCC(DL: dl, VT: BoolVT, LHS: RHS, RHS: Zero, Cond: ISD::SETLT);
11358 SDValue QuotNeg = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: BoolVT, N1: LHSNeg, N2: RHSNeg);
11359 SDValue Sub1 = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Quot,
11360 N2: DAG.getConstant(Val: 1, DL: dl, VT));
11361 Quot = DAG.getSelect(DL: dl, VT,
11362 Cond: DAG.getNode(Opcode: ISD::AND, DL: dl, VT: BoolVT, N1: RemNonZero, N2: QuotNeg),
11363 LHS: Sub1, RHS: Quot);
11364 } else
11365 Quot = DAG.getNode(Opcode: ISD::UDIV, DL: dl, VT,
11366 N1: LHS, N2: RHS);
11367
11368 return Quot;
11369}
11370
11371void TargetLowering::expandUADDSUBO(
11372 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
11373 SDLoc dl(Node);
11374 SDValue LHS = Node->getOperand(Num: 0);
11375 SDValue RHS = Node->getOperand(Num: 1);
11376 bool IsAdd = Node->getOpcode() == ISD::UADDO;
11377
11378 // If UADDO_CARRY/SUBO_CARRY is legal, use that instead.
11379 unsigned OpcCarry = IsAdd ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
11380 if (isOperationLegalOrCustom(Op: OpcCarry, VT: Node->getValueType(ResNo: 0))) {
11381 SDValue CarryIn = DAG.getConstant(Val: 0, DL: dl, VT: Node->getValueType(ResNo: 1));
11382 SDValue NodeCarry = DAG.getNode(Opcode: OpcCarry, DL: dl, VTList: Node->getVTList(),
11383 Ops: { LHS, RHS, CarryIn });
11384 Result = SDValue(NodeCarry.getNode(), 0);
11385 Overflow = SDValue(NodeCarry.getNode(), 1);
11386 return;
11387 }
11388
11389 Result = DAG.getNode(Opcode: IsAdd ? ISD::ADD : ISD::SUB, DL: dl,
11390 VT: LHS.getValueType(), N1: LHS, N2: RHS);
11391
11392 EVT ResultType = Node->getValueType(ResNo: 1);
11393 EVT SetCCType = getSetCCResultType(
11394 DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: Node->getValueType(ResNo: 0));
11395 SDValue SetCC;
11396 if (IsAdd && isOneConstant(V: RHS)) {
11397 // Special case: uaddo X, 1 overflowed if X+1 is 0. This potential reduces
11398 // the live range of X. We assume comparing with 0 is cheap.
11399 // The general case (X + C) < C is not necessarily beneficial. Although we
11400 // reduce the live range of X, we may introduce the materialization of
11401 // constant C.
11402 SetCC =
11403 DAG.getSetCC(DL: dl, VT: SetCCType, LHS: Result,
11404 RHS: DAG.getConstant(Val: 0, DL: dl, VT: Node->getValueType(ResNo: 0)), Cond: ISD::SETEQ);
11405 } else if (IsAdd && isAllOnesConstant(V: RHS)) {
11406 // Special case: uaddo X, -1 overflows if X != 0.
11407 SetCC =
11408 DAG.getSetCC(DL: dl, VT: SetCCType, LHS,
11409 RHS: DAG.getConstant(Val: 0, DL: dl, VT: Node->getValueType(ResNo: 0)), Cond: ISD::SETNE);
11410 } else {
11411 ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
11412 SetCC = DAG.getSetCC(DL: dl, VT: SetCCType, LHS: Result, RHS: LHS, Cond: CC);
11413 }
11414 Overflow = DAG.getBoolExtOrTrunc(Op: SetCC, SL: dl, VT: ResultType, OpVT: ResultType);
11415}
11416
11417void TargetLowering::expandSADDSUBO(
11418 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
11419 SDLoc dl(Node);
11420 SDValue LHS = Node->getOperand(Num: 0);
11421 SDValue RHS = Node->getOperand(Num: 1);
11422 bool IsAdd = Node->getOpcode() == ISD::SADDO;
11423
11424 Result = DAG.getNode(Opcode: IsAdd ? ISD::ADD : ISD::SUB, DL: dl,
11425 VT: LHS.getValueType(), N1: LHS, N2: RHS);
11426
11427 EVT ResultType = Node->getValueType(ResNo: 1);
11428 EVT OType = getSetCCResultType(
11429 DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: Node->getValueType(ResNo: 0));
11430
11431 // If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
11432 unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
11433 if (isOperationLegal(Op: OpcSat, VT: LHS.getValueType())) {
11434 SDValue Sat = DAG.getNode(Opcode: OpcSat, DL: dl, VT: LHS.getValueType(), N1: LHS, N2: RHS);
11435 SDValue SetCC = DAG.getSetCC(DL: dl, VT: OType, LHS: Result, RHS: Sat, Cond: ISD::SETNE);
11436 Overflow = DAG.getBoolExtOrTrunc(Op: SetCC, SL: dl, VT: ResultType, OpVT: ResultType);
11437 return;
11438 }
11439
11440 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT: LHS.getValueType());
11441
11442 // For an addition, the result should be less than one of the operands (LHS)
11443 // if and only if the other operand (RHS) is negative, otherwise there will
11444 // be overflow.
11445 // For a subtraction, the result should be less than one of the operands
11446 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
11447 // otherwise there will be overflow.
11448 SDValue ResultLowerThanLHS = DAG.getSetCC(DL: dl, VT: OType, LHS: Result, RHS: LHS, Cond: ISD::SETLT);
11449 SDValue ConditionRHS =
11450 DAG.getSetCC(DL: dl, VT: OType, LHS: RHS, RHS: Zero, Cond: IsAdd ? ISD::SETLT : ISD::SETGT);
11451
11452 Overflow = DAG.getBoolExtOrTrunc(
11453 Op: DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: OType, N1: ConditionRHS, N2: ResultLowerThanLHS), SL: dl,
11454 VT: ResultType, OpVT: ResultType);
11455}
11456
11457bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,
11458 SDValue &Overflow, SelectionDAG &DAG) const {
11459 SDLoc dl(Node);
11460 EVT VT = Node->getValueType(ResNo: 0);
11461 EVT SetCCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
11462 SDValue LHS = Node->getOperand(Num: 0);
11463 SDValue RHS = Node->getOperand(Num: 1);
11464 bool isSigned = Node->getOpcode() == ISD::SMULO;
11465
11466 // For power-of-two multiplications we can use a simpler shift expansion.
11467 if (ConstantSDNode *RHSC = isConstOrConstSplat(N: RHS)) {
11468 const APInt &C = RHSC->getAPIntValue();
11469 // mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
11470 if (C.isPowerOf2()) {
11471 // smulo(x, signed_min) is same as umulo(x, signed_min).
11472 bool UseArithShift = isSigned && !C.isMinSignedValue();
11473 SDValue ShiftAmt = DAG.getShiftAmountConstant(Val: C.logBase2(), VT, DL: dl);
11474 Result = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: LHS, N2: ShiftAmt);
11475 Overflow = DAG.getSetCC(DL: dl, VT: SetCCVT,
11476 LHS: DAG.getNode(Opcode: UseArithShift ? ISD::SRA : ISD::SRL,
11477 DL: dl, VT, N1: Result, N2: ShiftAmt),
11478 RHS: LHS, Cond: ISD::SETNE);
11479 return true;
11480 }
11481 }
11482
11483 EVT WideVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: VT.getScalarSizeInBits() * 2);
11484 if (VT.isVector())
11485 WideVT =
11486 EVT::getVectorVT(Context&: *DAG.getContext(), VT: WideVT, EC: VT.getVectorElementCount());
11487
11488 SDValue BottomHalf;
11489 SDValue TopHalf;
11490 static const unsigned Ops[2][3] =
11491 { { ISD::MULHU, ISD::UMUL_LOHI, ISD::ZERO_EXTEND },
11492 { ISD::MULHS, ISD::SMUL_LOHI, ISD::SIGN_EXTEND }};
11493 if (isOperationLegalOrCustom(Op: Ops[isSigned][0], VT)) {
11494 BottomHalf = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LHS, N2: RHS);
11495 TopHalf = DAG.getNode(Opcode: Ops[isSigned][0], DL: dl, VT, N1: LHS, N2: RHS);
11496 } else if (isOperationLegalOrCustom(Op: Ops[isSigned][1], VT)) {
11497 BottomHalf = DAG.getNode(Opcode: Ops[isSigned][1], DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: VT), N1: LHS,
11498 N2: RHS);
11499 TopHalf = BottomHalf.getValue(R: 1);
11500 } else if (isTypeLegal(VT: WideVT)) {
11501 LHS = DAG.getNode(Opcode: Ops[isSigned][2], DL: dl, VT: WideVT, Operand: LHS);
11502 RHS = DAG.getNode(Opcode: Ops[isSigned][2], DL: dl, VT: WideVT, Operand: RHS);
11503 SDValue Mul = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: WideVT, N1: LHS, N2: RHS);
11504 BottomHalf = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Mul);
11505 SDValue ShiftAmt =
11506 DAG.getShiftAmountConstant(Val: VT.getScalarSizeInBits(), VT: WideVT, DL: dl);
11507 TopHalf = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT,
11508 Operand: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: WideVT, N1: Mul, N2: ShiftAmt));
11509 } else {
11510 if (VT.isVector())
11511 return false;
11512
11513 forceExpandWideMUL(DAG, dl, Signed: isSigned, LHS, RHS, Lo&: BottomHalf, Hi&: TopHalf);
11514 }
11515
11516 Result = BottomHalf;
11517 if (isSigned) {
11518 SDValue ShiftAmt = DAG.getShiftAmountConstant(
11519 Val: VT.getScalarSizeInBits() - 1, VT: BottomHalf.getValueType(), DL: dl);
11520 SDValue Sign = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: BottomHalf, N2: ShiftAmt);
11521 Overflow = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: TopHalf, RHS: Sign, Cond: ISD::SETNE);
11522 } else {
11523 Overflow = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: TopHalf,
11524 RHS: DAG.getConstant(Val: 0, DL: dl, VT), Cond: ISD::SETNE);
11525 }
11526
11527 // Truncate the result if SetCC returns a larger type than needed.
11528 EVT RType = Node->getValueType(ResNo: 1);
11529 if (RType.bitsLT(VT: Overflow.getValueType()))
11530 Overflow = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: RType, Operand: Overflow);
11531
11532 assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
11533 "Unexpected result type for S/UMULO legalization");
11534 return true;
11535}
11536
11537SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const {
11538 SDLoc dl(Node);
11539 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(VecReduceOpcode: Node->getOpcode());
11540 SDValue Op = Node->getOperand(Num: 0);
11541 EVT VT = Op.getValueType();
11542
11543 // Try to use a shuffle reduction for power of two vectors.
11544 if (VT.isPow2VectorType()) {
11545 while (VT.getVectorElementCount().isKnownMultipleOf(RHS: 2)) {
11546 EVT HalfVT = VT.getHalfNumVectorElementsVT(Context&: *DAG.getContext());
11547 if (!isOperationLegalOrCustom(Op: BaseOpcode, VT: HalfVT))
11548 break;
11549
11550 SDValue Lo, Hi;
11551 std::tie(args&: Lo, args&: Hi) = DAG.SplitVector(N: Op, DL: dl);
11552 Op = DAG.getNode(Opcode: BaseOpcode, DL: dl, VT: HalfVT, N1: Lo, N2: Hi, Flags: Node->getFlags());
11553 VT = HalfVT;
11554
11555 // Stop if splitting is enough to make the reduction legal.
11556 if (isOperationLegalOrCustom(Op: Node->getOpcode(), VT: HalfVT))
11557 return DAG.getNode(Opcode: Node->getOpcode(), DL: dl, VT: Node->getValueType(ResNo: 0), Operand: Op,
11558 Flags: Node->getFlags());
11559 }
11560 }
11561
11562 if (VT.isScalableVector())
11563 reportFatalInternalError(
11564 reason: "Expanding reductions for scalable vectors is undefined.");
11565
11566 EVT EltVT = VT.getVectorElementType();
11567 unsigned NumElts = VT.getVectorNumElements();
11568
11569 SmallVector<SDValue, 8> Ops;
11570 DAG.ExtractVectorElements(Op, Args&: Ops, Start: 0, Count: NumElts);
11571
11572 SDValue Res = Ops[0];
11573 for (unsigned i = 1; i < NumElts; i++)
11574 Res = DAG.getNode(Opcode: BaseOpcode, DL: dl, VT: EltVT, N1: Res, N2: Ops[i], Flags: Node->getFlags());
11575
11576 // Result type may be wider than element type.
11577 if (EltVT != Node->getValueType(ResNo: 0))
11578 Res = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT: Node->getValueType(ResNo: 0), Operand: Res);
11579 return Res;
11580}
11581
11582SDValue TargetLowering::expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const {
11583 SDLoc dl(Node);
11584 SDValue AccOp = Node->getOperand(Num: 0);
11585 SDValue VecOp = Node->getOperand(Num: 1);
11586 SDNodeFlags Flags = Node->getFlags();
11587
11588 EVT VT = VecOp.getValueType();
11589 EVT EltVT = VT.getVectorElementType();
11590
11591 if (VT.isScalableVector())
11592 report_fatal_error(
11593 reason: "Expanding reductions for scalable vectors is undefined.");
11594
11595 unsigned NumElts = VT.getVectorNumElements();
11596
11597 SmallVector<SDValue, 8> Ops;
11598 DAG.ExtractVectorElements(Op: VecOp, Args&: Ops, Start: 0, Count: NumElts);
11599
11600 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(VecReduceOpcode: Node->getOpcode());
11601
11602 SDValue Res = AccOp;
11603 for (unsigned i = 0; i < NumElts; i++)
11604 Res = DAG.getNode(Opcode: BaseOpcode, DL: dl, VT: EltVT, N1: Res, N2: Ops[i], Flags);
11605
11606 return Res;
11607}
11608
11609bool TargetLowering::expandREM(SDNode *Node, SDValue &Result,
11610 SelectionDAG &DAG) const {
11611 EVT VT = Node->getValueType(ResNo: 0);
11612 SDLoc dl(Node);
11613 bool isSigned = Node->getOpcode() == ISD::SREM;
11614 unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
11615 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
11616 SDValue Dividend = Node->getOperand(Num: 0);
11617 SDValue Divisor = Node->getOperand(Num: 1);
11618 if (isOperationLegalOrCustom(Op: DivRemOpc, VT)) {
11619 SDVTList VTs = DAG.getVTList(VT1: VT, VT2: VT);
11620 Result = DAG.getNode(Opcode: DivRemOpc, DL: dl, VTList: VTs, N1: Dividend, N2: Divisor).getValue(R: 1);
11621 return true;
11622 }
11623 if (isOperationLegalOrCustom(Op: DivOpc, VT)) {
11624 // X % Y -> X-X/Y*Y
11625 SDValue Divide = DAG.getNode(Opcode: DivOpc, DL: dl, VT, N1: Dividend, N2: Divisor);
11626 SDValue Mul = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: Divide, N2: Divisor);
11627 Result = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Dividend, N2: Mul);
11628 return true;
11629 }
11630 return false;
11631}
11632
11633SDValue TargetLowering::expandFP_TO_INT_SAT(SDNode *Node,
11634 SelectionDAG &DAG) const {
11635 bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT;
11636 SDLoc dl(SDValue(Node, 0));
11637 SDValue Src = Node->getOperand(Num: 0);
11638
11639 // DstVT is the result type, while SatVT is the size to which we saturate
11640 EVT SrcVT = Src.getValueType();
11641 EVT DstVT = Node->getValueType(ResNo: 0);
11642
11643 EVT SatVT = cast<VTSDNode>(Val: Node->getOperand(Num: 1))->getVT();
11644 unsigned SatWidth = SatVT.getScalarSizeInBits();
11645 unsigned DstWidth = DstVT.getScalarSizeInBits();
11646 assert(SatWidth <= DstWidth &&
11647 "Expected saturation width smaller than result width");
11648
11649 // Determine minimum and maximum integer values and their corresponding
11650 // floating-point values.
11651 APInt MinInt, MaxInt;
11652 if (IsSigned) {
11653 MinInt = APInt::getSignedMinValue(numBits: SatWidth).sext(width: DstWidth);
11654 MaxInt = APInt::getSignedMaxValue(numBits: SatWidth).sext(width: DstWidth);
11655 } else {
11656 MinInt = APInt::getMinValue(numBits: SatWidth).zext(width: DstWidth);
11657 MaxInt = APInt::getMaxValue(numBits: SatWidth).zext(width: DstWidth);
11658 }
11659
11660 // We cannot risk emitting FP_TO_XINT nodes with a source VT of [b]f16, as
11661 // libcall emission cannot handle this. Large result types will fail.
11662 if (SrcVT == MVT::f16 || SrcVT == MVT::bf16) {
11663 Src = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f32, Operand: Src);
11664 SrcVT = Src.getValueType();
11665 }
11666
11667 const fltSemantics &Sem = SrcVT.getFltSemantics();
11668 APFloat MinFloat(Sem);
11669 APFloat MaxFloat(Sem);
11670
11671 APFloat::opStatus MinStatus =
11672 MinFloat.convertFromAPInt(Input: MinInt, IsSigned, RM: APFloat::rmTowardZero);
11673 APFloat::opStatus MaxStatus =
11674 MaxFloat.convertFromAPInt(Input: MaxInt, IsSigned, RM: APFloat::rmTowardZero);
11675 bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
11676 !(MaxStatus & APFloat::opStatus::opInexact);
11677
11678 SDValue MinFloatNode = DAG.getConstantFP(Val: MinFloat, DL: dl, VT: SrcVT);
11679 SDValue MaxFloatNode = DAG.getConstantFP(Val: MaxFloat, DL: dl, VT: SrcVT);
11680
11681 // If the integer bounds are exactly representable as floats and min/max are
11682 // legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
11683 // of comparisons and selects.
11684 bool MinMaxLegal = isOperationLegal(Op: ISD::FMINNUM, VT: SrcVT) &&
11685 isOperationLegal(Op: ISD::FMAXNUM, VT: SrcVT);
11686 if (AreExactFloatBounds && MinMaxLegal) {
11687 SDValue Clamped = Src;
11688
11689 // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
11690 Clamped = DAG.getNode(Opcode: ISD::FMAXNUM, DL: dl, VT: SrcVT, N1: Clamped, N2: MinFloatNode);
11691 // Clamp by MaxFloat from above. NaN cannot occur.
11692 Clamped = DAG.getNode(Opcode: ISD::FMINNUM, DL: dl, VT: SrcVT, N1: Clamped, N2: MaxFloatNode);
11693 // Convert clamped value to integer.
11694 SDValue FpToInt = DAG.getNode(Opcode: IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT,
11695 DL: dl, VT: DstVT, Operand: Clamped);
11696
11697 // In the unsigned case we're done, because we mapped NaN to MinFloat,
11698 // which will cast to zero.
11699 if (!IsSigned)
11700 return FpToInt;
11701
11702 // Otherwise, select 0 if Src is NaN.
11703 SDValue ZeroInt = DAG.getConstant(Val: 0, DL: dl, VT: DstVT);
11704 EVT SetCCVT =
11705 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: SrcVT);
11706 SDValue IsNan = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: Src, Cond: ISD::CondCode::SETUO);
11707 return DAG.getSelect(DL: dl, VT: DstVT, Cond: IsNan, LHS: ZeroInt, RHS: FpToInt);
11708 }
11709
11710 SDValue MinIntNode = DAG.getConstant(Val: MinInt, DL: dl, VT: DstVT);
11711 SDValue MaxIntNode = DAG.getConstant(Val: MaxInt, DL: dl, VT: DstVT);
11712
11713 // Result of direct conversion. The assumption here is that the operation is
11714 // non-trapping and it's fine to apply it to an out-of-range value if we
11715 // select it away later.
11716 SDValue FpToInt =
11717 DAG.getNode(Opcode: IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, DL: dl, VT: DstVT, Operand: Src);
11718
11719 SDValue Select = FpToInt;
11720
11721 EVT SetCCVT =
11722 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: SrcVT);
11723
11724 // If Src ULT MinFloat, select MinInt. In particular, this also selects
11725 // MinInt if Src is NaN.
11726 SDValue ULT = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: MinFloatNode, Cond: ISD::SETULT);
11727 Select = DAG.getSelect(DL: dl, VT: DstVT, Cond: ULT, LHS: MinIntNode, RHS: Select);
11728 // If Src OGT MaxFloat, select MaxInt.
11729 SDValue OGT = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: MaxFloatNode, Cond: ISD::SETOGT);
11730 Select = DAG.getSelect(DL: dl, VT: DstVT, Cond: OGT, LHS: MaxIntNode, RHS: Select);
11731
11732 // In the unsigned case we are done, because we mapped NaN to MinInt, which
11733 // is already zero.
11734 if (!IsSigned)
11735 return Select;
11736
11737 // Otherwise, select 0 if Src is NaN.
11738 SDValue ZeroInt = DAG.getConstant(Val: 0, DL: dl, VT: DstVT);
11739 SDValue IsNan = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: Src, Cond: ISD::CondCode::SETUO);
11740 return DAG.getSelect(DL: dl, VT: DstVT, Cond: IsNan, LHS: ZeroInt, RHS: Select);
11741}
11742
11743SDValue TargetLowering::expandRoundInexactToOdd(EVT ResultVT, SDValue Op,
11744 const SDLoc &dl,
11745 SelectionDAG &DAG) const {
11746 EVT OperandVT = Op.getValueType();
11747 if (OperandVT.getScalarType() == ResultVT.getScalarType())
11748 return Op;
11749 EVT ResultIntVT = ResultVT.changeTypeToInteger();
11750 // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
11751 // can induce double-rounding which may alter the results. We can
11752 // correct for this using a trick explained in: Boldo, Sylvie, and
11753 // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
11754 // World Congress. 2005.
11755 SDValue Narrow = DAG.getFPExtendOrRound(Op, DL: dl, VT: ResultVT);
11756 SDValue NarrowAsWide = DAG.getFPExtendOrRound(Op: Narrow, DL: dl, VT: OperandVT);
11757
11758 // We can keep the narrow value as-is if narrowing was exact (no
11759 // rounding error), the wide value was NaN (the narrow value is also
11760 // NaN and should be preserved) or if we rounded to the odd value.
11761 SDValue NarrowBits = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: ResultIntVT, Operand: Narrow);
11762 SDValue One = DAG.getConstant(Val: 1, DL: dl, VT: ResultIntVT);
11763 SDValue NegativeOne = DAG.getAllOnesConstant(DL: dl, VT: ResultIntVT);
11764 SDValue And = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ResultIntVT, N1: NarrowBits, N2: One);
11765 EVT ResultIntVTCCVT = getSetCCResultType(
11766 DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: And.getValueType());
11767 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT: ResultIntVT);
11768 // The result is already odd so we don't need to do anything.
11769 SDValue AlreadyOdd = DAG.getSetCC(DL: dl, VT: ResultIntVTCCVT, LHS: And, RHS: Zero, Cond: ISD::SETNE);
11770
11771 EVT WideSetCCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(),
11772 VT: Op.getValueType());
11773 // We keep results which are exact, odd or NaN.
11774 SDValue KeepNarrow =
11775 DAG.getSetCC(DL: dl, VT: WideSetCCVT, LHS: Op, RHS: NarrowAsWide, Cond: ISD::SETUEQ);
11776 KeepNarrow = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: WideSetCCVT, N1: KeepNarrow, N2: AlreadyOdd);
11777 // We morally performed a round-down if AbsNarrow is smaller than
11778 // AbsWide.
11779 SDValue AbsWide = DAG.getNode(Opcode: ISD::FABS, DL: dl, VT: OperandVT, Operand: Op);
11780 SDValue AbsNarrowAsWide = DAG.getNode(Opcode: ISD::FABS, DL: dl, VT: OperandVT, Operand: NarrowAsWide);
11781 SDValue NarrowIsRd =
11782 DAG.getSetCC(DL: dl, VT: WideSetCCVT, LHS: AbsWide, RHS: AbsNarrowAsWide, Cond: ISD::SETOGT);
11783 // If the narrow value is odd or exact, pick it.
11784 // Otherwise, narrow is even and corresponds to either the rounded-up
11785 // or rounded-down value. If narrow is the rounded-down value, we want
11786 // the rounded-up value as it will be odd.
11787 SDValue Adjust = DAG.getSelect(DL: dl, VT: ResultIntVT, Cond: NarrowIsRd, LHS: One, RHS: NegativeOne);
11788 SDValue Adjusted = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: ResultIntVT, N1: NarrowBits, N2: Adjust);
11789 Op = DAG.getSelect(DL: dl, VT: ResultIntVT, Cond: KeepNarrow, LHS: NarrowBits, RHS: Adjusted);
11790 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: ResultVT, Operand: Op);
11791}
11792
11793SDValue TargetLowering::expandFP_ROUND(SDNode *Node, SelectionDAG &DAG) const {
11794 assert(Node->getOpcode() == ISD::FP_ROUND && "Unexpected opcode!");
11795 SDValue Op = Node->getOperand(Num: 0);
11796 EVT VT = Node->getValueType(ResNo: 0);
11797 SDLoc dl(Node);
11798 if (VT.getScalarType() == MVT::bf16) {
11799 if (Node->getConstantOperandVal(Num: 1) == 1) {
11800 return DAG.getNode(Opcode: ISD::FP_TO_BF16, DL: dl, VT, Operand: Node->getOperand(Num: 0));
11801 }
11802 EVT OperandVT = Op.getValueType();
11803 SDValue IsNaN = DAG.getSetCC(
11804 DL: dl,
11805 VT: getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: OperandVT),
11806 LHS: Op, RHS: Op, Cond: ISD::SETUO);
11807
11808 // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
11809 // can induce double-rounding which may alter the results. We can
11810 // correct for this using a trick explained in: Boldo, Sylvie, and
11811 // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
11812 // World Congress. 2005.
11813 EVT F32 = VT.isVector() ? VT.changeVectorElementType(EltVT: MVT::f32) : MVT::f32;
11814 EVT I32 = F32.changeTypeToInteger();
11815 Op = expandRoundInexactToOdd(ResultVT: F32, Op, dl, DAG);
11816 Op = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: I32, Operand: Op);
11817
11818 // Conversions should set NaN's quiet bit. This also prevents NaNs from
11819 // turning into infinities.
11820 SDValue NaN =
11821 DAG.getNode(Opcode: ISD::OR, DL: dl, VT: I32, N1: Op, N2: DAG.getConstant(Val: 0x400000, DL: dl, VT: I32));
11822
11823 // Factor in the contribution of the low 16 bits.
11824 SDValue One = DAG.getConstant(Val: 1, DL: dl, VT: I32);
11825 SDValue Lsb = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: I32, N1: Op,
11826 N2: DAG.getShiftAmountConstant(Val: 16, VT: I32, DL: dl));
11827 Lsb = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: I32, N1: Lsb, N2: One);
11828 SDValue RoundingBias =
11829 DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: I32, N1: DAG.getConstant(Val: 0x7fff, DL: dl, VT: I32), N2: Lsb);
11830 SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: I32, N1: Op, N2: RoundingBias);
11831
11832 // Don't round if we had a NaN, we don't want to turn 0x7fffffff into
11833 // 0x80000000.
11834 Op = DAG.getSelect(DL: dl, VT: I32, Cond: IsNaN, LHS: NaN, RHS: Add);
11835
11836 // Now that we have rounded, shift the bits into position.
11837 Op = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: I32, N1: Op,
11838 N2: DAG.getShiftAmountConstant(Val: 16, VT: I32, DL: dl));
11839 Op = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: I32, Operand: Op);
11840 EVT I16 = I32.isVector() ? I32.changeVectorElementType(EltVT: MVT::i16) : MVT::i16;
11841 Op = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: I16, Operand: Op);
11842 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT, Operand: Op);
11843 }
11844 return SDValue();
11845}
11846
11847SDValue TargetLowering::expandVectorSplice(SDNode *Node,
11848 SelectionDAG &DAG) const {
11849 assert(Node->getOpcode() == ISD::VECTOR_SPLICE && "Unexpected opcode!");
11850 assert(Node->getValueType(0).isScalableVector() &&
11851 "Fixed length vector types expected to use SHUFFLE_VECTOR!");
11852
11853 EVT VT = Node->getValueType(ResNo: 0);
11854 SDValue V1 = Node->getOperand(Num: 0);
11855 SDValue V2 = Node->getOperand(Num: 1);
11856 int64_t Imm = cast<ConstantSDNode>(Val: Node->getOperand(Num: 2))->getSExtValue();
11857 SDLoc DL(Node);
11858
11859 // Expand through memory thusly:
11860 // Alloca CONCAT_VECTORS_TYPES(V1, V2) Ptr
11861 // Store V1, Ptr
11862 // Store V2, Ptr + sizeof(V1)
11863 // If (Imm < 0)
11864 // TrailingElts = -Imm
11865 // Ptr = Ptr + sizeof(V1) - (TrailingElts * sizeof(VT.Elt))
11866 // else
11867 // Ptr = Ptr + (Imm * sizeof(VT.Elt))
11868 // Res = Load Ptr
11869
11870 Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false);
11871
11872 EVT MemVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: VT.getVectorElementType(),
11873 EC: VT.getVectorElementCount() * 2);
11874 SDValue StackPtr = DAG.CreateStackTemporary(Bytes: MemVT.getStoreSize(), Alignment);
11875 EVT PtrVT = StackPtr.getValueType();
11876 auto &MF = DAG.getMachineFunction();
11877 auto FrameIndex = cast<FrameIndexSDNode>(Val: StackPtr.getNode())->getIndex();
11878 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FI: FrameIndex);
11879
11880 // Store the lo part of CONCAT_VECTORS(V1, V2)
11881 SDValue StoreV1 = DAG.getStore(Chain: DAG.getEntryNode(), dl: DL, Val: V1, Ptr: StackPtr, PtrInfo);
11882 // Store the hi part of CONCAT_VECTORS(V1, V2)
11883 SDValue OffsetToV2 = DAG.getVScale(
11884 DL, VT: PtrVT,
11885 MulImm: APInt(PtrVT.getFixedSizeInBits(), VT.getStoreSize().getKnownMinValue()));
11886 SDValue StackPtr2 = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: StackPtr, N2: OffsetToV2);
11887 SDValue StoreV2 = DAG.getStore(Chain: StoreV1, dl: DL, Val: V2, Ptr: StackPtr2, PtrInfo);
11888
11889 if (Imm >= 0) {
11890 // Load back the required element. getVectorElementPointer takes care of
11891 // clamping the index if it's out-of-bounds.
11892 StackPtr = getVectorElementPointer(DAG, VecPtr: StackPtr, VecVT: VT, Index: Node->getOperand(Num: 2));
11893 // Load the spliced result
11894 return DAG.getLoad(VT, dl: DL, Chain: StoreV2, Ptr: StackPtr,
11895 PtrInfo: MachinePointerInfo::getUnknownStack(MF));
11896 }
11897
11898 uint64_t TrailingElts = -Imm;
11899
11900 // NOTE: TrailingElts must be clamped so as not to read outside of V1:V2.
11901 TypeSize EltByteSize = VT.getVectorElementType().getStoreSize();
11902 SDValue TrailingBytes =
11903 DAG.getConstant(Val: TrailingElts * EltByteSize, DL, VT: PtrVT);
11904
11905 if (TrailingElts > VT.getVectorMinNumElements()) {
11906 SDValue VLBytes =
11907 DAG.getVScale(DL, VT: PtrVT,
11908 MulImm: APInt(PtrVT.getFixedSizeInBits(),
11909 VT.getStoreSize().getKnownMinValue()));
11910 TrailingBytes = DAG.getNode(Opcode: ISD::UMIN, DL, VT: PtrVT, N1: TrailingBytes, N2: VLBytes);
11911 }
11912
11913 // Calculate the start address of the spliced result.
11914 StackPtr2 = DAG.getNode(Opcode: ISD::SUB, DL, VT: PtrVT, N1: StackPtr2, N2: TrailingBytes);
11915
11916 // Load the spliced result
11917 return DAG.getLoad(VT, dl: DL, Chain: StoreV2, Ptr: StackPtr2,
11918 PtrInfo: MachinePointerInfo::getUnknownStack(MF));
11919}
11920
11921SDValue TargetLowering::expandVECTOR_COMPRESS(SDNode *Node,
11922 SelectionDAG &DAG) const {
11923 SDLoc DL(Node);
11924 SDValue Vec = Node->getOperand(Num: 0);
11925 SDValue Mask = Node->getOperand(Num: 1);
11926 SDValue Passthru = Node->getOperand(Num: 2);
11927
11928 EVT VecVT = Vec.getValueType();
11929 EVT ScalarVT = VecVT.getScalarType();
11930 EVT MaskVT = Mask.getValueType();
11931 EVT MaskScalarVT = MaskVT.getScalarType();
11932
11933 // Needs to be handled by targets that have scalable vector types.
11934 if (VecVT.isScalableVector())
11935 report_fatal_error(reason: "Cannot expand masked_compress for scalable vectors.");
11936
11937 SDValue StackPtr = DAG.CreateStackTemporary(
11938 Bytes: VecVT.getStoreSize(), Alignment: DAG.getReducedAlign(VT: VecVT, /*UseABI=*/false));
11939 int FI = cast<FrameIndexSDNode>(Val: StackPtr.getNode())->getIndex();
11940 MachinePointerInfo PtrInfo =
11941 MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI);
11942
11943 MVT PositionVT = getVectorIdxTy(DL: DAG.getDataLayout());
11944 SDValue Chain = DAG.getEntryNode();
11945 SDValue OutPos = DAG.getConstant(Val: 0, DL, VT: PositionVT);
11946
11947 bool HasPassthru = !Passthru.isUndef();
11948
11949 // If we have a passthru vector, store it on the stack, overwrite the matching
11950 // positions and then re-write the last element that was potentially
11951 // overwritten even though mask[i] = false.
11952 if (HasPassthru)
11953 Chain = DAG.getStore(Chain, dl: DL, Val: Passthru, Ptr: StackPtr, PtrInfo);
11954
11955 SDValue LastWriteVal;
11956 APInt PassthruSplatVal;
11957 bool IsSplatPassthru =
11958 ISD::isConstantSplatVector(N: Passthru.getNode(), SplatValue&: PassthruSplatVal);
11959
11960 if (IsSplatPassthru) {
11961 // As we do not know which position we wrote to last, we cannot simply
11962 // access that index from the passthru vector. So we first check if passthru
11963 // is a splat vector, to use any element ...
11964 LastWriteVal = DAG.getConstant(Val: PassthruSplatVal, DL, VT: ScalarVT);
11965 } else if (HasPassthru) {
11966 // ... if it is not a splat vector, we need to get the passthru value at
11967 // position = popcount(mask) and re-load it from the stack before it is
11968 // overwritten in the loop below.
11969 EVT PopcountVT = ScalarVT.changeTypeToInteger();
11970 SDValue Popcount = DAG.getNode(
11971 Opcode: ISD::TRUNCATE, DL, VT: MaskVT.changeVectorElementType(EltVT: MVT::i1), Operand: Mask);
11972 Popcount =
11973 DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL,
11974 VT: MaskVT.changeVectorElementType(EltVT: PopcountVT), Operand: Popcount);
11975 Popcount = DAG.getNode(Opcode: ISD::VECREDUCE_ADD, DL, VT: PopcountVT, Operand: Popcount);
11976 SDValue LastElmtPtr =
11977 getVectorElementPointer(DAG, VecPtr: StackPtr, VecVT, Index: Popcount);
11978 LastWriteVal = DAG.getLoad(
11979 VT: ScalarVT, dl: DL, Chain, Ptr: LastElmtPtr,
11980 PtrInfo: MachinePointerInfo::getUnknownStack(MF&: DAG.getMachineFunction()));
11981 Chain = LastWriteVal.getValue(R: 1);
11982 }
11983
11984 unsigned NumElms = VecVT.getVectorNumElements();
11985 for (unsigned I = 0; I < NumElms; I++) {
11986 SDValue ValI = DAG.getExtractVectorElt(DL, VT: ScalarVT, Vec, Idx: I);
11987 SDValue OutPtr = getVectorElementPointer(DAG, VecPtr: StackPtr, VecVT, Index: OutPos);
11988 Chain = DAG.getStore(
11989 Chain, dl: DL, Val: ValI, Ptr: OutPtr,
11990 PtrInfo: MachinePointerInfo::getUnknownStack(MF&: DAG.getMachineFunction()));
11991
11992 // Get the mask value and add it to the current output position. This
11993 // either increments by 1 if MaskI is true or adds 0 otherwise.
11994 // Freeze in case we have poison/undef mask entries.
11995 SDValue MaskI =
11996 DAG.getFreeze(V: DAG.getExtractVectorElt(DL, VT: MaskScalarVT, Vec: Mask, Idx: I));
11997 MaskI = DAG.getFreeze(V: MaskI);
11998 MaskI = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i1, Operand: MaskI);
11999 MaskI = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: PositionVT, Operand: MaskI);
12000 OutPos = DAG.getNode(Opcode: ISD::ADD, DL, VT: PositionVT, N1: OutPos, N2: MaskI);
12001
12002 if (HasPassthru && I == NumElms - 1) {
12003 SDValue EndOfVector =
12004 DAG.getConstant(Val: VecVT.getVectorNumElements() - 1, DL, VT: PositionVT);
12005 SDValue AllLanesSelected =
12006 DAG.getSetCC(DL, VT: MVT::i1, LHS: OutPos, RHS: EndOfVector, Cond: ISD::CondCode::SETUGT);
12007 OutPos = DAG.getNode(Opcode: ISD::UMIN, DL, VT: PositionVT, N1: OutPos, N2: EndOfVector);
12008 OutPtr = getVectorElementPointer(DAG, VecPtr: StackPtr, VecVT, Index: OutPos);
12009
12010 // Re-write the last ValI if all lanes were selected. Otherwise,
12011 // overwrite the last write it with the passthru value.
12012 LastWriteVal = DAG.getSelect(DL, VT: ScalarVT, Cond: AllLanesSelected, LHS: ValI,
12013 RHS: LastWriteVal, Flags: SDNodeFlags::Unpredictable);
12014 Chain = DAG.getStore(
12015 Chain, dl: DL, Val: LastWriteVal, Ptr: OutPtr,
12016 PtrInfo: MachinePointerInfo::getUnknownStack(MF&: DAG.getMachineFunction()));
12017 }
12018 }
12019
12020 return DAG.getLoad(VT: VecVT, dl: DL, Chain, Ptr: StackPtr, PtrInfo);
12021}
12022
12023SDValue TargetLowering::expandPartialReduceMLA(SDNode *N,
12024 SelectionDAG &DAG) const {
12025 SDLoc DL(N);
12026 SDValue Acc = N->getOperand(Num: 0);
12027 SDValue MulLHS = N->getOperand(Num: 1);
12028 SDValue MulRHS = N->getOperand(Num: 2);
12029 EVT AccVT = Acc.getValueType();
12030 EVT MulOpVT = MulLHS.getValueType();
12031
12032 EVT ExtMulOpVT =
12033 EVT::getVectorVT(Context&: *DAG.getContext(), VT: AccVT.getVectorElementType(),
12034 EC: MulOpVT.getVectorElementCount());
12035
12036 unsigned ExtOpcLHS = N->getOpcode() == ISD::PARTIAL_REDUCE_UMLA
12037 ? ISD::ZERO_EXTEND
12038 : ISD::SIGN_EXTEND;
12039 unsigned ExtOpcRHS = N->getOpcode() == ISD::PARTIAL_REDUCE_SMLA
12040 ? ISD::SIGN_EXTEND
12041 : ISD::ZERO_EXTEND;
12042
12043 if (ExtMulOpVT != MulOpVT) {
12044 MulLHS = DAG.getNode(Opcode: ExtOpcLHS, DL, VT: ExtMulOpVT, Operand: MulLHS);
12045 MulRHS = DAG.getNode(Opcode: ExtOpcRHS, DL, VT: ExtMulOpVT, Operand: MulRHS);
12046 }
12047 SDValue Input = MulLHS;
12048 APInt ConstantOne;
12049 if (!ISD::isConstantSplatVector(N: MulRHS.getNode(), SplatValue&: ConstantOne) ||
12050 !ConstantOne.isOne())
12051 Input = DAG.getNode(Opcode: ISD::MUL, DL, VT: ExtMulOpVT, N1: MulLHS, N2: MulRHS);
12052
12053 unsigned Stride = AccVT.getVectorMinNumElements();
12054 unsigned ScaleFactor = MulOpVT.getVectorMinNumElements() / Stride;
12055
12056 // Collect all of the subvectors
12057 std::deque<SDValue> Subvectors = {Acc};
12058 for (unsigned I = 0; I < ScaleFactor; I++)
12059 Subvectors.push_back(x: DAG.getExtractSubvector(DL, VT: AccVT, Vec: Input, Idx: I * Stride));
12060
12061 // Flatten the subvector tree
12062 while (Subvectors.size() > 1) {
12063 Subvectors.push_back(
12064 x: DAG.getNode(Opcode: ISD::ADD, DL, VT: AccVT, Ops: {Subvectors[0], Subvectors[1]}));
12065 Subvectors.pop_front();
12066 Subvectors.pop_front();
12067 }
12068
12069 assert(Subvectors.size() == 1 &&
12070 "There should only be one subvector after tree flattening");
12071
12072 return Subvectors[0];
12073}
12074
12075bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT,
12076 SDValue &LHS, SDValue &RHS,
12077 SDValue &CC, SDValue Mask,
12078 SDValue EVL, bool &NeedInvert,
12079 const SDLoc &dl, SDValue &Chain,
12080 bool IsSignaling) const {
12081 MVT OpVT = LHS.getSimpleValueType();
12082 ISD::CondCode CCCode = cast<CondCodeSDNode>(Val&: CC)->get();
12083 NeedInvert = false;
12084 assert(!EVL == !Mask && "VP Mask and EVL must either both be set or unset");
12085 bool IsNonVP = !EVL;
12086 switch (getCondCodeAction(CC: CCCode, VT: OpVT)) {
12087 default:
12088 llvm_unreachable("Unknown condition code action!");
12089 case TargetLowering::Legal:
12090 // Nothing to do.
12091 break;
12092 case TargetLowering::Expand: {
12093 ISD::CondCode InvCC = ISD::getSetCCSwappedOperands(Operation: CCCode);
12094 if (isCondCodeLegalOrCustom(CC: InvCC, VT: OpVT)) {
12095 std::swap(a&: LHS, b&: RHS);
12096 CC = DAG.getCondCode(Cond: InvCC);
12097 return true;
12098 }
12099 // Swapping operands didn't work. Try inverting the condition.
12100 bool NeedSwap = false;
12101 InvCC = getSetCCInverse(Operation: CCCode, Type: OpVT);
12102 if (!isCondCodeLegalOrCustom(CC: InvCC, VT: OpVT)) {
12103 // If inverting the condition is not enough, try swapping operands
12104 // on top of it.
12105 InvCC = ISD::getSetCCSwappedOperands(Operation: InvCC);
12106 NeedSwap = true;
12107 }
12108 if (isCondCodeLegalOrCustom(CC: InvCC, VT: OpVT)) {
12109 CC = DAG.getCondCode(Cond: InvCC);
12110 NeedInvert = true;
12111 if (NeedSwap)
12112 std::swap(a&: LHS, b&: RHS);
12113 return true;
12114 }
12115
12116 // Special case: expand i1 comparisons using logical operations.
12117 if (OpVT == MVT::i1) {
12118 SDValue Ret;
12119 switch (CCCode) {
12120 default:
12121 llvm_unreachable("Unknown integer setcc!");
12122 case ISD::SETEQ: // X == Y --> ~(X ^ Y)
12123 Ret = DAG.getNOT(DL: dl, Val: DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: MVT::i1, N1: LHS, N2: RHS),
12124 VT: MVT::i1);
12125 break;
12126 case ISD::SETNE: // X != Y --> (X ^ Y)
12127 Ret = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: MVT::i1, N1: LHS, N2: RHS);
12128 break;
12129 case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
12130 case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
12131 Ret = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i1, N1: RHS,
12132 N2: DAG.getNOT(DL: dl, Val: LHS, VT: MVT::i1));
12133 break;
12134 case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
12135 case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
12136 Ret = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i1, N1: LHS,
12137 N2: DAG.getNOT(DL: dl, Val: RHS, VT: MVT::i1));
12138 break;
12139 case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
12140 case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
12141 Ret = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: MVT::i1, N1: RHS,
12142 N2: DAG.getNOT(DL: dl, Val: LHS, VT: MVT::i1));
12143 break;
12144 case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
12145 case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
12146 Ret = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: MVT::i1, N1: LHS,
12147 N2: DAG.getNOT(DL: dl, Val: RHS, VT: MVT::i1));
12148 break;
12149 }
12150
12151 LHS = DAG.getZExtOrTrunc(Op: Ret, DL: dl, VT);
12152 RHS = SDValue();
12153 CC = SDValue();
12154 return true;
12155 }
12156
12157 ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
12158 unsigned Opc = 0;
12159 switch (CCCode) {
12160 default:
12161 llvm_unreachable("Don't know how to expand this condition!");
12162 case ISD::SETUO:
12163 if (isCondCodeLegal(CC: ISD::SETUNE, VT: OpVT)) {
12164 CC1 = ISD::SETUNE;
12165 CC2 = ISD::SETUNE;
12166 Opc = ISD::OR;
12167 break;
12168 }
12169 assert(isCondCodeLegal(ISD::SETOEQ, OpVT) &&
12170 "If SETUE is expanded, SETOEQ or SETUNE must be legal!");
12171 NeedInvert = true;
12172 [[fallthrough]];
12173 case ISD::SETO:
12174 assert(isCondCodeLegal(ISD::SETOEQ, OpVT) &&
12175 "If SETO is expanded, SETOEQ must be legal!");
12176 CC1 = ISD::SETOEQ;
12177 CC2 = ISD::SETOEQ;
12178 Opc = ISD::AND;
12179 break;
12180 case ISD::SETONE:
12181 case ISD::SETUEQ:
12182 // If the SETUO or SETO CC isn't legal, we might be able to use
12183 // SETOGT || SETOLT, inverting the result for SETUEQ. We only need one
12184 // of SETOGT/SETOLT to be legal, the other can be emulated by swapping
12185 // the operands.
12186 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
12187 if (!isCondCodeLegal(CC: CC2, VT: OpVT) && (isCondCodeLegal(CC: ISD::SETOGT, VT: OpVT) ||
12188 isCondCodeLegal(CC: ISD::SETOLT, VT: OpVT))) {
12189 CC1 = ISD::SETOGT;
12190 CC2 = ISD::SETOLT;
12191 Opc = ISD::OR;
12192 NeedInvert = ((unsigned)CCCode & 0x8U);
12193 break;
12194 }
12195 [[fallthrough]];
12196 case ISD::SETOEQ:
12197 case ISD::SETOGT:
12198 case ISD::SETOGE:
12199 case ISD::SETOLT:
12200 case ISD::SETOLE:
12201 case ISD::SETUNE:
12202 case ISD::SETUGT:
12203 case ISD::SETUGE:
12204 case ISD::SETULT:
12205 case ISD::SETULE:
12206 // If we are floating point, assign and break, otherwise fall through.
12207 if (!OpVT.isInteger()) {
12208 // We can use the 4th bit to tell if we are the unordered
12209 // or ordered version of the opcode.
12210 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
12211 Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
12212 CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
12213 break;
12214 }
12215 // Fallthrough if we are unsigned integer.
12216 [[fallthrough]];
12217 case ISD::SETLE:
12218 case ISD::SETGT:
12219 case ISD::SETGE:
12220 case ISD::SETLT:
12221 case ISD::SETNE:
12222 case ISD::SETEQ:
12223 // If all combinations of inverting the condition and swapping operands
12224 // didn't work then we have no means to expand the condition.
12225 llvm_unreachable("Don't know how to expand this condition!");
12226 }
12227
12228 SDValue SetCC1, SetCC2;
12229 if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
12230 // If we aren't the ordered or unorder operation,
12231 // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
12232 if (IsNonVP) {
12233 SetCC1 = DAG.getSetCC(DL: dl, VT, LHS, RHS, Cond: CC1, Chain, IsSignaling);
12234 SetCC2 = DAG.getSetCC(DL: dl, VT, LHS, RHS, Cond: CC2, Chain, IsSignaling);
12235 } else {
12236 SetCC1 = DAG.getSetCCVP(DL: dl, VT, LHS, RHS, Cond: CC1, Mask, EVL);
12237 SetCC2 = DAG.getSetCCVP(DL: dl, VT, LHS, RHS, Cond: CC2, Mask, EVL);
12238 }
12239 } else {
12240 // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
12241 if (IsNonVP) {
12242 SetCC1 = DAG.getSetCC(DL: dl, VT, LHS, RHS: LHS, Cond: CC1, Chain, IsSignaling);
12243 SetCC2 = DAG.getSetCC(DL: dl, VT, LHS: RHS, RHS, Cond: CC2, Chain, IsSignaling);
12244 } else {
12245 SetCC1 = DAG.getSetCCVP(DL: dl, VT, LHS, RHS: LHS, Cond: CC1, Mask, EVL);
12246 SetCC2 = DAG.getSetCCVP(DL: dl, VT, LHS: RHS, RHS, Cond: CC2, Mask, EVL);
12247 }
12248 }
12249 if (Chain)
12250 Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, N1: SetCC1.getValue(R: 1),
12251 N2: SetCC2.getValue(R: 1));
12252 if (IsNonVP)
12253 LHS = DAG.getNode(Opcode: Opc, DL: dl, VT, N1: SetCC1, N2: SetCC2);
12254 else {
12255 // Transform the binary opcode to the VP equivalent.
12256 assert((Opc == ISD::OR || Opc == ISD::AND) && "Unexpected opcode");
12257 Opc = Opc == ISD::OR ? ISD::VP_OR : ISD::VP_AND;
12258 LHS = DAG.getNode(Opcode: Opc, DL: dl, VT, N1: SetCC1, N2: SetCC2, N3: Mask, N4: EVL);
12259 }
12260 RHS = SDValue();
12261 CC = SDValue();
12262 return true;
12263 }
12264 }
12265 return false;
12266}
12267
12268SDValue TargetLowering::expandVectorNaryOpBySplitting(SDNode *Node,
12269 SelectionDAG &DAG) const {
12270 EVT VT = Node->getValueType(ResNo: 0);
12271 // Despite its documentation, GetSplitDestVTs will assert if VT cannot be
12272 // split into two equal parts.
12273 if (!VT.isVector() || !VT.getVectorElementCount().isKnownMultipleOf(RHS: 2))
12274 return SDValue();
12275
12276 // Restrict expansion to cases where both parts can be concatenated.
12277 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VT);
12278 if (LoVT != HiVT || !isTypeLegal(VT: LoVT))
12279 return SDValue();
12280
12281 SDLoc DL(Node);
12282 unsigned Opcode = Node->getOpcode();
12283
12284 // Don't expand if the result is likely to be unrolled anyway.
12285 if (!isOperationLegalOrCustomOrPromote(Op: Opcode, VT: LoVT))
12286 return SDValue();
12287
12288 SmallVector<SDValue, 4> LoOps, HiOps;
12289 for (const SDValue &V : Node->op_values()) {
12290 auto [Lo, Hi] = DAG.SplitVector(N: V, DL, LoVT, HiVT);
12291 LoOps.push_back(Elt: Lo);
12292 HiOps.push_back(Elt: Hi);
12293 }
12294
12295 SDValue SplitOpLo = DAG.getNode(Opcode, DL, VT: LoVT, Ops: LoOps);
12296 SDValue SplitOpHi = DAG.getNode(Opcode, DL, VT: HiVT, Ops: HiOps);
12297 return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT, N1: SplitOpLo, N2: SplitOpHi);
12298}
12299
12300SDValue TargetLowering::scalarizeExtractedVectorLoad(EVT ResultVT,
12301 const SDLoc &DL,
12302 EVT InVecVT, SDValue EltNo,
12303 LoadSDNode *OriginalLoad,
12304 SelectionDAG &DAG) const {
12305 assert(OriginalLoad->isSimple());
12306
12307 EVT VecEltVT = InVecVT.getVectorElementType();
12308
12309 // If the vector element type is not a multiple of a byte then we are unable
12310 // to correctly compute an address to load only the extracted element as a
12311 // scalar.
12312 if (!VecEltVT.isByteSized())
12313 return SDValue();
12314
12315 ISD::LoadExtType ExtTy =
12316 ResultVT.bitsGT(VT: VecEltVT) ? ISD::EXTLOAD : ISD::NON_EXTLOAD;
12317 if (!isOperationLegalOrCustom(Op: ISD::LOAD, VT: VecEltVT))
12318 return SDValue();
12319
12320 std::optional<unsigned> ByteOffset;
12321 Align Alignment = OriginalLoad->getAlign();
12322 MachinePointerInfo MPI;
12323 if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(Val&: EltNo)) {
12324 int Elt = ConstEltNo->getZExtValue();
12325 ByteOffset = VecEltVT.getSizeInBits() * Elt / 8;
12326 MPI = OriginalLoad->getPointerInfo().getWithOffset(O: *ByteOffset);
12327 Alignment = commonAlignment(A: Alignment, Offset: *ByteOffset);
12328 } else {
12329 // Discard the pointer info except the address space because the memory
12330 // operand can't represent this new access since the offset is variable.
12331 MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
12332 Alignment = commonAlignment(A: Alignment, Offset: VecEltVT.getSizeInBits() / 8);
12333 }
12334
12335 if (!shouldReduceLoadWidth(Load: OriginalLoad, ExtTy, NewVT: VecEltVT, ByteOffset))
12336 return SDValue();
12337
12338 unsigned IsFast = 0;
12339 if (!allowsMemoryAccess(Context&: *DAG.getContext(), DL: DAG.getDataLayout(), VT: VecEltVT,
12340 AddrSpace: OriginalLoad->getAddressSpace(), Alignment,
12341 Flags: OriginalLoad->getMemOperand()->getFlags(), Fast: &IsFast) ||
12342 !IsFast)
12343 return SDValue();
12344
12345 SDValue NewPtr =
12346 getVectorElementPointer(DAG, VecPtr: OriginalLoad->getBasePtr(), VecVT: InVecVT, Index: EltNo);
12347
12348 // We are replacing a vector load with a scalar load. The new load must have
12349 // identical memory op ordering to the original.
12350 SDValue Load;
12351 if (ResultVT.bitsGT(VT: VecEltVT)) {
12352 // If the result type of vextract is wider than the load, then issue an
12353 // extending load instead.
12354 ISD::LoadExtType ExtType = isLoadExtLegal(ExtType: ISD::ZEXTLOAD, ValVT: ResultVT, MemVT: VecEltVT)
12355 ? ISD::ZEXTLOAD
12356 : ISD::EXTLOAD;
12357 Load = DAG.getExtLoad(ExtType, dl: DL, VT: ResultVT, Chain: OriginalLoad->getChain(),
12358 Ptr: NewPtr, PtrInfo: MPI, MemVT: VecEltVT, Alignment,
12359 MMOFlags: OriginalLoad->getMemOperand()->getFlags(),
12360 AAInfo: OriginalLoad->getAAInfo());
12361 DAG.makeEquivalentMemoryOrdering(OldLoad: OriginalLoad, NewMemOp: Load);
12362 } else {
12363 // The result type is narrower or the same width as the vector element
12364 Load = DAG.getLoad(VT: VecEltVT, dl: DL, Chain: OriginalLoad->getChain(), Ptr: NewPtr, PtrInfo: MPI,
12365 Alignment, MMOFlags: OriginalLoad->getMemOperand()->getFlags(),
12366 AAInfo: OriginalLoad->getAAInfo());
12367 DAG.makeEquivalentMemoryOrdering(OldLoad: OriginalLoad, NewMemOp: Load);
12368 if (ResultVT.bitsLT(VT: VecEltVT))
12369 Load = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: ResultVT, Operand: Load);
12370 else
12371 Load = DAG.getBitcast(VT: ResultVT, V: Load);
12372 }
12373
12374 return Load;
12375}
12376