1//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This implements the TargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/CodeGen/TargetLowering.h"
14#include "llvm/ADT/STLExtras.h"
15#include "llvm/Analysis/ValueTracking.h"
16#include "llvm/Analysis/VectorUtils.h"
17#include "llvm/CodeGen/Analysis.h"
18#include "llvm/CodeGen/CallingConvLower.h"
19#include "llvm/CodeGen/CodeGenCommonISel.h"
20#include "llvm/CodeGen/MachineFrameInfo.h"
21#include "llvm/CodeGen/MachineFunction.h"
22#include "llvm/CodeGen/MachineJumpTableInfo.h"
23#include "llvm/CodeGen/MachineRegisterInfo.h"
24#include "llvm/CodeGen/SDPatternMatch.h"
25#include "llvm/CodeGen/SelectionDAG.h"
26#include "llvm/CodeGen/TargetRegisterInfo.h"
27#include "llvm/IR/DataLayout.h"
28#include "llvm/IR/DerivedTypes.h"
29#include "llvm/IR/GlobalVariable.h"
30#include "llvm/IR/LLVMContext.h"
31#include "llvm/MC/MCAsmInfo.h"
32#include "llvm/MC/MCExpr.h"
33#include "llvm/Support/DivisionByConstantInfo.h"
34#include "llvm/Support/ErrorHandling.h"
35#include "llvm/Support/KnownBits.h"
36#include "llvm/Support/MathExtras.h"
37#include "llvm/Target/TargetMachine.h"
38#include <cctype>
39#include <deque>
40using namespace llvm;
41using namespace llvm::SDPatternMatch;
42
43/// NOTE: The TargetMachine owns TLOF.
44TargetLowering::TargetLowering(const TargetMachine &tm,
45 const TargetSubtargetInfo &STI)
46 : TargetLoweringBase(tm, STI) {}
47
48// Define the virtual destructor out-of-line for build efficiency.
49TargetLowering::~TargetLowering() = default;
50
51const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
52 return nullptr;
53}
54
55bool TargetLowering::isPositionIndependent() const {
56 return getTargetMachine().isPositionIndependent();
57}
58
59/// Check whether a given call node is in tail position within its function. If
60/// so, it sets Chain to the input chain of the tail call.
61bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
62 SDValue &Chain) const {
63 const Function &F = DAG.getMachineFunction().getFunction();
64
65 // First, check if tail calls have been disabled in this function.
66 if (F.getFnAttribute(Kind: "disable-tail-calls").getValueAsBool())
67 return false;
68
69 // Conservatively require the attributes of the call to match those of
70 // the return. Ignore following attributes because they don't affect the
71 // call sequence.
72 AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs());
73 for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
74 Attribute::DereferenceableOrNull, Attribute::NoAlias,
75 Attribute::NonNull, Attribute::NoUndef,
76 Attribute::Range, Attribute::NoFPClass})
77 CallerAttrs.removeAttribute(Val: Attr);
78
79 if (CallerAttrs.hasAttributes())
80 return false;
81
82 // It's not safe to eliminate the sign / zero extension of the return value.
83 if (CallerAttrs.contains(A: Attribute::ZExt) ||
84 CallerAttrs.contains(A: Attribute::SExt))
85 return false;
86
87 // Check if the only use is a function return node.
88 return isUsedByReturnOnly(Node, Chain);
89}
90
91bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
92 const uint32_t *CallerPreservedMask,
93 const SmallVectorImpl<CCValAssign> &ArgLocs,
94 const SmallVectorImpl<SDValue> &OutVals) const {
95 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
96 const CCValAssign &ArgLoc = ArgLocs[I];
97 if (!ArgLoc.isRegLoc())
98 continue;
99 MCRegister Reg = ArgLoc.getLocReg();
100 // Only look at callee saved registers.
101 if (MachineOperand::clobbersPhysReg(RegMask: CallerPreservedMask, PhysReg: Reg))
102 continue;
103 // Check that we pass the value used for the caller.
104 // (We look for a CopyFromReg reading a virtual register that is used
105 // for the function live-in value of register Reg)
106 SDValue Value = OutVals[I];
107 if (Value->getOpcode() == ISD::AssertZext)
108 Value = Value.getOperand(i: 0);
109 if (Value->getOpcode() != ISD::CopyFromReg)
110 return false;
111 Register ArgReg = cast<RegisterSDNode>(Val: Value->getOperand(Num: 1))->getReg();
112 if (MRI.getLiveInPhysReg(VReg: ArgReg) != Reg)
113 return false;
114 }
115 return true;
116}
117
118/// Set CallLoweringInfo attribute flags based on a call instruction
119/// and called function attributes.
120void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
121 unsigned ArgIdx) {
122 IsSExt = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::SExt);
123 IsZExt = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::ZExt);
124 IsNoExt = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::NoExt);
125 IsInReg = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::InReg);
126 IsSRet = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::StructRet);
127 IsNest = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::Nest);
128 IsByVal = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::ByVal);
129 IsPreallocated = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::Preallocated);
130 IsInAlloca = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::InAlloca);
131 IsReturned = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::Returned);
132 IsSwiftSelf = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::SwiftSelf);
133 IsSwiftAsync = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::SwiftAsync);
134 IsSwiftError = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::SwiftError);
135 Alignment = Call->getParamStackAlign(ArgNo: ArgIdx);
136 IndirectType = nullptr;
137 assert(IsByVal + IsPreallocated + IsInAlloca + IsSRet <= 1 &&
138 "multiple ABI attributes?");
139 if (IsByVal) {
140 IndirectType = Call->getParamByValType(ArgNo: ArgIdx);
141 if (!Alignment)
142 Alignment = Call->getParamAlign(ArgNo: ArgIdx);
143 }
144 if (IsPreallocated)
145 IndirectType = Call->getParamPreallocatedType(ArgNo: ArgIdx);
146 if (IsInAlloca)
147 IndirectType = Call->getParamInAllocaType(ArgNo: ArgIdx);
148 if (IsSRet)
149 IndirectType = Call->getParamStructRetType(ArgNo: ArgIdx);
150}
151
152/// Generate a libcall taking the given operands as arguments and returning a
153/// result of type RetVT.
154std::pair<SDValue, SDValue>
155TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl,
156 EVT RetVT, ArrayRef<SDValue> Ops,
157 MakeLibCallOptions CallOptions, const SDLoc &dl,
158 SDValue InChain) const {
159 if (LibcallImpl == RTLIB::Unsupported)
160 reportFatalInternalError(reason: "unsupported library call operation");
161
162 if (!InChain)
163 InChain = DAG.getEntryNode();
164
165 TargetLowering::ArgListTy Args;
166 Args.reserve(n: Ops.size());
167
168 ArrayRef<Type *> OpsTypeOverrides = CallOptions.OpsTypeOverrides;
169 for (unsigned i = 0; i < Ops.size(); ++i) {
170 SDValue NewOp = Ops[i];
171 Type *Ty = i < OpsTypeOverrides.size() && OpsTypeOverrides[i]
172 ? OpsTypeOverrides[i]
173 : NewOp.getValueType().getTypeForEVT(Context&: *DAG.getContext());
174 TargetLowering::ArgListEntry Entry(NewOp, Ty);
175 if (CallOptions.IsSoften)
176 Entry.OrigTy =
177 CallOptions.OpsVTBeforeSoften[i].getTypeForEVT(Context&: *DAG.getContext());
178
179 Entry.IsSExt =
180 shouldSignExtendTypeInLibCall(Ty: Entry.Ty, IsSigned: CallOptions.IsSigned);
181 Entry.IsZExt = !Entry.IsSExt;
182
183 if (CallOptions.IsSoften &&
184 !shouldExtendTypeInLibCall(Type: CallOptions.OpsVTBeforeSoften[i])) {
185 Entry.IsSExt = Entry.IsZExt = false;
186 }
187 Args.push_back(x: Entry);
188 }
189
190 SDValue Callee =
191 DAG.getExternalSymbol(LCImpl: LibcallImpl, VT: getPointerTy(DL: DAG.getDataLayout()));
192
193 Type *RetTy = RetVT.getTypeForEVT(Context&: *DAG.getContext());
194 Type *OrigRetTy = RetTy;
195 TargetLowering::CallLoweringInfo CLI(DAG);
196 bool signExtend = shouldSignExtendTypeInLibCall(Ty: RetTy, IsSigned: CallOptions.IsSigned);
197 bool zeroExtend = !signExtend;
198
199 if (CallOptions.IsSoften) {
200 OrigRetTy = CallOptions.RetVTBeforeSoften.getTypeForEVT(Context&: *DAG.getContext());
201 if (!shouldExtendTypeInLibCall(Type: CallOptions.RetVTBeforeSoften))
202 signExtend = zeroExtend = false;
203 }
204
205 CLI.setDebugLoc(dl)
206 .setChain(InChain)
207 .setLibCallee(CC: getLibcallImplCallingConv(Call: LibcallImpl), ResultType: RetTy, OrigResultType: OrigRetTy,
208 Target: Callee, ArgsList: std::move(Args))
209 .setNoReturn(CallOptions.DoesNotReturn)
210 .setDiscardResult(!CallOptions.IsReturnValueUsed)
211 .setIsPostTypeLegalization(CallOptions.IsPostTypeLegalization)
212 .setSExtResult(signExtend)
213 .setZExtResult(zeroExtend);
214 return LowerCallTo(CLI);
215}
216
217bool TargetLowering::findOptimalMemOpLowering(
218 LLVMContext &Context, std::vector<EVT> &MemOps, unsigned Limit,
219 const MemOp &Op, unsigned DstAS, unsigned SrcAS,
220 const AttributeList &FuncAttributes, EVT *LargestVT) const {
221 if (Limit != ~unsigned(0) && Op.isMemcpyWithFixedDstAlign() &&
222 Op.getSrcAlign() < Op.getDstAlign())
223 return false;
224
225 EVT VT = getOptimalMemOpType(Context, Op, FuncAttributes);
226
227 if (VT == MVT::Other) {
228 // Use the largest integer type whose alignment constraints are satisfied.
229 // We only need to check DstAlign here as SrcAlign is always greater or
230 // equal to DstAlign (or zero).
231 VT = MVT::LAST_INTEGER_VALUETYPE;
232 if (Op.isFixedDstAlign())
233 while (Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
234 !allowsMisalignedMemoryAccesses(VT, AddrSpace: DstAS, Alignment: Op.getDstAlign()))
235 VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
236 assert(VT.isInteger());
237
238 // Find the largest legal integer type.
239 MVT LVT = MVT::LAST_INTEGER_VALUETYPE;
240 while (!isTypeLegal(VT: LVT))
241 LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
242 assert(LVT.isInteger());
243
244 // If the type we've chosen is larger than the largest legal integer type
245 // then use that instead.
246 if (VT.bitsGT(VT: LVT))
247 VT = LVT;
248 }
249
250 unsigned NumMemOps = 0;
251 uint64_t Size = Op.size();
252 while (Size) {
253 unsigned VTSize = VT.getSizeInBits() / 8;
254 while (VTSize > Size) {
255 // For now, only use non-vector load / store's for the left-over pieces.
256 EVT NewVT = VT;
257 unsigned NewVTSize;
258
259 bool Found = false;
260 if (VT.isVector() || VT.isFloatingPoint()) {
261 NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
262 if (isOperationLegalOrCustom(Op: ISD::STORE, VT: NewVT) &&
263 isSafeMemOpType(NewVT.getSimpleVT()))
264 Found = true;
265 else if (NewVT == MVT::i64 &&
266 isOperationLegalOrCustom(Op: ISD::STORE, VT: MVT::f64) &&
267 isSafeMemOpType(MVT::f64)) {
268 // i64 is usually not legal on 32-bit targets, but f64 may be.
269 NewVT = MVT::f64;
270 Found = true;
271 }
272 }
273
274 if (!Found) {
275 do {
276 NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
277 if (NewVT == MVT::i8)
278 break;
279 } while (!isSafeMemOpType(NewVT.getSimpleVT()));
280 }
281 NewVTSize = NewVT.getSizeInBits() / 8;
282
283 // If the new VT cannot cover all of the remaining bits, then consider
284 // issuing a (or a pair of) unaligned and overlapping load / store.
285 unsigned Fast;
286 if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
287 allowsMisalignedMemoryAccesses(
288 VT, AddrSpace: DstAS, Alignment: Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
289 Flags: MachineMemOperand::MONone, &Fast) &&
290 Fast)
291 VTSize = Size;
292 else {
293 VT = NewVT;
294 VTSize = NewVTSize;
295 }
296 }
297
298 if (++NumMemOps > Limit)
299 return false;
300
301 MemOps.push_back(x: VT);
302 Size -= VTSize;
303 }
304
305 return true;
306}
307
308/// Soften the operands of a comparison. This code is shared among BR_CC,
309/// SELECT_CC, and SETCC handlers.
310void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
311 SDValue &NewLHS, SDValue &NewRHS,
312 ISD::CondCode &CCCode,
313 const SDLoc &dl, const SDValue OldLHS,
314 const SDValue OldRHS) const {
315 SDValue Chain;
316 return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, DL: dl, OldLHS,
317 OldRHS, Chain);
318}
319
320void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
321 SDValue &NewLHS, SDValue &NewRHS,
322 ISD::CondCode &CCCode,
323 const SDLoc &dl, const SDValue OldLHS,
324 const SDValue OldRHS,
325 SDValue &Chain,
326 bool IsSignaling) const {
327 // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
328 // not supporting it. We can update this code when libgcc provides such
329 // functions.
330
331 assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
332 && "Unsupported setcc type!");
333
334 // Expand into one or more soft-fp libcall(s).
335 RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
336 bool ShouldInvertCC = false;
337 switch (CCCode) {
338 case ISD::SETEQ:
339 case ISD::SETOEQ:
340 LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
341 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
342 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
343 break;
344 case ISD::SETNE:
345 case ISD::SETUNE:
346 LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
347 (VT == MVT::f64) ? RTLIB::UNE_F64 :
348 (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
349 break;
350 case ISD::SETGE:
351 case ISD::SETOGE:
352 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
353 (VT == MVT::f64) ? RTLIB::OGE_F64 :
354 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
355 break;
356 case ISD::SETLT:
357 case ISD::SETOLT:
358 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
359 (VT == MVT::f64) ? RTLIB::OLT_F64 :
360 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
361 break;
362 case ISD::SETLE:
363 case ISD::SETOLE:
364 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
365 (VT == MVT::f64) ? RTLIB::OLE_F64 :
366 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
367 break;
368 case ISD::SETGT:
369 case ISD::SETOGT:
370 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
371 (VT == MVT::f64) ? RTLIB::OGT_F64 :
372 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
373 break;
374 case ISD::SETO:
375 ShouldInvertCC = true;
376 [[fallthrough]];
377 case ISD::SETUO:
378 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
379 (VT == MVT::f64) ? RTLIB::UO_F64 :
380 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
381 break;
382 case ISD::SETONE:
383 // SETONE = O && UNE
384 ShouldInvertCC = true;
385 [[fallthrough]];
386 case ISD::SETUEQ:
387 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
388 (VT == MVT::f64) ? RTLIB::UO_F64 :
389 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
390 LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
391 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
392 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
393 break;
394 default:
395 // Invert CC for unordered comparisons
396 ShouldInvertCC = true;
397 switch (CCCode) {
398 case ISD::SETULT:
399 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
400 (VT == MVT::f64) ? RTLIB::OGE_F64 :
401 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
402 break;
403 case ISD::SETULE:
404 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
405 (VT == MVT::f64) ? RTLIB::OGT_F64 :
406 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
407 break;
408 case ISD::SETUGT:
409 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
410 (VT == MVT::f64) ? RTLIB::OLE_F64 :
411 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
412 break;
413 case ISD::SETUGE:
414 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
415 (VT == MVT::f64) ? RTLIB::OLT_F64 :
416 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
417 break;
418 default: llvm_unreachable("Do not know how to soften this setcc!");
419 }
420 }
421
422 // Use the target specific return value for comparison lib calls.
423 EVT RetVT = getCmpLibcallReturnType();
424 SDValue Ops[2] = {NewLHS, NewRHS};
425 TargetLowering::MakeLibCallOptions CallOptions;
426 EVT OpsVT[2] = { OldLHS.getValueType(),
427 OldRHS.getValueType() };
428 CallOptions.setTypeListBeforeSoften(OpsVT, RetVT);
429 auto Call = makeLibCall(DAG, LC: LC1, RetVT, Ops, CallOptions, dl, Chain);
430 NewLHS = Call.first;
431 NewRHS = DAG.getConstant(Val: 0, DL: dl, VT: RetVT);
432
433 RTLIB::LibcallImpl LC1Impl = getLibcallImpl(Call: LC1);
434 if (LC1Impl == RTLIB::Unsupported) {
435 reportFatalUsageError(
436 reason: "no libcall available to soften floating-point compare");
437 }
438
439 CCCode = getSoftFloatCmpLibcallPredicate(Call: LC1Impl);
440 if (ShouldInvertCC) {
441 assert(RetVT.isInteger());
442 CCCode = getSetCCInverse(Operation: CCCode, Type: RetVT);
443 }
444
445 if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
446 // Update Chain.
447 Chain = Call.second;
448 } else {
449 RTLIB::LibcallImpl LC2Impl = getLibcallImpl(Call: LC2);
450 if (LC2Impl == RTLIB::Unsupported) {
451 reportFatalUsageError(
452 reason: "no libcall available to soften floating-point compare");
453 }
454
455 assert(CCCode == (ShouldInvertCC ? ISD::SETEQ : ISD::SETNE) &&
456 "unordered call should be simple boolean");
457
458 EVT SetCCVT =
459 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: RetVT);
460 if (getBooleanContents(Type: RetVT) == ZeroOrOneBooleanContent) {
461 NewLHS = DAG.getNode(Opcode: ISD::AssertZext, DL: dl, VT: RetVT, N1: Call.first,
462 N2: DAG.getValueType(MVT::i1));
463 }
464
465 SDValue Tmp = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: NewLHS, RHS: NewRHS, Cond: CCCode);
466 auto Call2 = makeLibCall(DAG, LC: LC2, RetVT, Ops, CallOptions, dl, Chain);
467 CCCode = getSoftFloatCmpLibcallPredicate(Call: LC2Impl);
468 if (ShouldInvertCC)
469 CCCode = getSetCCInverse(Operation: CCCode, Type: RetVT);
470 NewLHS = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Call2.first, RHS: NewRHS, Cond: CCCode);
471 if (Chain)
472 Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, N1: Call.second,
473 N2: Call2.second);
474 NewLHS = DAG.getNode(Opcode: ShouldInvertCC ? ISD::AND : ISD::OR, DL: dl,
475 VT: Tmp.getValueType(), N1: Tmp, N2: NewLHS);
476 NewRHS = SDValue();
477 }
478}
479
480/// Return the entry encoding for a jump table in the current function. The
481/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
482unsigned TargetLowering::getJumpTableEncoding() const {
483 // In non-pic modes, just use the address of a block.
484 if (!isPositionIndependent())
485 return MachineJumpTableInfo::EK_BlockAddress;
486
487 // Otherwise, use a label difference.
488 return MachineJumpTableInfo::EK_LabelDifference32;
489}
490
491SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
492 SelectionDAG &DAG) const {
493 return Table;
494}
495
496/// This returns the relocation base for the given PIC jumptable, the same as
497/// getPICJumpTableRelocBase, but as an MCExpr.
498const MCExpr *
499TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
500 unsigned JTI,MCContext &Ctx) const{
501 // The normal PIC reloc base is the label at the start of the jump table.
502 return MCSymbolRefExpr::create(Symbol: MF->getJTISymbol(JTI, Ctx), Ctx);
503}
504
505SDValue TargetLowering::expandIndirectJTBranch(const SDLoc &dl, SDValue Value,
506 SDValue Addr, int JTI,
507 SelectionDAG &DAG) const {
508 SDValue Chain = Value;
509 // Jump table debug info is only needed if CodeView is enabled.
510 if (DAG.getTarget().getTargetTriple().isOSBinFormatCOFF()) {
511 Chain = DAG.getJumpTableDebugInfo(JTI, Chain, DL: dl);
512 }
513 return DAG.getNode(Opcode: ISD::BRIND, DL: dl, VT: MVT::Other, N1: Chain, N2: Addr);
514}
515
516bool
517TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
518 const TargetMachine &TM = getTargetMachine();
519 const GlobalValue *GV = GA->getGlobal();
520
521 // If the address is not even local to this DSO we will have to load it from
522 // a got and then add the offset.
523 if (!TM.shouldAssumeDSOLocal(GV))
524 return false;
525
526 // If the code is position independent we will have to add a base register.
527 if (isPositionIndependent())
528 return false;
529
530 // Otherwise we can do it.
531 return true;
532}
533
534//===----------------------------------------------------------------------===//
535// Optimization Methods
536//===----------------------------------------------------------------------===//
537
538/// If the specified instruction has a constant integer operand and there are
539/// bits set in that constant that are not demanded, then clear those bits and
540/// return true.
541bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
542 const APInt &DemandedBits,
543 const APInt &DemandedElts,
544 TargetLoweringOpt &TLO) const {
545 SDLoc DL(Op);
546 unsigned Opcode = Op.getOpcode();
547
548 // Early-out if we've ended up calling an undemanded node, leave this to
549 // constant folding.
550 if (DemandedBits.isZero() || DemandedElts.isZero())
551 return false;
552
553 // Do target-specific constant optimization.
554 if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
555 return TLO.New.getNode();
556
557 // FIXME: ISD::SELECT, ISD::SELECT_CC
558 switch (Opcode) {
559 default:
560 break;
561 case ISD::XOR:
562 case ISD::AND:
563 case ISD::OR: {
564 auto *Op1C = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 1));
565 if (!Op1C || Op1C->isOpaque())
566 return false;
567
568 // If this is a 'not' op, don't touch it because that's a canonical form.
569 const APInt &C = Op1C->getAPIntValue();
570 if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(RHS: C))
571 return false;
572
573 if (!C.isSubsetOf(RHS: DemandedBits)) {
574 EVT VT = Op.getValueType();
575 SDValue NewC = TLO.DAG.getConstant(Val: DemandedBits & C, DL, VT);
576 SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, N1: Op.getOperand(i: 0), N2: NewC,
577 Flags: Op->getFlags());
578 return TLO.CombineTo(O: Op, N: NewOp);
579 }
580
581 break;
582 }
583 }
584
585 return false;
586}
587
588bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
589 const APInt &DemandedBits,
590 TargetLoweringOpt &TLO) const {
591 EVT VT = Op.getValueType();
592 APInt DemandedElts = VT.isVector()
593 ? APInt::getAllOnes(numBits: VT.getVectorNumElements())
594 : APInt(1, 1);
595 return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
596}
597
598/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
599/// This uses isTruncateFree/isZExtFree and ANY_EXTEND for the widening cast,
600/// but it could be generalized for targets with other types of implicit
601/// widening casts.
602bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
603 const APInt &DemandedBits,
604 TargetLoweringOpt &TLO) const {
605 assert(Op.getNumOperands() == 2 &&
606 "ShrinkDemandedOp only supports binary operators!");
607 assert(Op.getNode()->getNumValues() == 1 &&
608 "ShrinkDemandedOp only supports nodes with one result!");
609
610 EVT VT = Op.getValueType();
611 SelectionDAG &DAG = TLO.DAG;
612 SDLoc dl(Op);
613
614 // Early return, as this function cannot handle vector types.
615 if (VT.isVector())
616 return false;
617
618 assert(Op.getOperand(0).getValueType().getScalarSizeInBits() == BitWidth &&
619 Op.getOperand(1).getValueType().getScalarSizeInBits() == BitWidth &&
620 "ShrinkDemandedOp only supports operands that have the same size!");
621
622 // Don't do this if the node has another user, which may require the
623 // full value.
624 if (!Op.getNode()->hasOneUse())
625 return false;
626
627 // Search for the smallest integer type with free casts to and from
628 // Op's type. For expedience, just check power-of-2 integer types.
629 unsigned DemandedSize = DemandedBits.getActiveBits();
630 for (unsigned SmallVTBits = llvm::bit_ceil(Value: DemandedSize);
631 SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(A: SmallVTBits)) {
632 EVT SmallVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: SmallVTBits);
633 if (isTruncateFree(Val: Op, VT2: SmallVT) && isZExtFree(FromTy: SmallVT, ToTy: VT)) {
634 // We found a type with free casts.
635
636 // If the operation has the 'disjoint' flag, then the
637 // operands on the new node are also disjoint.
638 SDNodeFlags Flags(Op->getFlags().hasDisjoint() ? SDNodeFlags::Disjoint
639 : SDNodeFlags::None);
640 unsigned Opcode = Op.getOpcode();
641 if (Opcode == ISD::PTRADD) {
642 // It isn't a ptradd anymore if it doesn't operate on the entire
643 // pointer.
644 Opcode = ISD::ADD;
645 }
646 SDValue X = DAG.getNode(
647 Opcode, DL: dl, VT: SmallVT,
648 N1: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: SmallVT, Operand: Op.getOperand(i: 0)),
649 N2: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: SmallVT, Operand: Op.getOperand(i: 1)), Flags);
650 assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
651 SDValue Z = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT, Operand: X);
652 return TLO.CombineTo(O: Op, N: Z);
653 }
654 }
655 return false;
656}
657
658bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
659 DAGCombinerInfo &DCI) const {
660 SelectionDAG &DAG = DCI.DAG;
661 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
662 !DCI.isBeforeLegalizeOps());
663 KnownBits Known;
664
665 bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
666 if (Simplified) {
667 DCI.AddToWorklist(N: Op.getNode());
668 DCI.CommitTargetLoweringOpt(TLO);
669 }
670 return Simplified;
671}
672
673bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
674 const APInt &DemandedElts,
675 DAGCombinerInfo &DCI) const {
676 SelectionDAG &DAG = DCI.DAG;
677 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
678 !DCI.isBeforeLegalizeOps());
679 KnownBits Known;
680
681 bool Simplified =
682 SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO);
683 if (Simplified) {
684 DCI.AddToWorklist(N: Op.getNode());
685 DCI.CommitTargetLoweringOpt(TLO);
686 }
687 return Simplified;
688}
689
690bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
691 KnownBits &Known,
692 TargetLoweringOpt &TLO,
693 unsigned Depth,
694 bool AssumeSingleUse) const {
695 EVT VT = Op.getValueType();
696
697 // Since the number of lanes in a scalable vector is unknown at compile time,
698 // we track one bit which is implicitly broadcast to all lanes. This means
699 // that all lanes in a scalable vector are considered demanded.
700 APInt DemandedElts = VT.isFixedLengthVector()
701 ? APInt::getAllOnes(numBits: VT.getVectorNumElements())
702 : APInt(1, 1);
703 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
704 AssumeSingleUse);
705}
706
707// TODO: Under what circumstances can we create nodes? Constant folding?
708SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
709 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
710 SelectionDAG &DAG, unsigned Depth) const {
711 EVT VT = Op.getValueType();
712
713 // Limit search depth.
714 if (Depth >= SelectionDAG::MaxRecursionDepth)
715 return SDValue();
716
717 // Ignore UNDEFs.
718 if (Op.isUndef())
719 return SDValue();
720
721 // Not demanding any bits/elts from Op.
722 if (DemandedBits == 0 || DemandedElts == 0)
723 return DAG.getUNDEF(VT);
724
725 bool IsLE = DAG.getDataLayout().isLittleEndian();
726 unsigned NumElts = DemandedElts.getBitWidth();
727 unsigned BitWidth = DemandedBits.getBitWidth();
728 KnownBits LHSKnown, RHSKnown;
729 switch (Op.getOpcode()) {
730 case ISD::BITCAST: {
731 if (VT.isScalableVector())
732 return SDValue();
733
734 SDValue Src = peekThroughBitcasts(V: Op.getOperand(i: 0));
735 EVT SrcVT = Src.getValueType();
736 EVT DstVT = Op.getValueType();
737 if (SrcVT == DstVT)
738 return Src;
739
740 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
741 unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
742 if (NumSrcEltBits == NumDstEltBits)
743 if (SDValue V = SimplifyMultipleUseDemandedBits(
744 Op: Src, DemandedBits, DemandedElts, DAG, Depth: Depth + 1))
745 return DAG.getBitcast(VT: DstVT, V);
746
747 if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) {
748 unsigned Scale = NumDstEltBits / NumSrcEltBits;
749 unsigned NumSrcElts = SrcVT.getVectorNumElements();
750 APInt DemandedSrcBits = APInt::getZero(numBits: NumSrcEltBits);
751 APInt DemandedSrcElts = APInt::getZero(numBits: NumSrcElts);
752 for (unsigned i = 0; i != Scale; ++i) {
753 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
754 unsigned BitOffset = EltOffset * NumSrcEltBits;
755 APInt Sub = DemandedBits.extractBits(numBits: NumSrcEltBits, bitPosition: BitOffset);
756 if (!Sub.isZero()) {
757 DemandedSrcBits |= Sub;
758 for (unsigned j = 0; j != NumElts; ++j)
759 if (DemandedElts[j])
760 DemandedSrcElts.setBit((j * Scale) + i);
761 }
762 }
763
764 if (SDValue V = SimplifyMultipleUseDemandedBits(
765 Op: Src, DemandedBits: DemandedSrcBits, DemandedElts: DemandedSrcElts, DAG, Depth: Depth + 1))
766 return DAG.getBitcast(VT: DstVT, V);
767 }
768
769 // TODO - bigendian once we have test coverage.
770 if (IsLE && (NumSrcEltBits % NumDstEltBits) == 0) {
771 unsigned Scale = NumSrcEltBits / NumDstEltBits;
772 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
773 APInt DemandedSrcBits = APInt::getZero(numBits: NumSrcEltBits);
774 APInt DemandedSrcElts = APInt::getZero(numBits: NumSrcElts);
775 for (unsigned i = 0; i != NumElts; ++i)
776 if (DemandedElts[i]) {
777 unsigned Offset = (i % Scale) * NumDstEltBits;
778 DemandedSrcBits.insertBits(SubBits: DemandedBits, bitPosition: Offset);
779 DemandedSrcElts.setBit(i / Scale);
780 }
781
782 if (SDValue V = SimplifyMultipleUseDemandedBits(
783 Op: Src, DemandedBits: DemandedSrcBits, DemandedElts: DemandedSrcElts, DAG, Depth: Depth + 1))
784 return DAG.getBitcast(VT: DstVT, V);
785 }
786
787 break;
788 }
789 case ISD::AND: {
790 LHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1);
791 RHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1);
792
793 // If all of the demanded bits are known 1 on one side, return the other.
794 // These bits cannot contribute to the result of the 'and' in this
795 // context.
796 if (DemandedBits.isSubsetOf(RHS: LHSKnown.Zero | RHSKnown.One))
797 return Op.getOperand(i: 0);
798 if (DemandedBits.isSubsetOf(RHS: RHSKnown.Zero | LHSKnown.One))
799 return Op.getOperand(i: 1);
800 break;
801 }
802 case ISD::OR: {
803 LHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1);
804 RHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1);
805
806 // If all of the demanded bits are known zero on one side, return the
807 // other. These bits cannot contribute to the result of the 'or' in this
808 // context.
809 if (DemandedBits.isSubsetOf(RHS: LHSKnown.One | RHSKnown.Zero))
810 return Op.getOperand(i: 0);
811 if (DemandedBits.isSubsetOf(RHS: RHSKnown.One | LHSKnown.Zero))
812 return Op.getOperand(i: 1);
813 break;
814 }
815 case ISD::XOR: {
816 LHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1);
817 RHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1);
818
819 // If all of the demanded bits are known zero on one side, return the
820 // other.
821 if (DemandedBits.isSubsetOf(RHS: RHSKnown.Zero))
822 return Op.getOperand(i: 0);
823 if (DemandedBits.isSubsetOf(RHS: LHSKnown.Zero))
824 return Op.getOperand(i: 1);
825 break;
826 }
827 case ISD::ADD: {
828 RHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1);
829 if (RHSKnown.isZero())
830 return Op.getOperand(i: 0);
831
832 LHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1);
833 if (LHSKnown.isZero())
834 return Op.getOperand(i: 1);
835 break;
836 }
837 case ISD::SHL: {
838 // If we are only demanding sign bits then we can use the shift source
839 // directly.
840 if (std::optional<unsigned> MaxSA =
841 DAG.getValidMaximumShiftAmount(V: Op, DemandedElts, Depth: Depth + 1)) {
842 SDValue Op0 = Op.getOperand(i: 0);
843 unsigned ShAmt = *MaxSA;
844 unsigned NumSignBits =
845 DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + 1);
846 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
847 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
848 return Op0;
849 }
850 break;
851 }
852 case ISD::SRL: {
853 // If we are only demanding sign bits then we can use the shift source
854 // directly.
855 if (std::optional<unsigned> MaxSA =
856 DAG.getValidMaximumShiftAmount(V: Op, DemandedElts, Depth: Depth + 1)) {
857 SDValue Op0 = Op.getOperand(i: 0);
858 unsigned ShAmt = *MaxSA;
859 // Must already be signbits in DemandedBits bounds, and can't demand any
860 // shifted in zeroes.
861 if (DemandedBits.countl_zero() >= ShAmt) {
862 unsigned NumSignBits =
863 DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + 1);
864 if (DemandedBits.countr_zero() >= (BitWidth - NumSignBits))
865 return Op0;
866 }
867 }
868 break;
869 }
870 case ISD::SETCC: {
871 SDValue Op0 = Op.getOperand(i: 0);
872 SDValue Op1 = Op.getOperand(i: 1);
873 ISD::CondCode CC = cast<CondCodeSDNode>(Val: Op.getOperand(i: 2))->get();
874 // If (1) we only need the sign-bit, (2) the setcc operands are the same
875 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
876 // -1, we may be able to bypass the setcc.
877 if (DemandedBits.isSignMask() &&
878 Op0.getScalarValueSizeInBits() == BitWidth &&
879 getBooleanContents(Type: Op0.getValueType()) ==
880 BooleanContent::ZeroOrNegativeOneBooleanContent) {
881 // If we're testing X < 0, then this compare isn't needed - just use X!
882 // FIXME: We're limiting to integer types here, but this should also work
883 // if we don't care about FP signed-zero. The use of SETLT with FP means
884 // that we don't care about NaNs.
885 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
886 (isNullConstant(V: Op1) || ISD::isBuildVectorAllZeros(N: Op1.getNode())))
887 return Op0;
888 }
889 break;
890 }
891 case ISD::SIGN_EXTEND_INREG: {
892 // If none of the extended bits are demanded, eliminate the sextinreg.
893 SDValue Op0 = Op.getOperand(i: 0);
894 EVT ExVT = cast<VTSDNode>(Val: Op.getOperand(i: 1))->getVT();
895 unsigned ExBits = ExVT.getScalarSizeInBits();
896 if (DemandedBits.getActiveBits() <= ExBits &&
897 shouldRemoveRedundantExtend(Op))
898 return Op0;
899 // If the input is already sign extended, just drop the extension.
900 unsigned NumSignBits = DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + 1);
901 if (NumSignBits >= (BitWidth - ExBits + 1))
902 return Op0;
903 break;
904 }
905 case ISD::ANY_EXTEND_VECTOR_INREG:
906 case ISD::SIGN_EXTEND_VECTOR_INREG:
907 case ISD::ZERO_EXTEND_VECTOR_INREG: {
908 if (VT.isScalableVector())
909 return SDValue();
910
911 // If we only want the lowest element and none of extended bits, then we can
912 // return the bitcasted source vector.
913 SDValue Src = Op.getOperand(i: 0);
914 EVT SrcVT = Src.getValueType();
915 EVT DstVT = Op.getValueType();
916 if (IsLE && DemandedElts == 1 &&
917 DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
918 DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
919 return DAG.getBitcast(VT: DstVT, V: Src);
920 }
921 break;
922 }
923 case ISD::INSERT_VECTOR_ELT: {
924 if (VT.isScalableVector())
925 return SDValue();
926
927 // If we don't demand the inserted element, return the base vector.
928 SDValue Vec = Op.getOperand(i: 0);
929 auto *CIdx = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 2));
930 EVT VecVT = Vec.getValueType();
931 if (CIdx && CIdx->getAPIntValue().ult(RHS: VecVT.getVectorNumElements()) &&
932 !DemandedElts[CIdx->getZExtValue()])
933 return Vec;
934 break;
935 }
936 case ISD::INSERT_SUBVECTOR: {
937 if (VT.isScalableVector())
938 return SDValue();
939
940 SDValue Vec = Op.getOperand(i: 0);
941 SDValue Sub = Op.getOperand(i: 1);
942 uint64_t Idx = Op.getConstantOperandVal(i: 2);
943 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
944 APInt DemandedSubElts = DemandedElts.extractBits(numBits: NumSubElts, bitPosition: Idx);
945 // If we don't demand the inserted subvector, return the base vector.
946 if (DemandedSubElts == 0)
947 return Vec;
948 break;
949 }
950 case ISD::VECTOR_SHUFFLE: {
951 assert(!VT.isScalableVector());
952 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Val&: Op)->getMask();
953
954 // If all the demanded elts are from one operand and are inline,
955 // then we can use the operand directly.
956 bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
957 for (unsigned i = 0; i != NumElts; ++i) {
958 int M = ShuffleMask[i];
959 if (M < 0 || !DemandedElts[i])
960 continue;
961 AllUndef = false;
962 IdentityLHS &= (M == (int)i);
963 IdentityRHS &= ((M - NumElts) == i);
964 }
965
966 if (AllUndef)
967 return DAG.getUNDEF(VT: Op.getValueType());
968 if (IdentityLHS)
969 return Op.getOperand(i: 0);
970 if (IdentityRHS)
971 return Op.getOperand(i: 1);
972 break;
973 }
974 default:
975 // TODO: Probably okay to remove after audit; here to reduce change size
976 // in initial enablement patch for scalable vectors
977 if (VT.isScalableVector())
978 return SDValue();
979
980 if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
981 if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
982 Op, DemandedBits, DemandedElts, DAG, Depth))
983 return V;
984 break;
985 }
986 return SDValue();
987}
988
989SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
990 SDValue Op, const APInt &DemandedBits, SelectionDAG &DAG,
991 unsigned Depth) const {
992 EVT VT = Op.getValueType();
993 // Since the number of lanes in a scalable vector is unknown at compile time,
994 // we track one bit which is implicitly broadcast to all lanes. This means
995 // that all lanes in a scalable vector are considered demanded.
996 APInt DemandedElts = VT.isFixedLengthVector()
997 ? APInt::getAllOnes(numBits: VT.getVectorNumElements())
998 : APInt(1, 1);
999 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
1000 Depth);
1001}
1002
1003SDValue TargetLowering::SimplifyMultipleUseDemandedVectorElts(
1004 SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
1005 unsigned Depth) const {
1006 APInt DemandedBits = APInt::getAllOnes(numBits: Op.getScalarValueSizeInBits());
1007 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
1008 Depth);
1009}
1010
1011// Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1).
1012// or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1).
1013static SDValue combineShiftToAVG(SDValue Op,
1014 TargetLowering::TargetLoweringOpt &TLO,
1015 const TargetLowering &TLI,
1016 const APInt &DemandedBits,
1017 const APInt &DemandedElts, unsigned Depth) {
1018 assert((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) &&
1019 "SRL or SRA node is required here!");
1020 // Is the right shift using an immediate value of 1?
1021 ConstantSDNode *N1C = isConstOrConstSplat(N: Op.getOperand(i: 1), DemandedElts);
1022 if (!N1C || !N1C->isOne())
1023 return SDValue();
1024
1025 // We are looking for an avgfloor
1026 // add(ext, ext)
1027 // or one of these as a avgceil
1028 // add(add(ext, ext), 1)
1029 // add(add(ext, 1), ext)
1030 // add(ext, add(ext, 1))
1031 SDValue Add = Op.getOperand(i: 0);
1032 if (Add.getOpcode() != ISD::ADD)
1033 return SDValue();
1034
1035 SDValue ExtOpA = Add.getOperand(i: 0);
1036 SDValue ExtOpB = Add.getOperand(i: 1);
1037 SDValue Add2;
1038 auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3, SDValue A) {
1039 ConstantSDNode *ConstOp;
1040 if ((ConstOp = isConstOrConstSplat(N: Op2, DemandedElts)) &&
1041 ConstOp->isOne()) {
1042 ExtOpA = Op1;
1043 ExtOpB = Op3;
1044 Add2 = A;
1045 return true;
1046 }
1047 if ((ConstOp = isConstOrConstSplat(N: Op3, DemandedElts)) &&
1048 ConstOp->isOne()) {
1049 ExtOpA = Op1;
1050 ExtOpB = Op2;
1051 Add2 = A;
1052 return true;
1053 }
1054 return false;
1055 };
1056 bool IsCeil =
1057 (ExtOpA.getOpcode() == ISD::ADD &&
1058 MatchOperands(ExtOpA.getOperand(i: 0), ExtOpA.getOperand(i: 1), ExtOpB, ExtOpA)) ||
1059 (ExtOpB.getOpcode() == ISD::ADD &&
1060 MatchOperands(ExtOpB.getOperand(i: 0), ExtOpB.getOperand(i: 1), ExtOpA, ExtOpB));
1061
1062 // If the shift is signed (sra):
1063 // - Needs >= 2 sign bit for both operands.
1064 // - Needs >= 2 zero bits.
1065 // If the shift is unsigned (srl):
1066 // - Needs >= 1 zero bit for both operands.
1067 // - Needs 1 demanded bit zero and >= 2 sign bits.
1068 SelectionDAG &DAG = TLO.DAG;
1069 unsigned ShiftOpc = Op.getOpcode();
1070 bool IsSigned = false;
1071 unsigned KnownBits;
1072 unsigned NumSignedA = DAG.ComputeNumSignBits(Op: ExtOpA, DemandedElts, Depth);
1073 unsigned NumSignedB = DAG.ComputeNumSignBits(Op: ExtOpB, DemandedElts, Depth);
1074 unsigned NumSigned = std::min(a: NumSignedA, b: NumSignedB) - 1;
1075 unsigned NumZeroA =
1076 DAG.computeKnownBits(Op: ExtOpA, DemandedElts, Depth).countMinLeadingZeros();
1077 unsigned NumZeroB =
1078 DAG.computeKnownBits(Op: ExtOpB, DemandedElts, Depth).countMinLeadingZeros();
1079 unsigned NumZero = std::min(a: NumZeroA, b: NumZeroB);
1080
1081 switch (ShiftOpc) {
1082 default:
1083 llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG");
1084 case ISD::SRA: {
1085 if (NumZero >= 2 && NumSigned < NumZero) {
1086 IsSigned = false;
1087 KnownBits = NumZero;
1088 break;
1089 }
1090 if (NumSigned >= 1) {
1091 IsSigned = true;
1092 KnownBits = NumSigned;
1093 break;
1094 }
1095 return SDValue();
1096 }
1097 case ISD::SRL: {
1098 if (NumZero >= 1 && NumSigned < NumZero) {
1099 IsSigned = false;
1100 KnownBits = NumZero;
1101 break;
1102 }
1103 if (NumSigned >= 1 && DemandedBits.isSignBitClear()) {
1104 IsSigned = true;
1105 KnownBits = NumSigned;
1106 break;
1107 }
1108 return SDValue();
1109 }
1110 }
1111
1112 unsigned AVGOpc = IsCeil ? (IsSigned ? ISD::AVGCEILS : ISD::AVGCEILU)
1113 : (IsSigned ? ISD::AVGFLOORS : ISD::AVGFLOORU);
1114
1115 // Find the smallest power-2 type that is legal for this vector size and
1116 // operation, given the original type size and the number of known sign/zero
1117 // bits.
1118 EVT VT = Op.getValueType();
1119 unsigned MinWidth =
1120 std::max<unsigned>(a: VT.getScalarSizeInBits() - KnownBits, b: 8);
1121 EVT NVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: llvm::bit_ceil(Value: MinWidth));
1122 if (NVT.getScalarSizeInBits() > VT.getScalarSizeInBits())
1123 return SDValue();
1124 if (VT.isVector())
1125 NVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: NVT, EC: VT.getVectorElementCount());
1126 if (TLO.LegalTypes() && !TLI.isOperationLegal(Op: AVGOpc, VT: NVT)) {
1127 // If we could not transform, and (both) adds are nuw/nsw, we can use the
1128 // larger type size to do the transform.
1129 if (TLO.LegalOperations() && !TLI.isOperationLegal(Op: AVGOpc, VT))
1130 return SDValue();
1131 if (DAG.willNotOverflowAdd(IsSigned, N0: Add.getOperand(i: 0),
1132 N1: Add.getOperand(i: 1)) &&
1133 (!Add2 || DAG.willNotOverflowAdd(IsSigned, N0: Add2.getOperand(i: 0),
1134 N1: Add2.getOperand(i: 1))))
1135 NVT = VT;
1136 else
1137 return SDValue();
1138 }
1139
1140 // Don't create a AVGFLOOR node with a scalar constant unless its legal as
1141 // this is likely to stop other folds (reassociation, value tracking etc.)
1142 if (!IsCeil && !TLI.isOperationLegal(Op: AVGOpc, VT: NVT) &&
1143 (isa<ConstantSDNode>(Val: ExtOpA) || isa<ConstantSDNode>(Val: ExtOpB)))
1144 return SDValue();
1145
1146 SDLoc DL(Op);
1147 SDValue ResultAVG =
1148 DAG.getNode(Opcode: AVGOpc, DL, VT: NVT, N1: DAG.getExtOrTrunc(IsSigned, Op: ExtOpA, DL, VT: NVT),
1149 N2: DAG.getExtOrTrunc(IsSigned, Op: ExtOpB, DL, VT: NVT));
1150 return DAG.getExtOrTrunc(IsSigned, Op: ResultAVG, DL, VT);
1151}
1152
1153/// Look at Op. At this point, we know that only the OriginalDemandedBits of the
1154/// result of Op are ever used downstream. If we can use this information to
1155/// simplify Op, create a new simplified DAG node and return true, returning the
1156/// original and new nodes in Old and New. Otherwise, analyze the expression and
1157/// return a mask of Known bits for the expression (used to simplify the
1158/// caller). The Known bits may only be accurate for those bits in the
1159/// OriginalDemandedBits and OriginalDemandedElts.
1160bool TargetLowering::SimplifyDemandedBits(
1161 SDValue Op, const APInt &OriginalDemandedBits,
1162 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
1163 unsigned Depth, bool AssumeSingleUse) const {
1164 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
1165 assert(Op.getScalarValueSizeInBits() == BitWidth &&
1166 "Mask size mismatches value type size!");
1167
1168 // Don't know anything.
1169 Known = KnownBits(BitWidth);
1170
1171 EVT VT = Op.getValueType();
1172 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
1173 unsigned NumElts = OriginalDemandedElts.getBitWidth();
1174 assert((!VT.isFixedLengthVector() || NumElts == VT.getVectorNumElements()) &&
1175 "Unexpected vector size");
1176
1177 APInt DemandedBits = OriginalDemandedBits;
1178 APInt DemandedElts = OriginalDemandedElts;
1179 SDLoc dl(Op);
1180
1181 // Undef operand.
1182 if (Op.isUndef())
1183 return false;
1184
1185 // We can't simplify target constants.
1186 if (Op.getOpcode() == ISD::TargetConstant)
1187 return false;
1188
1189 if (Op.getOpcode() == ISD::Constant) {
1190 // We know all of the bits for a constant!
1191 Known = KnownBits::makeConstant(C: Op->getAsAPIntVal());
1192 return false;
1193 }
1194
1195 if (Op.getOpcode() == ISD::ConstantFP) {
1196 // We know all of the bits for a floating point constant!
1197 Known = KnownBits::makeConstant(
1198 C: cast<ConstantFPSDNode>(Val&: Op)->getValueAPF().bitcastToAPInt());
1199 return false;
1200 }
1201
1202 // Other users may use these bits.
1203 bool HasMultiUse = false;
1204 if (!AssumeSingleUse && !Op.getNode()->hasOneUse()) {
1205 if (Depth >= SelectionDAG::MaxRecursionDepth) {
1206 // Limit search depth.
1207 return false;
1208 }
1209 // Allow multiple uses, just set the DemandedBits/Elts to all bits.
1210 DemandedBits = APInt::getAllOnes(numBits: BitWidth);
1211 DemandedElts = APInt::getAllOnes(numBits: NumElts);
1212 HasMultiUse = true;
1213 } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
1214 // Not demanding any bits/elts from Op.
1215 return TLO.CombineTo(O: Op, N: TLO.DAG.getUNDEF(VT));
1216 } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
1217 // Limit search depth.
1218 return false;
1219 }
1220
1221 KnownBits Known2;
1222 switch (Op.getOpcode()) {
1223 case ISD::SCALAR_TO_VECTOR: {
1224 if (VT.isScalableVector())
1225 return false;
1226 if (!DemandedElts[0])
1227 return TLO.CombineTo(O: Op, N: TLO.DAG.getUNDEF(VT));
1228
1229 KnownBits SrcKnown;
1230 SDValue Src = Op.getOperand(i: 0);
1231 unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
1232 APInt SrcDemandedBits = DemandedBits.zext(width: SrcBitWidth);
1233 if (SimplifyDemandedBits(Op: Src, DemandedBits: SrcDemandedBits, Known&: SrcKnown, TLO, Depth: Depth + 1))
1234 return true;
1235
1236 // Upper elements are undef, so only get the knownbits if we just demand
1237 // the bottom element.
1238 if (DemandedElts == 1)
1239 Known = SrcKnown.anyextOrTrunc(BitWidth);
1240 break;
1241 }
1242 case ISD::BUILD_VECTOR:
1243 // Collect the known bits that are shared by every demanded element.
1244 // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
1245 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1246 return false; // Don't fall through, will infinitely loop.
1247 case ISD::SPLAT_VECTOR: {
1248 SDValue Scl = Op.getOperand(i: 0);
1249 APInt DemandedSclBits = DemandedBits.zextOrTrunc(width: Scl.getValueSizeInBits());
1250 KnownBits KnownScl;
1251 if (SimplifyDemandedBits(Op: Scl, DemandedBits: DemandedSclBits, Known&: KnownScl, TLO, Depth: Depth + 1))
1252 return true;
1253
1254 // Implicitly truncate the bits to match the official semantics of
1255 // SPLAT_VECTOR.
1256 Known = KnownScl.trunc(BitWidth);
1257 break;
1258 }
1259 case ISD::LOAD: {
1260 auto *LD = cast<LoadSDNode>(Val&: Op);
1261 if (getTargetConstantFromLoad(LD)) {
1262 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1263 return false; // Don't fall through, will infinitely loop.
1264 }
1265 if (ISD::isZEXTLoad(N: Op.getNode()) && Op.getResNo() == 0) {
1266 // If this is a ZEXTLoad and we are looking at the loaded value.
1267 EVT MemVT = LD->getMemoryVT();
1268 unsigned MemBits = MemVT.getScalarSizeInBits();
1269 Known.Zero.setBitsFrom(MemBits);
1270 return false; // Don't fall through, will infinitely loop.
1271 }
1272 break;
1273 }
1274 case ISD::INSERT_VECTOR_ELT: {
1275 if (VT.isScalableVector())
1276 return false;
1277 SDValue Vec = Op.getOperand(i: 0);
1278 SDValue Scl = Op.getOperand(i: 1);
1279 auto *CIdx = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 2));
1280 EVT VecVT = Vec.getValueType();
1281
1282 // If index isn't constant, assume we need all vector elements AND the
1283 // inserted element.
1284 APInt DemandedVecElts(DemandedElts);
1285 if (CIdx && CIdx->getAPIntValue().ult(RHS: VecVT.getVectorNumElements())) {
1286 unsigned Idx = CIdx->getZExtValue();
1287 DemandedVecElts.clearBit(BitPosition: Idx);
1288
1289 // Inserted element is not required.
1290 if (!DemandedElts[Idx])
1291 return TLO.CombineTo(O: Op, N: Vec);
1292 }
1293
1294 KnownBits KnownScl;
1295 unsigned NumSclBits = Scl.getScalarValueSizeInBits();
1296 APInt DemandedSclBits = DemandedBits.zextOrTrunc(width: NumSclBits);
1297 if (SimplifyDemandedBits(Op: Scl, DemandedBits: DemandedSclBits, Known&: KnownScl, TLO, Depth: Depth + 1))
1298 return true;
1299
1300 Known = KnownScl.anyextOrTrunc(BitWidth);
1301
1302 KnownBits KnownVec;
1303 if (SimplifyDemandedBits(Op: Vec, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedVecElts, Known&: KnownVec, TLO,
1304 Depth: Depth + 1))
1305 return true;
1306
1307 if (!!DemandedVecElts)
1308 Known = Known.intersectWith(RHS: KnownVec);
1309
1310 return false;
1311 }
1312 case ISD::INSERT_SUBVECTOR: {
1313 if (VT.isScalableVector())
1314 return false;
1315 // Demand any elements from the subvector and the remainder from the src its
1316 // inserted into.
1317 SDValue Src = Op.getOperand(i: 0);
1318 SDValue Sub = Op.getOperand(i: 1);
1319 uint64_t Idx = Op.getConstantOperandVal(i: 2);
1320 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
1321 APInt DemandedSubElts = DemandedElts.extractBits(numBits: NumSubElts, bitPosition: Idx);
1322 APInt DemandedSrcElts = DemandedElts;
1323 DemandedSrcElts.clearBits(LoBit: Idx, HiBit: Idx + NumSubElts);
1324
1325 KnownBits KnownSub, KnownSrc;
1326 if (SimplifyDemandedBits(Op: Sub, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedSubElts, Known&: KnownSub, TLO,
1327 Depth: Depth + 1))
1328 return true;
1329 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedSrcElts, Known&: KnownSrc, TLO,
1330 Depth: Depth + 1))
1331 return true;
1332
1333 Known.setAllConflict();
1334 if (!!DemandedSubElts)
1335 Known = Known.intersectWith(RHS: KnownSub);
1336 if (!!DemandedSrcElts)
1337 Known = Known.intersectWith(RHS: KnownSrc);
1338
1339 // Attempt to avoid multi-use src if we don't need anything from it.
1340 if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||
1341 !DemandedSrcElts.isAllOnes()) {
1342 SDValue NewSub = SimplifyMultipleUseDemandedBits(
1343 Op: Sub, DemandedBits, DemandedElts: DemandedSubElts, DAG&: TLO.DAG, Depth: Depth + 1);
1344 SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1345 Op: Src, DemandedBits, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + 1);
1346 if (NewSub || NewSrc) {
1347 NewSub = NewSub ? NewSub : Sub;
1348 NewSrc = NewSrc ? NewSrc : Src;
1349 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: NewSrc, N2: NewSub,
1350 N3: Op.getOperand(i: 2));
1351 return TLO.CombineTo(O: Op, N: NewOp);
1352 }
1353 }
1354 break;
1355 }
1356 case ISD::EXTRACT_SUBVECTOR: {
1357 if (VT.isScalableVector())
1358 return false;
1359 // Offset the demanded elts by the subvector index.
1360 SDValue Src = Op.getOperand(i: 0);
1361 if (Src.getValueType().isScalableVector())
1362 break;
1363 uint64_t Idx = Op.getConstantOperandVal(i: 1);
1364 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1365 APInt DemandedSrcElts = DemandedElts.zext(width: NumSrcElts).shl(shiftAmt: Idx);
1366
1367 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedSrcElts, Known, TLO,
1368 Depth: Depth + 1))
1369 return true;
1370
1371 // Attempt to avoid multi-use src if we don't need anything from it.
1372 if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
1373 SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
1374 Op: Src, DemandedBits, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + 1);
1375 if (DemandedSrc) {
1376 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: DemandedSrc,
1377 N2: Op.getOperand(i: 1));
1378 return TLO.CombineTo(O: Op, N: NewOp);
1379 }
1380 }
1381 break;
1382 }
1383 case ISD::CONCAT_VECTORS: {
1384 if (VT.isScalableVector())
1385 return false;
1386 Known.setAllConflict();
1387 EVT SubVT = Op.getOperand(i: 0).getValueType();
1388 unsigned NumSubVecs = Op.getNumOperands();
1389 unsigned NumSubElts = SubVT.getVectorNumElements();
1390 for (unsigned i = 0; i != NumSubVecs; ++i) {
1391 APInt DemandedSubElts =
1392 DemandedElts.extractBits(numBits: NumSubElts, bitPosition: i * NumSubElts);
1393 if (SimplifyDemandedBits(Op: Op.getOperand(i), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedSubElts,
1394 Known&: Known2, TLO, Depth: Depth + 1))
1395 return true;
1396 // Known bits are shared by every demanded subvector element.
1397 if (!!DemandedSubElts)
1398 Known = Known.intersectWith(RHS: Known2);
1399 }
1400 break;
1401 }
1402 case ISD::VECTOR_SHUFFLE: {
1403 assert(!VT.isScalableVector());
1404 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Val&: Op)->getMask();
1405
1406 // Collect demanded elements from shuffle operands..
1407 APInt DemandedLHS, DemandedRHS;
1408 if (!getShuffleDemandedElts(SrcWidth: NumElts, Mask: ShuffleMask, DemandedElts, DemandedLHS,
1409 DemandedRHS))
1410 break;
1411
1412 if (!!DemandedLHS || !!DemandedRHS) {
1413 SDValue Op0 = Op.getOperand(i: 0);
1414 SDValue Op1 = Op.getOperand(i: 1);
1415
1416 Known.setAllConflict();
1417 if (!!DemandedLHS) {
1418 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedLHS, Known&: Known2, TLO,
1419 Depth: Depth + 1))
1420 return true;
1421 Known = Known.intersectWith(RHS: Known2);
1422 }
1423 if (!!DemandedRHS) {
1424 if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedRHS, Known&: Known2, TLO,
1425 Depth: Depth + 1))
1426 return true;
1427 Known = Known.intersectWith(RHS: Known2);
1428 }
1429
1430 // Attempt to avoid multi-use ops if we don't need anything from them.
1431 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1432 Op: Op0, DemandedBits, DemandedElts: DemandedLHS, DAG&: TLO.DAG, Depth: Depth + 1);
1433 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1434 Op: Op1, DemandedBits, DemandedElts: DemandedRHS, DAG&: TLO.DAG, Depth: Depth + 1);
1435 if (DemandedOp0 || DemandedOp1) {
1436 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1437 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1438 SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, N1: Op0, N2: Op1, Mask: ShuffleMask);
1439 return TLO.CombineTo(O: Op, N: NewOp);
1440 }
1441 }
1442 break;
1443 }
1444 case ISD::AND: {
1445 SDValue Op0 = Op.getOperand(i: 0);
1446 SDValue Op1 = Op.getOperand(i: 1);
1447
1448 // If the RHS is a constant, check to see if the LHS would be zero without
1449 // using the bits from the RHS. Below, we use knowledge about the RHS to
1450 // simplify the LHS, here we're using information from the LHS to simplify
1451 // the RHS.
1452 if (ConstantSDNode *RHSC = isConstOrConstSplat(N: Op1, DemandedElts)) {
1453 // Do not increment Depth here; that can cause an infinite loop.
1454 KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op: Op0, DemandedElts, Depth);
1455 // If the LHS already has zeros where RHSC does, this 'and' is dead.
1456 if ((LHSKnown.Zero & DemandedBits) ==
1457 (~RHSC->getAPIntValue() & DemandedBits))
1458 return TLO.CombineTo(O: Op, N: Op0);
1459
1460 // If any of the set bits in the RHS are known zero on the LHS, shrink
1461 // the constant.
1462 if (ShrinkDemandedConstant(Op, DemandedBits: ~LHSKnown.Zero & DemandedBits,
1463 DemandedElts, TLO))
1464 return true;
1465
1466 // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1467 // constant, but if this 'and' is only clearing bits that were just set by
1468 // the xor, then this 'and' can be eliminated by shrinking the mask of
1469 // the xor. For example, for a 32-bit X:
1470 // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1471 if (isBitwiseNot(V: Op0) && Op0.hasOneUse() &&
1472 LHSKnown.One == ~RHSC->getAPIntValue()) {
1473 SDValue Xor = TLO.DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: Op0.getOperand(i: 0), N2: Op1);
1474 return TLO.CombineTo(O: Op, N: Xor);
1475 }
1476 }
1477
1478 // AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)
1479 // iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits).
1480 if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR && !VT.isScalableVector() &&
1481 (Op0.getOperand(i: 0).isUndef() ||
1482 ISD::isBuildVectorOfConstantSDNodes(N: Op0.getOperand(i: 0).getNode())) &&
1483 Op0->hasOneUse()) {
1484 unsigned NumSubElts =
1485 Op0.getOperand(i: 1).getValueType().getVectorNumElements();
1486 unsigned SubIdx = Op0.getConstantOperandVal(i: 2);
1487 APInt DemandedSub =
1488 APInt::getBitsSet(numBits: NumElts, loBit: SubIdx, hiBit: SubIdx + NumSubElts);
1489 KnownBits KnownSubMask =
1490 TLO.DAG.computeKnownBits(Op: Op1, DemandedElts: DemandedSub & DemandedElts, Depth: Depth + 1);
1491 if (DemandedBits.isSubsetOf(RHS: KnownSubMask.One)) {
1492 SDValue NewAnd =
1493 TLO.DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op0.getOperand(i: 0), N2: Op1);
1494 SDValue NewInsert =
1495 TLO.DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL: dl, VT, N1: NewAnd,
1496 N2: Op0.getOperand(i: 1), N3: Op0.getOperand(i: 2));
1497 return TLO.CombineTo(O: Op, N: NewInsert);
1498 }
1499 }
1500
1501 if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts, Known, TLO,
1502 Depth: Depth + 1))
1503 return true;
1504 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: ~Known.Zero & DemandedBits, OriginalDemandedElts: DemandedElts,
1505 Known&: Known2, TLO, Depth: Depth + 1))
1506 return true;
1507
1508 // If all of the demanded bits are known one on one side, return the other.
1509 // These bits cannot contribute to the result of the 'and'.
1510 if (DemandedBits.isSubsetOf(RHS: Known2.Zero | Known.One))
1511 return TLO.CombineTo(O: Op, N: Op0);
1512 if (DemandedBits.isSubsetOf(RHS: Known.Zero | Known2.One))
1513 return TLO.CombineTo(O: Op, N: Op1);
1514 // If all of the demanded bits in the inputs are known zeros, return zero.
1515 if (DemandedBits.isSubsetOf(RHS: Known.Zero | Known2.Zero))
1516 return TLO.CombineTo(O: Op, N: TLO.DAG.getConstant(Val: 0, DL: dl, VT));
1517 // If the RHS is a constant, see if we can simplify it.
1518 if (ShrinkDemandedConstant(Op, DemandedBits: ~Known2.Zero & DemandedBits, DemandedElts,
1519 TLO))
1520 return true;
1521 // If the operation can be done in a smaller type, do so.
1522 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1523 return true;
1524
1525 // Attempt to avoid multi-use ops if we don't need anything from them.
1526 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1527 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1528 Op: Op0, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
1529 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1530 Op: Op1, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
1531 if (DemandedOp0 || DemandedOp1) {
1532 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1533 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1534 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: Op0, N2: Op1);
1535 return TLO.CombineTo(O: Op, N: NewOp);
1536 }
1537 }
1538
1539 Known &= Known2;
1540 break;
1541 }
1542 case ISD::OR: {
1543 SDValue Op0 = Op.getOperand(i: 0);
1544 SDValue Op1 = Op.getOperand(i: 1);
1545 if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts, Known, TLO,
1546 Depth: Depth + 1)) {
1547 Op->dropFlags(Mask: SDNodeFlags::Disjoint);
1548 return true;
1549 }
1550
1551 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: ~Known.One & DemandedBits, OriginalDemandedElts: DemandedElts,
1552 Known&: Known2, TLO, Depth: Depth + 1)) {
1553 Op->dropFlags(Mask: SDNodeFlags::Disjoint);
1554 return true;
1555 }
1556
1557 // If all of the demanded bits are known zero on one side, return the other.
1558 // These bits cannot contribute to the result of the 'or'.
1559 if (DemandedBits.isSubsetOf(RHS: Known2.One | Known.Zero))
1560 return TLO.CombineTo(O: Op, N: Op0);
1561 if (DemandedBits.isSubsetOf(RHS: Known.One | Known2.Zero))
1562 return TLO.CombineTo(O: Op, N: Op1);
1563 // If the RHS is a constant, see if we can simplify it.
1564 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1565 return true;
1566 // If the operation can be done in a smaller type, do so.
1567 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1568 return true;
1569
1570 // Attempt to avoid multi-use ops if we don't need anything from them.
1571 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1572 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1573 Op: Op0, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
1574 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1575 Op: Op1, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
1576 if (DemandedOp0 || DemandedOp1) {
1577 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1578 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1579 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: Op0, N2: Op1);
1580 return TLO.CombineTo(O: Op, N: NewOp);
1581 }
1582 }
1583
1584 // (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2))
1585 // TODO: Use SimplifyMultipleUseDemandedBits to peek through masks.
1586 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::AND &&
1587 Op0->hasOneUse() && Op1->hasOneUse()) {
1588 // Attempt to match all commutations - m_c_Or would've been useful!
1589 for (int I = 0; I != 2; ++I) {
1590 SDValue X = Op.getOperand(i: I).getOperand(i: 0);
1591 SDValue C1 = Op.getOperand(i: I).getOperand(i: 1);
1592 SDValue Alt = Op.getOperand(i: 1 - I).getOperand(i: 0);
1593 SDValue C2 = Op.getOperand(i: 1 - I).getOperand(i: 1);
1594 if (Alt.getOpcode() == ISD::OR) {
1595 for (int J = 0; J != 2; ++J) {
1596 if (X == Alt.getOperand(i: J)) {
1597 SDValue Y = Alt.getOperand(i: 1 - J);
1598 if (SDValue C12 = TLO.DAG.FoldConstantArithmetic(Opcode: ISD::OR, DL: dl, VT,
1599 Ops: {C1, C2})) {
1600 SDValue MaskX = TLO.DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: X, N2: C12);
1601 SDValue MaskY = TLO.DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Y, N2: C2);
1602 return TLO.CombineTo(
1603 O: Op, N: TLO.DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: MaskX, N2: MaskY));
1604 }
1605 }
1606 }
1607 }
1608 }
1609 }
1610
1611 Known |= Known2;
1612 break;
1613 }
1614 case ISD::XOR: {
1615 SDValue Op0 = Op.getOperand(i: 0);
1616 SDValue Op1 = Op.getOperand(i: 1);
1617
1618 if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts, Known, TLO,
1619 Depth: Depth + 1))
1620 return true;
1621 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
1622 Depth: Depth + 1))
1623 return true;
1624
1625 // If all of the demanded bits are known zero on one side, return the other.
1626 // These bits cannot contribute to the result of the 'xor'.
1627 if (DemandedBits.isSubsetOf(RHS: Known.Zero))
1628 return TLO.CombineTo(O: Op, N: Op0);
1629 if (DemandedBits.isSubsetOf(RHS: Known2.Zero))
1630 return TLO.CombineTo(O: Op, N: Op1);
1631 // If the operation can be done in a smaller type, do so.
1632 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1633 return true;
1634
1635 // If all of the unknown bits are known to be zero on one side or the other
1636 // turn this into an *inclusive* or.
1637 // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1638 if (DemandedBits.isSubsetOf(RHS: Known.Zero | Known2.Zero))
1639 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Op0, N2: Op1));
1640
1641 ConstantSDNode *C = isConstOrConstSplat(N: Op1, DemandedElts);
1642 if (C) {
1643 // If one side is a constant, and all of the set bits in the constant are
1644 // also known set on the other side, turn this into an AND, as we know
1645 // the bits will be cleared.
1646 // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1647 // NB: it is okay if more bits are known than are requested
1648 if (C->getAPIntValue() == Known2.One) {
1649 SDValue ANDC =
1650 TLO.DAG.getConstant(Val: ~C->getAPIntValue() & DemandedBits, DL: dl, VT);
1651 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op0, N2: ANDC));
1652 }
1653
1654 // If the RHS is a constant, see if we can change it. Don't alter a -1
1655 // constant because that's a 'not' op, and that is better for combining
1656 // and codegen.
1657 if (!C->isAllOnes() && DemandedBits.isSubsetOf(RHS: C->getAPIntValue())) {
1658 // We're flipping all demanded bits. Flip the undemanded bits too.
1659 SDValue New = TLO.DAG.getNOT(DL: dl, Val: Op0, VT);
1660 return TLO.CombineTo(O: Op, N: New);
1661 }
1662
1663 unsigned Op0Opcode = Op0.getOpcode();
1664 if ((Op0Opcode == ISD::SRL || Op0Opcode == ISD::SHL) && Op0.hasOneUse()) {
1665 if (ConstantSDNode *ShiftC =
1666 isConstOrConstSplat(N: Op0.getOperand(i: 1), DemandedElts)) {
1667 // Don't crash on an oversized shift. We can not guarantee that a
1668 // bogus shift has been simplified to undef.
1669 if (ShiftC->getAPIntValue().ult(RHS: BitWidth)) {
1670 uint64_t ShiftAmt = ShiftC->getZExtValue();
1671 APInt Ones = APInt::getAllOnes(numBits: BitWidth);
1672 Ones = Op0Opcode == ISD::SHL ? Ones.shl(shiftAmt: ShiftAmt)
1673 : Ones.lshr(shiftAmt: ShiftAmt);
1674 if ((DemandedBits & C->getAPIntValue()) == (DemandedBits & Ones) &&
1675 isDesirableToCommuteXorWithShift(N: Op.getNode())) {
1676 // If the xor constant is a demanded mask, do a 'not' before the
1677 // shift:
1678 // xor (X << ShiftC), XorC --> (not X) << ShiftC
1679 // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
1680 SDValue Not = TLO.DAG.getNOT(DL: dl, Val: Op0.getOperand(i: 0), VT);
1681 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Op0Opcode, DL: dl, VT, N1: Not,
1682 N2: Op0.getOperand(i: 1)));
1683 }
1684 }
1685 }
1686 }
1687 }
1688
1689 // If we can't turn this into a 'not', try to shrink the constant.
1690 if (!C || !C->isAllOnes())
1691 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1692 return true;
1693
1694 // Attempt to avoid multi-use ops if we don't need anything from them.
1695 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1696 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1697 Op: Op0, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
1698 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1699 Op: Op1, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
1700 if (DemandedOp0 || DemandedOp1) {
1701 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1702 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1703 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: Op0, N2: Op1);
1704 return TLO.CombineTo(O: Op, N: NewOp);
1705 }
1706 }
1707
1708 Known ^= Known2;
1709 break;
1710 }
1711 case ISD::SELECT:
1712 if (SimplifyDemandedBits(Op: Op.getOperand(i: 2), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1713 Known, TLO, Depth: Depth + 1))
1714 return true;
1715 if (SimplifyDemandedBits(Op: Op.getOperand(i: 1), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1716 Known&: Known2, TLO, Depth: Depth + 1))
1717 return true;
1718
1719 // If the operands are constants, see if we can simplify them.
1720 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1721 return true;
1722
1723 // Only known if known in both the LHS and RHS.
1724 Known = Known.intersectWith(RHS: Known2);
1725 break;
1726 case ISD::VSELECT:
1727 if (SimplifyDemandedBits(Op: Op.getOperand(i: 2), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1728 Known, TLO, Depth: Depth + 1))
1729 return true;
1730 if (SimplifyDemandedBits(Op: Op.getOperand(i: 1), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1731 Known&: Known2, TLO, Depth: Depth + 1))
1732 return true;
1733
1734 // Only known if known in both the LHS and RHS.
1735 Known = Known.intersectWith(RHS: Known2);
1736 break;
1737 case ISD::SELECT_CC:
1738 if (SimplifyDemandedBits(Op: Op.getOperand(i: 3), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1739 Known, TLO, Depth: Depth + 1))
1740 return true;
1741 if (SimplifyDemandedBits(Op: Op.getOperand(i: 2), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1742 Known&: Known2, TLO, Depth: Depth + 1))
1743 return true;
1744
1745 // If the operands are constants, see if we can simplify them.
1746 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1747 return true;
1748
1749 // Only known if known in both the LHS and RHS.
1750 Known = Known.intersectWith(RHS: Known2);
1751 break;
1752 case ISD::SETCC: {
1753 SDValue Op0 = Op.getOperand(i: 0);
1754 SDValue Op1 = Op.getOperand(i: 1);
1755 ISD::CondCode CC = cast<CondCodeSDNode>(Val: Op.getOperand(i: 2))->get();
1756 // If we're testing X < 0, X >= 0, X <= -1 or X > -1
1757 // (X is of integer type) then we only need the sign mask of the previous
1758 // result
1759 if (Op1.getValueType().isInteger() &&
1760 (((CC == ISD::SETLT || CC == ISD::SETGE) && isNullOrNullSplat(V: Op1)) ||
1761 ((CC == ISD::SETLE || CC == ISD::SETGT) &&
1762 isAllOnesOrAllOnesSplat(V: Op1)))) {
1763 KnownBits KnownOp0;
1764 if (SimplifyDemandedBits(
1765 Op: Op0, OriginalDemandedBits: APInt::getSignMask(BitWidth: Op0.getScalarValueSizeInBits()),
1766 OriginalDemandedElts: DemandedElts, Known&: KnownOp0, TLO, Depth: Depth + 1))
1767 return true;
1768 // If (1) we only need the sign-bit, (2) the setcc operands are the same
1769 // width as the setcc result, and (3) the result of a setcc conforms to 0
1770 // or -1, we may be able to bypass the setcc.
1771 if (DemandedBits.isSignMask() &&
1772 Op0.getScalarValueSizeInBits() == BitWidth &&
1773 getBooleanContents(Type: Op0.getValueType()) ==
1774 BooleanContent::ZeroOrNegativeOneBooleanContent) {
1775 // If we remove a >= 0 or > -1 (for integers), we need to introduce a
1776 // NOT Operation
1777 if (CC == ISD::SETGE || CC == ISD::SETGT) {
1778 SDLoc DL(Op);
1779 EVT VT = Op0.getValueType();
1780 SDValue NotOp0 = TLO.DAG.getNOT(DL, Val: Op0, VT);
1781 return TLO.CombineTo(O: Op, N: NotOp0);
1782 }
1783 return TLO.CombineTo(O: Op, N: Op0);
1784 }
1785 }
1786 if (getBooleanContents(Type: Op0.getValueType()) ==
1787 TargetLowering::ZeroOrOneBooleanContent &&
1788 BitWidth > 1)
1789 Known.Zero.setBitsFrom(1);
1790 break;
1791 }
1792 case ISD::SHL: {
1793 SDValue Op0 = Op.getOperand(i: 0);
1794 SDValue Op1 = Op.getOperand(i: 1);
1795 EVT ShiftVT = Op1.getValueType();
1796
1797 if (std::optional<unsigned> KnownSA =
1798 TLO.DAG.getValidShiftAmount(V: Op, DemandedElts, Depth: Depth + 1)) {
1799 unsigned ShAmt = *KnownSA;
1800 if (ShAmt == 0)
1801 return TLO.CombineTo(O: Op, N: Op0);
1802
1803 // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1804 // single shift. We can do this if the bottom bits (which are shifted
1805 // out) are never demanded.
1806 // TODO - support non-uniform vector amounts.
1807 if (Op0.getOpcode() == ISD::SRL) {
1808 if (!DemandedBits.intersects(RHS: APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: ShAmt))) {
1809 if (std::optional<unsigned> InnerSA =
1810 TLO.DAG.getValidShiftAmount(V: Op0, DemandedElts, Depth: Depth + 2)) {
1811 unsigned C1 = *InnerSA;
1812 unsigned Opc = ISD::SHL;
1813 int Diff = ShAmt - C1;
1814 if (Diff < 0) {
1815 Diff = -Diff;
1816 Opc = ISD::SRL;
1817 }
1818 SDValue NewSA = TLO.DAG.getConstant(Val: Diff, DL: dl, VT: ShiftVT);
1819 return TLO.CombineTo(
1820 O: Op, N: TLO.DAG.getNode(Opcode: Opc, DL: dl, VT, N1: Op0.getOperand(i: 0), N2: NewSA));
1821 }
1822 }
1823 }
1824
1825 // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1826 // are not demanded. This will likely allow the anyext to be folded away.
1827 // TODO - support non-uniform vector amounts.
1828 if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1829 SDValue InnerOp = Op0.getOperand(i: 0);
1830 EVT InnerVT = InnerOp.getValueType();
1831 unsigned InnerBits = InnerVT.getScalarSizeInBits();
1832 if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1833 isTypeDesirableForOp(ISD::SHL, VT: InnerVT)) {
1834 SDValue NarrowShl = TLO.DAG.getNode(
1835 Opcode: ISD::SHL, DL: dl, VT: InnerVT, N1: InnerOp,
1836 N2: TLO.DAG.getShiftAmountConstant(Val: ShAmt, VT: InnerVT, DL: dl));
1837 return TLO.CombineTo(
1838 O: Op, N: TLO.DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT, Operand: NarrowShl));
1839 }
1840
1841 // Repeat the SHL optimization above in cases where an extension
1842 // intervenes: (shl (anyext (shr x, c1)), c2) to
1843 // (shl (anyext x), c2-c1). This requires that the bottom c1 bits
1844 // aren't demanded (as above) and that the shifted upper c1 bits of
1845 // x aren't demanded.
1846 // TODO - support non-uniform vector amounts.
1847 if (InnerOp.getOpcode() == ISD::SRL && Op0.hasOneUse() &&
1848 InnerOp.hasOneUse()) {
1849 if (std::optional<unsigned> SA2 = TLO.DAG.getValidShiftAmount(
1850 V: InnerOp, DemandedElts, Depth: Depth + 2)) {
1851 unsigned InnerShAmt = *SA2;
1852 if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1853 DemandedBits.getActiveBits() <=
1854 (InnerBits - InnerShAmt + ShAmt) &&
1855 DemandedBits.countr_zero() >= ShAmt) {
1856 SDValue NewSA =
1857 TLO.DAG.getConstant(Val: ShAmt - InnerShAmt, DL: dl, VT: ShiftVT);
1858 SDValue NewExt = TLO.DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT,
1859 Operand: InnerOp.getOperand(i: 0));
1860 return TLO.CombineTo(
1861 O: Op, N: TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: NewExt, N2: NewSA));
1862 }
1863 }
1864 }
1865 }
1866
1867 APInt InDemandedMask = DemandedBits.lshr(shiftAmt: ShAmt);
1868 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: InDemandedMask, OriginalDemandedElts: DemandedElts, Known, TLO,
1869 Depth: Depth + 1)) {
1870 // Disable the nsw and nuw flags. We can no longer guarantee that we
1871 // won't wrap after simplification.
1872 Op->dropFlags(Mask: SDNodeFlags::NoWrap);
1873 return true;
1874 }
1875 Known <<= ShAmt;
1876 // low bits known zero.
1877 Known.Zero.setLowBits(ShAmt);
1878
1879 // Attempt to avoid multi-use ops if we don't need anything from them.
1880 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1881 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1882 Op: Op0, DemandedBits: InDemandedMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
1883 if (DemandedOp0) {
1884 SDValue NewOp = TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: DemandedOp0, N2: Op1);
1885 return TLO.CombineTo(O: Op, N: NewOp);
1886 }
1887 }
1888
1889 // TODO: Can we merge this fold with the one below?
1890 // Try shrinking the operation as long as the shift amount will still be
1891 // in range.
1892 if (ShAmt < DemandedBits.getActiveBits() && !VT.isVector() &&
1893 Op.getNode()->hasOneUse()) {
1894 // Search for the smallest integer type with free casts to and from
1895 // Op's type. For expedience, just check power-of-2 integer types.
1896 unsigned DemandedSize = DemandedBits.getActiveBits();
1897 for (unsigned SmallVTBits = llvm::bit_ceil(Value: DemandedSize);
1898 SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(A: SmallVTBits)) {
1899 EVT SmallVT = EVT::getIntegerVT(Context&: *TLO.DAG.getContext(), BitWidth: SmallVTBits);
1900 if (isNarrowingProfitable(N: Op.getNode(), SrcVT: VT, DestVT: SmallVT) &&
1901 isTypeDesirableForOp(ISD::SHL, VT: SmallVT) &&
1902 isTruncateFree(FromVT: VT, ToVT: SmallVT) && isZExtFree(FromTy: SmallVT, ToTy: VT) &&
1903 (!TLO.LegalOperations() || isOperationLegal(Op: ISD::SHL, VT: SmallVT))) {
1904 assert(DemandedSize <= SmallVTBits &&
1905 "Narrowed below demanded bits?");
1906 // We found a type with free casts.
1907 SDValue NarrowShl = TLO.DAG.getNode(
1908 Opcode: ISD::SHL, DL: dl, VT: SmallVT,
1909 N1: TLO.DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: SmallVT, Operand: Op.getOperand(i: 0)),
1910 N2: TLO.DAG.getShiftAmountConstant(Val: ShAmt, VT: SmallVT, DL: dl));
1911 return TLO.CombineTo(
1912 O: Op, N: TLO.DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT, Operand: NarrowShl));
1913 }
1914 }
1915 }
1916
1917 // Narrow shift to lower half - similar to ShrinkDemandedOp.
1918 // (shl i64:x, K) -> (i64 zero_extend (shl (i32 (trunc i64:x)), K))
1919 // Only do this if we demand the upper half so the knownbits are correct.
1920 unsigned HalfWidth = BitWidth / 2;
1921 if ((BitWidth % 2) == 0 && !VT.isVector() && ShAmt < HalfWidth &&
1922 DemandedBits.countLeadingOnes() >= HalfWidth) {
1923 EVT HalfVT = EVT::getIntegerVT(Context&: *TLO.DAG.getContext(), BitWidth: HalfWidth);
1924 if (isNarrowingProfitable(N: Op.getNode(), SrcVT: VT, DestVT: HalfVT) &&
1925 isTypeDesirableForOp(ISD::SHL, VT: HalfVT) &&
1926 isTruncateFree(FromVT: VT, ToVT: HalfVT) && isZExtFree(FromTy: HalfVT, ToTy: VT) &&
1927 (!TLO.LegalOperations() || isOperationLegal(Op: ISD::SHL, VT: HalfVT))) {
1928 // If we're demanding the upper bits at all, we must ensure
1929 // that the upper bits of the shift result are known to be zero,
1930 // which is equivalent to the narrow shift being NUW.
1931 if (bool IsNUW = (Known.countMinLeadingZeros() >= HalfWidth)) {
1932 bool IsNSW = Known.countMinSignBits() > HalfWidth;
1933 SDNodeFlags Flags;
1934 Flags.setNoSignedWrap(IsNSW);
1935 Flags.setNoUnsignedWrap(IsNUW);
1936 SDValue NewOp = TLO.DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HalfVT, Operand: Op0);
1937 SDValue NewShiftAmt =
1938 TLO.DAG.getShiftAmountConstant(Val: ShAmt, VT: HalfVT, DL: dl);
1939 SDValue NewShift = TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: HalfVT, N1: NewOp,
1940 N2: NewShiftAmt, Flags);
1941 SDValue NewExt =
1942 TLO.DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: NewShift);
1943 return TLO.CombineTo(O: Op, N: NewExt);
1944 }
1945 }
1946 }
1947 } else {
1948 // This is a variable shift, so we can't shift the demand mask by a known
1949 // amount. But if we are not demanding high bits, then we are not
1950 // demanding those bits from the pre-shifted operand either.
1951 if (unsigned CTLZ = DemandedBits.countl_zero()) {
1952 APInt DemandedFromOp(APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth - CTLZ));
1953 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: DemandedFromOp, OriginalDemandedElts: DemandedElts, Known, TLO,
1954 Depth: Depth + 1)) {
1955 // Disable the nsw and nuw flags. We can no longer guarantee that we
1956 // won't wrap after simplification.
1957 Op->dropFlags(Mask: SDNodeFlags::NoWrap);
1958 return true;
1959 }
1960 Known.resetAll();
1961 }
1962 }
1963
1964 // If we are only demanding sign bits then we can use the shift source
1965 // directly.
1966 if (std::optional<unsigned> MaxSA =
1967 TLO.DAG.getValidMaximumShiftAmount(V: Op, DemandedElts, Depth: Depth + 1)) {
1968 unsigned ShAmt = *MaxSA;
1969 unsigned NumSignBits =
1970 TLO.DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + 1);
1971 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
1972 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
1973 return TLO.CombineTo(O: Op, N: Op0);
1974 }
1975 break;
1976 }
1977 case ISD::SRL: {
1978 SDValue Op0 = Op.getOperand(i: 0);
1979 SDValue Op1 = Op.getOperand(i: 1);
1980 EVT ShiftVT = Op1.getValueType();
1981
1982 if (std::optional<unsigned> KnownSA =
1983 TLO.DAG.getValidShiftAmount(V: Op, DemandedElts, Depth: Depth + 1)) {
1984 unsigned ShAmt = *KnownSA;
1985 if (ShAmt == 0)
1986 return TLO.CombineTo(O: Op, N: Op0);
1987
1988 // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1989 // single shift. We can do this if the top bits (which are shifted out)
1990 // are never demanded.
1991 // TODO - support non-uniform vector amounts.
1992 if (Op0.getOpcode() == ISD::SHL) {
1993 if (!DemandedBits.intersects(RHS: APInt::getHighBitsSet(numBits: BitWidth, hiBitsSet: ShAmt))) {
1994 if (std::optional<unsigned> InnerSA =
1995 TLO.DAG.getValidShiftAmount(V: Op0, DemandedElts, Depth: Depth + 2)) {
1996 unsigned C1 = *InnerSA;
1997 unsigned Opc = ISD::SRL;
1998 int Diff = ShAmt - C1;
1999 if (Diff < 0) {
2000 Diff = -Diff;
2001 Opc = ISD::SHL;
2002 }
2003 SDValue NewSA = TLO.DAG.getConstant(Val: Diff, DL: dl, VT: ShiftVT);
2004 return TLO.CombineTo(
2005 O: Op, N: TLO.DAG.getNode(Opcode: Opc, DL: dl, VT, N1: Op0.getOperand(i: 0), N2: NewSA));
2006 }
2007 }
2008 }
2009
2010 // If this is (srl (sra X, C1), ShAmt), see if we can combine this into a
2011 // single sra. We can do this if the top bits are never demanded.
2012 if (Op0.getOpcode() == ISD::SRA && Op0.hasOneUse()) {
2013 if (!DemandedBits.intersects(RHS: APInt::getHighBitsSet(numBits: BitWidth, hiBitsSet: ShAmt))) {
2014 if (std::optional<unsigned> InnerSA =
2015 TLO.DAG.getValidShiftAmount(V: Op0, DemandedElts, Depth: Depth + 2)) {
2016 unsigned C1 = *InnerSA;
2017 // Clamp the combined shift amount if it exceeds the bit width.
2018 unsigned Combined = std::min(a: C1 + ShAmt, b: BitWidth - 1);
2019 SDValue NewSA = TLO.DAG.getConstant(Val: Combined, DL: dl, VT: ShiftVT);
2020 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::SRA, DL: dl, VT,
2021 N1: Op0.getOperand(i: 0), N2: NewSA));
2022 }
2023 }
2024 }
2025
2026 APInt InDemandedMask = (DemandedBits << ShAmt);
2027
2028 // If the shift is exact, then it does demand the low bits (and knows that
2029 // they are zero).
2030 if (Op->getFlags().hasExact())
2031 InDemandedMask.setLowBits(ShAmt);
2032
2033 // Narrow shift to lower half - similar to ShrinkDemandedOp.
2034 // (srl i64:x, K) -> (i64 zero_extend (srl (i32 (trunc i64:x)), K))
2035 if ((BitWidth % 2) == 0 && !VT.isVector()) {
2036 APInt HiBits = APInt::getHighBitsSet(numBits: BitWidth, hiBitsSet: BitWidth / 2);
2037 EVT HalfVT = EVT::getIntegerVT(Context&: *TLO.DAG.getContext(), BitWidth: BitWidth / 2);
2038 if (isNarrowingProfitable(N: Op.getNode(), SrcVT: VT, DestVT: HalfVT) &&
2039 isTypeDesirableForOp(ISD::SRL, VT: HalfVT) &&
2040 isTruncateFree(FromVT: VT, ToVT: HalfVT) && isZExtFree(FromTy: HalfVT, ToTy: VT) &&
2041 (!TLO.LegalOperations() || isOperationLegal(Op: ISD::SRL, VT: HalfVT)) &&
2042 ((InDemandedMask.countLeadingZeros() >= (BitWidth / 2)) ||
2043 TLO.DAG.MaskedValueIsZero(Op: Op0, Mask: HiBits))) {
2044 SDValue NewOp = TLO.DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HalfVT, Operand: Op0);
2045 SDValue NewShiftAmt =
2046 TLO.DAG.getShiftAmountConstant(Val: ShAmt, VT: HalfVT, DL: dl);
2047 SDValue NewShift =
2048 TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: HalfVT, N1: NewOp, N2: NewShiftAmt);
2049 return TLO.CombineTo(
2050 O: Op, N: TLO.DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: NewShift));
2051 }
2052 }
2053
2054 // Compute the new bits that are at the top now.
2055 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: InDemandedMask, OriginalDemandedElts: DemandedElts, Known, TLO,
2056 Depth: Depth + 1))
2057 return true;
2058 Known >>= ShAmt;
2059 // High bits known zero.
2060 Known.Zero.setHighBits(ShAmt);
2061
2062 // Attempt to avoid multi-use ops if we don't need anything from them.
2063 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2064 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2065 Op: Op0, DemandedBits: InDemandedMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
2066 if (DemandedOp0) {
2067 SDValue NewOp = TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: DemandedOp0, N2: Op1);
2068 return TLO.CombineTo(O: Op, N: NewOp);
2069 }
2070 }
2071 } else {
2072 // Use generic knownbits computation as it has support for non-uniform
2073 // shift amounts.
2074 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2075 }
2076
2077 // If we are only demanding sign bits then we can use the shift source
2078 // directly.
2079 if (std::optional<unsigned> MaxSA =
2080 TLO.DAG.getValidMaximumShiftAmount(V: Op, DemandedElts, Depth: Depth + 1)) {
2081 unsigned ShAmt = *MaxSA;
2082 // Must already be signbits in DemandedBits bounds, and can't demand any
2083 // shifted in zeroes.
2084 if (DemandedBits.countl_zero() >= ShAmt) {
2085 unsigned NumSignBits =
2086 TLO.DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + 1);
2087 if (DemandedBits.countr_zero() >= (BitWidth - NumSignBits))
2088 return TLO.CombineTo(O: Op, N: Op0);
2089 }
2090 }
2091
2092 // Try to match AVG patterns (after shift simplification).
2093 if (SDValue AVG = combineShiftToAVG(Op, TLO, TLI: *this, DemandedBits,
2094 DemandedElts, Depth: Depth + 1))
2095 return TLO.CombineTo(O: Op, N: AVG);
2096
2097 break;
2098 }
2099 case ISD::SRA: {
2100 SDValue Op0 = Op.getOperand(i: 0);
2101 SDValue Op1 = Op.getOperand(i: 1);
2102 EVT ShiftVT = Op1.getValueType();
2103
2104 // If we only want bits that already match the signbit then we don't need
2105 // to shift.
2106 unsigned NumHiDemandedBits = BitWidth - DemandedBits.countr_zero();
2107 if (TLO.DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + 1) >=
2108 NumHiDemandedBits)
2109 return TLO.CombineTo(O: Op, N: Op0);
2110
2111 // If this is an arithmetic shift right and only the low-bit is set, we can
2112 // always convert this into a logical shr, even if the shift amount is
2113 // variable. The low bit of the shift cannot be an input sign bit unless
2114 // the shift amount is >= the size of the datatype, which is undefined.
2115 if (DemandedBits.isOne())
2116 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op0, N2: Op1));
2117
2118 if (std::optional<unsigned> KnownSA =
2119 TLO.DAG.getValidShiftAmount(V: Op, DemandedElts, Depth: Depth + 1)) {
2120 unsigned ShAmt = *KnownSA;
2121 if (ShAmt == 0)
2122 return TLO.CombineTo(O: Op, N: Op0);
2123
2124 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target
2125 // supports sext_inreg.
2126 if (Op0.getOpcode() == ISD::SHL) {
2127 if (std::optional<unsigned> InnerSA =
2128 TLO.DAG.getValidShiftAmount(V: Op0, DemandedElts, Depth: Depth + 2)) {
2129 unsigned LowBits = BitWidth - ShAmt;
2130 EVT ExtVT = VT.changeElementType(
2131 Context&: *TLO.DAG.getContext(),
2132 EltVT: EVT::getIntegerVT(Context&: *TLO.DAG.getContext(), BitWidth: LowBits));
2133
2134 if (*InnerSA == ShAmt) {
2135 if (!TLO.LegalOperations() ||
2136 getOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: ExtVT) == Legal)
2137 return TLO.CombineTo(
2138 O: Op, N: TLO.DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL: dl, VT,
2139 N1: Op0.getOperand(i: 0),
2140 N2: TLO.DAG.getValueType(ExtVT)));
2141
2142 // Even if we can't convert to sext_inreg, we might be able to
2143 // remove this shift pair if the input is already sign extended.
2144 unsigned NumSignBits =
2145 TLO.DAG.ComputeNumSignBits(Op: Op0.getOperand(i: 0), DemandedElts);
2146 if (NumSignBits > ShAmt)
2147 return TLO.CombineTo(O: Op, N: Op0.getOperand(i: 0));
2148 }
2149 }
2150 }
2151
2152 APInt InDemandedMask = (DemandedBits << ShAmt);
2153
2154 // If the shift is exact, then it does demand the low bits (and knows that
2155 // they are zero).
2156 if (Op->getFlags().hasExact())
2157 InDemandedMask.setLowBits(ShAmt);
2158
2159 // If any of the demanded bits are produced by the sign extension, we also
2160 // demand the input sign bit.
2161 if (DemandedBits.countl_zero() < ShAmt)
2162 InDemandedMask.setSignBit();
2163
2164 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: InDemandedMask, OriginalDemandedElts: DemandedElts, Known, TLO,
2165 Depth: Depth + 1))
2166 return true;
2167 Known >>= ShAmt;
2168
2169 // If the input sign bit is known to be zero, or if none of the top bits
2170 // are demanded, turn this into an unsigned shift right.
2171 if (Known.Zero[BitWidth - ShAmt - 1] ||
2172 DemandedBits.countl_zero() >= ShAmt) {
2173 SDNodeFlags Flags;
2174 Flags.setExact(Op->getFlags().hasExact());
2175 return TLO.CombineTo(
2176 O: Op, N: TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op0, N2: Op1, Flags));
2177 }
2178
2179 int Log2 = DemandedBits.exactLogBase2();
2180 if (Log2 >= 0) {
2181 // The bit must come from the sign.
2182 SDValue NewSA = TLO.DAG.getConstant(Val: BitWidth - 1 - Log2, DL: dl, VT: ShiftVT);
2183 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op0, N2: NewSA));
2184 }
2185
2186 if (Known.One[BitWidth - ShAmt - 1])
2187 // New bits are known one.
2188 Known.One.setHighBits(ShAmt);
2189
2190 // Attempt to avoid multi-use ops if we don't need anything from them.
2191 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2192 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2193 Op: Op0, DemandedBits: InDemandedMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
2194 if (DemandedOp0) {
2195 SDValue NewOp = TLO.DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: DemandedOp0, N2: Op1);
2196 return TLO.CombineTo(O: Op, N: NewOp);
2197 }
2198 }
2199 }
2200
2201 // Try to match AVG patterns (after shift simplification).
2202 if (SDValue AVG = combineShiftToAVG(Op, TLO, TLI: *this, DemandedBits,
2203 DemandedElts, Depth: Depth + 1))
2204 return TLO.CombineTo(O: Op, N: AVG);
2205
2206 break;
2207 }
2208 case ISD::FSHL:
2209 case ISD::FSHR: {
2210 SDValue Op0 = Op.getOperand(i: 0);
2211 SDValue Op1 = Op.getOperand(i: 1);
2212 SDValue Op2 = Op.getOperand(i: 2);
2213 bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
2214
2215 if (ConstantSDNode *SA = isConstOrConstSplat(N: Op2, DemandedElts)) {
2216 unsigned Amt = SA->getAPIntValue().urem(RHS: BitWidth);
2217
2218 // For fshl, 0-shift returns the 1st arg.
2219 // For fshr, 0-shift returns the 2nd arg.
2220 if (Amt == 0) {
2221 if (SimplifyDemandedBits(Op: IsFSHL ? Op0 : Op1, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
2222 Known, TLO, Depth: Depth + 1))
2223 return true;
2224 break;
2225 }
2226
2227 // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
2228 // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
2229 APInt Demanded0 = DemandedBits.lshr(shiftAmt: IsFSHL ? Amt : (BitWidth - Amt));
2230 APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
2231 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: Demanded0, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
2232 Depth: Depth + 1))
2233 return true;
2234 if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: Demanded1, OriginalDemandedElts: DemandedElts, Known, TLO,
2235 Depth: Depth + 1))
2236 return true;
2237
2238 Known2 <<= (IsFSHL ? Amt : (BitWidth - Amt));
2239 Known >>= (IsFSHL ? (BitWidth - Amt) : Amt);
2240 Known = Known.unionWith(RHS: Known2);
2241
2242 // Attempt to avoid multi-use ops if we don't need anything from them.
2243 if (!Demanded0.isAllOnes() || !Demanded1.isAllOnes() ||
2244 !DemandedElts.isAllOnes()) {
2245 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2246 Op: Op0, DemandedBits: Demanded0, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
2247 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2248 Op: Op1, DemandedBits: Demanded1, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
2249 if (DemandedOp0 || DemandedOp1) {
2250 DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0;
2251 DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1;
2252 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: DemandedOp0,
2253 N2: DemandedOp1, N3: Op2);
2254 return TLO.CombineTo(O: Op, N: NewOp);
2255 }
2256 }
2257 }
2258
2259 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2260 if (isPowerOf2_32(Value: BitWidth)) {
2261 APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
2262 if (SimplifyDemandedBits(Op: Op2, OriginalDemandedBits: DemandedAmtBits, OriginalDemandedElts: DemandedElts,
2263 Known&: Known2, TLO, Depth: Depth + 1))
2264 return true;
2265 }
2266 break;
2267 }
2268 case ISD::ROTL:
2269 case ISD::ROTR: {
2270 SDValue Op0 = Op.getOperand(i: 0);
2271 SDValue Op1 = Op.getOperand(i: 1);
2272 bool IsROTL = (Op.getOpcode() == ISD::ROTL);
2273
2274 // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
2275 if (BitWidth == TLO.DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + 1))
2276 return TLO.CombineTo(O: Op, N: Op0);
2277
2278 if (ConstantSDNode *SA = isConstOrConstSplat(N: Op1, DemandedElts)) {
2279 unsigned Amt = SA->getAPIntValue().urem(RHS: BitWidth);
2280 unsigned RevAmt = BitWidth - Amt;
2281
2282 // rotl: (Op0 << Amt) | (Op0 >> (BW - Amt))
2283 // rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt)
2284 APInt Demanded0 = DemandedBits.rotr(rotateAmt: IsROTL ? Amt : RevAmt);
2285 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: Demanded0, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
2286 Depth: Depth + 1))
2287 return true;
2288
2289 // rot*(x, 0) --> x
2290 if (Amt == 0)
2291 return TLO.CombineTo(O: Op, N: Op0);
2292
2293 // See if we don't demand either half of the rotated bits.
2294 if ((!TLO.LegalOperations() || isOperationLegal(Op: ISD::SHL, VT)) &&
2295 DemandedBits.countr_zero() >= (IsROTL ? Amt : RevAmt)) {
2296 Op1 = TLO.DAG.getConstant(Val: IsROTL ? Amt : RevAmt, DL: dl, VT: Op1.getValueType());
2297 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op0, N2: Op1));
2298 }
2299 if ((!TLO.LegalOperations() || isOperationLegal(Op: ISD::SRL, VT)) &&
2300 DemandedBits.countl_zero() >= (IsROTL ? RevAmt : Amt)) {
2301 Op1 = TLO.DAG.getConstant(Val: IsROTL ? RevAmt : Amt, DL: dl, VT: Op1.getValueType());
2302 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op0, N2: Op1));
2303 }
2304 }
2305
2306 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2307 if (isPowerOf2_32(Value: BitWidth)) {
2308 APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
2309 if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: DemandedAmtBits, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
2310 Depth: Depth + 1))
2311 return true;
2312 }
2313 break;
2314 }
2315 case ISD::SMIN:
2316 case ISD::SMAX:
2317 case ISD::UMIN:
2318 case ISD::UMAX: {
2319 unsigned Opc = Op.getOpcode();
2320 SDValue Op0 = Op.getOperand(i: 0);
2321 SDValue Op1 = Op.getOperand(i: 1);
2322
2323 // If we're only demanding signbits, then we can simplify to OR/AND node.
2324 unsigned BitOp =
2325 (Opc == ISD::SMIN || Opc == ISD::UMAX) ? ISD::OR : ISD::AND;
2326 unsigned NumSignBits =
2327 std::min(a: TLO.DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + 1),
2328 b: TLO.DAG.ComputeNumSignBits(Op: Op1, DemandedElts, Depth: Depth + 1));
2329 unsigned NumDemandedUpperBits = BitWidth - DemandedBits.countr_zero();
2330 if (NumSignBits >= NumDemandedUpperBits)
2331 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: BitOp, DL: SDLoc(Op), VT, N1: Op0, N2: Op1));
2332
2333 // Check if one arg is always less/greater than (or equal) to the other arg.
2334 KnownBits Known0 = TLO.DAG.computeKnownBits(Op: Op0, DemandedElts, Depth: Depth + 1);
2335 KnownBits Known1 = TLO.DAG.computeKnownBits(Op: Op1, DemandedElts, Depth: Depth + 1);
2336 switch (Opc) {
2337 case ISD::SMIN:
2338 if (std::optional<bool> IsSLE = KnownBits::sle(LHS: Known0, RHS: Known1))
2339 return TLO.CombineTo(O: Op, N: *IsSLE ? Op0 : Op1);
2340 if (std::optional<bool> IsSLT = KnownBits::slt(LHS: Known0, RHS: Known1))
2341 return TLO.CombineTo(O: Op, N: *IsSLT ? Op0 : Op1);
2342 Known = KnownBits::smin(LHS: Known0, RHS: Known1);
2343 break;
2344 case ISD::SMAX:
2345 if (std::optional<bool> IsSGE = KnownBits::sge(LHS: Known0, RHS: Known1))
2346 return TLO.CombineTo(O: Op, N: *IsSGE ? Op0 : Op1);
2347 if (std::optional<bool> IsSGT = KnownBits::sgt(LHS: Known0, RHS: Known1))
2348 return TLO.CombineTo(O: Op, N: *IsSGT ? Op0 : Op1);
2349 Known = KnownBits::smax(LHS: Known0, RHS: Known1);
2350 break;
2351 case ISD::UMIN:
2352 if (std::optional<bool> IsULE = KnownBits::ule(LHS: Known0, RHS: Known1))
2353 return TLO.CombineTo(O: Op, N: *IsULE ? Op0 : Op1);
2354 if (std::optional<bool> IsULT = KnownBits::ult(LHS: Known0, RHS: Known1))
2355 return TLO.CombineTo(O: Op, N: *IsULT ? Op0 : Op1);
2356 Known = KnownBits::umin(LHS: Known0, RHS: Known1);
2357 break;
2358 case ISD::UMAX:
2359 if (std::optional<bool> IsUGE = KnownBits::uge(LHS: Known0, RHS: Known1))
2360 return TLO.CombineTo(O: Op, N: *IsUGE ? Op0 : Op1);
2361 if (std::optional<bool> IsUGT = KnownBits::ugt(LHS: Known0, RHS: Known1))
2362 return TLO.CombineTo(O: Op, N: *IsUGT ? Op0 : Op1);
2363 Known = KnownBits::umax(LHS: Known0, RHS: Known1);
2364 break;
2365 }
2366 break;
2367 }
2368 case ISD::BITREVERSE: {
2369 SDValue Src = Op.getOperand(i: 0);
2370 APInt DemandedSrcBits = DemandedBits.reverseBits();
2371 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedSrcBits, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
2372 Depth: Depth + 1))
2373 return true;
2374 Known = Known2.reverseBits();
2375 break;
2376 }
2377 case ISD::BSWAP: {
2378 SDValue Src = Op.getOperand(i: 0);
2379
2380 // If the only bits demanded come from one byte of the bswap result,
2381 // just shift the input byte into position to eliminate the bswap.
2382 unsigned NLZ = DemandedBits.countl_zero();
2383 unsigned NTZ = DemandedBits.countr_zero();
2384
2385 // Round NTZ down to the next byte. If we have 11 trailing zeros, then
2386 // we need all the bits down to bit 8. Likewise, round NLZ. If we
2387 // have 14 leading zeros, round to 8.
2388 NLZ = alignDown(Value: NLZ, Align: 8);
2389 NTZ = alignDown(Value: NTZ, Align: 8);
2390 // If we need exactly one byte, we can do this transformation.
2391 if (BitWidth - NLZ - NTZ == 8) {
2392 // Replace this with either a left or right shift to get the byte into
2393 // the right place.
2394 unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL;
2395 if (!TLO.LegalOperations() || isOperationLegal(Op: ShiftOpcode, VT)) {
2396 unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
2397 SDValue ShAmt = TLO.DAG.getShiftAmountConstant(Val: ShiftAmount, VT, DL: dl);
2398 SDValue NewOp = TLO.DAG.getNode(Opcode: ShiftOpcode, DL: dl, VT, N1: Src, N2: ShAmt);
2399 return TLO.CombineTo(O: Op, N: NewOp);
2400 }
2401 }
2402
2403 APInt DemandedSrcBits = DemandedBits.byteSwap();
2404 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedSrcBits, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
2405 Depth: Depth + 1))
2406 return true;
2407 Known = Known2.byteSwap();
2408 break;
2409 }
2410 case ISD::CTPOP: {
2411 // If only 1 bit is demanded, replace with PARITY as long as we're before
2412 // op legalization.
2413 // FIXME: Limit to scalars for now.
2414 if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
2415 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::PARITY, DL: dl, VT,
2416 Operand: Op.getOperand(i: 0)));
2417
2418 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2419 break;
2420 }
2421 case ISD::SIGN_EXTEND_INREG: {
2422 SDValue Op0 = Op.getOperand(i: 0);
2423 EVT ExVT = cast<VTSDNode>(Val: Op.getOperand(i: 1))->getVT();
2424 unsigned ExVTBits = ExVT.getScalarSizeInBits();
2425
2426 // If we only care about the highest bit, don't bother shifting right.
2427 if (DemandedBits.isSignMask()) {
2428 unsigned MinSignedBits =
2429 TLO.DAG.ComputeMaxSignificantBits(Op: Op0, DemandedElts, Depth: Depth + 1);
2430 bool AlreadySignExtended = ExVTBits >= MinSignedBits;
2431 // However if the input is already sign extended we expect the sign
2432 // extension to be dropped altogether later and do not simplify.
2433 if (!AlreadySignExtended) {
2434 // Compute the correct shift amount type, which must be getShiftAmountTy
2435 // for scalar types after legalization.
2436 SDValue ShiftAmt =
2437 TLO.DAG.getShiftAmountConstant(Val: BitWidth - ExVTBits, VT, DL: dl);
2438 return TLO.CombineTo(O: Op,
2439 N: TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op0, N2: ShiftAmt));
2440 }
2441 }
2442
2443 // If none of the extended bits are demanded, eliminate the sextinreg.
2444 if (DemandedBits.getActiveBits() <= ExVTBits)
2445 return TLO.CombineTo(O: Op, N: Op0);
2446
2447 APInt InputDemandedBits = DemandedBits.getLoBits(numBits: ExVTBits);
2448
2449 // Since the sign extended bits are demanded, we know that the sign
2450 // bit is demanded.
2451 InputDemandedBits.setBit(ExVTBits - 1);
2452
2453 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: InputDemandedBits, OriginalDemandedElts: DemandedElts, Known, TLO,
2454 Depth: Depth + 1))
2455 return true;
2456
2457 // If the sign bit of the input is known set or clear, then we know the
2458 // top bits of the result.
2459
2460 // If the input sign bit is known zero, convert this into a zero extension.
2461 if (Known.Zero[ExVTBits - 1])
2462 return TLO.CombineTo(O: Op, N: TLO.DAG.getZeroExtendInReg(Op: Op0, DL: dl, VT: ExVT));
2463
2464 APInt Mask = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: ExVTBits);
2465 if (Known.One[ExVTBits - 1]) { // Input sign bit known set
2466 Known.One.setBitsFrom(ExVTBits);
2467 Known.Zero &= Mask;
2468 } else { // Input sign bit unknown
2469 Known.Zero &= Mask;
2470 Known.One &= Mask;
2471 }
2472 break;
2473 }
2474 case ISD::BUILD_PAIR: {
2475 EVT HalfVT = Op.getOperand(i: 0).getValueType();
2476 unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
2477
2478 APInt MaskLo = DemandedBits.getLoBits(numBits: HalfBitWidth).trunc(width: HalfBitWidth);
2479 APInt MaskHi = DemandedBits.getHiBits(numBits: HalfBitWidth).trunc(width: HalfBitWidth);
2480
2481 KnownBits KnownLo, KnownHi;
2482
2483 if (SimplifyDemandedBits(Op: Op.getOperand(i: 0), DemandedBits: MaskLo, Known&: KnownLo, TLO, Depth: Depth + 1))
2484 return true;
2485
2486 if (SimplifyDemandedBits(Op: Op.getOperand(i: 1), DemandedBits: MaskHi, Known&: KnownHi, TLO, Depth: Depth + 1))
2487 return true;
2488
2489 Known = KnownHi.concat(Lo: KnownLo);
2490 break;
2491 }
2492 case ISD::ZERO_EXTEND_VECTOR_INREG:
2493 if (VT.isScalableVector())
2494 return false;
2495 [[fallthrough]];
2496 case ISD::ZERO_EXTEND: {
2497 SDValue Src = Op.getOperand(i: 0);
2498 EVT SrcVT = Src.getValueType();
2499 unsigned InBits = SrcVT.getScalarSizeInBits();
2500 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2501 bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
2502
2503 // If none of the top bits are demanded, convert this into an any_extend.
2504 if (DemandedBits.getActiveBits() <= InBits) {
2505 // If we only need the non-extended bits of the bottom element
2506 // then we can just bitcast to the result.
2507 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2508 VT.getSizeInBits() == SrcVT.getSizeInBits())
2509 return TLO.CombineTo(O: Op, N: TLO.DAG.getBitcast(VT, V: Src));
2510
2511 unsigned Opc =
2512 IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
2513 if (!TLO.LegalOperations() || isOperationLegal(Op: Opc, VT))
2514 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Opc, DL: dl, VT, Operand: Src));
2515 }
2516
2517 APInt InDemandedBits = DemandedBits.trunc(width: InBits);
2518 APInt InDemandedElts = DemandedElts.zext(width: InElts);
2519 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: InDemandedBits, OriginalDemandedElts: InDemandedElts, Known, TLO,
2520 Depth: Depth + 1)) {
2521 Op->dropFlags(Mask: SDNodeFlags::NonNeg);
2522 return true;
2523 }
2524 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2525 Known = Known.zext(BitWidth);
2526
2527 // Attempt to avoid multi-use ops if we don't need anything from them.
2528 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2529 Op: Src, DemandedBits: InDemandedBits, DemandedElts: InDemandedElts, DAG&: TLO.DAG, Depth: Depth + 1))
2530 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, Operand: NewSrc));
2531 break;
2532 }
2533 case ISD::SIGN_EXTEND_VECTOR_INREG:
2534 if (VT.isScalableVector())
2535 return false;
2536 [[fallthrough]];
2537 case ISD::SIGN_EXTEND: {
2538 SDValue Src = Op.getOperand(i: 0);
2539 EVT SrcVT = Src.getValueType();
2540 unsigned InBits = SrcVT.getScalarSizeInBits();
2541 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2542 bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
2543
2544 APInt InDemandedElts = DemandedElts.zext(width: InElts);
2545 APInt InDemandedBits = DemandedBits.trunc(width: InBits);
2546
2547 // Since some of the sign extended bits are demanded, we know that the sign
2548 // bit is demanded.
2549 InDemandedBits.setBit(InBits - 1);
2550
2551 // If none of the top bits are demanded, convert this into an any_extend.
2552 if (DemandedBits.getActiveBits() <= InBits) {
2553 // If we only need the non-extended bits of the bottom element
2554 // then we can just bitcast to the result.
2555 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2556 VT.getSizeInBits() == SrcVT.getSizeInBits())
2557 return TLO.CombineTo(O: Op, N: TLO.DAG.getBitcast(VT, V: Src));
2558
2559 // Don't lose an all signbits 0/-1 splat on targets with 0/-1 booleans.
2560 if (getBooleanContents(Type: VT) != ZeroOrNegativeOneBooleanContent ||
2561 TLO.DAG.ComputeNumSignBits(Op: Src, DemandedElts: InDemandedElts, Depth: Depth + 1) !=
2562 InBits) {
2563 unsigned Opc =
2564 IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
2565 if (!TLO.LegalOperations() || isOperationLegal(Op: Opc, VT))
2566 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Opc, DL: dl, VT, Operand: Src));
2567 }
2568 }
2569
2570 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: InDemandedBits, OriginalDemandedElts: InDemandedElts, Known, TLO,
2571 Depth: Depth + 1))
2572 return true;
2573 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2574
2575 // If the sign bit is known one, the top bits match.
2576 Known = Known.sext(BitWidth);
2577
2578 // If the sign bit is known zero, convert this to a zero extend.
2579 if (Known.isNonNegative()) {
2580 unsigned Opc =
2581 IsVecInReg ? ISD::ZERO_EXTEND_VECTOR_INREG : ISD::ZERO_EXTEND;
2582 if (!TLO.LegalOperations() || isOperationLegal(Op: Opc, VT)) {
2583 SDNodeFlags Flags;
2584 if (!IsVecInReg)
2585 Flags |= SDNodeFlags::NonNeg;
2586 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Opc, DL: dl, VT, Operand: Src, Flags));
2587 }
2588 }
2589
2590 // Attempt to avoid multi-use ops if we don't need anything from them.
2591 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2592 Op: Src, DemandedBits: InDemandedBits, DemandedElts: InDemandedElts, DAG&: TLO.DAG, Depth: Depth + 1))
2593 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, Operand: NewSrc));
2594 break;
2595 }
2596 case ISD::ANY_EXTEND_VECTOR_INREG:
2597 if (VT.isScalableVector())
2598 return false;
2599 [[fallthrough]];
2600 case ISD::ANY_EXTEND: {
2601 SDValue Src = Op.getOperand(i: 0);
2602 EVT SrcVT = Src.getValueType();
2603 unsigned InBits = SrcVT.getScalarSizeInBits();
2604 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2605 bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
2606
2607 // If we only need the bottom element then we can just bitcast.
2608 // TODO: Handle ANY_EXTEND?
2609 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2610 VT.getSizeInBits() == SrcVT.getSizeInBits())
2611 return TLO.CombineTo(O: Op, N: TLO.DAG.getBitcast(VT, V: Src));
2612
2613 APInt InDemandedBits = DemandedBits.trunc(width: InBits);
2614 APInt InDemandedElts = DemandedElts.zext(width: InElts);
2615 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: InDemandedBits, OriginalDemandedElts: InDemandedElts, Known, TLO,
2616 Depth: Depth + 1))
2617 return true;
2618 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2619 Known = Known.anyext(BitWidth);
2620
2621 // Attempt to avoid multi-use ops if we don't need anything from them.
2622 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2623 Op: Src, DemandedBits: InDemandedBits, DemandedElts: InDemandedElts, DAG&: TLO.DAG, Depth: Depth + 1))
2624 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, Operand: NewSrc));
2625 break;
2626 }
2627 case ISD::TRUNCATE: {
2628 SDValue Src = Op.getOperand(i: 0);
2629
2630 // Simplify the input, using demanded bit information, and compute the known
2631 // zero/one bits live out.
2632 unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
2633 APInt TruncMask = DemandedBits.zext(width: OperandBitWidth);
2634 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: TruncMask, OriginalDemandedElts: DemandedElts, Known, TLO,
2635 Depth: Depth + 1)) {
2636 // Disable the nsw and nuw flags. We can no longer guarantee that we
2637 // won't wrap after simplification.
2638 Op->dropFlags(Mask: SDNodeFlags::NoWrap);
2639 return true;
2640 }
2641 Known = Known.trunc(BitWidth);
2642
2643 // Attempt to avoid multi-use ops if we don't need anything from them.
2644 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2645 Op: Src, DemandedBits: TruncMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1))
2646 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: NewSrc));
2647
2648 // If the input is only used by this truncate, see if we can shrink it based
2649 // on the known demanded bits.
2650 switch (Src.getOpcode()) {
2651 default:
2652 break;
2653 case ISD::SRL:
2654 // Shrink SRL by a constant if none of the high bits shifted in are
2655 // demanded.
2656 if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
2657 // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
2658 // undesirable.
2659 break;
2660
2661 if (Src.getNode()->hasOneUse()) {
2662 if (isTruncateFree(Val: Src, VT2: VT) &&
2663 !isTruncateFree(FromVT: Src.getValueType(), ToVT: VT)) {
2664 // If truncate is only free at trunc(srl), do not turn it into
2665 // srl(trunc). The check is done by first check the truncate is free
2666 // at Src's opcode(srl), then check the truncate is not done by
2667 // referencing sub-register. In test, if both trunc(srl) and
2668 // srl(trunc)'s trunc are free, srl(trunc) performs better. If only
2669 // trunc(srl)'s trunc is free, trunc(srl) is better.
2670 break;
2671 }
2672
2673 std::optional<unsigned> ShAmtC =
2674 TLO.DAG.getValidShiftAmount(V: Src, DemandedElts, Depth: Depth + 2);
2675 if (!ShAmtC || *ShAmtC >= BitWidth)
2676 break;
2677 unsigned ShVal = *ShAmtC;
2678
2679 APInt HighBits =
2680 APInt::getHighBitsSet(numBits: OperandBitWidth, hiBitsSet: OperandBitWidth - BitWidth);
2681 HighBits.lshrInPlace(ShiftAmt: ShVal);
2682 HighBits = HighBits.trunc(width: BitWidth);
2683 if (!(HighBits & DemandedBits)) {
2684 // None of the shifted in bits are needed. Add a truncate of the
2685 // shift input, then shift it.
2686 SDValue NewShAmt = TLO.DAG.getShiftAmountConstant(Val: ShVal, VT, DL: dl);
2687 SDValue NewTrunc =
2688 TLO.DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Src.getOperand(i: 0));
2689 return TLO.CombineTo(
2690 O: Op, N: TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: NewTrunc, N2: NewShAmt));
2691 }
2692 }
2693 break;
2694 }
2695
2696 break;
2697 }
2698 case ISD::AssertZext: {
2699 // AssertZext demands all of the high bits, plus any of the low bits
2700 // demanded by its users.
2701 EVT ZVT = cast<VTSDNode>(Val: Op.getOperand(i: 1))->getVT();
2702 APInt InMask = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: ZVT.getSizeInBits());
2703 if (SimplifyDemandedBits(Op: Op.getOperand(i: 0), DemandedBits: ~InMask | DemandedBits, Known,
2704 TLO, Depth: Depth + 1))
2705 return true;
2706
2707 Known.Zero |= ~InMask;
2708 Known.One &= (~Known.Zero);
2709 break;
2710 }
2711 case ISD::EXTRACT_VECTOR_ELT: {
2712 SDValue Src = Op.getOperand(i: 0);
2713 SDValue Idx = Op.getOperand(i: 1);
2714 ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
2715 unsigned EltBitWidth = Src.getScalarValueSizeInBits();
2716
2717 if (SrcEltCnt.isScalable())
2718 return false;
2719
2720 // Demand the bits from every vector element without a constant index.
2721 unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2722 APInt DemandedSrcElts = APInt::getAllOnes(numBits: NumSrcElts);
2723 if (auto *CIdx = dyn_cast<ConstantSDNode>(Val&: Idx))
2724 if (CIdx->getAPIntValue().ult(RHS: NumSrcElts))
2725 DemandedSrcElts = APInt::getOneBitSet(numBits: NumSrcElts, BitNo: CIdx->getZExtValue());
2726
2727 // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
2728 // anything about the extended bits.
2729 APInt DemandedSrcBits = DemandedBits;
2730 if (BitWidth > EltBitWidth)
2731 DemandedSrcBits = DemandedSrcBits.trunc(width: EltBitWidth);
2732
2733 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedSrcBits, OriginalDemandedElts: DemandedSrcElts, Known&: Known2, TLO,
2734 Depth: Depth + 1))
2735 return true;
2736
2737 // Attempt to avoid multi-use ops if we don't need anything from them.
2738 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2739 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2740 Op: Src, DemandedBits: DemandedSrcBits, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + 1)) {
2741 SDValue NewOp =
2742 TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: DemandedSrc, N2: Idx);
2743 return TLO.CombineTo(O: Op, N: NewOp);
2744 }
2745 }
2746
2747 Known = Known2;
2748 if (BitWidth > EltBitWidth)
2749 Known = Known.anyext(BitWidth);
2750 break;
2751 }
2752 case ISD::BITCAST: {
2753 if (VT.isScalableVector())
2754 return false;
2755 SDValue Src = Op.getOperand(i: 0);
2756 EVT SrcVT = Src.getValueType();
2757 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
2758
2759 // If this is an FP->Int bitcast and if the sign bit is the only
2760 // thing demanded, turn this into a FGETSIGN.
2761 if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
2762 DemandedBits == APInt::getSignMask(BitWidth: Op.getValueSizeInBits()) &&
2763 SrcVT.isFloatingPoint()) {
2764 bool OpVTLegal = isOperationLegalOrCustom(Op: ISD::FGETSIGN, VT);
2765 bool i32Legal = isOperationLegalOrCustom(Op: ISD::FGETSIGN, VT: MVT::i32);
2766 if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
2767 SrcVT != MVT::f128) {
2768 // Cannot eliminate/lower SHL for f128 yet.
2769 EVT Ty = OpVTLegal ? VT : MVT::i32;
2770 // Make a FGETSIGN + SHL to move the sign bit into the appropriate
2771 // place. We expect the SHL to be eliminated by other optimizations.
2772 SDValue Sign = TLO.DAG.getNode(Opcode: ISD::FGETSIGN, DL: dl, VT: Ty, Operand: Src);
2773 unsigned OpVTSizeInBits = Op.getValueSizeInBits();
2774 if (!OpVTLegal && OpVTSizeInBits > 32)
2775 Sign = TLO.DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: Sign);
2776 unsigned ShVal = Op.getValueSizeInBits() - 1;
2777 SDValue ShAmt = TLO.DAG.getConstant(Val: ShVal, DL: dl, VT);
2778 return TLO.CombineTo(O: Op,
2779 N: TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Sign, N2: ShAmt));
2780 }
2781 }
2782
2783 // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
2784 // Demand the elt/bit if any of the original elts/bits are demanded.
2785 if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) {
2786 unsigned Scale = BitWidth / NumSrcEltBits;
2787 unsigned NumSrcElts = SrcVT.getVectorNumElements();
2788 APInt DemandedSrcBits = APInt::getZero(numBits: NumSrcEltBits);
2789 APInt DemandedSrcElts = APInt::getZero(numBits: NumSrcElts);
2790 for (unsigned i = 0; i != Scale; ++i) {
2791 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
2792 unsigned BitOffset = EltOffset * NumSrcEltBits;
2793 APInt Sub = DemandedBits.extractBits(numBits: NumSrcEltBits, bitPosition: BitOffset);
2794 if (!Sub.isZero()) {
2795 DemandedSrcBits |= Sub;
2796 for (unsigned j = 0; j != NumElts; ++j)
2797 if (DemandedElts[j])
2798 DemandedSrcElts.setBit((j * Scale) + i);
2799 }
2800 }
2801
2802 APInt KnownSrcUndef, KnownSrcZero;
2803 if (SimplifyDemandedVectorElts(Op: Src, DemandedEltMask: DemandedSrcElts, KnownUndef&: KnownSrcUndef,
2804 KnownZero&: KnownSrcZero, TLO, Depth: Depth + 1))
2805 return true;
2806
2807 KnownBits KnownSrcBits;
2808 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedSrcBits, OriginalDemandedElts: DemandedSrcElts,
2809 Known&: KnownSrcBits, TLO, Depth: Depth + 1))
2810 return true;
2811 } else if (IsLE && (NumSrcEltBits % BitWidth) == 0) {
2812 // TODO - bigendian once we have test coverage.
2813 unsigned Scale = NumSrcEltBits / BitWidth;
2814 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2815 APInt DemandedSrcBits = APInt::getZero(numBits: NumSrcEltBits);
2816 APInt DemandedSrcElts = APInt::getZero(numBits: NumSrcElts);
2817 for (unsigned i = 0; i != NumElts; ++i)
2818 if (DemandedElts[i]) {
2819 unsigned Offset = (i % Scale) * BitWidth;
2820 DemandedSrcBits.insertBits(SubBits: DemandedBits, bitPosition: Offset);
2821 DemandedSrcElts.setBit(i / Scale);
2822 }
2823
2824 if (SrcVT.isVector()) {
2825 APInt KnownSrcUndef, KnownSrcZero;
2826 if (SimplifyDemandedVectorElts(Op: Src, DemandedEltMask: DemandedSrcElts, KnownUndef&: KnownSrcUndef,
2827 KnownZero&: KnownSrcZero, TLO, Depth: Depth + 1))
2828 return true;
2829 }
2830
2831 KnownBits KnownSrcBits;
2832 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedSrcBits, OriginalDemandedElts: DemandedSrcElts,
2833 Known&: KnownSrcBits, TLO, Depth: Depth + 1))
2834 return true;
2835
2836 // Attempt to avoid multi-use ops if we don't need anything from them.
2837 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2838 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2839 Op: Src, DemandedBits: DemandedSrcBits, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + 1)) {
2840 SDValue NewOp = TLO.DAG.getBitcast(VT, V: DemandedSrc);
2841 return TLO.CombineTo(O: Op, N: NewOp);
2842 }
2843 }
2844 }
2845
2846 // If this is a bitcast, let computeKnownBits handle it. Only do this on a
2847 // recursive call where Known may be useful to the caller.
2848 if (Depth > 0) {
2849 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2850 return false;
2851 }
2852 break;
2853 }
2854 case ISD::MUL:
2855 if (DemandedBits.isPowerOf2()) {
2856 // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
2857 // If we demand exactly one bit N and we have "X * (C' << N)" where C' is
2858 // odd (has LSB set), then the left-shifted low bit of X is the answer.
2859 unsigned CTZ = DemandedBits.countr_zero();
2860 ConstantSDNode *C = isConstOrConstSplat(N: Op.getOperand(i: 1), DemandedElts);
2861 if (C && C->getAPIntValue().countr_zero() == CTZ) {
2862 SDValue AmtC = TLO.DAG.getShiftAmountConstant(Val: CTZ, VT, DL: dl);
2863 SDValue Shl = TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op.getOperand(i: 0), N2: AmtC);
2864 return TLO.CombineTo(O: Op, N: Shl);
2865 }
2866 }
2867 // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
2868 // X * X is odd iff X is odd.
2869 // 'Quadratic Reciprocity': X * X -> 0 for bit[1]
2870 if (Op.getOperand(i: 0) == Op.getOperand(i: 1) && DemandedBits.ult(RHS: 4)) {
2871 SDValue One = TLO.DAG.getConstant(Val: 1, DL: dl, VT);
2872 SDValue And1 = TLO.DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op.getOperand(i: 0), N2: One);
2873 return TLO.CombineTo(O: Op, N: And1);
2874 }
2875 [[fallthrough]];
2876 case ISD::PTRADD:
2877 if (Op.getOperand(i: 0).getValueType() != Op.getOperand(i: 1).getValueType())
2878 break;
2879 // PTRADD behaves like ADD if pointers are represented as integers.
2880 [[fallthrough]];
2881 case ISD::ADD:
2882 case ISD::SUB: {
2883 // Add, Sub, and Mul don't demand any bits in positions beyond that
2884 // of the highest bit demanded of them.
2885 SDValue Op0 = Op.getOperand(i: 0), Op1 = Op.getOperand(i: 1);
2886 SDNodeFlags Flags = Op.getNode()->getFlags();
2887 unsigned DemandedBitsLZ = DemandedBits.countl_zero();
2888 APInt LoMask = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth - DemandedBitsLZ);
2889 KnownBits KnownOp0, KnownOp1;
2890 auto GetDemandedBitsLHSMask = [&](APInt Demanded,
2891 const KnownBits &KnownRHS) {
2892 if (Op.getOpcode() == ISD::MUL)
2893 Demanded.clearHighBits(hiBits: KnownRHS.countMinTrailingZeros());
2894 return Demanded;
2895 };
2896 if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: LoMask, OriginalDemandedElts: DemandedElts, Known&: KnownOp1, TLO,
2897 Depth: Depth + 1) ||
2898 SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: GetDemandedBitsLHSMask(LoMask, KnownOp1),
2899 OriginalDemandedElts: DemandedElts, Known&: KnownOp0, TLO, Depth: Depth + 1) ||
2900 // See if the operation should be performed at a smaller bit width.
2901 ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
2902 // Disable the nsw and nuw flags. We can no longer guarantee that we
2903 // won't wrap after simplification.
2904 Op->dropFlags(Mask: SDNodeFlags::NoWrap);
2905 return true;
2906 }
2907
2908 // neg x with only low bit demanded is simply x.
2909 if (Op.getOpcode() == ISD::SUB && DemandedBits.isOne() &&
2910 isNullConstant(V: Op0))
2911 return TLO.CombineTo(O: Op, N: Op1);
2912
2913 // Attempt to avoid multi-use ops if we don't need anything from them.
2914 if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2915 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2916 Op: Op0, DemandedBits: LoMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
2917 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2918 Op: Op1, DemandedBits: LoMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
2919 if (DemandedOp0 || DemandedOp1) {
2920 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
2921 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
2922 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: Op0, N2: Op1,
2923 Flags: Flags & ~SDNodeFlags::NoWrap);
2924 return TLO.CombineTo(O: Op, N: NewOp);
2925 }
2926 }
2927
2928 // If we have a constant operand, we may be able to turn it into -1 if we
2929 // do not demand the high bits. This can make the constant smaller to
2930 // encode, allow more general folding, or match specialized instruction
2931 // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
2932 // is probably not useful (and could be detrimental).
2933 ConstantSDNode *C = isConstOrConstSplat(N: Op1);
2934 APInt HighMask = APInt::getHighBitsSet(numBits: BitWidth, hiBitsSet: DemandedBitsLZ);
2935 if (C && !C->isAllOnes() && !C->isOne() &&
2936 (C->getAPIntValue() | HighMask).isAllOnes()) {
2937 SDValue Neg1 = TLO.DAG.getAllOnesConstant(DL: dl, VT);
2938 // Disable the nsw and nuw flags. We can no longer guarantee that we
2939 // won't wrap after simplification.
2940 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: Op0, N2: Neg1,
2941 Flags: Flags & ~SDNodeFlags::NoWrap);
2942 return TLO.CombineTo(O: Op, N: NewOp);
2943 }
2944
2945 // Match a multiply with a disguised negated-power-of-2 and convert to a
2946 // an equivalent shift-left amount.
2947 // Example: (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2948 auto getShiftLeftAmt = [&HighMask](SDValue Mul) -> unsigned {
2949 if (Mul.getOpcode() != ISD::MUL || !Mul.hasOneUse())
2950 return 0;
2951
2952 // Don't touch opaque constants. Also, ignore zero and power-of-2
2953 // multiplies. Those will get folded later.
2954 ConstantSDNode *MulC = isConstOrConstSplat(N: Mul.getOperand(i: 1));
2955 if (MulC && !MulC->isOpaque() && !MulC->isZero() &&
2956 !MulC->getAPIntValue().isPowerOf2()) {
2957 APInt UnmaskedC = MulC->getAPIntValue() | HighMask;
2958 if (UnmaskedC.isNegatedPowerOf2())
2959 return (-UnmaskedC).logBase2();
2960 }
2961 return 0;
2962 };
2963
2964 auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y,
2965 unsigned ShlAmt) {
2966 SDValue ShlAmtC = TLO.DAG.getShiftAmountConstant(Val: ShlAmt, VT, DL: dl);
2967 SDValue Shl = TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: X, N2: ShlAmtC);
2968 SDValue Res = TLO.DAG.getNode(Opcode: NT, DL: dl, VT, N1: Y, N2: Shl);
2969 return TLO.CombineTo(O: Op, N: Res);
2970 };
2971
2972 if (isOperationLegalOrCustom(Op: ISD::SHL, VT)) {
2973 if (Op.getOpcode() == ISD::ADD) {
2974 // (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2975 if (unsigned ShAmt = getShiftLeftAmt(Op0))
2976 return foldMul(ISD::SUB, Op0.getOperand(i: 0), Op1, ShAmt);
2977 // Op0 + (X * MulC) --> Op0 - (X << log2(-MulC))
2978 if (unsigned ShAmt = getShiftLeftAmt(Op1))
2979 return foldMul(ISD::SUB, Op1.getOperand(i: 0), Op0, ShAmt);
2980 }
2981 if (Op.getOpcode() == ISD::SUB) {
2982 // Op0 - (X * MulC) --> Op0 + (X << log2(-MulC))
2983 if (unsigned ShAmt = getShiftLeftAmt(Op1))
2984 return foldMul(ISD::ADD, Op1.getOperand(i: 0), Op0, ShAmt);
2985 }
2986 }
2987
2988 if (Op.getOpcode() == ISD::MUL) {
2989 Known = KnownBits::mul(LHS: KnownOp0, RHS: KnownOp1);
2990 } else { // Op.getOpcode() is either ISD::ADD, ISD::PTRADD, or ISD::SUB.
2991 Known = KnownBits::computeForAddSub(
2992 Add: Op.getOpcode() != ISD::SUB, NSW: Flags.hasNoSignedWrap(),
2993 NUW: Flags.hasNoUnsignedWrap(), LHS: KnownOp0, RHS: KnownOp1);
2994 }
2995 break;
2996 }
2997 case ISD::FABS: {
2998 SDValue Op0 = Op.getOperand(i: 0);
2999 APInt SignMask = APInt::getSignMask(BitWidth);
3000
3001 if (!DemandedBits.intersects(RHS: SignMask))
3002 return TLO.CombineTo(O: Op, N: Op0);
3003
3004 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts, Known, TLO,
3005 Depth: Depth + 1))
3006 return true;
3007
3008 if (Known.isNonNegative())
3009 return TLO.CombineTo(O: Op, N: Op0);
3010 if (Known.isNegative())
3011 return TLO.CombineTo(
3012 O: Op, N: TLO.DAG.getNode(Opcode: ISD::FNEG, DL: dl, VT, Operand: Op0, Flags: Op->getFlags()));
3013
3014 Known.Zero |= SignMask;
3015 Known.One &= ~SignMask;
3016
3017 break;
3018 }
3019 case ISD::FCOPYSIGN: {
3020 SDValue Op0 = Op.getOperand(i: 0);
3021 SDValue Op1 = Op.getOperand(i: 1);
3022
3023 unsigned BitWidth0 = Op0.getScalarValueSizeInBits();
3024 unsigned BitWidth1 = Op1.getScalarValueSizeInBits();
3025 APInt SignMask0 = APInt::getSignMask(BitWidth: BitWidth0);
3026 APInt SignMask1 = APInt::getSignMask(BitWidth: BitWidth1);
3027
3028 if (!DemandedBits.intersects(RHS: SignMask0))
3029 return TLO.CombineTo(O: Op, N: Op0);
3030
3031 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: ~SignMask0 & DemandedBits, OriginalDemandedElts: DemandedElts,
3032 Known, TLO, Depth: Depth + 1) ||
3033 SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: SignMask1, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
3034 Depth: Depth + 1))
3035 return true;
3036
3037 if (Known2.isNonNegative())
3038 return TLO.CombineTo(
3039 O: Op, N: TLO.DAG.getNode(Opcode: ISD::FABS, DL: dl, VT, Operand: Op0, Flags: Op->getFlags()));
3040
3041 if (Known2.isNegative())
3042 return TLO.CombineTo(
3043 O: Op, N: TLO.DAG.getNode(Opcode: ISD::FNEG, DL: dl, VT,
3044 Operand: TLO.DAG.getNode(Opcode: ISD::FABS, DL: SDLoc(Op0), VT, Operand: Op0)));
3045
3046 Known.Zero &= ~SignMask0;
3047 Known.One &= ~SignMask0;
3048 break;
3049 }
3050 case ISD::FNEG: {
3051 SDValue Op0 = Op.getOperand(i: 0);
3052 APInt SignMask = APInt::getSignMask(BitWidth);
3053
3054 if (!DemandedBits.intersects(RHS: SignMask))
3055 return TLO.CombineTo(O: Op, N: Op0);
3056
3057 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts, Known, TLO,
3058 Depth: Depth + 1))
3059 return true;
3060
3061 if (!Known.isSignUnknown()) {
3062 Known.Zero ^= SignMask;
3063 Known.One ^= SignMask;
3064 }
3065
3066 break;
3067 }
3068 default:
3069 // We also ask the target about intrinsics (which could be specific to it).
3070 if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3071 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
3072 // TODO: Probably okay to remove after audit; here to reduce change size
3073 // in initial enablement patch for scalable vectors
3074 if (Op.getValueType().isScalableVector())
3075 break;
3076 if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
3077 Known, TLO, Depth))
3078 return true;
3079 break;
3080 }
3081
3082 // Just use computeKnownBits to compute output bits.
3083 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
3084 break;
3085 }
3086
3087 // If we know the value of all of the demanded bits, return this as a
3088 // constant.
3089 if (!isTargetCanonicalConstantNode(Op) &&
3090 DemandedBits.isSubsetOf(RHS: Known.Zero | Known.One)) {
3091 // Avoid folding to a constant if any OpaqueConstant is involved.
3092 if (llvm::any_of(Range: Op->ops(), P: [](SDValue V) {
3093 auto *C = dyn_cast<ConstantSDNode>(Val&: V);
3094 return C && C->isOpaque();
3095 }))
3096 return false;
3097 if (VT.isInteger())
3098 return TLO.CombineTo(O: Op, N: TLO.DAG.getConstant(Val: Known.One, DL: dl, VT));
3099 if (VT.isFloatingPoint())
3100 return TLO.CombineTo(
3101 O: Op, N: TLO.DAG.getConstantFP(Val: APFloat(VT.getFltSemantics(), Known.One),
3102 DL: dl, VT));
3103 }
3104
3105 // A multi use 'all demanded elts' simplify failed to find any knownbits.
3106 // Try again just for the original demanded elts.
3107 // Ensure we do this AFTER constant folding above.
3108 if (HasMultiUse && Known.isUnknown() && !OriginalDemandedElts.isAllOnes())
3109 Known = TLO.DAG.computeKnownBits(Op, DemandedElts: OriginalDemandedElts, Depth);
3110
3111 return false;
3112}
3113
3114bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op,
3115 const APInt &DemandedElts,
3116 DAGCombinerInfo &DCI) const {
3117 SelectionDAG &DAG = DCI.DAG;
3118 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
3119 !DCI.isBeforeLegalizeOps());
3120
3121 APInt KnownUndef, KnownZero;
3122 bool Simplified =
3123 SimplifyDemandedVectorElts(Op, DemandedEltMask: DemandedElts, KnownUndef, KnownZero, TLO);
3124 if (Simplified) {
3125 DCI.AddToWorklist(N: Op.getNode());
3126 DCI.CommitTargetLoweringOpt(TLO);
3127 }
3128
3129 return Simplified;
3130}
3131
3132/// Given a vector binary operation and known undefined elements for each input
3133/// operand, compute whether each element of the output is undefined.
3134static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG,
3135 const APInt &UndefOp0,
3136 const APInt &UndefOp1) {
3137 EVT VT = BO.getValueType();
3138 assert(DAG.getTargetLoweringInfo().isBinOp(BO.getOpcode()) && VT.isVector() &&
3139 "Vector binop only");
3140
3141 EVT EltVT = VT.getVectorElementType();
3142 unsigned NumElts = VT.isFixedLengthVector() ? VT.getVectorNumElements() : 1;
3143 assert(UndefOp0.getBitWidth() == NumElts &&
3144 UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
3145
3146 auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
3147 const APInt &UndefVals) {
3148 if (UndefVals[Index])
3149 return DAG.getUNDEF(VT: EltVT);
3150
3151 if (auto *BV = dyn_cast<BuildVectorSDNode>(Val&: V)) {
3152 // Try hard to make sure that the getNode() call is not creating temporary
3153 // nodes. Ignore opaque integers because they do not constant fold.
3154 SDValue Elt = BV->getOperand(Num: Index);
3155 auto *C = dyn_cast<ConstantSDNode>(Val&: Elt);
3156 if (isa<ConstantFPSDNode>(Val: Elt) || Elt.isUndef() || (C && !C->isOpaque()))
3157 return Elt;
3158 }
3159
3160 return SDValue();
3161 };
3162
3163 APInt KnownUndef = APInt::getZero(numBits: NumElts);
3164 for (unsigned i = 0; i != NumElts; ++i) {
3165 // If both inputs for this element are either constant or undef and match
3166 // the element type, compute the constant/undef result for this element of
3167 // the vector.
3168 // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
3169 // not handle FP constants. The code within getNode() should be refactored
3170 // to avoid the danger of creating a bogus temporary node here.
3171 SDValue C0 = getUndefOrConstantElt(BO.getOperand(i: 0), i, UndefOp0);
3172 SDValue C1 = getUndefOrConstantElt(BO.getOperand(i: 1), i, UndefOp1);
3173 if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
3174 if (DAG.getNode(Opcode: BO.getOpcode(), DL: SDLoc(BO), VT: EltVT, N1: C0, N2: C1).isUndef())
3175 KnownUndef.setBit(i);
3176 }
3177 return KnownUndef;
3178}
3179
3180bool TargetLowering::SimplifyDemandedVectorElts(
3181 SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
3182 APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
3183 bool AssumeSingleUse) const {
3184 EVT VT = Op.getValueType();
3185 unsigned Opcode = Op.getOpcode();
3186 APInt DemandedElts = OriginalDemandedElts;
3187 unsigned NumElts = DemandedElts.getBitWidth();
3188 assert(VT.isVector() && "Expected vector op");
3189
3190 KnownUndef = KnownZero = APInt::getZero(numBits: NumElts);
3191
3192 if (!shouldSimplifyDemandedVectorElts(Op, TLO))
3193 return false;
3194
3195 // TODO: For now we assume we know nothing about scalable vectors.
3196 if (VT.isScalableVector())
3197 return false;
3198
3199 assert(VT.getVectorNumElements() == NumElts &&
3200 "Mask size mismatches value type element count!");
3201
3202 // Undef operand.
3203 if (Op.isUndef()) {
3204 KnownUndef.setAllBits();
3205 return false;
3206 }
3207
3208 // If Op has other users, assume that all elements are needed.
3209 if (!AssumeSingleUse && !Op.getNode()->hasOneUse())
3210 DemandedElts.setAllBits();
3211
3212 // Not demanding any elements from Op.
3213 if (DemandedElts == 0) {
3214 KnownUndef.setAllBits();
3215 return TLO.CombineTo(O: Op, N: TLO.DAG.getUNDEF(VT));
3216 }
3217
3218 // Limit search depth.
3219 if (Depth >= SelectionDAG::MaxRecursionDepth)
3220 return false;
3221
3222 SDLoc DL(Op);
3223 unsigned EltSizeInBits = VT.getScalarSizeInBits();
3224 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
3225
3226 // Helper for demanding the specified elements and all the bits of both binary
3227 // operands.
3228 auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
3229 SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op: Op0, DemandedElts,
3230 DAG&: TLO.DAG, Depth: Depth + 1);
3231 SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op: Op1, DemandedElts,
3232 DAG&: TLO.DAG, Depth: Depth + 1);
3233 if (NewOp0 || NewOp1) {
3234 SDValue NewOp =
3235 TLO.DAG.getNode(Opcode, DL: SDLoc(Op), VT, N1: NewOp0 ? NewOp0 : Op0,
3236 N2: NewOp1 ? NewOp1 : Op1, Flags: Op->getFlags());
3237 return TLO.CombineTo(O: Op, N: NewOp);
3238 }
3239 return false;
3240 };
3241
3242 switch (Opcode) {
3243 case ISD::SCALAR_TO_VECTOR: {
3244 if (!DemandedElts[0]) {
3245 KnownUndef.setAllBits();
3246 return TLO.CombineTo(O: Op, N: TLO.DAG.getUNDEF(VT));
3247 }
3248 KnownUndef.setHighBits(NumElts - 1);
3249 break;
3250 }
3251 case ISD::BITCAST: {
3252 SDValue Src = Op.getOperand(i: 0);
3253 EVT SrcVT = Src.getValueType();
3254
3255 if (!SrcVT.isVector()) {
3256 // TODO - bigendian once we have test coverage.
3257 if (IsLE) {
3258 APInt DemandedSrcBits = APInt::getZero(numBits: SrcVT.getSizeInBits());
3259 unsigned EltSize = VT.getScalarSizeInBits();
3260 for (unsigned I = 0; I != NumElts; ++I) {
3261 if (DemandedElts[I]) {
3262 unsigned Offset = I * EltSize;
3263 DemandedSrcBits.setBits(loBit: Offset, hiBit: Offset + EltSize);
3264 }
3265 }
3266 KnownBits Known;
3267 if (SimplifyDemandedBits(Op: Src, DemandedBits: DemandedSrcBits, Known, TLO, Depth: Depth + 1))
3268 return true;
3269 }
3270 break;
3271 }
3272
3273 // Fast handling of 'identity' bitcasts.
3274 unsigned NumSrcElts = SrcVT.getVectorNumElements();
3275 if (NumSrcElts == NumElts)
3276 return SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: DemandedElts, KnownUndef,
3277 KnownZero, TLO, Depth: Depth + 1);
3278
3279 APInt SrcDemandedElts, SrcZero, SrcUndef;
3280
3281 // Bitcast from 'large element' src vector to 'small element' vector, we
3282 // must demand a source element if any DemandedElt maps to it.
3283 if ((NumElts % NumSrcElts) == 0) {
3284 unsigned Scale = NumElts / NumSrcElts;
3285 SrcDemandedElts = APIntOps::ScaleBitMask(A: DemandedElts, NewBitWidth: NumSrcElts);
3286 if (SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: SrcDemandedElts, KnownUndef&: SrcUndef, KnownZero&: SrcZero,
3287 TLO, Depth: Depth + 1))
3288 return true;
3289
3290 // Try calling SimplifyDemandedBits, converting demanded elts to the bits
3291 // of the large element.
3292 // TODO - bigendian once we have test coverage.
3293 if (IsLE) {
3294 unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
3295 APInt SrcDemandedBits = APInt::getZero(numBits: SrcEltSizeInBits);
3296 for (unsigned i = 0; i != NumElts; ++i)
3297 if (DemandedElts[i]) {
3298 unsigned Ofs = (i % Scale) * EltSizeInBits;
3299 SrcDemandedBits.setBits(loBit: Ofs, hiBit: Ofs + EltSizeInBits);
3300 }
3301
3302 KnownBits Known;
3303 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: SrcDemandedBits, OriginalDemandedElts: SrcDemandedElts, Known,
3304 TLO, Depth: Depth + 1))
3305 return true;
3306
3307 // The bitcast has split each wide element into a number of
3308 // narrow subelements. We have just computed the Known bits
3309 // for wide elements. See if element splitting results in
3310 // some subelements being zero. Only for demanded elements!
3311 for (unsigned SubElt = 0; SubElt != Scale; ++SubElt) {
3312 if (!Known.Zero.extractBits(numBits: EltSizeInBits, bitPosition: SubElt * EltSizeInBits)
3313 .isAllOnes())
3314 continue;
3315 for (unsigned SrcElt = 0; SrcElt != NumSrcElts; ++SrcElt) {
3316 unsigned Elt = Scale * SrcElt + SubElt;
3317 if (DemandedElts[Elt])
3318 KnownZero.setBit(Elt);
3319 }
3320 }
3321 }
3322
3323 // If the src element is zero/undef then all the output elements will be -
3324 // only demanded elements are guaranteed to be correct.
3325 for (unsigned i = 0; i != NumSrcElts; ++i) {
3326 if (SrcDemandedElts[i]) {
3327 if (SrcZero[i])
3328 KnownZero.setBits(loBit: i * Scale, hiBit: (i + 1) * Scale);
3329 if (SrcUndef[i])
3330 KnownUndef.setBits(loBit: i * Scale, hiBit: (i + 1) * Scale);
3331 }
3332 }
3333 }
3334
3335 // Bitcast from 'small element' src vector to 'large element' vector, we
3336 // demand all smaller source elements covered by the larger demanded element
3337 // of this vector.
3338 if ((NumSrcElts % NumElts) == 0) {
3339 unsigned Scale = NumSrcElts / NumElts;
3340 SrcDemandedElts = APIntOps::ScaleBitMask(A: DemandedElts, NewBitWidth: NumSrcElts);
3341 if (SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: SrcDemandedElts, KnownUndef&: SrcUndef, KnownZero&: SrcZero,
3342 TLO, Depth: Depth + 1))
3343 return true;
3344
3345 // If all the src elements covering an output element are zero/undef, then
3346 // the output element will be as well, assuming it was demanded.
3347 for (unsigned i = 0; i != NumElts; ++i) {
3348 if (DemandedElts[i]) {
3349 if (SrcZero.extractBits(numBits: Scale, bitPosition: i * Scale).isAllOnes())
3350 KnownZero.setBit(i);
3351 if (SrcUndef.extractBits(numBits: Scale, bitPosition: i * Scale).isAllOnes())
3352 KnownUndef.setBit(i);
3353 }
3354 }
3355 }
3356 break;
3357 }
3358 case ISD::FREEZE: {
3359 SDValue N0 = Op.getOperand(i: 0);
3360 if (TLO.DAG.isGuaranteedNotToBeUndefOrPoison(Op: N0, DemandedElts,
3361 /*PoisonOnly=*/false,
3362 Depth: Depth + 1))
3363 return TLO.CombineTo(O: Op, N: N0);
3364
3365 // TODO: Replace this with the general fold from DAGCombiner::visitFREEZE
3366 // freeze(op(x, ...)) -> op(freeze(x), ...).
3367 if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && DemandedElts == 1)
3368 return TLO.CombineTo(
3369 O: Op, N: TLO.DAG.getNode(Opcode: ISD::SCALAR_TO_VECTOR, DL, VT,
3370 Operand: TLO.DAG.getFreeze(V: N0.getOperand(i: 0))));
3371 break;
3372 }
3373 case ISD::BUILD_VECTOR: {
3374 // Check all elements and simplify any unused elements with UNDEF.
3375 if (!DemandedElts.isAllOnes()) {
3376 // Don't simplify BROADCASTS.
3377 if (llvm::any_of(Range: Op->op_values(),
3378 P: [&](SDValue Elt) { return Op.getOperand(i: 0) != Elt; })) {
3379 SmallVector<SDValue, 32> Ops(Op->ops());
3380 bool Updated = false;
3381 for (unsigned i = 0; i != NumElts; ++i) {
3382 if (!DemandedElts[i] && !Ops[i].isUndef()) {
3383 Ops[i] = TLO.DAG.getUNDEF(VT: Ops[0].getValueType());
3384 KnownUndef.setBit(i);
3385 Updated = true;
3386 }
3387 }
3388 if (Updated)
3389 return TLO.CombineTo(O: Op, N: TLO.DAG.getBuildVector(VT, DL, Ops));
3390 }
3391 }
3392 for (unsigned i = 0; i != NumElts; ++i) {
3393 SDValue SrcOp = Op.getOperand(i);
3394 if (SrcOp.isUndef()) {
3395 KnownUndef.setBit(i);
3396 } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
3397 (isNullConstant(V: SrcOp) || isNullFPConstant(V: SrcOp))) {
3398 KnownZero.setBit(i);
3399 }
3400 }
3401 break;
3402 }
3403 case ISD::CONCAT_VECTORS: {
3404 EVT SubVT = Op.getOperand(i: 0).getValueType();
3405 unsigned NumSubVecs = Op.getNumOperands();
3406 unsigned NumSubElts = SubVT.getVectorNumElements();
3407 for (unsigned i = 0; i != NumSubVecs; ++i) {
3408 SDValue SubOp = Op.getOperand(i);
3409 APInt SubElts = DemandedElts.extractBits(numBits: NumSubElts, bitPosition: i * NumSubElts);
3410 APInt SubUndef, SubZero;
3411 if (SimplifyDemandedVectorElts(Op: SubOp, OriginalDemandedElts: SubElts, KnownUndef&: SubUndef, KnownZero&: SubZero, TLO,
3412 Depth: Depth + 1))
3413 return true;
3414 KnownUndef.insertBits(SubBits: SubUndef, bitPosition: i * NumSubElts);
3415 KnownZero.insertBits(SubBits: SubZero, bitPosition: i * NumSubElts);
3416 }
3417
3418 // Attempt to avoid multi-use ops if we don't need anything from them.
3419 if (!DemandedElts.isAllOnes()) {
3420 bool FoundNewSub = false;
3421 SmallVector<SDValue, 2> DemandedSubOps;
3422 for (unsigned i = 0; i != NumSubVecs; ++i) {
3423 SDValue SubOp = Op.getOperand(i);
3424 APInt SubElts = DemandedElts.extractBits(numBits: NumSubElts, bitPosition: i * NumSubElts);
3425 SDValue NewSubOp = SimplifyMultipleUseDemandedVectorElts(
3426 Op: SubOp, DemandedElts: SubElts, DAG&: TLO.DAG, Depth: Depth + 1);
3427 DemandedSubOps.push_back(Elt: NewSubOp ? NewSubOp : SubOp);
3428 FoundNewSub = NewSubOp ? true : FoundNewSub;
3429 }
3430 if (FoundNewSub) {
3431 SDValue NewOp =
3432 TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: SDLoc(Op), VT, Ops: DemandedSubOps);
3433 return TLO.CombineTo(O: Op, N: NewOp);
3434 }
3435 }
3436 break;
3437 }
3438 case ISD::INSERT_SUBVECTOR: {
3439 // Demand any elements from the subvector and the remainder from the src it
3440 // is inserted into.
3441 SDValue Src = Op.getOperand(i: 0);
3442 SDValue Sub = Op.getOperand(i: 1);
3443 uint64_t Idx = Op.getConstantOperandVal(i: 2);
3444 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
3445 APInt DemandedSubElts = DemandedElts.extractBits(numBits: NumSubElts, bitPosition: Idx);
3446 APInt DemandedSrcElts = DemandedElts;
3447 DemandedSrcElts.clearBits(LoBit: Idx, HiBit: Idx + NumSubElts);
3448
3449 // If none of the sub operand elements are demanded, bypass the insert.
3450 if (!DemandedSubElts)
3451 return TLO.CombineTo(O: Op, N: Src);
3452
3453 APInt SubUndef, SubZero;
3454 if (SimplifyDemandedVectorElts(Op: Sub, OriginalDemandedElts: DemandedSubElts, KnownUndef&: SubUndef, KnownZero&: SubZero, TLO,
3455 Depth: Depth + 1))
3456 return true;
3457
3458 // If none of the src operand elements are demanded, replace it with undef.
3459 if (!DemandedSrcElts && !Src.isUndef())
3460 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT,
3461 N1: TLO.DAG.getUNDEF(VT), N2: Sub,
3462 N3: Op.getOperand(i: 2)));
3463
3464 if (SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: DemandedSrcElts, KnownUndef, KnownZero,
3465 TLO, Depth: Depth + 1))
3466 return true;
3467 KnownUndef.insertBits(SubBits: SubUndef, bitPosition: Idx);
3468 KnownZero.insertBits(SubBits: SubZero, bitPosition: Idx);
3469
3470 // Attempt to avoid multi-use ops if we don't need anything from them.
3471 if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) {
3472 SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3473 Op: Src, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + 1);
3474 SDValue NewSub = SimplifyMultipleUseDemandedVectorElts(
3475 Op: Sub, DemandedElts: DemandedSubElts, DAG&: TLO.DAG, Depth: Depth + 1);
3476 if (NewSrc || NewSub) {
3477 NewSrc = NewSrc ? NewSrc : Src;
3478 NewSub = NewSub ? NewSub : Sub;
3479 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: SDLoc(Op), VT, N1: NewSrc,
3480 N2: NewSub, N3: Op.getOperand(i: 2));
3481 return TLO.CombineTo(O: Op, N: NewOp);
3482 }
3483 }
3484 break;
3485 }
3486 case ISD::EXTRACT_SUBVECTOR: {
3487 // Offset the demanded elts by the subvector index.
3488 SDValue Src = Op.getOperand(i: 0);
3489 if (Src.getValueType().isScalableVector())
3490 break;
3491 uint64_t Idx = Op.getConstantOperandVal(i: 1);
3492 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3493 APInt DemandedSrcElts = DemandedElts.zext(width: NumSrcElts).shl(shiftAmt: Idx);
3494
3495 APInt SrcUndef, SrcZero;
3496 if (SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: DemandedSrcElts, KnownUndef&: SrcUndef, KnownZero&: SrcZero, TLO,
3497 Depth: Depth + 1))
3498 return true;
3499 KnownUndef = SrcUndef.extractBits(numBits: NumElts, bitPosition: Idx);
3500 KnownZero = SrcZero.extractBits(numBits: NumElts, bitPosition: Idx);
3501
3502 // Attempt to avoid multi-use ops if we don't need anything from them.
3503 if (!DemandedElts.isAllOnes()) {
3504 SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3505 Op: Src, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + 1);
3506 if (NewSrc) {
3507 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: SDLoc(Op), VT, N1: NewSrc,
3508 N2: Op.getOperand(i: 1));
3509 return TLO.CombineTo(O: Op, N: NewOp);
3510 }
3511 }
3512 break;
3513 }
3514 case ISD::INSERT_VECTOR_ELT: {
3515 SDValue Vec = Op.getOperand(i: 0);
3516 SDValue Scl = Op.getOperand(i: 1);
3517 auto *CIdx = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 2));
3518
3519 // For a legal, constant insertion index, if we don't need this insertion
3520 // then strip it, else remove it from the demanded elts.
3521 if (CIdx && CIdx->getAPIntValue().ult(RHS: NumElts)) {
3522 unsigned Idx = CIdx->getZExtValue();
3523 if (!DemandedElts[Idx])
3524 return TLO.CombineTo(O: Op, N: Vec);
3525
3526 APInt DemandedVecElts(DemandedElts);
3527 DemandedVecElts.clearBit(BitPosition: Idx);
3528 if (SimplifyDemandedVectorElts(Op: Vec, OriginalDemandedElts: DemandedVecElts, KnownUndef,
3529 KnownZero, TLO, Depth: Depth + 1))
3530 return true;
3531
3532 KnownUndef.setBitVal(BitPosition: Idx, BitValue: Scl.isUndef());
3533
3534 KnownZero.setBitVal(BitPosition: Idx, BitValue: isNullConstant(V: Scl) || isNullFPConstant(V: Scl));
3535 break;
3536 }
3537
3538 APInt VecUndef, VecZero;
3539 if (SimplifyDemandedVectorElts(Op: Vec, OriginalDemandedElts: DemandedElts, KnownUndef&: VecUndef, KnownZero&: VecZero, TLO,
3540 Depth: Depth + 1))
3541 return true;
3542 // Without knowing the insertion index we can't set KnownUndef/KnownZero.
3543 break;
3544 }
3545 case ISD::VSELECT: {
3546 SDValue Sel = Op.getOperand(i: 0);
3547 SDValue LHS = Op.getOperand(i: 1);
3548 SDValue RHS = Op.getOperand(i: 2);
3549
3550 // Try to transform the select condition based on the current demanded
3551 // elements.
3552 APInt UndefSel, ZeroSel;
3553 if (SimplifyDemandedVectorElts(Op: Sel, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefSel, KnownZero&: ZeroSel, TLO,
3554 Depth: Depth + 1))
3555 return true;
3556
3557 // See if we can simplify either vselect operand.
3558 APInt DemandedLHS(DemandedElts);
3559 APInt DemandedRHS(DemandedElts);
3560 APInt UndefLHS, ZeroLHS;
3561 APInt UndefRHS, ZeroRHS;
3562 if (SimplifyDemandedVectorElts(Op: LHS, OriginalDemandedElts: DemandedLHS, KnownUndef&: UndefLHS, KnownZero&: ZeroLHS, TLO,
3563 Depth: Depth + 1))
3564 return true;
3565 if (SimplifyDemandedVectorElts(Op: RHS, OriginalDemandedElts: DemandedRHS, KnownUndef&: UndefRHS, KnownZero&: ZeroRHS, TLO,
3566 Depth: Depth + 1))
3567 return true;
3568
3569 KnownUndef = UndefLHS & UndefRHS;
3570 KnownZero = ZeroLHS & ZeroRHS;
3571
3572 // If we know that the selected element is always zero, we don't need the
3573 // select value element.
3574 APInt DemandedSel = DemandedElts & ~KnownZero;
3575 if (DemandedSel != DemandedElts)
3576 if (SimplifyDemandedVectorElts(Op: Sel, OriginalDemandedElts: DemandedSel, KnownUndef&: UndefSel, KnownZero&: ZeroSel, TLO,
3577 Depth: Depth + 1))
3578 return true;
3579
3580 break;
3581 }
3582 case ISD::VECTOR_SHUFFLE: {
3583 SDValue LHS = Op.getOperand(i: 0);
3584 SDValue RHS = Op.getOperand(i: 1);
3585 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Val&: Op)->getMask();
3586
3587 // Collect demanded elements from shuffle operands..
3588 APInt DemandedLHS(NumElts, 0);
3589 APInt DemandedRHS(NumElts, 0);
3590 for (unsigned i = 0; i != NumElts; ++i) {
3591 int M = ShuffleMask[i];
3592 if (M < 0 || !DemandedElts[i])
3593 continue;
3594 assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
3595 if (M < (int)NumElts)
3596 DemandedLHS.setBit(M);
3597 else
3598 DemandedRHS.setBit(M - NumElts);
3599 }
3600
3601 // If either side isn't demanded, replace it by UNDEF. We handle this
3602 // explicitly here to also simplify in case of multiple uses (on the
3603 // contrary to the SimplifyDemandedVectorElts calls below).
3604 bool FoldLHS = !DemandedLHS && !LHS.isUndef();
3605 bool FoldRHS = !DemandedRHS && !RHS.isUndef();
3606 if (FoldLHS || FoldRHS) {
3607 LHS = FoldLHS ? TLO.DAG.getUNDEF(VT: LHS.getValueType()) : LHS;
3608 RHS = FoldRHS ? TLO.DAG.getUNDEF(VT: RHS.getValueType()) : RHS;
3609 SDValue NewOp =
3610 TLO.DAG.getVectorShuffle(VT, dl: SDLoc(Op), N1: LHS, N2: RHS, Mask: ShuffleMask);
3611 return TLO.CombineTo(O: Op, N: NewOp);
3612 }
3613
3614 // See if we can simplify either shuffle operand.
3615 APInt UndefLHS, ZeroLHS;
3616 APInt UndefRHS, ZeroRHS;
3617 if (SimplifyDemandedVectorElts(Op: LHS, OriginalDemandedElts: DemandedLHS, KnownUndef&: UndefLHS, KnownZero&: ZeroLHS, TLO,
3618 Depth: Depth + 1))
3619 return true;
3620 if (SimplifyDemandedVectorElts(Op: RHS, OriginalDemandedElts: DemandedRHS, KnownUndef&: UndefRHS, KnownZero&: ZeroRHS, TLO,
3621 Depth: Depth + 1))
3622 return true;
3623
3624 // Simplify mask using undef elements from LHS/RHS.
3625 bool Updated = false;
3626 bool IdentityLHS = true, IdentityRHS = true;
3627 SmallVector<int, 32> NewMask(ShuffleMask);
3628 for (unsigned i = 0; i != NumElts; ++i) {
3629 int &M = NewMask[i];
3630 if (M < 0)
3631 continue;
3632 if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
3633 (M >= (int)NumElts && UndefRHS[M - NumElts])) {
3634 Updated = true;
3635 M = -1;
3636 }
3637 IdentityLHS &= (M < 0) || (M == (int)i);
3638 IdentityRHS &= (M < 0) || ((M - NumElts) == i);
3639 }
3640
3641 // Update legal shuffle masks based on demanded elements if it won't reduce
3642 // to Identity which can cause premature removal of the shuffle mask.
3643 if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
3644 SDValue LegalShuffle =
3645 buildLegalVectorShuffle(VT, DL, N0: LHS, N1: RHS, Mask: NewMask, DAG&: TLO.DAG);
3646 if (LegalShuffle)
3647 return TLO.CombineTo(O: Op, N: LegalShuffle);
3648 }
3649
3650 // Propagate undef/zero elements from LHS/RHS.
3651 for (unsigned i = 0; i != NumElts; ++i) {
3652 int M = ShuffleMask[i];
3653 if (M < 0) {
3654 KnownUndef.setBit(i);
3655 } else if (M < (int)NumElts) {
3656 if (UndefLHS[M])
3657 KnownUndef.setBit(i);
3658 if (ZeroLHS[M])
3659 KnownZero.setBit(i);
3660 } else {
3661 if (UndefRHS[M - NumElts])
3662 KnownUndef.setBit(i);
3663 if (ZeroRHS[M - NumElts])
3664 KnownZero.setBit(i);
3665 }
3666 }
3667 break;
3668 }
3669 case ISD::ANY_EXTEND_VECTOR_INREG:
3670 case ISD::SIGN_EXTEND_VECTOR_INREG:
3671 case ISD::ZERO_EXTEND_VECTOR_INREG: {
3672 APInt SrcUndef, SrcZero;
3673 SDValue Src = Op.getOperand(i: 0);
3674 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3675 APInt DemandedSrcElts = DemandedElts.zext(width: NumSrcElts);
3676 if (SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: DemandedSrcElts, KnownUndef&: SrcUndef, KnownZero&: SrcZero, TLO,
3677 Depth: Depth + 1))
3678 return true;
3679 KnownZero = SrcZero.zextOrTrunc(width: NumElts);
3680 KnownUndef = SrcUndef.zextOrTrunc(width: NumElts);
3681
3682 if (IsLE && Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
3683 Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
3684 DemandedSrcElts == 1) {
3685 // aext - if we just need the bottom element then we can bitcast.
3686 return TLO.CombineTo(O: Op, N: TLO.DAG.getBitcast(VT, V: Src));
3687 }
3688
3689 if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
3690 // zext(undef) upper bits are guaranteed to be zero.
3691 if (DemandedElts.isSubsetOf(RHS: KnownUndef))
3692 return TLO.CombineTo(O: Op, N: TLO.DAG.getConstant(Val: 0, DL: SDLoc(Op), VT));
3693 KnownUndef.clearAllBits();
3694
3695 // zext - if we just need the bottom element then we can mask:
3696 // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
3697 if (IsLE && DemandedSrcElts == 1 && Src.getOpcode() == ISD::AND &&
3698 Op->isOnlyUserOf(N: Src.getNode()) &&
3699 Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
3700 SDLoc DL(Op);
3701 EVT SrcVT = Src.getValueType();
3702 EVT SrcSVT = SrcVT.getScalarType();
3703
3704 // If we're after type legalization and SrcSVT is not legal, use the
3705 // promoted type for creating constants to avoid creating nodes with
3706 // illegal types.
3707 if (AfterLegalizeTypes)
3708 SrcSVT = getLegalTypeToTransformTo(Context&: *TLO.DAG.getContext(), VT: SrcSVT);
3709
3710 SmallVector<SDValue> MaskElts;
3711 MaskElts.push_back(Elt: TLO.DAG.getAllOnesConstant(DL, VT: SrcSVT));
3712 MaskElts.append(NumInputs: NumSrcElts - 1, Elt: TLO.DAG.getConstant(Val: 0, DL, VT: SrcSVT));
3713 SDValue Mask = TLO.DAG.getBuildVector(VT: SrcVT, DL, Ops: MaskElts);
3714 if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
3715 Opcode: ISD::AND, DL, VT: SrcVT, Ops: {Src.getOperand(i: 1), Mask})) {
3716 Fold = TLO.DAG.getNode(Opcode: ISD::AND, DL, VT: SrcVT, N1: Src.getOperand(i: 0), N2: Fold);
3717 return TLO.CombineTo(O: Op, N: TLO.DAG.getBitcast(VT, V: Fold));
3718 }
3719 }
3720 }
3721 break;
3722 }
3723
3724 // TODO: There are more binop opcodes that could be handled here - MIN,
3725 // MAX, saturated math, etc.
3726 case ISD::ADD: {
3727 SDValue Op0 = Op.getOperand(i: 0);
3728 SDValue Op1 = Op.getOperand(i: 1);
3729 if (Op0 == Op1 && Op->isOnlyUserOf(N: Op0.getNode())) {
3730 APInt UndefLHS, ZeroLHS;
3731 if (SimplifyDemandedVectorElts(Op: Op0, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefLHS, KnownZero&: ZeroLHS, TLO,
3732 Depth: Depth + 1, /*AssumeSingleUse*/ true))
3733 return true;
3734 }
3735 [[fallthrough]];
3736 }
3737 case ISD::AVGCEILS:
3738 case ISD::AVGCEILU:
3739 case ISD::AVGFLOORS:
3740 case ISD::AVGFLOORU:
3741 case ISD::OR:
3742 case ISD::XOR:
3743 case ISD::SUB:
3744 case ISD::FADD:
3745 case ISD::FSUB:
3746 case ISD::FMUL:
3747 case ISD::FDIV:
3748 case ISD::FREM: {
3749 SDValue Op0 = Op.getOperand(i: 0);
3750 SDValue Op1 = Op.getOperand(i: 1);
3751
3752 APInt UndefRHS, ZeroRHS;
3753 if (SimplifyDemandedVectorElts(Op: Op1, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefRHS, KnownZero&: ZeroRHS, TLO,
3754 Depth: Depth + 1))
3755 return true;
3756 APInt UndefLHS, ZeroLHS;
3757 if (SimplifyDemandedVectorElts(Op: Op0, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefLHS, KnownZero&: ZeroLHS, TLO,
3758 Depth: Depth + 1))
3759 return true;
3760
3761 KnownZero = ZeroLHS & ZeroRHS;
3762 KnownUndef = getKnownUndefForVectorBinop(BO: Op, DAG&: TLO.DAG, UndefOp0: UndefLHS, UndefOp1: UndefRHS);
3763
3764 // Attempt to avoid multi-use ops if we don't need anything from them.
3765 // TODO - use KnownUndef to relax the demandedelts?
3766 if (!DemandedElts.isAllOnes())
3767 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3768 return true;
3769 break;
3770 }
3771 case ISD::SHL:
3772 case ISD::SRL:
3773 case ISD::SRA:
3774 case ISD::ROTL:
3775 case ISD::ROTR: {
3776 SDValue Op0 = Op.getOperand(i: 0);
3777 SDValue Op1 = Op.getOperand(i: 1);
3778
3779 APInt UndefRHS, ZeroRHS;
3780 if (SimplifyDemandedVectorElts(Op: Op1, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefRHS, KnownZero&: ZeroRHS, TLO,
3781 Depth: Depth + 1))
3782 return true;
3783 APInt UndefLHS, ZeroLHS;
3784 if (SimplifyDemandedVectorElts(Op: Op0, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefLHS, KnownZero&: ZeroLHS, TLO,
3785 Depth: Depth + 1))
3786 return true;
3787
3788 KnownZero = ZeroLHS;
3789 KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
3790
3791 // Attempt to avoid multi-use ops if we don't need anything from them.
3792 // TODO - use KnownUndef to relax the demandedelts?
3793 if (!DemandedElts.isAllOnes())
3794 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3795 return true;
3796 break;
3797 }
3798 case ISD::MUL:
3799 case ISD::MULHU:
3800 case ISD::MULHS:
3801 case ISD::AND: {
3802 SDValue Op0 = Op.getOperand(i: 0);
3803 SDValue Op1 = Op.getOperand(i: 1);
3804
3805 APInt SrcUndef, SrcZero;
3806 if (SimplifyDemandedVectorElts(Op: Op1, OriginalDemandedElts: DemandedElts, KnownUndef&: SrcUndef, KnownZero&: SrcZero, TLO,
3807 Depth: Depth + 1))
3808 return true;
3809 // If we know that a demanded element was zero in Op1 we don't need to
3810 // demand it in Op0 - its guaranteed to be zero.
3811 APInt DemandedElts0 = DemandedElts & ~SrcZero;
3812 if (SimplifyDemandedVectorElts(Op: Op0, OriginalDemandedElts: DemandedElts0, KnownUndef, KnownZero,
3813 TLO, Depth: Depth + 1))
3814 return true;
3815
3816 KnownUndef &= DemandedElts0;
3817 KnownZero &= DemandedElts0;
3818
3819 // If every element pair has a zero/undef then just fold to zero.
3820 // fold (and x, undef) -> 0 / (and x, 0) -> 0
3821 // fold (mul x, undef) -> 0 / (mul x, 0) -> 0
3822 if (DemandedElts.isSubsetOf(RHS: SrcZero | KnownZero | SrcUndef | KnownUndef))
3823 return TLO.CombineTo(O: Op, N: TLO.DAG.getConstant(Val: 0, DL: SDLoc(Op), VT));
3824
3825 // If either side has a zero element, then the result element is zero, even
3826 // if the other is an UNDEF.
3827 // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
3828 // and then handle 'and' nodes with the rest of the binop opcodes.
3829 KnownZero |= SrcZero;
3830 KnownUndef &= SrcUndef;
3831 KnownUndef &= ~KnownZero;
3832
3833 // Attempt to avoid multi-use ops if we don't need anything from them.
3834 if (!DemandedElts.isAllOnes())
3835 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3836 return true;
3837 break;
3838 }
3839 case ISD::TRUNCATE:
3840 case ISD::SIGN_EXTEND:
3841 case ISD::ZERO_EXTEND:
3842 if (SimplifyDemandedVectorElts(Op: Op.getOperand(i: 0), OriginalDemandedElts: DemandedElts, KnownUndef,
3843 KnownZero, TLO, Depth: Depth + 1))
3844 return true;
3845
3846 if (!DemandedElts.isAllOnes())
3847 if (SDValue NewOp = SimplifyMultipleUseDemandedVectorElts(
3848 Op: Op.getOperand(i: 0), DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1))
3849 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode, DL: SDLoc(Op), VT, Operand: NewOp));
3850
3851 if (Op.getOpcode() == ISD::ZERO_EXTEND) {
3852 // zext(undef) upper bits are guaranteed to be zero.
3853 if (DemandedElts.isSubsetOf(RHS: KnownUndef))
3854 return TLO.CombineTo(O: Op, N: TLO.DAG.getConstant(Val: 0, DL: SDLoc(Op), VT));
3855 KnownUndef.clearAllBits();
3856 }
3857 break;
3858 case ISD::SINT_TO_FP:
3859 case ISD::UINT_TO_FP:
3860 case ISD::FP_TO_SINT:
3861 case ISD::FP_TO_UINT:
3862 if (SimplifyDemandedVectorElts(Op: Op.getOperand(i: 0), OriginalDemandedElts: DemandedElts, KnownUndef,
3863 KnownZero, TLO, Depth: Depth + 1))
3864 return true;
3865 // Don't fall through to generic undef -> undef handling.
3866 return false;
3867 default: {
3868 if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
3869 if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
3870 KnownZero, TLO, Depth))
3871 return true;
3872 } else {
3873 KnownBits Known;
3874 APInt DemandedBits = APInt::getAllOnes(numBits: EltSizeInBits);
3875 if (SimplifyDemandedBits(Op, OriginalDemandedBits: DemandedBits, OriginalDemandedElts, Known,
3876 TLO, Depth, AssumeSingleUse))
3877 return true;
3878 }
3879 break;
3880 }
3881 }
3882 assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
3883
3884 // Constant fold all undef cases.
3885 // TODO: Handle zero cases as well.
3886 if (DemandedElts.isSubsetOf(RHS: KnownUndef))
3887 return TLO.CombineTo(O: Op, N: TLO.DAG.getUNDEF(VT));
3888
3889 return false;
3890}
3891
3892/// Determine which of the bits specified in Mask are known to be either zero or
3893/// one and return them in the Known.
3894void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
3895 KnownBits &Known,
3896 const APInt &DemandedElts,
3897 const SelectionDAG &DAG,
3898 unsigned Depth) const {
3899 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3900 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3901 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3902 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3903 "Should use MaskedValueIsZero if you don't know whether Op"
3904 " is a target node!");
3905 Known.resetAll();
3906}
3907
3908void TargetLowering::computeKnownBitsForTargetInstr(
3909 GISelValueTracking &Analysis, Register R, KnownBits &Known,
3910 const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3911 unsigned Depth) const {
3912 Known.resetAll();
3913}
3914
3915void TargetLowering::computeKnownFPClassForTargetInstr(
3916 GISelValueTracking &Analysis, Register R, KnownFPClass &Known,
3917 const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3918 unsigned Depth) const {
3919 Known.resetAll();
3920}
3921
3922void TargetLowering::computeKnownBitsForFrameIndex(
3923 const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
3924 // The low bits are known zero if the pointer is aligned.
3925 Known.Zero.setLowBits(Log2(A: MF.getFrameInfo().getObjectAlign(ObjectIdx: FrameIdx)));
3926}
3927
3928Align TargetLowering::computeKnownAlignForTargetInstr(
3929 GISelValueTracking &Analysis, Register R, const MachineRegisterInfo &MRI,
3930 unsigned Depth) const {
3931 return Align(1);
3932}
3933
3934/// This method can be implemented by targets that want to expose additional
3935/// information about sign bits to the DAG Combiner.
3936unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
3937 const APInt &,
3938 const SelectionDAG &,
3939 unsigned Depth) const {
3940 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3941 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3942 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3943 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3944 "Should use ComputeNumSignBits if you don't know whether Op"
3945 " is a target node!");
3946 return 1;
3947}
3948
3949unsigned TargetLowering::computeNumSignBitsForTargetInstr(
3950 GISelValueTracking &Analysis, Register R, const APInt &DemandedElts,
3951 const MachineRegisterInfo &MRI, unsigned Depth) const {
3952 return 1;
3953}
3954
3955bool TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
3956 SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
3957 TargetLoweringOpt &TLO, unsigned Depth) const {
3958 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3959 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3960 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3961 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3962 "Should use SimplifyDemandedVectorElts if you don't know whether Op"
3963 " is a target node!");
3964 return false;
3965}
3966
3967bool TargetLowering::SimplifyDemandedBitsForTargetNode(
3968 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3969 KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
3970 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3971 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3972 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3973 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3974 "Should use SimplifyDemandedBits if you don't know whether Op"
3975 " is a target node!");
3976 computeKnownBitsForTargetNode(Op, Known, DemandedElts, DAG: TLO.DAG, Depth);
3977 return false;
3978}
3979
3980SDValue TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
3981 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3982 SelectionDAG &DAG, unsigned Depth) const {
3983 assert(
3984 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3985 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3986 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3987 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3988 "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
3989 " is a target node!");
3990 return SDValue();
3991}
3992
3993SDValue
3994TargetLowering::buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0,
3995 SDValue N1, MutableArrayRef<int> Mask,
3996 SelectionDAG &DAG) const {
3997 bool LegalMask = isShuffleMaskLegal(Mask, VT);
3998 if (!LegalMask) {
3999 std::swap(a&: N0, b&: N1);
4000 ShuffleVectorSDNode::commuteMask(Mask);
4001 LegalMask = isShuffleMaskLegal(Mask, VT);
4002 }
4003
4004 if (!LegalMask)
4005 return SDValue();
4006
4007 return DAG.getVectorShuffle(VT, dl: DL, N1: N0, N2: N1, Mask);
4008}
4009
4010const Constant *TargetLowering::getTargetConstantFromLoad(LoadSDNode*) const {
4011 return nullptr;
4012}
4013
4014bool TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode(
4015 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
4016 bool PoisonOnly, unsigned Depth) const {
4017 assert(
4018 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4019 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4020 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4021 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4022 "Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"
4023 " is a target node!");
4024
4025 // If Op can't create undef/poison and none of its operands are undef/poison
4026 // then Op is never undef/poison.
4027 return !canCreateUndefOrPoisonForTargetNode(Op, DemandedElts, DAG, PoisonOnly,
4028 /*ConsiderFlags*/ true, Depth) &&
4029 all_of(Range: Op->ops(), P: [&](SDValue V) {
4030 return DAG.isGuaranteedNotToBeUndefOrPoison(Op: V, PoisonOnly,
4031 Depth: Depth + 1);
4032 });
4033}
4034
4035bool TargetLowering::canCreateUndefOrPoisonForTargetNode(
4036 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
4037 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
4038 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4039 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4040 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4041 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4042 "Should use canCreateUndefOrPoison if you don't know whether Op"
4043 " is a target node!");
4044 // Be conservative and return true.
4045 return true;
4046}
4047
4048bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
4049 const APInt &DemandedElts,
4050 const SelectionDAG &DAG,
4051 bool SNaN,
4052 unsigned Depth) const {
4053 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4054 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4055 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4056 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4057 "Should use isKnownNeverNaN if you don't know whether Op"
4058 " is a target node!");
4059 return false;
4060}
4061
4062bool TargetLowering::isSplatValueForTargetNode(SDValue Op,
4063 const APInt &DemandedElts,
4064 APInt &UndefElts,
4065 const SelectionDAG &DAG,
4066 unsigned Depth) const {
4067 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4068 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4069 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4070 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4071 "Should use isSplatValue if you don't know whether Op"
4072 " is a target node!");
4073 return false;
4074}
4075
4076// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
4077// work with truncating build vectors and vectors with elements of less than
4078// 8 bits.
4079bool TargetLowering::isConstTrueVal(SDValue N) const {
4080 if (!N)
4081 return false;
4082
4083 unsigned EltWidth;
4084 APInt CVal;
4085 if (ConstantSDNode *CN = isConstOrConstSplat(N, /*AllowUndefs=*/false,
4086 /*AllowTruncation=*/true)) {
4087 CVal = CN->getAPIntValue();
4088 EltWidth = N.getValueType().getScalarSizeInBits();
4089 } else
4090 return false;
4091
4092 // If this is a truncating splat, truncate the splat value.
4093 // Otherwise, we may fail to match the expected values below.
4094 if (EltWidth < CVal.getBitWidth())
4095 CVal = CVal.trunc(width: EltWidth);
4096
4097 switch (getBooleanContents(Type: N.getValueType())) {
4098 case UndefinedBooleanContent:
4099 return CVal[0];
4100 case ZeroOrOneBooleanContent:
4101 return CVal.isOne();
4102 case ZeroOrNegativeOneBooleanContent:
4103 return CVal.isAllOnes();
4104 }
4105
4106 llvm_unreachable("Invalid boolean contents");
4107}
4108
4109bool TargetLowering::isConstFalseVal(SDValue N) const {
4110 if (!N)
4111 return false;
4112
4113 const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val&: N);
4114 if (!CN) {
4115 const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Val&: N);
4116 if (!BV)
4117 return false;
4118
4119 // Only interested in constant splats, we don't care about undef
4120 // elements in identifying boolean constants and getConstantSplatNode
4121 // returns NULL if all ops are undef;
4122 CN = BV->getConstantSplatNode();
4123 if (!CN)
4124 return false;
4125 }
4126
4127 if (getBooleanContents(Type: N->getValueType(ResNo: 0)) == UndefinedBooleanContent)
4128 return !CN->getAPIntValue()[0];
4129
4130 return CN->isZero();
4131}
4132
4133bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
4134 bool SExt) const {
4135 if (VT == MVT::i1)
4136 return N->isOne();
4137
4138 TargetLowering::BooleanContent Cnt = getBooleanContents(Type: VT);
4139 switch (Cnt) {
4140 case TargetLowering::ZeroOrOneBooleanContent:
4141 // An extended value of 1 is always true, unless its original type is i1,
4142 // in which case it will be sign extended to -1.
4143 return (N->isOne() && !SExt) || (SExt && (N->getValueType(ResNo: 0) != MVT::i1));
4144 case TargetLowering::UndefinedBooleanContent:
4145 case TargetLowering::ZeroOrNegativeOneBooleanContent:
4146 return N->isAllOnes() && SExt;
4147 }
4148 llvm_unreachable("Unexpected enumeration.");
4149}
4150
4151/// This helper function of SimplifySetCC tries to optimize the comparison when
4152/// either operand of the SetCC node is a bitwise-and instruction.
4153SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
4154 ISD::CondCode Cond, const SDLoc &DL,
4155 DAGCombinerInfo &DCI) const {
4156 if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
4157 std::swap(a&: N0, b&: N1);
4158
4159 SelectionDAG &DAG = DCI.DAG;
4160 EVT OpVT = N0.getValueType();
4161 if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
4162 (Cond != ISD::SETEQ && Cond != ISD::SETNE))
4163 return SDValue();
4164
4165 // (X & Y) != 0 --> zextOrTrunc(X & Y)
4166 // iff everything but LSB is known zero:
4167 if (Cond == ISD::SETNE && isNullConstant(V: N1) &&
4168 (getBooleanContents(Type: OpVT) == TargetLowering::UndefinedBooleanContent ||
4169 getBooleanContents(Type: OpVT) == TargetLowering::ZeroOrOneBooleanContent)) {
4170 unsigned NumEltBits = OpVT.getScalarSizeInBits();
4171 APInt UpperBits = APInt::getHighBitsSet(numBits: NumEltBits, hiBitsSet: NumEltBits - 1);
4172 if (DAG.MaskedValueIsZero(Op: N0, Mask: UpperBits))
4173 return DAG.getBoolExtOrTrunc(Op: N0, SL: DL, VT, OpVT);
4174 }
4175
4176 // Try to eliminate a power-of-2 mask constant by converting to a signbit
4177 // test in a narrow type that we can truncate to with no cost. Examples:
4178 // (i32 X & 32768) == 0 --> (trunc X to i16) >= 0
4179 // (i32 X & 32768) != 0 --> (trunc X to i16) < 0
4180 // TODO: This conservatively checks for type legality on the source and
4181 // destination types. That may inhibit optimizations, but it also
4182 // allows setcc->shift transforms that may be more beneficial.
4183 auto *AndC = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1));
4184 if (AndC && isNullConstant(V: N1) && AndC->getAPIntValue().isPowerOf2() &&
4185 isTypeLegal(VT: OpVT) && N0.hasOneUse()) {
4186 EVT NarrowVT = EVT::getIntegerVT(Context&: *DAG.getContext(),
4187 BitWidth: AndC->getAPIntValue().getActiveBits());
4188 if (isTruncateFree(FromVT: OpVT, ToVT: NarrowVT) && isTypeLegal(VT: NarrowVT)) {
4189 SDValue Trunc = DAG.getZExtOrTrunc(Op: N0.getOperand(i: 0), DL, VT: NarrowVT);
4190 SDValue Zero = DAG.getConstant(Val: 0, DL, VT: NarrowVT);
4191 return DAG.getSetCC(DL, VT, LHS: Trunc, RHS: Zero,
4192 Cond: Cond == ISD::SETEQ ? ISD::SETGE : ISD::SETLT);
4193 }
4194 }
4195
4196 // Match these patterns in any of their permutations:
4197 // (X & Y) == Y
4198 // (X & Y) != Y
4199 SDValue X, Y;
4200 if (N0.getOperand(i: 0) == N1) {
4201 X = N0.getOperand(i: 1);
4202 Y = N0.getOperand(i: 0);
4203 } else if (N0.getOperand(i: 1) == N1) {
4204 X = N0.getOperand(i: 0);
4205 Y = N0.getOperand(i: 1);
4206 } else {
4207 return SDValue();
4208 }
4209
4210 // TODO: We should invert (X & Y) eq/ne 0 -> (X & Y) ne/eq Y if
4211 // `isXAndYEqZeroPreferableToXAndYEqY` is false. This is a bit difficult as
4212 // its liable to create and infinite loop.
4213 SDValue Zero = DAG.getConstant(Val: 0, DL, VT: OpVT);
4214 if (isXAndYEqZeroPreferableToXAndYEqY(Cond, OpVT) &&
4215 DAG.isKnownToBeAPowerOfTwo(Val: Y)) {
4216 // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
4217 // Note that where Y is variable and is known to have at most one bit set
4218 // (for example, if it is Z & 1) we cannot do this; the expressions are not
4219 // equivalent when Y == 0.
4220 assert(OpVT.isInteger());
4221 Cond = ISD::getSetCCInverse(Operation: Cond, Type: OpVT);
4222 if (DCI.isBeforeLegalizeOps() ||
4223 isCondCodeLegal(CC: Cond, VT: N0.getSimpleValueType()))
4224 return DAG.getSetCC(DL, VT, LHS: N0, RHS: Zero, Cond);
4225 } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
4226 // If the target supports an 'and-not' or 'and-complement' logic operation,
4227 // try to use that to make a comparison operation more efficient.
4228 // But don't do this transform if the mask is a single bit because there are
4229 // more efficient ways to deal with that case (for example, 'bt' on x86 or
4230 // 'rlwinm' on PPC).
4231
4232 // Bail out if the compare operand that we want to turn into a zero is
4233 // already a zero (otherwise, infinite loop).
4234 if (isNullConstant(V: Y))
4235 return SDValue();
4236
4237 // Transform this into: ~X & Y == 0.
4238 SDValue NotX = DAG.getNOT(DL: SDLoc(X), Val: X, VT: OpVT);
4239 SDValue NewAnd = DAG.getNode(Opcode: ISD::AND, DL: SDLoc(N0), VT: OpVT, N1: NotX, N2: Y);
4240 return DAG.getSetCC(DL, VT, LHS: NewAnd, RHS: Zero, Cond);
4241 }
4242
4243 return SDValue();
4244}
4245
4246/// This helper function of SimplifySetCC tries to optimize the comparison when
4247/// either operand of the SetCC node is a bitwise-or instruction.
4248/// For now, this just transforms (X | Y) ==/!= Y into X & ~Y ==/!= 0.
4249SDValue TargetLowering::foldSetCCWithOr(EVT VT, SDValue N0, SDValue N1,
4250 ISD::CondCode Cond, const SDLoc &DL,
4251 DAGCombinerInfo &DCI) const {
4252 if (N1.getOpcode() == ISD::OR && N0.getOpcode() != ISD::OR)
4253 std::swap(a&: N0, b&: N1);
4254
4255 SelectionDAG &DAG = DCI.DAG;
4256 EVT OpVT = N0.getValueType();
4257 if (!N0.hasOneUse() || !OpVT.isInteger() ||
4258 (Cond != ISD::SETEQ && Cond != ISD::SETNE))
4259 return SDValue();
4260
4261 // (X | Y) == Y
4262 // (X | Y) != Y
4263 SDValue X;
4264 if (sd_match(N: N0, P: m_Or(L: m_Value(N&: X), R: m_Specific(N: N1))) && hasAndNotCompare(Y: X)) {
4265 // If the target supports an 'and-not' or 'and-complement' logic operation,
4266 // try to use that to make a comparison operation more efficient.
4267
4268 // Bail out if the compare operand that we want to turn into a zero is
4269 // already a zero (otherwise, infinite loop).
4270 if (isNullConstant(V: N1))
4271 return SDValue();
4272
4273 // Transform this into: X & ~Y ==/!= 0.
4274 SDValue NotY = DAG.getNOT(DL: SDLoc(N1), Val: N1, VT: OpVT);
4275 SDValue NewAnd = DAG.getNode(Opcode: ISD::AND, DL: SDLoc(N0), VT: OpVT, N1: X, N2: NotY);
4276 return DAG.getSetCC(DL, VT, LHS: NewAnd, RHS: DAG.getConstant(Val: 0, DL, VT: OpVT), Cond);
4277 }
4278
4279 return SDValue();
4280}
4281
4282/// There are multiple IR patterns that could be checking whether certain
4283/// truncation of a signed number would be lossy or not. The pattern which is
4284/// best at IR level, may not lower optimally. Thus, we want to unfold it.
4285/// We are looking for the following pattern: (KeptBits is a constant)
4286/// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
4287/// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
4288/// KeptBits also can't be 1, that would have been folded to %x dstcond 0
4289/// We will unfold it into the natural trunc+sext pattern:
4290/// ((%x << C) a>> C) dstcond %x
4291/// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
4292SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
4293 EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
4294 const SDLoc &DL) const {
4295 // We must be comparing with a constant.
4296 ConstantSDNode *C1;
4297 if (!(C1 = dyn_cast<ConstantSDNode>(Val&: N1)))
4298 return SDValue();
4299
4300 // N0 should be: add %x, (1 << (KeptBits-1))
4301 if (N0->getOpcode() != ISD::ADD)
4302 return SDValue();
4303
4304 // And we must be 'add'ing a constant.
4305 ConstantSDNode *C01;
4306 if (!(C01 = dyn_cast<ConstantSDNode>(Val: N0->getOperand(Num: 1))))
4307 return SDValue();
4308
4309 SDValue X = N0->getOperand(Num: 0);
4310 EVT XVT = X.getValueType();
4311
4312 // Validate constants ...
4313
4314 APInt I1 = C1->getAPIntValue();
4315
4316 ISD::CondCode NewCond;
4317 if (Cond == ISD::CondCode::SETULT) {
4318 NewCond = ISD::CondCode::SETEQ;
4319 } else if (Cond == ISD::CondCode::SETULE) {
4320 NewCond = ISD::CondCode::SETEQ;
4321 // But need to 'canonicalize' the constant.
4322 I1 += 1;
4323 } else if (Cond == ISD::CondCode::SETUGT) {
4324 NewCond = ISD::CondCode::SETNE;
4325 // But need to 'canonicalize' the constant.
4326 I1 += 1;
4327 } else if (Cond == ISD::CondCode::SETUGE) {
4328 NewCond = ISD::CondCode::SETNE;
4329 } else
4330 return SDValue();
4331
4332 APInt I01 = C01->getAPIntValue();
4333
4334 auto checkConstants = [&I1, &I01]() -> bool {
4335 // Both of them must be power-of-two, and the constant from setcc is bigger.
4336 return I1.ugt(RHS: I01) && I1.isPowerOf2() && I01.isPowerOf2();
4337 };
4338
4339 if (checkConstants()) {
4340 // Great, e.g. got icmp ult i16 (add i16 %x, 128), 256
4341 } else {
4342 // What if we invert constants? (and the target predicate)
4343 I1.negate();
4344 I01.negate();
4345 assert(XVT.isInteger());
4346 NewCond = getSetCCInverse(Operation: NewCond, Type: XVT);
4347 if (!checkConstants())
4348 return SDValue();
4349 // Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
4350 }
4351
4352 // They are power-of-two, so which bit is set?
4353 const unsigned KeptBits = I1.logBase2();
4354 const unsigned KeptBitsMinusOne = I01.logBase2();
4355
4356 // Magic!
4357 if (KeptBits != (KeptBitsMinusOne + 1))
4358 return SDValue();
4359 assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
4360
4361 // We don't want to do this in every single case.
4362 SelectionDAG &DAG = DCI.DAG;
4363 if (!shouldTransformSignedTruncationCheck(XVT, KeptBits))
4364 return SDValue();
4365
4366 // Unfold into: sext_inreg(%x) cond %x
4367 // Where 'cond' will be either 'eq' or 'ne'.
4368 SDValue SExtInReg = DAG.getNode(
4369 Opcode: ISD::SIGN_EXTEND_INREG, DL, VT: XVT, N1: X,
4370 N2: DAG.getValueType(EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: KeptBits)));
4371 return DAG.getSetCC(DL, VT: SCCVT, LHS: SExtInReg, RHS: X, Cond: NewCond);
4372}
4373
4374// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
4375SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
4376 EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
4377 DAGCombinerInfo &DCI, const SDLoc &DL) const {
4378 assert(isConstOrConstSplat(N1C) && isConstOrConstSplat(N1C)->isZero() &&
4379 "Should be a comparison with 0.");
4380 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4381 "Valid only for [in]equality comparisons.");
4382
4383 unsigned NewShiftOpcode;
4384 SDValue X, C, Y;
4385
4386 SelectionDAG &DAG = DCI.DAG;
4387
4388 // Look for '(C l>>/<< Y)'.
4389 auto Match = [&NewShiftOpcode, &X, &C, &Y, &DAG, this](SDValue V) {
4390 // The shift should be one-use.
4391 if (!V.hasOneUse())
4392 return false;
4393 unsigned OldShiftOpcode = V.getOpcode();
4394 switch (OldShiftOpcode) {
4395 case ISD::SHL:
4396 NewShiftOpcode = ISD::SRL;
4397 break;
4398 case ISD::SRL:
4399 NewShiftOpcode = ISD::SHL;
4400 break;
4401 default:
4402 return false; // must be a logical shift.
4403 }
4404 // We should be shifting a constant.
4405 // FIXME: best to use isConstantOrConstantVector().
4406 C = V.getOperand(i: 0);
4407 ConstantSDNode *CC =
4408 isConstOrConstSplat(N: C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4409 if (!CC)
4410 return false;
4411 Y = V.getOperand(i: 1);
4412
4413 ConstantSDNode *XC =
4414 isConstOrConstSplat(N: X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4415 return shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
4416 X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
4417 };
4418
4419 // LHS of comparison should be an one-use 'and'.
4420 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
4421 return SDValue();
4422
4423 X = N0.getOperand(i: 0);
4424 SDValue Mask = N0.getOperand(i: 1);
4425
4426 // 'and' is commutative!
4427 if (!Match(Mask)) {
4428 std::swap(a&: X, b&: Mask);
4429 if (!Match(Mask))
4430 return SDValue();
4431 }
4432
4433 EVT VT = X.getValueType();
4434
4435 // Produce:
4436 // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
4437 SDValue T0 = DAG.getNode(Opcode: NewShiftOpcode, DL, VT, N1: X, N2: Y);
4438 SDValue T1 = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: T0, N2: C);
4439 SDValue T2 = DAG.getSetCC(DL, VT: SCCVT, LHS: T1, RHS: N1C, Cond);
4440 return T2;
4441}
4442
4443/// Try to fold an equality comparison with a {add/sub/xor} binary operation as
4444/// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
4445/// handle the commuted versions of these patterns.
4446SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
4447 ISD::CondCode Cond, const SDLoc &DL,
4448 DAGCombinerInfo &DCI) const {
4449 unsigned BOpcode = N0.getOpcode();
4450 assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
4451 "Unexpected binop");
4452 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
4453
4454 // (X + Y) == X --> Y == 0
4455 // (X - Y) == X --> Y == 0
4456 // (X ^ Y) == X --> Y == 0
4457 SelectionDAG &DAG = DCI.DAG;
4458 EVT OpVT = N0.getValueType();
4459 SDValue X = N0.getOperand(i: 0);
4460 SDValue Y = N0.getOperand(i: 1);
4461 if (X == N1)
4462 return DAG.getSetCC(DL, VT, LHS: Y, RHS: DAG.getConstant(Val: 0, DL, VT: OpVT), Cond);
4463
4464 if (Y != N1)
4465 return SDValue();
4466
4467 // (X + Y) == Y --> X == 0
4468 // (X ^ Y) == Y --> X == 0
4469 if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
4470 return DAG.getSetCC(DL, VT, LHS: X, RHS: DAG.getConstant(Val: 0, DL, VT: OpVT), Cond);
4471
4472 // The shift would not be valid if the operands are boolean (i1).
4473 if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
4474 return SDValue();
4475
4476 // (X - Y) == Y --> X == Y << 1
4477 SDValue One = DAG.getShiftAmountConstant(Val: 1, VT: OpVT, DL);
4478 SDValue YShl1 = DAG.getNode(Opcode: ISD::SHL, DL, VT: N1.getValueType(), N1: Y, N2: One);
4479 if (!DCI.isCalledByLegalizer())
4480 DCI.AddToWorklist(N: YShl1.getNode());
4481 return DAG.getSetCC(DL, VT, LHS: X, RHS: YShl1, Cond);
4482}
4483
4484static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT,
4485 SDValue N0, const APInt &C1,
4486 ISD::CondCode Cond, const SDLoc &dl,
4487 SelectionDAG &DAG) {
4488 // Look through truncs that don't change the value of a ctpop.
4489 // FIXME: Add vector support? Need to be careful with setcc result type below.
4490 SDValue CTPOP = N0;
4491 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
4492 N0.getScalarValueSizeInBits() > Log2_32(Value: N0.getOperand(i: 0).getScalarValueSizeInBits()))
4493 CTPOP = N0.getOperand(i: 0);
4494
4495 if (CTPOP.getOpcode() != ISD::CTPOP || !CTPOP.hasOneUse())
4496 return SDValue();
4497
4498 EVT CTVT = CTPOP.getValueType();
4499 SDValue CTOp = CTPOP.getOperand(i: 0);
4500
4501 // Expand a power-of-2-or-zero comparison based on ctpop:
4502 // (ctpop x) u< 2 -> (x & x-1) == 0
4503 // (ctpop x) u> 1 -> (x & x-1) != 0
4504 if (Cond == ISD::SETULT || Cond == ISD::SETUGT) {
4505 // Keep the CTPOP if it is a cheap vector op.
4506 if (CTVT.isVector() && TLI.isCtpopFast(VT: CTVT))
4507 return SDValue();
4508
4509 unsigned CostLimit = TLI.getCustomCtpopCost(VT: CTVT, Cond);
4510 if (C1.ugt(RHS: CostLimit + (Cond == ISD::SETULT)))
4511 return SDValue();
4512 if (C1 == 0 && (Cond == ISD::SETULT))
4513 return SDValue(); // This is handled elsewhere.
4514
4515 unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);
4516
4517 SDValue NegOne = DAG.getAllOnesConstant(DL: dl, VT: CTVT);
4518 SDValue Result = CTOp;
4519 for (unsigned i = 0; i < Passes; i++) {
4520 SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: CTVT, N1: Result, N2: NegOne);
4521 Result = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: CTVT, N1: Result, N2: Add);
4522 }
4523 ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;
4524 return DAG.getSetCC(DL: dl, VT, LHS: Result, RHS: DAG.getConstant(Val: 0, DL: dl, VT: CTVT), Cond: CC);
4525 }
4526
4527 // Expand a power-of-2 comparison based on ctpop
4528 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
4529 // Keep the CTPOP if it is cheap.
4530 if (TLI.isCtpopFast(VT: CTVT))
4531 return SDValue();
4532
4533 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT: CTVT);
4534 SDValue NegOne = DAG.getAllOnesConstant(DL: dl, VT: CTVT);
4535 assert(CTVT.isInteger());
4536 SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: CTVT, N1: CTOp, N2: NegOne);
4537
4538 // Its not uncommon for known-never-zero X to exist in (ctpop X) eq/ne 1, so
4539 // check before emitting a potentially unnecessary op.
4540 if (DAG.isKnownNeverZero(Op: CTOp)) {
4541 // (ctpop x) == 1 --> (x & x-1) == 0
4542 // (ctpop x) != 1 --> (x & x-1) != 0
4543 SDValue And = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: CTVT, N1: CTOp, N2: Add);
4544 SDValue RHS = DAG.getSetCC(DL: dl, VT, LHS: And, RHS: Zero, Cond);
4545 return RHS;
4546 }
4547
4548 // (ctpop x) == 1 --> (x ^ x-1) > x-1
4549 // (ctpop x) != 1 --> (x ^ x-1) <= x-1
4550 SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: CTVT, N1: CTOp, N2: Add);
4551 ISD::CondCode CmpCond = Cond == ISD::SETEQ ? ISD::SETUGT : ISD::SETULE;
4552 return DAG.getSetCC(DL: dl, VT, LHS: Xor, RHS: Add, Cond: CmpCond);
4553 }
4554
4555 return SDValue();
4556}
4557
4558static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1,
4559 ISD::CondCode Cond, const SDLoc &dl,
4560 SelectionDAG &DAG) {
4561 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4562 return SDValue();
4563
4564 auto *C1 = isConstOrConstSplat(N: N1, /* AllowUndefs */ true);
4565 if (!C1 || !(C1->isZero() || C1->isAllOnes()))
4566 return SDValue();
4567
4568 auto getRotateSource = [](SDValue X) {
4569 if (X.getOpcode() == ISD::ROTL || X.getOpcode() == ISD::ROTR)
4570 return X.getOperand(i: 0);
4571 return SDValue();
4572 };
4573
4574 // Peek through a rotated value compared against 0 or -1:
4575 // (rot X, Y) == 0/-1 --> X == 0/-1
4576 // (rot X, Y) != 0/-1 --> X != 0/-1
4577 if (SDValue R = getRotateSource(N0))
4578 return DAG.getSetCC(DL: dl, VT, LHS: R, RHS: N1, Cond);
4579
4580 // Peek through an 'or' of a rotated value compared against 0:
4581 // or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
4582 // or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
4583 //
4584 // TODO: Add the 'and' with -1 sibling.
4585 // TODO: Recurse through a series of 'or' ops to find the rotate.
4586 EVT OpVT = N0.getValueType();
4587 if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) {
4588 if (SDValue R = getRotateSource(N0.getOperand(i: 0))) {
4589 SDValue NewOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1: R, N2: N0.getOperand(i: 1));
4590 return DAG.getSetCC(DL: dl, VT, LHS: NewOr, RHS: N1, Cond);
4591 }
4592 if (SDValue R = getRotateSource(N0.getOperand(i: 1))) {
4593 SDValue NewOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1: R, N2: N0.getOperand(i: 0));
4594 return DAG.getSetCC(DL: dl, VT, LHS: NewOr, RHS: N1, Cond);
4595 }
4596 }
4597
4598 return SDValue();
4599}
4600
4601static SDValue foldSetCCWithFunnelShift(EVT VT, SDValue N0, SDValue N1,
4602 ISD::CondCode Cond, const SDLoc &dl,
4603 SelectionDAG &DAG) {
4604 // If we are testing for all-bits-clear, we might be able to do that with
4605 // less shifting since bit-order does not matter.
4606 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4607 return SDValue();
4608
4609 auto *C1 = isConstOrConstSplat(N: N1, /* AllowUndefs */ true);
4610 if (!C1 || !C1->isZero())
4611 return SDValue();
4612
4613 if (!N0.hasOneUse() ||
4614 (N0.getOpcode() != ISD::FSHL && N0.getOpcode() != ISD::FSHR))
4615 return SDValue();
4616
4617 unsigned BitWidth = N0.getScalarValueSizeInBits();
4618 auto *ShAmtC = isConstOrConstSplat(N: N0.getOperand(i: 2));
4619 if (!ShAmtC)
4620 return SDValue();
4621
4622 uint64_t ShAmt = ShAmtC->getAPIntValue().urem(RHS: BitWidth);
4623 if (ShAmt == 0)
4624 return SDValue();
4625
4626 // Canonicalize fshr as fshl to reduce pattern-matching.
4627 if (N0.getOpcode() == ISD::FSHR)
4628 ShAmt = BitWidth - ShAmt;
4629
4630 // Match an 'or' with a specific operand 'Other' in either commuted variant.
4631 SDValue X, Y;
4632 auto matchOr = [&X, &Y](SDValue Or, SDValue Other) {
4633 if (Or.getOpcode() != ISD::OR || !Or.hasOneUse())
4634 return false;
4635 if (Or.getOperand(i: 0) == Other) {
4636 X = Or.getOperand(i: 0);
4637 Y = Or.getOperand(i: 1);
4638 return true;
4639 }
4640 if (Or.getOperand(i: 1) == Other) {
4641 X = Or.getOperand(i: 1);
4642 Y = Or.getOperand(i: 0);
4643 return true;
4644 }
4645 return false;
4646 };
4647
4648 EVT OpVT = N0.getValueType();
4649 EVT ShAmtVT = N0.getOperand(i: 2).getValueType();
4650 SDValue F0 = N0.getOperand(i: 0);
4651 SDValue F1 = N0.getOperand(i: 1);
4652 if (matchOr(F0, F1)) {
4653 // fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0
4654 SDValue NewShAmt = DAG.getConstant(Val: ShAmt, DL: dl, VT: ShAmtVT);
4655 SDValue Shift = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: OpVT, N1: Y, N2: NewShAmt);
4656 SDValue NewOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1: Shift, N2: X);
4657 return DAG.getSetCC(DL: dl, VT, LHS: NewOr, RHS: N1, Cond);
4658 }
4659 if (matchOr(F1, F0)) {
4660 // fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0
4661 SDValue NewShAmt = DAG.getConstant(Val: BitWidth - ShAmt, DL: dl, VT: ShAmtVT);
4662 SDValue Shift = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: OpVT, N1: Y, N2: NewShAmt);
4663 SDValue NewOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1: Shift, N2: X);
4664 return DAG.getSetCC(DL: dl, VT, LHS: NewOr, RHS: N1, Cond);
4665 }
4666
4667 return SDValue();
4668}
4669
4670/// Try to simplify a setcc built with the specified operands and cc. If it is
4671/// unable to simplify it, return a null SDValue.
4672SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
4673 ISD::CondCode Cond, bool foldBooleans,
4674 DAGCombinerInfo &DCI,
4675 const SDLoc &dl) const {
4676 SelectionDAG &DAG = DCI.DAG;
4677 const DataLayout &Layout = DAG.getDataLayout();
4678 EVT OpVT = N0.getValueType();
4679 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4680
4681 // Constant fold or commute setcc.
4682 if (SDValue Fold = DAG.FoldSetCC(VT, N1: N0, N2: N1, Cond, dl))
4683 return Fold;
4684
4685 bool N0ConstOrSplat =
4686 isConstOrConstSplat(N: N0, /*AllowUndefs*/ false, /*AllowTruncate*/ AllowTruncation: true);
4687 bool N1ConstOrSplat =
4688 isConstOrConstSplat(N: N1, /*AllowUndefs*/ false, /*AllowTruncate*/ AllowTruncation: true);
4689
4690 // Canonicalize toward having the constant on the RHS.
4691 // TODO: Handle non-splat vector constants. All undef causes trouble.
4692 // FIXME: We can't yet fold constant scalable vector splats, so avoid an
4693 // infinite loop here when we encounter one.
4694 ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Operation: Cond);
4695 if (N0ConstOrSplat && !N1ConstOrSplat &&
4696 (DCI.isBeforeLegalizeOps() ||
4697 isCondCodeLegal(CC: SwappedCC, VT: N0.getSimpleValueType())))
4698 return DAG.getSetCC(DL: dl, VT, LHS: N1, RHS: N0, Cond: SwappedCC);
4699
4700 // If we have a subtract with the same 2 non-constant operands as this setcc
4701 // -- but in reverse order -- then try to commute the operands of this setcc
4702 // to match. A matching pair of setcc (cmp) and sub may be combined into 1
4703 // instruction on some targets.
4704 if (!N0ConstOrSplat && !N1ConstOrSplat &&
4705 (DCI.isBeforeLegalizeOps() ||
4706 isCondCodeLegal(CC: SwappedCC, VT: N0.getSimpleValueType())) &&
4707 DAG.doesNodeExist(Opcode: ISD::SUB, VTList: DAG.getVTList(VT: OpVT), Ops: {N1, N0}) &&
4708 !DAG.doesNodeExist(Opcode: ISD::SUB, VTList: DAG.getVTList(VT: OpVT), Ops: {N0, N1}))
4709 return DAG.getSetCC(DL: dl, VT, LHS: N1, RHS: N0, Cond: SwappedCC);
4710
4711 if (SDValue V = foldSetCCWithRotate(VT, N0, N1, Cond, dl, DAG))
4712 return V;
4713
4714 if (SDValue V = foldSetCCWithFunnelShift(VT, N0, N1, Cond, dl, DAG))
4715 return V;
4716
4717 if (auto *N1C = isConstOrConstSplat(N: N1)) {
4718 const APInt &C1 = N1C->getAPIntValue();
4719
4720 // Optimize some CTPOP cases.
4721 if (SDValue V = simplifySetCCWithCTPOP(TLI: *this, VT, N0, C1, Cond, dl, DAG))
4722 return V;
4723
4724 // For equality to 0 of a no-wrap multiply, decompose and test each op:
4725 // X * Y == 0 --> (X == 0) || (Y == 0)
4726 // X * Y != 0 --> (X != 0) && (Y != 0)
4727 // TODO: This bails out if minsize is set, but if the target doesn't have a
4728 // single instruction multiply for this type, it would likely be
4729 // smaller to decompose.
4730 if (C1.isZero() && (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4731 N0.getOpcode() == ISD::MUL && N0.hasOneUse() &&
4732 (N0->getFlags().hasNoUnsignedWrap() ||
4733 N0->getFlags().hasNoSignedWrap()) &&
4734 !Attr.hasFnAttr(Kind: Attribute::MinSize)) {
4735 SDValue IsXZero = DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0), RHS: N1, Cond);
4736 SDValue IsYZero = DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 1), RHS: N1, Cond);
4737 unsigned LogicOp = Cond == ISD::SETEQ ? ISD::OR : ISD::AND;
4738 return DAG.getNode(Opcode: LogicOp, DL: dl, VT, N1: IsXZero, N2: IsYZero);
4739 }
4740
4741 // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
4742 // equality comparison, then we're just comparing whether X itself is
4743 // zero.
4744 if (N0.getOpcode() == ISD::SRL && (C1.isZero() || C1.isOne()) &&
4745 N0.getOperand(i: 0).getOpcode() == ISD::CTLZ &&
4746 llvm::has_single_bit<uint32_t>(Value: N0.getScalarValueSizeInBits())) {
4747 if (ConstantSDNode *ShAmt = isConstOrConstSplat(N: N0.getOperand(i: 1))) {
4748 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4749 ShAmt->getAPIntValue() == Log2_32(Value: N0.getScalarValueSizeInBits())) {
4750 if ((C1 == 0) == (Cond == ISD::SETEQ)) {
4751 // (srl (ctlz x), 5) == 0 -> X != 0
4752 // (srl (ctlz x), 5) != 1 -> X != 0
4753 Cond = ISD::SETNE;
4754 } else {
4755 // (srl (ctlz x), 5) != 0 -> X == 0
4756 // (srl (ctlz x), 5) == 1 -> X == 0
4757 Cond = ISD::SETEQ;
4758 }
4759 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT: N0.getValueType());
4760 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0).getOperand(i: 0), RHS: Zero,
4761 Cond);
4762 }
4763 }
4764 }
4765 }
4766
4767 // setcc X, 0, setlt --> X (when X is all sign bits)
4768 // setcc X, 0, setne --> X (when X is all sign bits)
4769 //
4770 // When we know that X has 0 or -1 in each element (or scalar), this
4771 // comparison will produce X. This is only true when boolean contents are
4772 // represented via 0s and -1s.
4773 if (VT == OpVT &&
4774 // Check that the result of setcc is 0 and -1.
4775 getBooleanContents(Type: VT) == ZeroOrNegativeOneBooleanContent &&
4776 // Match only for checks X < 0 and X != 0
4777 (Cond == ISD::SETLT || Cond == ISD::SETNE) && isNullOrNullSplat(V: N1) &&
4778 // The identity holds iff we know all sign bits for all lanes.
4779 DAG.ComputeNumSignBits(Op: N0) == N0.getScalarValueSizeInBits())
4780 return N0;
4781
4782 // FIXME: Support vectors.
4783 if (auto *N1C = dyn_cast<ConstantSDNode>(Val: N1.getNode())) {
4784 const APInt &C1 = N1C->getAPIntValue();
4785
4786 // (zext x) == C --> x == (trunc C)
4787 // (sext x) == C --> x == (trunc C)
4788 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4789 DCI.isBeforeLegalize() && N0->hasOneUse()) {
4790 unsigned MinBits = N0.getValueSizeInBits();
4791 SDValue PreExt;
4792 bool Signed = false;
4793 if (N0->getOpcode() == ISD::ZERO_EXTEND) {
4794 // ZExt
4795 MinBits = N0->getOperand(Num: 0).getValueSizeInBits();
4796 PreExt = N0->getOperand(Num: 0);
4797 } else if (N0->getOpcode() == ISD::AND) {
4798 // DAGCombine turns costly ZExts into ANDs
4799 if (auto *C = dyn_cast<ConstantSDNode>(Val: N0->getOperand(Num: 1)))
4800 if ((C->getAPIntValue()+1).isPowerOf2()) {
4801 MinBits = C->getAPIntValue().countr_one();
4802 PreExt = N0->getOperand(Num: 0);
4803 }
4804 } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
4805 // SExt
4806 MinBits = N0->getOperand(Num: 0).getValueSizeInBits();
4807 PreExt = N0->getOperand(Num: 0);
4808 Signed = true;
4809 } else if (auto *LN0 = dyn_cast<LoadSDNode>(Val&: N0)) {
4810 // ZEXTLOAD / SEXTLOAD
4811 if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
4812 MinBits = LN0->getMemoryVT().getSizeInBits();
4813 PreExt = N0;
4814 } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
4815 Signed = true;
4816 MinBits = LN0->getMemoryVT().getSizeInBits();
4817 PreExt = N0;
4818 }
4819 }
4820
4821 // Figure out how many bits we need to preserve this constant.
4822 unsigned ReqdBits = Signed ? C1.getSignificantBits() : C1.getActiveBits();
4823
4824 // Make sure we're not losing bits from the constant.
4825 if (MinBits > 0 &&
4826 MinBits < C1.getBitWidth() &&
4827 MinBits >= ReqdBits) {
4828 EVT MinVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: MinBits);
4829 if (isTypeDesirableForOp(ISD::SETCC, VT: MinVT)) {
4830 // Will get folded away.
4831 SDValue Trunc = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MinVT, Operand: PreExt);
4832 if (MinBits == 1 && C1 == 1)
4833 // Invert the condition.
4834 return DAG.getSetCC(DL: dl, VT, LHS: Trunc, RHS: DAG.getConstant(Val: 0, DL: dl, VT: MVT::i1),
4835 Cond: Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4836 SDValue C = DAG.getConstant(Val: C1.trunc(width: MinBits), DL: dl, VT: MinVT);
4837 return DAG.getSetCC(DL: dl, VT, LHS: Trunc, RHS: C, Cond);
4838 }
4839
4840 // If truncating the setcc operands is not desirable, we can still
4841 // simplify the expression in some cases:
4842 // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
4843 // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
4844 // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
4845 // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
4846 // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
4847 // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
4848 SDValue TopSetCC = N0->getOperand(Num: 0);
4849 unsigned N0Opc = N0->getOpcode();
4850 bool SExt = (N0Opc == ISD::SIGN_EXTEND);
4851 if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
4852 TopSetCC.getOpcode() == ISD::SETCC &&
4853 (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
4854 (isConstFalseVal(N: N1) ||
4855 isExtendedTrueVal(N: N1C, VT: N0->getValueType(ResNo: 0), SExt))) {
4856
4857 bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) ||
4858 (!N1C->isZero() && Cond == ISD::SETNE);
4859
4860 if (!Inverse)
4861 return TopSetCC;
4862
4863 ISD::CondCode InvCond = ISD::getSetCCInverse(
4864 Operation: cast<CondCodeSDNode>(Val: TopSetCC.getOperand(i: 2))->get(),
4865 Type: TopSetCC.getOperand(i: 0).getValueType());
4866 return DAG.getSetCC(DL: dl, VT, LHS: TopSetCC.getOperand(i: 0),
4867 RHS: TopSetCC.getOperand(i: 1),
4868 Cond: InvCond);
4869 }
4870 }
4871 }
4872
4873 // If the LHS is '(and load, const)', the RHS is 0, the test is for
4874 // equality or unsigned, and all 1 bits of the const are in the same
4875 // partial word, see if we can shorten the load.
4876 if (DCI.isBeforeLegalize() &&
4877 !ISD::isSignedIntSetCC(Code: Cond) &&
4878 N0.getOpcode() == ISD::AND && C1 == 0 &&
4879 N0.getNode()->hasOneUse() &&
4880 isa<LoadSDNode>(Val: N0.getOperand(i: 0)) &&
4881 N0.getOperand(i: 0).getNode()->hasOneUse() &&
4882 isa<ConstantSDNode>(Val: N0.getOperand(i: 1))) {
4883 auto *Lod = cast<LoadSDNode>(Val: N0.getOperand(i: 0));
4884 APInt bestMask;
4885 unsigned bestWidth = 0, bestOffset = 0;
4886 if (Lod->isSimple() && Lod->isUnindexed() &&
4887 (Lod->getMemoryVT().isByteSized() ||
4888 isPaddedAtMostSignificantBitsWhenStored(VT: Lod->getMemoryVT()))) {
4889 unsigned memWidth = Lod->getMemoryVT().getStoreSizeInBits();
4890 unsigned origWidth = N0.getValueSizeInBits();
4891 unsigned maskWidth = origWidth;
4892 // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
4893 // 8 bits, but have to be careful...
4894 if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
4895 origWidth = Lod->getMemoryVT().getSizeInBits();
4896 const APInt &Mask = N0.getConstantOperandAPInt(i: 1);
4897 // Only consider power-of-2 widths (and at least one byte) as candiates
4898 // for the narrowed load.
4899 for (unsigned width = 8; width < origWidth; width *= 2) {
4900 EVT newVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: width);
4901 APInt newMask = APInt::getLowBitsSet(numBits: maskWidth, loBitsSet: width);
4902 // Avoid accessing any padding here for now (we could use memWidth
4903 // instead of origWidth here otherwise).
4904 unsigned maxOffset = origWidth - width;
4905 for (unsigned offset = 0; offset <= maxOffset; offset += 8) {
4906 if (Mask.isSubsetOf(RHS: newMask)) {
4907 unsigned ptrOffset =
4908 Layout.isLittleEndian() ? offset : memWidth - width - offset;
4909 unsigned IsFast = 0;
4910 assert((ptrOffset % 8) == 0 && "Non-Bytealigned pointer offset");
4911 Align NewAlign = commonAlignment(A: Lod->getAlign(), Offset: ptrOffset / 8);
4912 if (shouldReduceLoadWidth(Load: Lod, ExtTy: ISD::NON_EXTLOAD, NewVT: newVT,
4913 ByteOffset: ptrOffset / 8) &&
4914 allowsMemoryAccess(
4915 Context&: *DAG.getContext(), DL: Layout, VT: newVT, AddrSpace: Lod->getAddressSpace(),
4916 Alignment: NewAlign, Flags: Lod->getMemOperand()->getFlags(), Fast: &IsFast) &&
4917 IsFast) {
4918 bestOffset = ptrOffset / 8;
4919 bestMask = Mask.lshr(shiftAmt: offset);
4920 bestWidth = width;
4921 break;
4922 }
4923 }
4924 newMask <<= 8;
4925 }
4926 if (bestWidth)
4927 break;
4928 }
4929 }
4930 if (bestWidth) {
4931 EVT newVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: bestWidth);
4932 SDValue Ptr = Lod->getBasePtr();
4933 if (bestOffset != 0)
4934 Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: TypeSize::getFixed(ExactSize: bestOffset));
4935 SDValue NewLoad =
4936 DAG.getLoad(VT: newVT, dl, Chain: Lod->getChain(), Ptr,
4937 PtrInfo: Lod->getPointerInfo().getWithOffset(O: bestOffset),
4938 Alignment: Lod->getBaseAlign());
4939 SDValue And =
4940 DAG.getNode(Opcode: ISD::AND, DL: dl, VT: newVT, N1: NewLoad,
4941 N2: DAG.getConstant(Val: bestMask.trunc(width: bestWidth), DL: dl, VT: newVT));
4942 return DAG.getSetCC(DL: dl, VT, LHS: And, RHS: DAG.getConstant(Val: 0LL, DL: dl, VT: newVT), Cond);
4943 }
4944 }
4945
4946 // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
4947 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
4948 unsigned InSize = N0.getOperand(i: 0).getValueSizeInBits();
4949
4950 // If the comparison constant has bits in the upper part, the
4951 // zero-extended value could never match.
4952 if (C1.intersects(RHS: APInt::getHighBitsSet(numBits: C1.getBitWidth(),
4953 hiBitsSet: C1.getBitWidth() - InSize))) {
4954 switch (Cond) {
4955 case ISD::SETUGT:
4956 case ISD::SETUGE:
4957 case ISD::SETEQ:
4958 return DAG.getConstant(Val: 0, DL: dl, VT);
4959 case ISD::SETULT:
4960 case ISD::SETULE:
4961 case ISD::SETNE:
4962 return DAG.getConstant(Val: 1, DL: dl, VT);
4963 case ISD::SETGT:
4964 case ISD::SETGE:
4965 // True if the sign bit of C1 is set.
4966 return DAG.getConstant(Val: C1.isNegative(), DL: dl, VT);
4967 case ISD::SETLT:
4968 case ISD::SETLE:
4969 // True if the sign bit of C1 isn't set.
4970 return DAG.getConstant(Val: C1.isNonNegative(), DL: dl, VT);
4971 default:
4972 break;
4973 }
4974 }
4975
4976 // Otherwise, we can perform the comparison with the low bits.
4977 switch (Cond) {
4978 case ISD::SETEQ:
4979 case ISD::SETNE:
4980 case ISD::SETUGT:
4981 case ISD::SETUGE:
4982 case ISD::SETULT:
4983 case ISD::SETULE: {
4984 EVT newVT = N0.getOperand(i: 0).getValueType();
4985 // FIXME: Should use isNarrowingProfitable.
4986 if (DCI.isBeforeLegalizeOps() ||
4987 (isOperationLegal(Op: ISD::SETCC, VT: newVT) &&
4988 isCondCodeLegal(CC: Cond, VT: newVT.getSimpleVT()) &&
4989 isTypeDesirableForOp(ISD::SETCC, VT: newVT))) {
4990 EVT NewSetCCVT = getSetCCResultType(DL: Layout, Context&: *DAG.getContext(), VT: newVT);
4991 SDValue NewConst = DAG.getConstant(Val: C1.trunc(width: InSize), DL: dl, VT: newVT);
4992
4993 SDValue NewSetCC = DAG.getSetCC(DL: dl, VT: NewSetCCVT, LHS: N0.getOperand(i: 0),
4994 RHS: NewConst, Cond);
4995 return DAG.getBoolExtOrTrunc(Op: NewSetCC, SL: dl, VT, OpVT: N0.getValueType());
4996 }
4997 break;
4998 }
4999 default:
5000 break; // todo, be more careful with signed comparisons
5001 }
5002 } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
5003 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5004 !isSExtCheaperThanZExt(FromTy: cast<VTSDNode>(Val: N0.getOperand(i: 1))->getVT(),
5005 ToTy: OpVT)) {
5006 EVT ExtSrcTy = cast<VTSDNode>(Val: N0.getOperand(i: 1))->getVT();
5007 unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
5008 EVT ExtDstTy = N0.getValueType();
5009 unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
5010
5011 // If the constant doesn't fit into the number of bits for the source of
5012 // the sign extension, it is impossible for both sides to be equal.
5013 if (C1.getSignificantBits() > ExtSrcTyBits)
5014 return DAG.getBoolConstant(V: Cond == ISD::SETNE, DL: dl, VT, OpVT);
5015
5016 assert(ExtDstTy == N0.getOperand(0).getValueType() &&
5017 ExtDstTy != ExtSrcTy && "Unexpected types!");
5018 APInt Imm = APInt::getLowBitsSet(numBits: ExtDstTyBits, loBitsSet: ExtSrcTyBits);
5019 SDValue ZextOp = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ExtDstTy, N1: N0.getOperand(i: 0),
5020 N2: DAG.getConstant(Val: Imm, DL: dl, VT: ExtDstTy));
5021 if (!DCI.isCalledByLegalizer())
5022 DCI.AddToWorklist(N: ZextOp.getNode());
5023 // Otherwise, make this a use of a zext.
5024 return DAG.getSetCC(DL: dl, VT, LHS: ZextOp,
5025 RHS: DAG.getConstant(Val: C1 & Imm, DL: dl, VT: ExtDstTy), Cond);
5026 } else if ((N1C->isZero() || N1C->isOne()) &&
5027 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5028 // SETCC (X), [0|1], [EQ|NE] -> X if X is known 0/1. i1 types are
5029 // excluded as they are handled below whilst checking for foldBooleans.
5030 if ((N0.getOpcode() == ISD::SETCC || VT.getScalarType() != MVT::i1) &&
5031 isTypeLegal(VT) && VT.bitsLE(VT: N0.getValueType()) &&
5032 (N0.getValueType() == MVT::i1 ||
5033 getBooleanContents(Type: N0.getValueType()) == ZeroOrOneBooleanContent) &&
5034 DAG.MaskedValueIsZero(
5035 Op: N0, Mask: APInt::getBitsSetFrom(numBits: N0.getValueSizeInBits(), loBit: 1))) {
5036 bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
5037 if (TrueWhenTrue)
5038 return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: N0);
5039 // Invert the condition.
5040 if (N0.getOpcode() == ISD::SETCC) {
5041 ISD::CondCode CC = cast<CondCodeSDNode>(Val: N0.getOperand(i: 2))->get();
5042 CC = ISD::getSetCCInverse(Operation: CC, Type: N0.getOperand(i: 0).getValueType());
5043 if (DCI.isBeforeLegalizeOps() ||
5044 isCondCodeLegal(CC, VT: N0.getOperand(i: 0).getSimpleValueType()))
5045 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0), RHS: N0.getOperand(i: 1), Cond: CC);
5046 }
5047 }
5048
5049 if ((N0.getOpcode() == ISD::XOR ||
5050 (N0.getOpcode() == ISD::AND &&
5051 N0.getOperand(i: 0).getOpcode() == ISD::XOR &&
5052 N0.getOperand(i: 1) == N0.getOperand(i: 0).getOperand(i: 1))) &&
5053 isOneConstant(V: N0.getOperand(i: 1))) {
5054 // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
5055 // can only do this if the top bits are known zero.
5056 unsigned BitWidth = N0.getValueSizeInBits();
5057 if (DAG.MaskedValueIsZero(Op: N0,
5058 Mask: APInt::getHighBitsSet(numBits: BitWidth,
5059 hiBitsSet: BitWidth-1))) {
5060 // Okay, get the un-inverted input value.
5061 SDValue Val;
5062 if (N0.getOpcode() == ISD::XOR) {
5063 Val = N0.getOperand(i: 0);
5064 } else {
5065 assert(N0.getOpcode() == ISD::AND &&
5066 N0.getOperand(0).getOpcode() == ISD::XOR);
5067 // ((X^1)&1)^1 -> X & 1
5068 Val = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: N0.getValueType(),
5069 N1: N0.getOperand(i: 0).getOperand(i: 0),
5070 N2: N0.getOperand(i: 1));
5071 }
5072
5073 return DAG.getSetCC(DL: dl, VT, LHS: Val, RHS: N1,
5074 Cond: Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
5075 }
5076 } else if (N1C->isOne()) {
5077 SDValue Op0 = N0;
5078 if (Op0.getOpcode() == ISD::TRUNCATE)
5079 Op0 = Op0.getOperand(i: 0);
5080
5081 if ((Op0.getOpcode() == ISD::XOR) &&
5082 Op0.getOperand(i: 0).getOpcode() == ISD::SETCC &&
5083 Op0.getOperand(i: 1).getOpcode() == ISD::SETCC) {
5084 SDValue XorLHS = Op0.getOperand(i: 0);
5085 SDValue XorRHS = Op0.getOperand(i: 1);
5086 // Ensure that the input setccs return an i1 type or 0/1 value.
5087 if (Op0.getValueType() == MVT::i1 ||
5088 (getBooleanContents(Type: XorLHS.getOperand(i: 0).getValueType()) ==
5089 ZeroOrOneBooleanContent &&
5090 getBooleanContents(Type: XorRHS.getOperand(i: 0).getValueType()) ==
5091 ZeroOrOneBooleanContent)) {
5092 // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
5093 Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ;
5094 return DAG.getSetCC(DL: dl, VT, LHS: XorLHS, RHS: XorRHS, Cond);
5095 }
5096 }
5097 if (Op0.getOpcode() == ISD::AND && isOneConstant(V: Op0.getOperand(i: 1))) {
5098 // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
5099 if (Op0.getValueType().bitsGT(VT))
5100 Op0 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
5101 N1: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Op0.getOperand(i: 0)),
5102 N2: DAG.getConstant(Val: 1, DL: dl, VT));
5103 else if (Op0.getValueType().bitsLT(VT))
5104 Op0 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
5105 N1: DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT, Operand: Op0.getOperand(i: 0)),
5106 N2: DAG.getConstant(Val: 1, DL: dl, VT));
5107
5108 return DAG.getSetCC(DL: dl, VT, LHS: Op0,
5109 RHS: DAG.getConstant(Val: 0, DL: dl, VT: Op0.getValueType()),
5110 Cond: Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
5111 }
5112 if (Op0.getOpcode() == ISD::AssertZext &&
5113 cast<VTSDNode>(Val: Op0.getOperand(i: 1))->getVT() == MVT::i1)
5114 return DAG.getSetCC(DL: dl, VT, LHS: Op0,
5115 RHS: DAG.getConstant(Val: 0, DL: dl, VT: Op0.getValueType()),
5116 Cond: Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
5117 }
5118 }
5119
5120 // Given:
5121 // icmp eq/ne (urem %x, %y), 0
5122 // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
5123 // icmp eq/ne %x, 0
5124 if (N0.getOpcode() == ISD::UREM && N1C->isZero() &&
5125 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5126 KnownBits XKnown = DAG.computeKnownBits(Op: N0.getOperand(i: 0));
5127 KnownBits YKnown = DAG.computeKnownBits(Op: N0.getOperand(i: 1));
5128 if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
5129 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0), RHS: N1, Cond);
5130 }
5131
5132 // Fold set_cc seteq (ashr X, BW-1), -1 -> set_cc setlt X, 0
5133 // and set_cc setne (ashr X, BW-1), -1 -> set_cc setge X, 0
5134 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5135 N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(Val: N0.getOperand(i: 1)) &&
5136 N0.getConstantOperandAPInt(i: 1) == OpVT.getScalarSizeInBits() - 1 &&
5137 N1C->isAllOnes()) {
5138 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0),
5139 RHS: DAG.getConstant(Val: 0, DL: dl, VT: OpVT),
5140 Cond: Cond == ISD::SETEQ ? ISD::SETLT : ISD::SETGE);
5141 }
5142
5143 // fold (setcc (trunc x) c) -> (setcc x c)
5144 if (N0.getOpcode() == ISD::TRUNCATE &&
5145 ((N0->getFlags().hasNoUnsignedWrap() && !ISD::isSignedIntSetCC(Code: Cond)) ||
5146 (N0->getFlags().hasNoSignedWrap() &&
5147 !ISD::isUnsignedIntSetCC(Code: Cond))) &&
5148 isTypeDesirableForOp(ISD::SETCC, VT: N0.getOperand(i: 0).getValueType())) {
5149 EVT NewVT = N0.getOperand(i: 0).getValueType();
5150 SDValue NewConst = DAG.getConstant(
5151 Val: (N0->getFlags().hasNoSignedWrap() && !ISD::isUnsignedIntSetCC(Code: Cond))
5152 ? C1.sext(width: NewVT.getSizeInBits())
5153 : C1.zext(width: NewVT.getSizeInBits()),
5154 DL: dl, VT: NewVT);
5155 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0), RHS: NewConst, Cond);
5156 }
5157
5158 if (SDValue V =
5159 optimizeSetCCOfSignedTruncationCheck(SCCVT: VT, N0, N1, Cond, DCI, DL: dl))
5160 return V;
5161 }
5162
5163 // These simplifications apply to splat vectors as well.
5164 // TODO: Handle more splat vector cases.
5165 if (auto *N1C = isConstOrConstSplat(N: N1)) {
5166 const APInt &C1 = N1C->getAPIntValue();
5167
5168 APInt MinVal, MaxVal;
5169 unsigned OperandBitSize = N1C->getValueType(ResNo: 0).getScalarSizeInBits();
5170 if (ISD::isSignedIntSetCC(Code: Cond)) {
5171 MinVal = APInt::getSignedMinValue(numBits: OperandBitSize);
5172 MaxVal = APInt::getSignedMaxValue(numBits: OperandBitSize);
5173 } else {
5174 MinVal = APInt::getMinValue(numBits: OperandBitSize);
5175 MaxVal = APInt::getMaxValue(numBits: OperandBitSize);
5176 }
5177
5178 // Canonicalize GE/LE comparisons to use GT/LT comparisons.
5179 if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
5180 // X >= MIN --> true
5181 if (C1 == MinVal)
5182 return DAG.getBoolConstant(V: true, DL: dl, VT, OpVT);
5183
5184 if (!VT.isVector()) { // TODO: Support this for vectors.
5185 // X >= C0 --> X > (C0 - 1)
5186 APInt C = C1 - 1;
5187 ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT;
5188 if ((DCI.isBeforeLegalizeOps() ||
5189 isCondCodeLegal(CC: NewCC, VT: OpVT.getSimpleVT())) &&
5190 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
5191 isLegalICmpImmediate(C.getSExtValue())))) {
5192 return DAG.getSetCC(DL: dl, VT, LHS: N0,
5193 RHS: DAG.getConstant(Val: C, DL: dl, VT: N1.getValueType()),
5194 Cond: NewCC);
5195 }
5196 }
5197 }
5198
5199 if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
5200 // X <= MAX --> true
5201 if (C1 == MaxVal)
5202 return DAG.getBoolConstant(V: true, DL: dl, VT, OpVT);
5203
5204 // X <= C0 --> X < (C0 + 1)
5205 if (!VT.isVector()) { // TODO: Support this for vectors.
5206 APInt C = C1 + 1;
5207 ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT;
5208 if ((DCI.isBeforeLegalizeOps() ||
5209 isCondCodeLegal(CC: NewCC, VT: OpVT.getSimpleVT())) &&
5210 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
5211 isLegalICmpImmediate(C.getSExtValue())))) {
5212 return DAG.getSetCC(DL: dl, VT, LHS: N0,
5213 RHS: DAG.getConstant(Val: C, DL: dl, VT: N1.getValueType()),
5214 Cond: NewCC);
5215 }
5216 }
5217 }
5218
5219 if (Cond == ISD::SETLT || Cond == ISD::SETULT) {
5220 if (C1 == MinVal)
5221 return DAG.getBoolConstant(V: false, DL: dl, VT, OpVT); // X < MIN --> false
5222
5223 // TODO: Support this for vectors after legalize ops.
5224 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5225 // Canonicalize setlt X, Max --> setne X, Max
5226 if (C1 == MaxVal)
5227 return DAG.getSetCC(DL: dl, VT, LHS: N0, RHS: N1, Cond: ISD::SETNE);
5228
5229 // If we have setult X, 1, turn it into seteq X, 0
5230 if (C1 == MinVal+1)
5231 return DAG.getSetCC(DL: dl, VT, LHS: N0,
5232 RHS: DAG.getConstant(Val: MinVal, DL: dl, VT: N0.getValueType()),
5233 Cond: ISD::SETEQ);
5234 }
5235 }
5236
5237 if (Cond == ISD::SETGT || Cond == ISD::SETUGT) {
5238 if (C1 == MaxVal)
5239 return DAG.getBoolConstant(V: false, DL: dl, VT, OpVT); // X > MAX --> false
5240
5241 // TODO: Support this for vectors after legalize ops.
5242 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5243 // Canonicalize setgt X, Min --> setne X, Min
5244 if (C1 == MinVal)
5245 return DAG.getSetCC(DL: dl, VT, LHS: N0, RHS: N1, Cond: ISD::SETNE);
5246
5247 // If we have setugt X, Max-1, turn it into seteq X, Max
5248 if (C1 == MaxVal-1)
5249 return DAG.getSetCC(DL: dl, VT, LHS: N0,
5250 RHS: DAG.getConstant(Val: MaxVal, DL: dl, VT: N0.getValueType()),
5251 Cond: ISD::SETEQ);
5252 }
5253 }
5254
5255 if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
5256 // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
5257 if (C1.isZero())
5258 if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
5259 SCCVT: VT, N0, N1C: N1, Cond, DCI, DL: dl))
5260 return CC;
5261
5262 // For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y).
5263 // For example, when high 32-bits of i64 X are known clear:
5264 // all bits clear: (X | (Y<<32)) == 0 --> (X | Y) == 0
5265 // all bits set: (X | (Y<<32)) == -1 --> (X & Y) == -1
5266 bool CmpZero = N1C->isZero();
5267 bool CmpNegOne = N1C->isAllOnes();
5268 if ((CmpZero || CmpNegOne) && N0.hasOneUse()) {
5269 // Match or(lo,shl(hi,bw/2)) pattern.
5270 auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
5271 unsigned EltBits = V.getScalarValueSizeInBits();
5272 if (V.getOpcode() != ISD::OR || (EltBits % 2) != 0)
5273 return false;
5274 SDValue LHS = V.getOperand(i: 0);
5275 SDValue RHS = V.getOperand(i: 1);
5276 APInt HiBits = APInt::getHighBitsSet(numBits: EltBits, hiBitsSet: EltBits / 2);
5277 // Unshifted element must have zero upperbits.
5278 if (RHS.getOpcode() == ISD::SHL &&
5279 isa<ConstantSDNode>(Val: RHS.getOperand(i: 1)) &&
5280 RHS.getConstantOperandAPInt(i: 1) == (EltBits / 2) &&
5281 DAG.MaskedValueIsZero(Op: LHS, Mask: HiBits)) {
5282 Lo = LHS;
5283 Hi = RHS.getOperand(i: 0);
5284 return true;
5285 }
5286 if (LHS.getOpcode() == ISD::SHL &&
5287 isa<ConstantSDNode>(Val: LHS.getOperand(i: 1)) &&
5288 LHS.getConstantOperandAPInt(i: 1) == (EltBits / 2) &&
5289 DAG.MaskedValueIsZero(Op: RHS, Mask: HiBits)) {
5290 Lo = RHS;
5291 Hi = LHS.getOperand(i: 0);
5292 return true;
5293 }
5294 return false;
5295 };
5296
5297 auto MergeConcat = [&](SDValue Lo, SDValue Hi) {
5298 unsigned EltBits = N0.getScalarValueSizeInBits();
5299 unsigned HalfBits = EltBits / 2;
5300 APInt HiBits = APInt::getHighBitsSet(numBits: EltBits, hiBitsSet: HalfBits);
5301 SDValue LoBits = DAG.getConstant(Val: ~HiBits, DL: dl, VT: OpVT);
5302 SDValue HiMask = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: OpVT, N1: Hi, N2: LoBits);
5303 SDValue NewN0 =
5304 DAG.getNode(Opcode: CmpZero ? ISD::OR : ISD::AND, DL: dl, VT: OpVT, N1: Lo, N2: HiMask);
5305 SDValue NewN1 = CmpZero ? DAG.getConstant(Val: 0, DL: dl, VT: OpVT) : LoBits;
5306 return DAG.getSetCC(DL: dl, VT, LHS: NewN0, RHS: NewN1, Cond);
5307 };
5308
5309 SDValue Lo, Hi;
5310 if (IsConcat(N0, Lo, Hi))
5311 return MergeConcat(Lo, Hi);
5312
5313 if (N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR) {
5314 SDValue Lo0, Lo1, Hi0, Hi1;
5315 if (IsConcat(N0.getOperand(i: 0), Lo0, Hi0) &&
5316 IsConcat(N0.getOperand(i: 1), Lo1, Hi1)) {
5317 return MergeConcat(DAG.getNode(Opcode: N0.getOpcode(), DL: dl, VT: OpVT, N1: Lo0, N2: Lo1),
5318 DAG.getNode(Opcode: N0.getOpcode(), DL: dl, VT: OpVT, N1: Hi0, N2: Hi1));
5319 }
5320 }
5321 }
5322 }
5323
5324 // If we have "setcc X, C0", check to see if we can shrink the immediate
5325 // by changing cc.
5326 // TODO: Support this for vectors after legalize ops.
5327 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5328 // SETUGT X, SINTMAX -> SETLT X, 0
5329 // SETUGE X, SINTMIN -> SETLT X, 0
5330 if ((Cond == ISD::SETUGT && C1.isMaxSignedValue()) ||
5331 (Cond == ISD::SETUGE && C1.isMinSignedValue()))
5332 return DAG.getSetCC(DL: dl, VT, LHS: N0,
5333 RHS: DAG.getConstant(Val: 0, DL: dl, VT: N1.getValueType()),
5334 Cond: ISD::SETLT);
5335
5336 // SETULT X, SINTMIN -> SETGT X, -1
5337 // SETULE X, SINTMAX -> SETGT X, -1
5338 if ((Cond == ISD::SETULT && C1.isMinSignedValue()) ||
5339 (Cond == ISD::SETULE && C1.isMaxSignedValue()))
5340 return DAG.getSetCC(DL: dl, VT, LHS: N0,
5341 RHS: DAG.getAllOnesConstant(DL: dl, VT: N1.getValueType()),
5342 Cond: ISD::SETGT);
5343 }
5344 }
5345
5346 // Back to non-vector simplifications.
5347 // TODO: Can we do these for vector splats?
5348 if (auto *N1C = dyn_cast<ConstantSDNode>(Val: N1.getNode())) {
5349 const APInt &C1 = N1C->getAPIntValue();
5350 EVT ShValTy = N0.getValueType();
5351
5352 // Fold bit comparisons when we can. This will result in an
5353 // incorrect value when boolean false is negative one, unless
5354 // the bitsize is 1 in which case the false value is the same
5355 // in practice regardless of the representation.
5356 if ((VT.getSizeInBits() == 1 ||
5357 getBooleanContents(Type: N0.getValueType()) == ZeroOrOneBooleanContent) &&
5358 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5359 (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(VT: ShValTy))) &&
5360 N0.getOpcode() == ISD::AND) {
5361 if (auto *AndRHS = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) {
5362 if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
5363 // Perform the xform if the AND RHS is a single bit.
5364 unsigned ShCt = AndRHS->getAPIntValue().logBase2();
5365 if (AndRHS->getAPIntValue().isPowerOf2() &&
5366 !shouldAvoidTransformToShift(VT: ShValTy, Amount: ShCt)) {
5367 return DAG.getNode(
5368 Opcode: ISD::TRUNCATE, DL: dl, VT,
5369 Operand: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: ShValTy, N1: N0,
5370 N2: DAG.getShiftAmountConstant(Val: ShCt, VT: ShValTy, DL: dl)));
5371 }
5372 } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
5373 // (X & 8) == 8 --> (X & 8) >> 3
5374 // Perform the xform if C1 is a single bit.
5375 unsigned ShCt = C1.logBase2();
5376 if (C1.isPowerOf2() && !shouldAvoidTransformToShift(VT: ShValTy, Amount: ShCt)) {
5377 return DAG.getNode(
5378 Opcode: ISD::TRUNCATE, DL: dl, VT,
5379 Operand: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: ShValTy, N1: N0,
5380 N2: DAG.getShiftAmountConstant(Val: ShCt, VT: ShValTy, DL: dl)));
5381 }
5382 }
5383 }
5384 }
5385
5386 if (C1.getSignificantBits() <= 64 &&
5387 !isLegalICmpImmediate(C1.getSExtValue())) {
5388 // (X & -256) == 256 -> (X >> 8) == 1
5389 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5390 N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
5391 if (auto *AndRHS = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) {
5392 const APInt &AndRHSC = AndRHS->getAPIntValue();
5393 if (AndRHSC.isNegatedPowerOf2() && C1.isSubsetOf(RHS: AndRHSC)) {
5394 unsigned ShiftBits = AndRHSC.countr_zero();
5395 if (!shouldAvoidTransformToShift(VT: ShValTy, Amount: ShiftBits)) {
5396 // If using an unsigned shift doesn't yield a legal compare
5397 // immediate, try using sra instead.
5398 APInt NewC = C1.lshr(shiftAmt: ShiftBits);
5399 if (NewC.getSignificantBits() <= 64 &&
5400 !isLegalICmpImmediate(NewC.getSExtValue())) {
5401 APInt SignedC = C1.ashr(ShiftAmt: ShiftBits);
5402 if (SignedC.getSignificantBits() <= 64 &&
5403 isLegalICmpImmediate(SignedC.getSExtValue())) {
5404 SDValue Shift = DAG.getNode(
5405 Opcode: ISD::SRA, DL: dl, VT: ShValTy, N1: N0.getOperand(i: 0),
5406 N2: DAG.getShiftAmountConstant(Val: ShiftBits, VT: ShValTy, DL: dl));
5407 SDValue CmpRHS = DAG.getConstant(Val: SignedC, DL: dl, VT: ShValTy);
5408 return DAG.getSetCC(DL: dl, VT, LHS: Shift, RHS: CmpRHS, Cond);
5409 }
5410 }
5411 SDValue Shift = DAG.getNode(
5412 Opcode: ISD::SRL, DL: dl, VT: ShValTy, N1: N0.getOperand(i: 0),
5413 N2: DAG.getShiftAmountConstant(Val: ShiftBits, VT: ShValTy, DL: dl));
5414 SDValue CmpRHS = DAG.getConstant(Val: NewC, DL: dl, VT: ShValTy);
5415 return DAG.getSetCC(DL: dl, VT, LHS: Shift, RHS: CmpRHS, Cond);
5416 }
5417 }
5418 }
5419 } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
5420 Cond == ISD::SETULE || Cond == ISD::SETUGT) {
5421 bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
5422 // X < 0x100000000 -> (X >> 32) < 1
5423 // X >= 0x100000000 -> (X >> 32) >= 1
5424 // X <= 0x0ffffffff -> (X >> 32) < 1
5425 // X > 0x0ffffffff -> (X >> 32) >= 1
5426 unsigned ShiftBits;
5427 APInt NewC = C1;
5428 ISD::CondCode NewCond = Cond;
5429 if (AdjOne) {
5430 ShiftBits = C1.countr_one();
5431 NewC = NewC + 1;
5432 NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
5433 } else {
5434 ShiftBits = C1.countr_zero();
5435 }
5436 NewC.lshrInPlace(ShiftAmt: ShiftBits);
5437 if (ShiftBits && NewC.getSignificantBits() <= 64 &&
5438 isLegalICmpImmediate(NewC.getSExtValue()) &&
5439 !shouldAvoidTransformToShift(VT: ShValTy, Amount: ShiftBits)) {
5440 SDValue Shift =
5441 DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: ShValTy, N1: N0,
5442 N2: DAG.getShiftAmountConstant(Val: ShiftBits, VT: ShValTy, DL: dl));
5443 SDValue CmpRHS = DAG.getConstant(Val: NewC, DL: dl, VT: ShValTy);
5444 return DAG.getSetCC(DL: dl, VT, LHS: Shift, RHS: CmpRHS, Cond: NewCond);
5445 }
5446 }
5447 }
5448 }
5449
5450 if (!isa<ConstantFPSDNode>(Val: N0) && isa<ConstantFPSDNode>(Val: N1)) {
5451 auto *CFP = cast<ConstantFPSDNode>(Val&: N1);
5452 assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
5453
5454 // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
5455 // constant if knowing that the operand is non-nan is enough. We prefer to
5456 // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
5457 // materialize 0.0.
5458 if (Cond == ISD::SETO || Cond == ISD::SETUO)
5459 return DAG.getSetCC(DL: dl, VT, LHS: N0, RHS: N0, Cond);
5460
5461 // setcc (fneg x), C -> setcc swap(pred) x, -C
5462 if (N0.getOpcode() == ISD::FNEG) {
5463 ISD::CondCode SwapCond = ISD::getSetCCSwappedOperands(Operation: Cond);
5464 if (DCI.isBeforeLegalizeOps() ||
5465 isCondCodeLegal(CC: SwapCond, VT: N0.getSimpleValueType())) {
5466 SDValue NegN1 = DAG.getNode(Opcode: ISD::FNEG, DL: dl, VT: N0.getValueType(), Operand: N1);
5467 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0), RHS: NegN1, Cond: SwapCond);
5468 }
5469 }
5470
5471 // setueq/setoeq X, (fabs Inf) -> is_fpclass X, fcInf
5472 if (isOperationLegalOrCustom(Op: ISD::IS_FPCLASS, VT: N0.getValueType()) &&
5473 !isFPImmLegal(CFP->getValueAPF(), CFP->getValueType(ResNo: 0))) {
5474 bool IsFabs = N0.getOpcode() == ISD::FABS;
5475 SDValue Op = IsFabs ? N0.getOperand(i: 0) : N0;
5476 if ((Cond == ISD::SETOEQ || Cond == ISD::SETUEQ) && CFP->isInfinity()) {
5477 FPClassTest Flag = CFP->isNegative() ? (IsFabs ? fcNone : fcNegInf)
5478 : (IsFabs ? fcInf : fcPosInf);
5479 if (Cond == ISD::SETUEQ)
5480 Flag |= fcNan;
5481 return DAG.getNode(Opcode: ISD::IS_FPCLASS, DL: dl, VT, N1: Op,
5482 N2: DAG.getTargetConstant(Val: Flag, DL: dl, VT: MVT::i32));
5483 }
5484 }
5485
5486 // If the condition is not legal, see if we can find an equivalent one
5487 // which is legal.
5488 if (!isCondCodeLegal(CC: Cond, VT: N0.getSimpleValueType())) {
5489 // If the comparison was an awkward floating-point == or != and one of
5490 // the comparison operands is infinity or negative infinity, convert the
5491 // condition to a less-awkward <= or >=.
5492 if (CFP->getValueAPF().isInfinity()) {
5493 bool IsNegInf = CFP->getValueAPF().isNegative();
5494 ISD::CondCode NewCond = ISD::SETCC_INVALID;
5495 switch (Cond) {
5496 case ISD::SETOEQ: NewCond = IsNegInf ? ISD::SETOLE : ISD::SETOGE; break;
5497 case ISD::SETUEQ: NewCond = IsNegInf ? ISD::SETULE : ISD::SETUGE; break;
5498 case ISD::SETUNE: NewCond = IsNegInf ? ISD::SETUGT : ISD::SETULT; break;
5499 case ISD::SETONE: NewCond = IsNegInf ? ISD::SETOGT : ISD::SETOLT; break;
5500 default: break;
5501 }
5502 if (NewCond != ISD::SETCC_INVALID &&
5503 isCondCodeLegal(CC: NewCond, VT: N0.getSimpleValueType()))
5504 return DAG.getSetCC(DL: dl, VT, LHS: N0, RHS: N1, Cond: NewCond);
5505 }
5506 }
5507 }
5508
5509 if (N0 == N1) {
5510 // The sext(setcc()) => setcc() optimization relies on the appropriate
5511 // constant being emitted.
5512 assert(!N0.getValueType().isInteger() &&
5513 "Integer types should be handled by FoldSetCC");
5514
5515 bool EqTrue = ISD::isTrueWhenEqual(Cond);
5516 unsigned UOF = ISD::getUnorderedFlavor(Cond);
5517 if (UOF == 2) // FP operators that are undefined on NaNs.
5518 return DAG.getBoolConstant(V: EqTrue, DL: dl, VT, OpVT);
5519 if (UOF == unsigned(EqTrue))
5520 return DAG.getBoolConstant(V: EqTrue, DL: dl, VT, OpVT);
5521 // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
5522 // if it is not already.
5523 ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
5524 if (NewCond != Cond &&
5525 (DCI.isBeforeLegalizeOps() ||
5526 isCondCodeLegal(CC: NewCond, VT: N0.getSimpleValueType())))
5527 return DAG.getSetCC(DL: dl, VT, LHS: N0, RHS: N1, Cond: NewCond);
5528 }
5529
5530 // ~X > ~Y --> Y > X
5531 // ~X < ~Y --> Y < X
5532 // ~X < C --> X > ~C
5533 // ~X > C --> X < ~C
5534 if ((isSignedIntSetCC(Code: Cond) || isUnsignedIntSetCC(Code: Cond)) &&
5535 N0.getValueType().isInteger()) {
5536 if (isBitwiseNot(V: N0)) {
5537 if (isBitwiseNot(V: N1))
5538 return DAG.getSetCC(DL: dl, VT, LHS: N1.getOperand(i: 0), RHS: N0.getOperand(i: 0), Cond);
5539
5540 if (DAG.isConstantIntBuildVectorOrConstantInt(N: N1) &&
5541 !DAG.isConstantIntBuildVectorOrConstantInt(N: N0.getOperand(i: 0))) {
5542 SDValue Not = DAG.getNOT(DL: dl, Val: N1, VT: OpVT);
5543 return DAG.getSetCC(DL: dl, VT, LHS: Not, RHS: N0.getOperand(i: 0), Cond);
5544 }
5545 }
5546 }
5547
5548 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5549 N0.getValueType().isInteger()) {
5550 if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
5551 N0.getOpcode() == ISD::XOR) {
5552 // Simplify (X+Y) == (X+Z) --> Y == Z
5553 if (N0.getOpcode() == N1.getOpcode()) {
5554 if (N0.getOperand(i: 0) == N1.getOperand(i: 0))
5555 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 1), RHS: N1.getOperand(i: 1), Cond);
5556 if (N0.getOperand(i: 1) == N1.getOperand(i: 1))
5557 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0), RHS: N1.getOperand(i: 0), Cond);
5558 if (isCommutativeBinOp(Opcode: N0.getOpcode())) {
5559 // If X op Y == Y op X, try other combinations.
5560 if (N0.getOperand(i: 0) == N1.getOperand(i: 1))
5561 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 1), RHS: N1.getOperand(i: 0),
5562 Cond);
5563 if (N0.getOperand(i: 1) == N1.getOperand(i: 0))
5564 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0), RHS: N1.getOperand(i: 1),
5565 Cond);
5566 }
5567 }
5568
5569 // If RHS is a legal immediate value for a compare instruction, we need
5570 // to be careful about increasing register pressure needlessly.
5571 bool LegalRHSImm = false;
5572
5573 if (auto *RHSC = dyn_cast<ConstantSDNode>(Val&: N1)) {
5574 if (auto *LHSR = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) {
5575 // Turn (X+C1) == C2 --> X == C2-C1
5576 if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse())
5577 return DAG.getSetCC(
5578 DL: dl, VT, LHS: N0.getOperand(i: 0),
5579 RHS: DAG.getConstant(Val: RHSC->getAPIntValue() - LHSR->getAPIntValue(),
5580 DL: dl, VT: N0.getValueType()),
5581 Cond);
5582
5583 // Turn (X^C1) == C2 --> X == C1^C2
5584 if (N0.getOpcode() == ISD::XOR && N0.getNode()->hasOneUse())
5585 return DAG.getSetCC(
5586 DL: dl, VT, LHS: N0.getOperand(i: 0),
5587 RHS: DAG.getConstant(Val: LHSR->getAPIntValue() ^ RHSC->getAPIntValue(),
5588 DL: dl, VT: N0.getValueType()),
5589 Cond);
5590 }
5591
5592 // Turn (C1-X) == C2 --> X == C1-C2
5593 if (auto *SUBC = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 0)))
5594 if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse())
5595 return DAG.getSetCC(
5596 DL: dl, VT, LHS: N0.getOperand(i: 1),
5597 RHS: DAG.getConstant(Val: SUBC->getAPIntValue() - RHSC->getAPIntValue(),
5598 DL: dl, VT: N0.getValueType()),
5599 Cond);
5600
5601 // Could RHSC fold directly into a compare?
5602 if (RHSC->getValueType(ResNo: 0).getSizeInBits() <= 64)
5603 LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
5604 }
5605
5606 // (X+Y) == X --> Y == 0 and similar folds.
5607 // Don't do this if X is an immediate that can fold into a cmp
5608 // instruction and X+Y has other uses. It could be an induction variable
5609 // chain, and the transform would increase register pressure.
5610 if (!LegalRHSImm || N0.hasOneUse())
5611 if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, DL: dl, DCI))
5612 return V;
5613 }
5614
5615 if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
5616 N1.getOpcode() == ISD::XOR)
5617 if (SDValue V = foldSetCCWithBinOp(VT, N0: N1, N1: N0, Cond, DL: dl, DCI))
5618 return V;
5619
5620 if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, DL: dl, DCI))
5621 return V;
5622
5623 if (SDValue V = foldSetCCWithOr(VT, N0, N1, Cond, DL: dl, DCI))
5624 return V;
5625 }
5626
5627 // Fold remainder of division by a constant.
5628 if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) &&
5629 N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5630 // When division is cheap or optimizing for minimum size,
5631 // fall through to DIVREM creation by skipping this fold.
5632 if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Kind: Attribute::MinSize)) {
5633 if (N0.getOpcode() == ISD::UREM) {
5634 if (SDValue Folded = buildUREMEqFold(SETCCVT: VT, REMNode: N0, CompTargetNode: N1, Cond, DCI, DL: dl))
5635 return Folded;
5636 } else if (N0.getOpcode() == ISD::SREM) {
5637 if (SDValue Folded = buildSREMEqFold(SETCCVT: VT, REMNode: N0, CompTargetNode: N1, Cond, DCI, DL: dl))
5638 return Folded;
5639 }
5640 }
5641 }
5642
5643 // Fold away ALL boolean setcc's.
5644 if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
5645 SDValue Temp;
5646 switch (Cond) {
5647 default: llvm_unreachable("Unknown integer setcc!");
5648 case ISD::SETEQ: // X == Y -> ~(X^Y)
5649 Temp = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: OpVT, N1: N0, N2: N1);
5650 N0 = DAG.getNOT(DL: dl, Val: Temp, VT: OpVT);
5651 if (!DCI.isCalledByLegalizer())
5652 DCI.AddToWorklist(N: Temp.getNode());
5653 break;
5654 case ISD::SETNE: // X != Y --> (X^Y)
5655 N0 = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: OpVT, N1: N0, N2: N1);
5656 break;
5657 case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
5658 case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
5659 Temp = DAG.getNOT(DL: dl, Val: N0, VT: OpVT);
5660 N0 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: OpVT, N1, N2: Temp);
5661 if (!DCI.isCalledByLegalizer())
5662 DCI.AddToWorklist(N: Temp.getNode());
5663 break;
5664 case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
5665 case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
5666 Temp = DAG.getNOT(DL: dl, Val: N1, VT: OpVT);
5667 N0 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: OpVT, N1: N0, N2: Temp);
5668 if (!DCI.isCalledByLegalizer())
5669 DCI.AddToWorklist(N: Temp.getNode());
5670 break;
5671 case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
5672 case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
5673 Temp = DAG.getNOT(DL: dl, Val: N0, VT: OpVT);
5674 N0 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1, N2: Temp);
5675 if (!DCI.isCalledByLegalizer())
5676 DCI.AddToWorklist(N: Temp.getNode());
5677 break;
5678 case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
5679 case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
5680 Temp = DAG.getNOT(DL: dl, Val: N1, VT: OpVT);
5681 N0 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1: N0, N2: Temp);
5682 break;
5683 }
5684 if (VT.getScalarType() != MVT::i1) {
5685 if (!DCI.isCalledByLegalizer())
5686 DCI.AddToWorklist(N: N0.getNode());
5687 // FIXME: If running after legalize, we probably can't do this.
5688 ISD::NodeType ExtendCode = getExtendForContent(Content: getBooleanContents(Type: OpVT));
5689 N0 = DAG.getNode(Opcode: ExtendCode, DL: dl, VT, Operand: N0);
5690 }
5691 return N0;
5692 }
5693
5694 // Fold (setcc (trunc x) (trunc y)) -> (setcc x y)
5695 if (N0.getOpcode() == ISD::TRUNCATE && N1.getOpcode() == ISD::TRUNCATE &&
5696 N0.getOperand(i: 0).getValueType() == N1.getOperand(i: 0).getValueType() &&
5697 ((!ISD::isSignedIntSetCC(Code: Cond) && N0->getFlags().hasNoUnsignedWrap() &&
5698 N1->getFlags().hasNoUnsignedWrap()) ||
5699 (!ISD::isUnsignedIntSetCC(Code: Cond) && N0->getFlags().hasNoSignedWrap() &&
5700 N1->getFlags().hasNoSignedWrap())) &&
5701 isTypeDesirableForOp(ISD::SETCC, VT: N0.getOperand(i: 0).getValueType())) {
5702 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0), RHS: N1.getOperand(i: 0), Cond);
5703 }
5704
5705 // Fold (setcc (sub nsw a, b), zero, s??) -> (setcc a, b, s??)
5706 // TODO: Remove that .isVector() check
5707 if (VT.isVector() && isZeroOrZeroSplat(N: N1) && N0.getOpcode() == ISD::SUB &&
5708 N0->getFlags().hasNoSignedWrap() && ISD::isSignedIntSetCC(Code: Cond)) {
5709 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0), RHS: N0.getOperand(i: 1), Cond);
5710 }
5711
5712 // Could not fold it.
5713 return SDValue();
5714}
5715
5716/// Returns true (and the GlobalValue and the offset) if the node is a
5717/// GlobalAddress + offset.
5718bool TargetLowering::isGAPlusOffset(SDNode *WN, const GlobalValue *&GA,
5719 int64_t &Offset) const {
5720
5721 SDNode *N = unwrapAddress(N: SDValue(WN, 0)).getNode();
5722
5723 if (auto *GASD = dyn_cast<GlobalAddressSDNode>(Val: N)) {
5724 GA = GASD->getGlobal();
5725 Offset += GASD->getOffset();
5726 return true;
5727 }
5728
5729 if (N->isAnyAdd()) {
5730 SDValue N1 = N->getOperand(Num: 0);
5731 SDValue N2 = N->getOperand(Num: 1);
5732 if (isGAPlusOffset(WN: N1.getNode(), GA, Offset)) {
5733 if (auto *V = dyn_cast<ConstantSDNode>(Val&: N2)) {
5734 Offset += V->getSExtValue();
5735 return true;
5736 }
5737 } else if (isGAPlusOffset(WN: N2.getNode(), GA, Offset)) {
5738 if (auto *V = dyn_cast<ConstantSDNode>(Val&: N1)) {
5739 Offset += V->getSExtValue();
5740 return true;
5741 }
5742 }
5743 }
5744
5745 return false;
5746}
5747
5748SDValue TargetLowering::PerformDAGCombine(SDNode *N,
5749 DAGCombinerInfo &DCI) const {
5750 // Default implementation: no optimization.
5751 return SDValue();
5752}
5753
5754//===----------------------------------------------------------------------===//
5755// Inline Assembler Implementation Methods
5756//===----------------------------------------------------------------------===//
5757
5758TargetLowering::ConstraintType
5759TargetLowering::getConstraintType(StringRef Constraint) const {
5760 unsigned S = Constraint.size();
5761
5762 if (S == 1) {
5763 switch (Constraint[0]) {
5764 default: break;
5765 case 'r':
5766 return C_RegisterClass;
5767 case 'm': // memory
5768 case 'o': // offsetable
5769 case 'V': // not offsetable
5770 return C_Memory;
5771 case 'p': // Address.
5772 return C_Address;
5773 case 'n': // Simple Integer
5774 case 'E': // Floating Point Constant
5775 case 'F': // Floating Point Constant
5776 return C_Immediate;
5777 case 'i': // Simple Integer or Relocatable Constant
5778 case 's': // Relocatable Constant
5779 case 'X': // Allow ANY value.
5780 case 'I': // Target registers.
5781 case 'J':
5782 case 'K':
5783 case 'L':
5784 case 'M':
5785 case 'N':
5786 case 'O':
5787 case 'P':
5788 case '<':
5789 case '>':
5790 return C_Other;
5791 }
5792 }
5793
5794 if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') {
5795 if (S == 8 && Constraint.substr(Start: 1, N: 6) == "memory") // "{memory}"
5796 return C_Memory;
5797 return C_Register;
5798 }
5799 return C_Unknown;
5800}
5801
5802/// Try to replace an X constraint, which matches anything, with another that
5803/// has more specific requirements based on the type of the corresponding
5804/// operand.
5805const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
5806 if (ConstraintVT.isInteger())
5807 return "r";
5808 if (ConstraintVT.isFloatingPoint())
5809 return "f"; // works for many targets
5810 return nullptr;
5811}
5812
5813SDValue TargetLowering::LowerAsmOutputForConstraint(
5814 SDValue &Chain, SDValue &Glue, const SDLoc &DL,
5815 const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
5816 return SDValue();
5817}
5818
5819/// Lower the specified operand into the Ops vector.
5820/// If it is invalid, don't add anything to Ops.
5821void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
5822 StringRef Constraint,
5823 std::vector<SDValue> &Ops,
5824 SelectionDAG &DAG) const {
5825
5826 if (Constraint.size() > 1)
5827 return;
5828
5829 char ConstraintLetter = Constraint[0];
5830 switch (ConstraintLetter) {
5831 default: break;
5832 case 'X': // Allows any operand
5833 case 'i': // Simple Integer or Relocatable Constant
5834 case 'n': // Simple Integer
5835 case 's': { // Relocatable Constant
5836
5837 ConstantSDNode *C;
5838 uint64_t Offset = 0;
5839
5840 // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
5841 // etc., since getelementpointer is variadic. We can't use
5842 // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
5843 // while in this case the GA may be furthest from the root node which is
5844 // likely an ISD::ADD.
5845 while (true) {
5846 if ((C = dyn_cast<ConstantSDNode>(Val&: Op)) && ConstraintLetter != 's') {
5847 // gcc prints these as sign extended. Sign extend value to 64 bits
5848 // now; without this it would get ZExt'd later in
5849 // ScheduleDAGSDNodes::EmitNode, which is very generic.
5850 bool IsBool = C->getConstantIntValue()->getBitWidth() == 1;
5851 BooleanContent BCont = getBooleanContents(Type: MVT::i64);
5852 ISD::NodeType ExtOpc =
5853 IsBool ? getExtendForContent(Content: BCont) : ISD::SIGN_EXTEND;
5854 int64_t ExtVal =
5855 ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue() : C->getSExtValue();
5856 Ops.push_back(
5857 x: DAG.getTargetConstant(Val: Offset + ExtVal, DL: SDLoc(C), VT: MVT::i64));
5858 return;
5859 }
5860 if (ConstraintLetter != 'n') {
5861 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Val&: Op)) {
5862 Ops.push_back(x: DAG.getTargetGlobalAddress(GV: GA->getGlobal(), DL: SDLoc(Op),
5863 VT: GA->getValueType(ResNo: 0),
5864 offset: Offset + GA->getOffset()));
5865 return;
5866 }
5867 if (const auto *BA = dyn_cast<BlockAddressSDNode>(Val&: Op)) {
5868 Ops.push_back(x: DAG.getTargetBlockAddress(
5869 BA: BA->getBlockAddress(), VT: BA->getValueType(ResNo: 0),
5870 Offset: Offset + BA->getOffset(), TargetFlags: BA->getTargetFlags()));
5871 return;
5872 }
5873 if (isa<BasicBlockSDNode>(Val: Op)) {
5874 Ops.push_back(x: Op);
5875 return;
5876 }
5877 }
5878 const unsigned OpCode = Op.getOpcode();
5879 if (OpCode == ISD::ADD || OpCode == ISD::SUB) {
5880 if ((C = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 0))))
5881 Op = Op.getOperand(i: 1);
5882 // Subtraction is not commutative.
5883 else if (OpCode == ISD::ADD &&
5884 (C = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 1))))
5885 Op = Op.getOperand(i: 0);
5886 else
5887 return;
5888 Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue();
5889 continue;
5890 }
5891 return;
5892 }
5893 break;
5894 }
5895 }
5896}
5897
5898void TargetLowering::CollectTargetIntrinsicOperands(
5899 const CallInst &I, SmallVectorImpl<SDValue> &Ops, SelectionDAG &DAG) const {
5900}
5901
5902std::pair<unsigned, const TargetRegisterClass *>
5903TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
5904 StringRef Constraint,
5905 MVT VT) const {
5906 if (!Constraint.starts_with(Prefix: "{"))
5907 return std::make_pair(x: 0u, y: static_cast<TargetRegisterClass *>(nullptr));
5908 assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?");
5909
5910 // Remove the braces from around the name.
5911 StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
5912
5913 std::pair<unsigned, const TargetRegisterClass *> R =
5914 std::make_pair(x: 0u, y: static_cast<const TargetRegisterClass *>(nullptr));
5915
5916 // Figure out which register class contains this reg.
5917 for (const TargetRegisterClass *RC : RI->regclasses()) {
5918 // If none of the value types for this register class are valid, we
5919 // can't use it. For example, 64-bit reg classes on 32-bit targets.
5920 if (!isLegalRC(TRI: *RI, RC: *RC))
5921 continue;
5922
5923 for (const MCPhysReg &PR : *RC) {
5924 if (RegName.equals_insensitive(RHS: RI->getRegAsmName(Reg: PR))) {
5925 std::pair<unsigned, const TargetRegisterClass *> S =
5926 std::make_pair(x: PR, y&: RC);
5927
5928 // If this register class has the requested value type, return it,
5929 // otherwise keep searching and return the first class found
5930 // if no other is found which explicitly has the requested type.
5931 if (RI->isTypeLegalForClass(RC: *RC, T: VT))
5932 return S;
5933 if (!R.second)
5934 R = S;
5935 }
5936 }
5937 }
5938
5939 return R;
5940}
5941
5942//===----------------------------------------------------------------------===//
5943// Constraint Selection.
5944
5945/// Return true of this is an input operand that is a matching constraint like
5946/// "4".
5947bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {
5948 assert(!ConstraintCode.empty() && "No known constraint!");
5949 return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
5950}
5951
5952/// If this is an input matching constraint, this method returns the output
5953/// operand it matches.
5954unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
5955 assert(!ConstraintCode.empty() && "No known constraint!");
5956 return atoi(nptr: ConstraintCode.c_str());
5957}
5958
5959/// Split up the constraint string from the inline assembly value into the
5960/// specific constraints and their prefixes, and also tie in the associated
5961/// operand values.
5962/// If this returns an empty vector, and if the constraint string itself
5963/// isn't empty, there was an error parsing.
5964TargetLowering::AsmOperandInfoVector
5965TargetLowering::ParseConstraints(const DataLayout &DL,
5966 const TargetRegisterInfo *TRI,
5967 const CallBase &Call) const {
5968 /// Information about all of the constraints.
5969 AsmOperandInfoVector ConstraintOperands;
5970 const InlineAsm *IA = cast<InlineAsm>(Val: Call.getCalledOperand());
5971 unsigned maCount = 0; // Largest number of multiple alternative constraints.
5972
5973 // Do a prepass over the constraints, canonicalizing them, and building up the
5974 // ConstraintOperands list.
5975 unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
5976 unsigned ResNo = 0; // ResNo - The result number of the next output.
5977 unsigned LabelNo = 0; // LabelNo - CallBr indirect dest number.
5978
5979 for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
5980 ConstraintOperands.emplace_back(args: std::move(CI));
5981 AsmOperandInfo &OpInfo = ConstraintOperands.back();
5982
5983 // Update multiple alternative constraint count.
5984 if (OpInfo.multipleAlternatives.size() > maCount)
5985 maCount = OpInfo.multipleAlternatives.size();
5986
5987 OpInfo.ConstraintVT = MVT::Other;
5988
5989 // Compute the value type for each operand.
5990 switch (OpInfo.Type) {
5991 case InlineAsm::isOutput: {
5992 // Indirect outputs just consume an argument.
5993 if (OpInfo.isIndirect) {
5994 OpInfo.CallOperandVal = Call.getArgOperand(i: ArgNo);
5995 break;
5996 }
5997
5998 // The return value of the call is this value. As such, there is no
5999 // corresponding argument.
6000 assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
6001 EVT VT;
6002 if (auto *STy = dyn_cast<StructType>(Val: Call.getType())) {
6003 VT = getAsmOperandValueType(DL, Ty: STy->getElementType(N: ResNo));
6004 } else {
6005 assert(ResNo == 0 && "Asm only has one result!");
6006 VT = getAsmOperandValueType(DL, Ty: Call.getType());
6007 }
6008 OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
6009 ++ResNo;
6010 break;
6011 }
6012 case InlineAsm::isInput:
6013 OpInfo.CallOperandVal = Call.getArgOperand(i: ArgNo);
6014 break;
6015 case InlineAsm::isLabel:
6016 OpInfo.CallOperandVal = cast<CallBrInst>(Val: &Call)->getIndirectDest(i: LabelNo);
6017 ++LabelNo;
6018 continue;
6019 case InlineAsm::isClobber:
6020 // Nothing to do.
6021 break;
6022 }
6023
6024 if (OpInfo.CallOperandVal) {
6025 llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
6026 if (OpInfo.isIndirect) {
6027 OpTy = Call.getParamElementType(ArgNo);
6028 assert(OpTy && "Indirect operand must have elementtype attribute");
6029 }
6030
6031 // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
6032 if (StructType *STy = dyn_cast<StructType>(Val: OpTy))
6033 if (STy->getNumElements() == 1)
6034 OpTy = STy->getElementType(N: 0);
6035
6036 // If OpTy is not a single value, it may be a struct/union that we
6037 // can tile with integers.
6038 if (!OpTy->isSingleValueType() && OpTy->isSized()) {
6039 unsigned BitSize = DL.getTypeSizeInBits(Ty: OpTy);
6040 switch (BitSize) {
6041 default: break;
6042 case 1:
6043 case 8:
6044 case 16:
6045 case 32:
6046 case 64:
6047 case 128:
6048 OpTy = IntegerType::get(C&: OpTy->getContext(), NumBits: BitSize);
6049 break;
6050 }
6051 }
6052
6053 EVT VT = getAsmOperandValueType(DL, Ty: OpTy, AllowUnknown: true);
6054 OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
6055 ArgNo++;
6056 }
6057 }
6058
6059 // If we have multiple alternative constraints, select the best alternative.
6060 if (!ConstraintOperands.empty()) {
6061 if (maCount) {
6062 unsigned bestMAIndex = 0;
6063 int bestWeight = -1;
6064 // weight: -1 = invalid match, and 0 = so-so match to 5 = good match.
6065 int weight = -1;
6066 unsigned maIndex;
6067 // Compute the sums of the weights for each alternative, keeping track
6068 // of the best (highest weight) one so far.
6069 for (maIndex = 0; maIndex < maCount; ++maIndex) {
6070 int weightSum = 0;
6071 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
6072 cIndex != eIndex; ++cIndex) {
6073 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
6074 if (OpInfo.Type == InlineAsm::isClobber)
6075 continue;
6076
6077 // If this is an output operand with a matching input operand,
6078 // look up the matching input. If their types mismatch, e.g. one
6079 // is an integer, the other is floating point, or their sizes are
6080 // different, flag it as an maCantMatch.
6081 if (OpInfo.hasMatchingInput()) {
6082 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
6083 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
6084 if ((OpInfo.ConstraintVT.isInteger() !=
6085 Input.ConstraintVT.isInteger()) ||
6086 (OpInfo.ConstraintVT.getSizeInBits() !=
6087 Input.ConstraintVT.getSizeInBits())) {
6088 weightSum = -1; // Can't match.
6089 break;
6090 }
6091 }
6092 }
6093 weight = getMultipleConstraintMatchWeight(info&: OpInfo, maIndex);
6094 if (weight == -1) {
6095 weightSum = -1;
6096 break;
6097 }
6098 weightSum += weight;
6099 }
6100 // Update best.
6101 if (weightSum > bestWeight) {
6102 bestWeight = weightSum;
6103 bestMAIndex = maIndex;
6104 }
6105 }
6106
6107 // Now select chosen alternative in each constraint.
6108 for (AsmOperandInfo &cInfo : ConstraintOperands)
6109 if (cInfo.Type != InlineAsm::isClobber)
6110 cInfo.selectAlternative(index: bestMAIndex);
6111 }
6112 }
6113
6114 // Check and hook up tied operands, choose constraint code to use.
6115 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
6116 cIndex != eIndex; ++cIndex) {
6117 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
6118
6119 // If this is an output operand with a matching input operand, look up the
6120 // matching input. If their types mismatch, e.g. one is an integer, the
6121 // other is floating point, or their sizes are different, flag it as an
6122 // error.
6123 if (OpInfo.hasMatchingInput()) {
6124 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
6125
6126 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
6127 std::pair<unsigned, const TargetRegisterClass *> MatchRC =
6128 getRegForInlineAsmConstraint(RI: TRI, Constraint: OpInfo.ConstraintCode,
6129 VT: OpInfo.ConstraintVT);
6130 std::pair<unsigned, const TargetRegisterClass *> InputRC =
6131 getRegForInlineAsmConstraint(RI: TRI, Constraint: Input.ConstraintCode,
6132 VT: Input.ConstraintVT);
6133 const bool OutOpIsIntOrFP = OpInfo.ConstraintVT.isInteger() ||
6134 OpInfo.ConstraintVT.isFloatingPoint();
6135 const bool InOpIsIntOrFP = Input.ConstraintVT.isInteger() ||
6136 Input.ConstraintVT.isFloatingPoint();
6137 if ((OutOpIsIntOrFP != InOpIsIntOrFP) ||
6138 (MatchRC.second != InputRC.second)) {
6139 report_fatal_error(reason: "Unsupported asm: input constraint"
6140 " with a matching output constraint of"
6141 " incompatible type!");
6142 }
6143 }
6144 }
6145 }
6146
6147 return ConstraintOperands;
6148}
6149
6150/// Return a number indicating our preference for chosing a type of constraint
6151/// over another, for the purpose of sorting them. Immediates are almost always
6152/// preferrable (when they can be emitted). A higher return value means a
6153/// stronger preference for one constraint type relative to another.
6154/// FIXME: We should prefer registers over memory but doing so may lead to
6155/// unrecoverable register exhaustion later.
6156/// https://github.com/llvm/llvm-project/issues/20571
6157static unsigned getConstraintPiority(TargetLowering::ConstraintType CT) {
6158 switch (CT) {
6159 case TargetLowering::C_Immediate:
6160 case TargetLowering::C_Other:
6161 return 4;
6162 case TargetLowering::C_Memory:
6163 case TargetLowering::C_Address:
6164 return 3;
6165 case TargetLowering::C_RegisterClass:
6166 return 2;
6167 case TargetLowering::C_Register:
6168 return 1;
6169 case TargetLowering::C_Unknown:
6170 return 0;
6171 }
6172 llvm_unreachable("Invalid constraint type");
6173}
6174
6175/// Examine constraint type and operand type and determine a weight value.
6176/// This object must already have been set up with the operand type
6177/// and the current alternative constraint selected.
6178TargetLowering::ConstraintWeight
6179 TargetLowering::getMultipleConstraintMatchWeight(
6180 AsmOperandInfo &info, int maIndex) const {
6181 InlineAsm::ConstraintCodeVector *rCodes;
6182 if (maIndex >= (int)info.multipleAlternatives.size())
6183 rCodes = &info.Codes;
6184 else
6185 rCodes = &info.multipleAlternatives[maIndex].Codes;
6186 ConstraintWeight BestWeight = CW_Invalid;
6187
6188 // Loop over the options, keeping track of the most general one.
6189 for (const std::string &rCode : *rCodes) {
6190 ConstraintWeight weight =
6191 getSingleConstraintMatchWeight(info, constraint: rCode.c_str());
6192 if (weight > BestWeight)
6193 BestWeight = weight;
6194 }
6195
6196 return BestWeight;
6197}
6198
6199/// Examine constraint type and operand type and determine a weight value.
6200/// This object must already have been set up with the operand type
6201/// and the current alternative constraint selected.
6202TargetLowering::ConstraintWeight
6203 TargetLowering::getSingleConstraintMatchWeight(
6204 AsmOperandInfo &info, const char *constraint) const {
6205 ConstraintWeight weight = CW_Invalid;
6206 Value *CallOperandVal = info.CallOperandVal;
6207 // If we don't have a value, we can't do a match,
6208 // but allow it at the lowest weight.
6209 if (!CallOperandVal)
6210 return CW_Default;
6211 // Look at the constraint type.
6212 switch (*constraint) {
6213 case 'i': // immediate integer.
6214 case 'n': // immediate integer with a known value.
6215 if (isa<ConstantInt>(Val: CallOperandVal))
6216 weight = CW_Constant;
6217 break;
6218 case 's': // non-explicit intregal immediate.
6219 if (isa<GlobalValue>(Val: CallOperandVal))
6220 weight = CW_Constant;
6221 break;
6222 case 'E': // immediate float if host format.
6223 case 'F': // immediate float.
6224 if (isa<ConstantFP>(Val: CallOperandVal))
6225 weight = CW_Constant;
6226 break;
6227 case '<': // memory operand with autodecrement.
6228 case '>': // memory operand with autoincrement.
6229 case 'm': // memory operand.
6230 case 'o': // offsettable memory operand
6231 case 'V': // non-offsettable memory operand
6232 weight = CW_Memory;
6233 break;
6234 case 'r': // general register.
6235 case 'g': // general register, memory operand or immediate integer.
6236 // note: Clang converts "g" to "imr".
6237 if (CallOperandVal->getType()->isIntegerTy())
6238 weight = CW_Register;
6239 break;
6240 case 'X': // any operand.
6241 default:
6242 weight = CW_Default;
6243 break;
6244 }
6245 return weight;
6246}
6247
6248/// If there are multiple different constraints that we could pick for this
6249/// operand (e.g. "imr") try to pick the 'best' one.
6250/// This is somewhat tricky: constraints (TargetLowering::ConstraintType) fall
6251/// into seven classes:
6252/// Register -> one specific register
6253/// RegisterClass -> a group of regs
6254/// Memory -> memory
6255/// Address -> a symbolic memory reference
6256/// Immediate -> immediate values
6257/// Other -> magic values (such as "Flag Output Operands")
6258/// Unknown -> something we don't recognize yet and can't handle
6259/// Ideally, we would pick the most specific constraint possible: if we have
6260/// something that fits into a register, we would pick it. The problem here
6261/// is that if we have something that could either be in a register or in
6262/// memory that use of the register could cause selection of *other*
6263/// operands to fail: they might only succeed if we pick memory. Because of
6264/// this the heuristic we use is:
6265///
6266/// 1) If there is an 'other' constraint, and if the operand is valid for
6267/// that constraint, use it. This makes us take advantage of 'i'
6268/// constraints when available.
6269/// 2) Otherwise, pick the most general constraint present. This prefers
6270/// 'm' over 'r', for example.
6271///
6272TargetLowering::ConstraintGroup TargetLowering::getConstraintPreferences(
6273 TargetLowering::AsmOperandInfo &OpInfo) const {
6274 ConstraintGroup Ret;
6275
6276 Ret.reserve(N: OpInfo.Codes.size());
6277 for (StringRef Code : OpInfo.Codes) {
6278 TargetLowering::ConstraintType CType = getConstraintType(Constraint: Code);
6279
6280 // Indirect 'other' or 'immediate' constraints are not allowed.
6281 if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory ||
6282 CType == TargetLowering::C_Register ||
6283 CType == TargetLowering::C_RegisterClass))
6284 continue;
6285
6286 // Things with matching constraints can only be registers, per gcc
6287 // documentation. This mainly affects "g" constraints.
6288 if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
6289 continue;
6290
6291 Ret.emplace_back(Args&: Code, Args&: CType);
6292 }
6293
6294 llvm::stable_sort(Range&: Ret, C: [](ConstraintPair a, ConstraintPair b) {
6295 return getConstraintPiority(CT: a.second) > getConstraintPiority(CT: b.second);
6296 });
6297
6298 return Ret;
6299}
6300
6301/// If we have an immediate, see if we can lower it. Return true if we can,
6302/// false otherwise.
6303static bool lowerImmediateIfPossible(TargetLowering::ConstraintPair &P,
6304 SDValue Op, SelectionDAG *DAG,
6305 const TargetLowering &TLI) {
6306
6307 assert((P.second == TargetLowering::C_Other ||
6308 P.second == TargetLowering::C_Immediate) &&
6309 "need immediate or other");
6310
6311 if (!Op.getNode())
6312 return false;
6313
6314 std::vector<SDValue> ResultOps;
6315 TLI.LowerAsmOperandForConstraint(Op, Constraint: P.first, Ops&: ResultOps, DAG&: *DAG);
6316 return !ResultOps.empty();
6317}
6318
6319/// Determines the constraint code and constraint type to use for the specific
6320/// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
6321void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
6322 SDValue Op,
6323 SelectionDAG *DAG) const {
6324 assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
6325
6326 // Single-letter constraints ('r') are very common.
6327 if (OpInfo.Codes.size() == 1) {
6328 OpInfo.ConstraintCode = OpInfo.Codes[0];
6329 OpInfo.ConstraintType = getConstraintType(Constraint: OpInfo.ConstraintCode);
6330 } else {
6331 ConstraintGroup G = getConstraintPreferences(OpInfo);
6332 if (G.empty())
6333 return;
6334
6335 unsigned BestIdx = 0;
6336 for (const unsigned E = G.size();
6337 BestIdx < E && (G[BestIdx].second == TargetLowering::C_Other ||
6338 G[BestIdx].second == TargetLowering::C_Immediate);
6339 ++BestIdx) {
6340 if (lowerImmediateIfPossible(P&: G[BestIdx], Op, DAG, TLI: *this))
6341 break;
6342 // If we're out of constraints, just pick the first one.
6343 if (BestIdx + 1 == E) {
6344 BestIdx = 0;
6345 break;
6346 }
6347 }
6348
6349 OpInfo.ConstraintCode = G[BestIdx].first;
6350 OpInfo.ConstraintType = G[BestIdx].second;
6351 }
6352
6353 // 'X' matches anything.
6354 if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
6355 // Constants are handled elsewhere. For Functions, the type here is the
6356 // type of the result, which is not what we want to look at; leave them
6357 // alone.
6358 Value *v = OpInfo.CallOperandVal;
6359 if (isa<ConstantInt>(Val: v) || isa<Function>(Val: v)) {
6360 return;
6361 }
6362
6363 if (isa<BasicBlock>(Val: v) || isa<BlockAddress>(Val: v)) {
6364 OpInfo.ConstraintCode = "i";
6365 return;
6366 }
6367
6368 // Otherwise, try to resolve it to something we know about by looking at
6369 // the actual operand type.
6370 if (const char *Repl = LowerXConstraint(ConstraintVT: OpInfo.ConstraintVT)) {
6371 OpInfo.ConstraintCode = Repl;
6372 OpInfo.ConstraintType = getConstraintType(Constraint: OpInfo.ConstraintCode);
6373 }
6374 }
6375}
6376
6377/// Given an exact SDIV by a constant, create a multiplication
6378/// with the multiplicative inverse of the constant.
6379/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6380static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
6381 const SDLoc &dl, SelectionDAG &DAG,
6382 SmallVectorImpl<SDNode *> &Created) {
6383 SDValue Op0 = N->getOperand(Num: 0);
6384 SDValue Op1 = N->getOperand(Num: 1);
6385 EVT VT = N->getValueType(ResNo: 0);
6386 EVT ShVT = TLI.getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
6387 EVT ShSVT = ShVT.getScalarType();
6388
6389 bool UseSRA = false;
6390 SmallVector<SDValue, 16> Shifts, Factors;
6391
6392 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6393 if (C->isZero())
6394 return false;
6395
6396 EVT CT = C->getValueType(ResNo: 0);
6397 APInt Divisor = C->getAPIntValue();
6398 unsigned Shift = Divisor.countr_zero();
6399 if (Shift) {
6400 Divisor.ashrInPlace(ShiftAmt: Shift);
6401 UseSRA = true;
6402 }
6403 APInt Factor = Divisor.multiplicativeInverse();
6404 Shifts.push_back(Elt: DAG.getConstant(Val: Shift, DL: dl, VT: ShSVT));
6405 Factors.push_back(Elt: DAG.getConstant(Val: Factor, DL: dl, VT: CT));
6406 return true;
6407 };
6408
6409 // Collect all magic values from the build vector.
6410 if (!ISD::matchUnaryPredicate(Op: Op1, Match: BuildSDIVPattern, /*AllowUndefs=*/false,
6411 /*AllowTruncation=*/true))
6412 return SDValue();
6413
6414 SDValue Shift, Factor;
6415 if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6416 Shift = DAG.getBuildVector(VT: ShVT, DL: dl, Ops: Shifts);
6417 Factor = DAG.getBuildVector(VT, DL: dl, Ops: Factors);
6418 } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6419 assert(Shifts.size() == 1 && Factors.size() == 1 &&
6420 "Expected matchUnaryPredicate to return one element for scalable "
6421 "vectors");
6422 Shift = DAG.getSplatVector(VT: ShVT, DL: dl, Op: Shifts[0]);
6423 Factor = DAG.getSplatVector(VT, DL: dl, Op: Factors[0]);
6424 } else {
6425 assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6426 Shift = Shifts[0];
6427 Factor = Factors[0];
6428 }
6429
6430 SDValue Res = Op0;
6431 if (UseSRA) {
6432 Res = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: Res, N2: Shift, Flags: SDNodeFlags::Exact);
6433 Created.push_back(Elt: Res.getNode());
6434 }
6435
6436 return DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: Res, N2: Factor);
6437}
6438
6439/// Given an exact UDIV by a constant, create a multiplication
6440/// with the multiplicative inverse of the constant.
6441/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6442static SDValue BuildExactUDIV(const TargetLowering &TLI, SDNode *N,
6443 const SDLoc &dl, SelectionDAG &DAG,
6444 SmallVectorImpl<SDNode *> &Created) {
6445 EVT VT = N->getValueType(ResNo: 0);
6446 EVT ShVT = TLI.getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
6447 EVT ShSVT = ShVT.getScalarType();
6448
6449 bool UseSRL = false;
6450 SmallVector<SDValue, 16> Shifts, Factors;
6451
6452 auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6453 if (C->isZero())
6454 return false;
6455
6456 EVT CT = C->getValueType(ResNo: 0);
6457 APInt Divisor = C->getAPIntValue();
6458 unsigned Shift = Divisor.countr_zero();
6459 if (Shift) {
6460 Divisor.lshrInPlace(ShiftAmt: Shift);
6461 UseSRL = true;
6462 }
6463 // Calculate the multiplicative inverse modulo BW.
6464 APInt Factor = Divisor.multiplicativeInverse();
6465 Shifts.push_back(Elt: DAG.getConstant(Val: Shift, DL: dl, VT: ShSVT));
6466 Factors.push_back(Elt: DAG.getConstant(Val: Factor, DL: dl, VT: CT));
6467 return true;
6468 };
6469
6470 SDValue Op1 = N->getOperand(Num: 1);
6471
6472 // Collect all magic values from the build vector.
6473 if (!ISD::matchUnaryPredicate(Op: Op1, Match: BuildUDIVPattern, /*AllowUndefs=*/false,
6474 /*AllowTruncation=*/true))
6475 return SDValue();
6476
6477 SDValue Shift, Factor;
6478 if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6479 Shift = DAG.getBuildVector(VT: ShVT, DL: dl, Ops: Shifts);
6480 Factor = DAG.getBuildVector(VT, DL: dl, Ops: Factors);
6481 } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6482 assert(Shifts.size() == 1 && Factors.size() == 1 &&
6483 "Expected matchUnaryPredicate to return one element for scalable "
6484 "vectors");
6485 Shift = DAG.getSplatVector(VT: ShVT, DL: dl, Op: Shifts[0]);
6486 Factor = DAG.getSplatVector(VT, DL: dl, Op: Factors[0]);
6487 } else {
6488 assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6489 Shift = Shifts[0];
6490 Factor = Factors[0];
6491 }
6492
6493 SDValue Res = N->getOperand(Num: 0);
6494 if (UseSRL) {
6495 Res = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Res, N2: Shift, Flags: SDNodeFlags::Exact);
6496 Created.push_back(Elt: Res.getNode());
6497 }
6498
6499 return DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: Res, N2: Factor);
6500}
6501
6502SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
6503 SelectionDAG &DAG,
6504 SmallVectorImpl<SDNode *> &Created) const {
6505 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6506 if (isIntDivCheap(VT: N->getValueType(ResNo: 0), Attr))
6507 return SDValue(N, 0); // Lower SDIV as SDIV
6508 return SDValue();
6509}
6510
6511SDValue
6512TargetLowering::BuildSREMPow2(SDNode *N, const APInt &Divisor,
6513 SelectionDAG &DAG,
6514 SmallVectorImpl<SDNode *> &Created) const {
6515 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6516 if (isIntDivCheap(VT: N->getValueType(ResNo: 0), Attr))
6517 return SDValue(N, 0); // Lower SREM as SREM
6518 return SDValue();
6519}
6520
6521/// Build sdiv by power-of-2 with conditional move instructions
6522/// Ref: "Hacker's Delight" by Henry Warren 10-1
6523/// If conditional move/branch is preferred, we lower sdiv x, +/-2**k into:
6524/// bgez x, label
6525/// add x, x, 2**k-1
6526/// label:
6527/// sra res, x, k
6528/// neg res, res (when the divisor is negative)
6529SDValue TargetLowering::buildSDIVPow2WithCMov(
6530 SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
6531 SmallVectorImpl<SDNode *> &Created) const {
6532 unsigned Lg2 = Divisor.countr_zero();
6533 EVT VT = N->getValueType(ResNo: 0);
6534
6535 SDLoc DL(N);
6536 SDValue N0 = N->getOperand(Num: 0);
6537 SDValue Zero = DAG.getConstant(Val: 0, DL, VT);
6538 APInt Lg2Mask = APInt::getLowBitsSet(numBits: VT.getSizeInBits(), loBitsSet: Lg2);
6539 SDValue Pow2MinusOne = DAG.getConstant(Val: Lg2Mask, DL, VT);
6540
6541 // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
6542 EVT CCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
6543 SDValue Cmp = DAG.getSetCC(DL, VT: CCVT, LHS: N0, RHS: Zero, Cond: ISD::SETLT);
6544 SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: N0, N2: Pow2MinusOne);
6545 SDValue CMov = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: Cmp, N2: Add, N3: N0);
6546
6547 Created.push_back(Elt: Cmp.getNode());
6548 Created.push_back(Elt: Add.getNode());
6549 Created.push_back(Elt: CMov.getNode());
6550
6551 // Divide by pow2.
6552 SDValue SRA = DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: CMov,
6553 N2: DAG.getShiftAmountConstant(Val: Lg2, VT, DL));
6554
6555 // If we're dividing by a positive value, we're done. Otherwise, we must
6556 // negate the result.
6557 if (Divisor.isNonNegative())
6558 return SRA;
6559
6560 Created.push_back(Elt: SRA.getNode());
6561 return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Zero, N2: SRA);
6562}
6563
6564/// Given an ISD::SDIV node expressing a divide by constant,
6565/// return a DAG expression to select that will generate the same value by
6566/// multiplying by a magic number.
6567/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6568SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
6569 bool IsAfterLegalization,
6570 bool IsAfterLegalTypes,
6571 SmallVectorImpl<SDNode *> &Created) const {
6572 SDLoc dl(N);
6573 EVT VT = N->getValueType(ResNo: 0);
6574 EVT SVT = VT.getScalarType();
6575 EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
6576 EVT ShSVT = ShVT.getScalarType();
6577 unsigned EltBits = VT.getScalarSizeInBits();
6578 EVT MulVT;
6579
6580 // Check to see if we can do this.
6581 // FIXME: We should be more aggressive here.
6582 if (!isTypeLegal(VT)) {
6583 // Limit this to simple scalars for now.
6584 if (VT.isVector() || !VT.isSimple())
6585 return SDValue();
6586
6587 // If this type will be promoted to a large enough type with a legal
6588 // multiply operation, we can go ahead and do this transform.
6589 if (getTypeAction(VT: VT.getSimpleVT()) != TypePromoteInteger)
6590 return SDValue();
6591
6592 MulVT = getTypeToTransformTo(Context&: *DAG.getContext(), VT);
6593 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6594 !isOperationLegal(Op: ISD::MUL, VT: MulVT))
6595 return SDValue();
6596 }
6597
6598 // If the sdiv has an 'exact' bit we can use a simpler lowering.
6599 if (N->getFlags().hasExact())
6600 return BuildExactSDIV(TLI: *this, N, dl, DAG, Created);
6601
6602 SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
6603
6604 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6605 if (C->isZero())
6606 return false;
6607 // Truncate the divisor to the target scalar type in case it was promoted
6608 // during type legalization.
6609 APInt Divisor = C->getAPIntValue().trunc(width: EltBits);
6610 SignedDivisionByConstantInfo magics = SignedDivisionByConstantInfo::get(D: Divisor);
6611 int NumeratorFactor = 0;
6612 int ShiftMask = -1;
6613
6614 if (Divisor.isOne() || Divisor.isAllOnes()) {
6615 // If d is +1/-1, we just multiply the numerator by +1/-1.
6616 NumeratorFactor = Divisor.getSExtValue();
6617 magics.Magic = 0;
6618 magics.ShiftAmount = 0;
6619 ShiftMask = 0;
6620 } else if (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) {
6621 // If d > 0 and m < 0, add the numerator.
6622 NumeratorFactor = 1;
6623 } else if (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) {
6624 // If d < 0 and m > 0, subtract the numerator.
6625 NumeratorFactor = -1;
6626 }
6627
6628 MagicFactors.push_back(Elt: DAG.getConstant(Val: magics.Magic, DL: dl, VT: SVT));
6629 Factors.push_back(Elt: DAG.getSignedConstant(Val: NumeratorFactor, DL: dl, VT: SVT));
6630 Shifts.push_back(Elt: DAG.getConstant(Val: magics.ShiftAmount, DL: dl, VT: ShSVT));
6631 ShiftMasks.push_back(Elt: DAG.getSignedConstant(Val: ShiftMask, DL: dl, VT: SVT));
6632 return true;
6633 };
6634
6635 SDValue N0 = N->getOperand(Num: 0);
6636 SDValue N1 = N->getOperand(Num: 1);
6637
6638 // Collect the shifts / magic values from each element.
6639 if (!ISD::matchUnaryPredicate(Op: N1, Match: BuildSDIVPattern, /*AllowUndefs=*/false,
6640 /*AllowTruncation=*/true))
6641 return SDValue();
6642
6643 SDValue MagicFactor, Factor, Shift, ShiftMask;
6644 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6645 MagicFactor = DAG.getBuildVector(VT, DL: dl, Ops: MagicFactors);
6646 Factor = DAG.getBuildVector(VT, DL: dl, Ops: Factors);
6647 Shift = DAG.getBuildVector(VT: ShVT, DL: dl, Ops: Shifts);
6648 ShiftMask = DAG.getBuildVector(VT, DL: dl, Ops: ShiftMasks);
6649 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6650 assert(MagicFactors.size() == 1 && Factors.size() == 1 &&
6651 Shifts.size() == 1 && ShiftMasks.size() == 1 &&
6652 "Expected matchUnaryPredicate to return one element for scalable "
6653 "vectors");
6654 MagicFactor = DAG.getSplatVector(VT, DL: dl, Op: MagicFactors[0]);
6655 Factor = DAG.getSplatVector(VT, DL: dl, Op: Factors[0]);
6656 Shift = DAG.getSplatVector(VT: ShVT, DL: dl, Op: Shifts[0]);
6657 ShiftMask = DAG.getSplatVector(VT, DL: dl, Op: ShiftMasks[0]);
6658 } else {
6659 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6660 MagicFactor = MagicFactors[0];
6661 Factor = Factors[0];
6662 Shift = Shifts[0];
6663 ShiftMask = ShiftMasks[0];
6664 }
6665
6666 // Multiply the numerator (operand 0) by the magic value.
6667 // FIXME: We should support doing a MUL in a wider type.
6668 auto GetMULHS = [&](SDValue X, SDValue Y) {
6669 // If the type isn't legal, use a wider mul of the type calculated
6670 // earlier.
6671 if (!isTypeLegal(VT)) {
6672 X = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: MulVT, Operand: X);
6673 Y = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: MulVT, Operand: Y);
6674 Y = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: MulVT, N1: X, N2: Y);
6675 Y = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: MulVT, N1: Y,
6676 N2: DAG.getShiftAmountConstant(Val: EltBits, VT: MulVT, DL: dl));
6677 return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Y);
6678 }
6679
6680 if (isOperationLegalOrCustom(Op: ISD::MULHS, VT, LegalOnly: IsAfterLegalization))
6681 return DAG.getNode(Opcode: ISD::MULHS, DL: dl, VT, N1: X, N2: Y);
6682 if (isOperationLegalOrCustom(Op: ISD::SMUL_LOHI, VT, LegalOnly: IsAfterLegalization)) {
6683 SDValue LoHi =
6684 DAG.getNode(Opcode: ISD::SMUL_LOHI, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: VT), N1: X, N2: Y);
6685 return SDValue(LoHi.getNode(), 1);
6686 }
6687 // If type twice as wide legal, widen and use a mul plus a shift.
6688 unsigned Size = VT.getScalarSizeInBits();
6689 EVT WideVT = VT.changeElementType(
6690 Context&: *DAG.getContext(), EltVT: EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: Size * 2));
6691 // Some targets like AMDGPU try to go from SDIV to SDIVREM which is then
6692 // custom lowered. This is very expensive so avoid it at all costs for
6693 // constant divisors.
6694 if ((!IsAfterLegalTypes && isOperationExpand(Op: ISD::SDIV, VT) &&
6695 isOperationCustom(Op: ISD::SDIVREM, VT: VT.getScalarType())) ||
6696 isOperationLegalOrCustom(Op: ISD::MUL, VT: WideVT)) {
6697 X = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: WideVT, Operand: X);
6698 Y = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: WideVT, Operand: Y);
6699 Y = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: WideVT, N1: X, N2: Y);
6700 Y = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: WideVT, N1: Y,
6701 N2: DAG.getShiftAmountConstant(Val: EltBits, VT: WideVT, DL: dl));
6702 return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Y);
6703 }
6704 return SDValue();
6705 };
6706
6707 SDValue Q = GetMULHS(N0, MagicFactor);
6708 if (!Q)
6709 return SDValue();
6710
6711 Created.push_back(Elt: Q.getNode());
6712
6713 // (Optionally) Add/subtract the numerator using Factor.
6714 Factor = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: N0, N2: Factor);
6715 Created.push_back(Elt: Factor.getNode());
6716 Q = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Q, N2: Factor);
6717 Created.push_back(Elt: Q.getNode());
6718
6719 // Shift right algebraic by shift value.
6720 Q = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: Q, N2: Shift);
6721 Created.push_back(Elt: Q.getNode());
6722
6723 // Extract the sign bit, mask it and add it to the quotient.
6724 SDValue SignShift = DAG.getConstant(Val: EltBits - 1, DL: dl, VT: ShVT);
6725 SDValue T = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Q, N2: SignShift);
6726 Created.push_back(Elt: T.getNode());
6727 T = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: T, N2: ShiftMask);
6728 Created.push_back(Elt: T.getNode());
6729 return DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Q, N2: T);
6730}
6731
6732/// Given an ISD::UDIV node expressing a divide by constant,
6733/// return a DAG expression to select that will generate the same value by
6734/// multiplying by a magic number.
6735/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6736SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
6737 bool IsAfterLegalization,
6738 bool IsAfterLegalTypes,
6739 SmallVectorImpl<SDNode *> &Created) const {
6740 SDLoc dl(N);
6741 EVT VT = N->getValueType(ResNo: 0);
6742 EVT SVT = VT.getScalarType();
6743 EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
6744 EVT ShSVT = ShVT.getScalarType();
6745 unsigned EltBits = VT.getScalarSizeInBits();
6746 EVT MulVT;
6747
6748 // Check to see if we can do this.
6749 // FIXME: We should be more aggressive here.
6750 if (!isTypeLegal(VT)) {
6751 // Limit this to simple scalars for now.
6752 if (VT.isVector() || !VT.isSimple())
6753 return SDValue();
6754
6755 // If this type will be promoted to a large enough type with a legal
6756 // multiply operation, we can go ahead and do this transform.
6757 if (getTypeAction(VT: VT.getSimpleVT()) != TypePromoteInteger)
6758 return SDValue();
6759
6760 MulVT = getTypeToTransformTo(Context&: *DAG.getContext(), VT);
6761 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6762 !isOperationLegal(Op: ISD::MUL, VT: MulVT))
6763 return SDValue();
6764 }
6765
6766 // If the udiv has an 'exact' bit we can use a simpler lowering.
6767 if (N->getFlags().hasExact())
6768 return BuildExactUDIV(TLI: *this, N, dl, DAG, Created);
6769
6770 SDValue N0 = N->getOperand(Num: 0);
6771 SDValue N1 = N->getOperand(Num: 1);
6772
6773 // Try to use leading zeros of the dividend to reduce the multiplier and
6774 // avoid expensive fixups.
6775 unsigned KnownLeadingZeros = DAG.computeKnownBits(Op: N0).countMinLeadingZeros();
6776
6777 // If we're after type legalization and SVT is not legal, use the
6778 // promoted type for creating constants to avoid creating nodes with
6779 // illegal types.
6780 if (IsAfterLegalTypes && VT.isVector()) {
6781 SVT = getTypeToTransformTo(Context&: *DAG.getContext(), VT: SVT);
6782 if (SVT.bitsLT(VT: VT.getScalarType()))
6783 return SDValue();
6784 ShSVT = getTypeToTransformTo(Context&: *DAG.getContext(), VT: ShSVT);
6785 if (ShSVT.bitsLT(VT: ShVT.getScalarType()))
6786 return SDValue();
6787 }
6788 const unsigned SVTBits = SVT.getSizeInBits();
6789
6790 bool UseNPQ = false, UsePreShift = false, UsePostShift = false;
6791 SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
6792
6793 auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6794 if (C->isZero())
6795 return false;
6796 // Truncate the divisor to the target scalar type in case it was promoted
6797 // during type legalization.
6798 APInt Divisor = C->getAPIntValue().trunc(width: EltBits);
6799
6800 SDValue PreShift, MagicFactor, NPQFactor, PostShift;
6801
6802 // Magic algorithm doesn't work for division by 1. We need to emit a select
6803 // at the end.
6804 if (Divisor.isOne()) {
6805 PreShift = PostShift = DAG.getUNDEF(VT: ShSVT);
6806 MagicFactor = NPQFactor = DAG.getUNDEF(VT: SVT);
6807 } else {
6808 UnsignedDivisionByConstantInfo magics =
6809 UnsignedDivisionByConstantInfo::get(
6810 D: Divisor, LeadingZeros: std::min(a: KnownLeadingZeros, b: Divisor.countl_zero()));
6811
6812 MagicFactor = DAG.getConstant(Val: magics.Magic.zext(width: SVTBits), DL: dl, VT: SVT);
6813
6814 assert(magics.PreShift < Divisor.getBitWidth() &&
6815 "We shouldn't generate an undefined shift!");
6816 assert(magics.PostShift < Divisor.getBitWidth() &&
6817 "We shouldn't generate an undefined shift!");
6818 assert((!magics.IsAdd || magics.PreShift == 0) &&
6819 "Unexpected pre-shift");
6820 PreShift = DAG.getConstant(Val: magics.PreShift, DL: dl, VT: ShSVT);
6821 PostShift = DAG.getConstant(Val: magics.PostShift, DL: dl, VT: ShSVT);
6822 NPQFactor = DAG.getConstant(
6823 Val: magics.IsAdd ? APInt::getOneBitSet(numBits: SVTBits, BitNo: EltBits - 1)
6824 : APInt::getZero(numBits: SVTBits),
6825 DL: dl, VT: SVT);
6826 UseNPQ |= magics.IsAdd;
6827 UsePreShift |= magics.PreShift != 0;
6828 UsePostShift |= magics.PostShift != 0;
6829 }
6830
6831 PreShifts.push_back(Elt: PreShift);
6832 MagicFactors.push_back(Elt: MagicFactor);
6833 NPQFactors.push_back(Elt: NPQFactor);
6834 PostShifts.push_back(Elt: PostShift);
6835 return true;
6836 };
6837
6838 // Collect the shifts/magic values from each element.
6839 if (!ISD::matchUnaryPredicate(Op: N1, Match: BuildUDIVPattern, /*AllowUndefs=*/false,
6840 /*AllowTruncation=*/true))
6841 return SDValue();
6842
6843 SDValue PreShift, PostShift, MagicFactor, NPQFactor;
6844 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6845 PreShift = DAG.getBuildVector(VT: ShVT, DL: dl, Ops: PreShifts);
6846 MagicFactor = DAG.getBuildVector(VT, DL: dl, Ops: MagicFactors);
6847 NPQFactor = DAG.getBuildVector(VT, DL: dl, Ops: NPQFactors);
6848 PostShift = DAG.getBuildVector(VT: ShVT, DL: dl, Ops: PostShifts);
6849 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6850 assert(PreShifts.size() == 1 && MagicFactors.size() == 1 &&
6851 NPQFactors.size() == 1 && PostShifts.size() == 1 &&
6852 "Expected matchUnaryPredicate to return one for scalable vectors");
6853 PreShift = DAG.getSplatVector(VT: ShVT, DL: dl, Op: PreShifts[0]);
6854 MagicFactor = DAG.getSplatVector(VT, DL: dl, Op: MagicFactors[0]);
6855 NPQFactor = DAG.getSplatVector(VT, DL: dl, Op: NPQFactors[0]);
6856 PostShift = DAG.getSplatVector(VT: ShVT, DL: dl, Op: PostShifts[0]);
6857 } else {
6858 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6859 PreShift = PreShifts[0];
6860 MagicFactor = MagicFactors[0];
6861 PostShift = PostShifts[0];
6862 }
6863
6864 SDValue Q = N0;
6865 if (UsePreShift) {
6866 Q = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Q, N2: PreShift);
6867 Created.push_back(Elt: Q.getNode());
6868 }
6869
6870 // FIXME: We should support doing a MUL in a wider type.
6871 auto GetMULHU = [&](SDValue X, SDValue Y) {
6872 // If the type isn't legal, use a wider mul of the type calculated
6873 // earlier.
6874 if (!isTypeLegal(VT)) {
6875 X = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: MulVT, Operand: X);
6876 Y = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: MulVT, Operand: Y);
6877 Y = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: MulVT, N1: X, N2: Y);
6878 Y = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: MulVT, N1: Y,
6879 N2: DAG.getShiftAmountConstant(Val: EltBits, VT: MulVT, DL: dl));
6880 return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Y);
6881 }
6882
6883 if (isOperationLegalOrCustom(Op: ISD::MULHU, VT, LegalOnly: IsAfterLegalization))
6884 return DAG.getNode(Opcode: ISD::MULHU, DL: dl, VT, N1: X, N2: Y);
6885 if (isOperationLegalOrCustom(Op: ISD::UMUL_LOHI, VT, LegalOnly: IsAfterLegalization)) {
6886 SDValue LoHi =
6887 DAG.getNode(Opcode: ISD::UMUL_LOHI, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: VT), N1: X, N2: Y);
6888 return SDValue(LoHi.getNode(), 1);
6889 }
6890 // If type twice as wide legal, widen and use a mul plus a shift.
6891 unsigned Size = VT.getScalarSizeInBits();
6892 EVT WideVT = VT.changeElementType(
6893 Context&: *DAG.getContext(), EltVT: EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: Size * 2));
6894 // Some targets like AMDGPU try to go from UDIV to UDIVREM which is then
6895 // custom lowered. This is very expensive so avoid it at all costs for
6896 // constant divisors.
6897 if ((!IsAfterLegalTypes && isOperationExpand(Op: ISD::UDIV, VT) &&
6898 isOperationCustom(Op: ISD::UDIVREM, VT: VT.getScalarType())) ||
6899 isOperationLegalOrCustom(Op: ISD::MUL, VT: WideVT)) {
6900 X = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: WideVT, Operand: X);
6901 Y = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: WideVT, Operand: Y);
6902 Y = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: WideVT, N1: X, N2: Y);
6903 Y = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: WideVT, N1: Y,
6904 N2: DAG.getShiftAmountConstant(Val: EltBits, VT: WideVT, DL: dl));
6905 return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Y);
6906 }
6907 return SDValue(); // No mulhu or equivalent
6908 };
6909
6910 // Multiply the numerator (operand 0) by the magic value.
6911 Q = GetMULHU(Q, MagicFactor);
6912 if (!Q)
6913 return SDValue();
6914
6915 Created.push_back(Elt: Q.getNode());
6916
6917 if (UseNPQ) {
6918 SDValue NPQ = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: N0, N2: Q);
6919 Created.push_back(Elt: NPQ.getNode());
6920
6921 // For vectors we might have a mix of non-NPQ/NPQ paths, so use
6922 // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
6923 if (VT.isVector())
6924 NPQ = GetMULHU(NPQ, NPQFactor);
6925 else
6926 NPQ = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: NPQ, N2: DAG.getConstant(Val: 1, DL: dl, VT: ShVT));
6927
6928 Created.push_back(Elt: NPQ.getNode());
6929
6930 Q = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: NPQ, N2: Q);
6931 Created.push_back(Elt: Q.getNode());
6932 }
6933
6934 if (UsePostShift) {
6935 Q = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Q, N2: PostShift);
6936 Created.push_back(Elt: Q.getNode());
6937 }
6938
6939 EVT SetCCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
6940
6941 SDValue One = DAG.getConstant(Val: 1, DL: dl, VT);
6942 SDValue IsOne = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: N1, RHS: One, Cond: ISD::SETEQ);
6943 return DAG.getSelect(DL: dl, VT, Cond: IsOne, LHS: N0, RHS: Q);
6944}
6945
6946/// If all values in Values that *don't* match the predicate are same 'splat'
6947/// value, then replace all values with that splat value.
6948/// Else, if AlternativeReplacement was provided, then replace all values that
6949/// do match predicate with AlternativeReplacement value.
6950static void
6951turnVectorIntoSplatVector(MutableArrayRef<SDValue> Values,
6952 std::function<bool(SDValue)> Predicate,
6953 SDValue AlternativeReplacement = SDValue()) {
6954 SDValue Replacement;
6955 // Is there a value for which the Predicate does *NOT* match? What is it?
6956 auto SplatValue = llvm::find_if_not(Range&: Values, P: Predicate);
6957 if (SplatValue != Values.end()) {
6958 // Does Values consist only of SplatValue's and values matching Predicate?
6959 if (llvm::all_of(Range&: Values, P: [Predicate, SplatValue](SDValue Value) {
6960 return Value == *SplatValue || Predicate(Value);
6961 })) // Then we shall replace values matching predicate with SplatValue.
6962 Replacement = *SplatValue;
6963 }
6964 if (!Replacement) {
6965 // Oops, we did not find the "baseline" splat value.
6966 if (!AlternativeReplacement)
6967 return; // Nothing to do.
6968 // Let's replace with provided value then.
6969 Replacement = AlternativeReplacement;
6970 }
6971 std::replace_if(first: Values.begin(), last: Values.end(), pred: Predicate, new_value: Replacement);
6972}
6973
6974/// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
6975/// where the divisor is constant and the comparison target is zero,
6976/// return a DAG expression that will generate the same comparison result
6977/// using only multiplications, additions and shifts/rotations.
6978/// Ref: "Hacker's Delight" 10-17.
6979SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
6980 SDValue CompTargetNode,
6981 ISD::CondCode Cond,
6982 DAGCombinerInfo &DCI,
6983 const SDLoc &DL) const {
6984 SmallVector<SDNode *, 5> Built;
6985 if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
6986 DCI, DL, Created&: Built)) {
6987 for (SDNode *N : Built)
6988 DCI.AddToWorklist(N);
6989 return Folded;
6990 }
6991
6992 return SDValue();
6993}
6994
6995SDValue
6996TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
6997 SDValue CompTargetNode, ISD::CondCode Cond,
6998 DAGCombinerInfo &DCI, const SDLoc &DL,
6999 SmallVectorImpl<SDNode *> &Created) const {
7000 // fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
7001 // - D must be constant, with D = D0 * 2^K where D0 is odd
7002 // - P is the multiplicative inverse of D0 modulo 2^W
7003 // - Q = floor(((2^W) - 1) / D)
7004 // where W is the width of the common type of N and D.
7005 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
7006 "Only applicable for (in)equality comparisons.");
7007
7008 SelectionDAG &DAG = DCI.DAG;
7009
7010 EVT VT = REMNode.getValueType();
7011 EVT SVT = VT.getScalarType();
7012 EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
7013 EVT ShSVT = ShVT.getScalarType();
7014
7015 // If MUL is unavailable, we cannot proceed in any case.
7016 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::MUL, VT))
7017 return SDValue();
7018
7019 bool ComparingWithAllZeros = true;
7020 bool AllComparisonsWithNonZerosAreTautological = true;
7021 bool HadTautologicalLanes = false;
7022 bool AllLanesAreTautological = true;
7023 bool HadEvenDivisor = false;
7024 bool AllDivisorsArePowerOfTwo = true;
7025 bool HadTautologicalInvertedLanes = false;
7026 SmallVector<SDValue, 16> PAmts, KAmts, QAmts;
7027
7028 auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
7029 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
7030 if (CDiv->isZero())
7031 return false;
7032
7033 const APInt &D = CDiv->getAPIntValue();
7034 const APInt &Cmp = CCmp->getAPIntValue();
7035
7036 ComparingWithAllZeros &= Cmp.isZero();
7037
7038 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
7039 // if C2 is not less than C1, the comparison is always false.
7040 // But we will only be able to produce the comparison that will give the
7041 // opposive tautological answer. So this lane would need to be fixed up.
7042 bool TautologicalInvertedLane = D.ule(RHS: Cmp);
7043 HadTautologicalInvertedLanes |= TautologicalInvertedLane;
7044
7045 // If all lanes are tautological (either all divisors are ones, or divisor
7046 // is not greater than the constant we are comparing with),
7047 // we will prefer to avoid the fold.
7048 bool TautologicalLane = D.isOne() || TautologicalInvertedLane;
7049 HadTautologicalLanes |= TautologicalLane;
7050 AllLanesAreTautological &= TautologicalLane;
7051
7052 // If we are comparing with non-zero, we need'll need to subtract said
7053 // comparison value from the LHS. But there is no point in doing that if
7054 // every lane where we are comparing with non-zero is tautological..
7055 if (!Cmp.isZero())
7056 AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
7057
7058 // Decompose D into D0 * 2^K
7059 unsigned K = D.countr_zero();
7060 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
7061 APInt D0 = D.lshr(shiftAmt: K);
7062
7063 // D is even if it has trailing zeros.
7064 HadEvenDivisor |= (K != 0);
7065 // D is a power-of-two if D0 is one.
7066 // If all divisors are power-of-two, we will prefer to avoid the fold.
7067 AllDivisorsArePowerOfTwo &= D0.isOne();
7068
7069 // P = inv(D0, 2^W)
7070 // 2^W requires W + 1 bits, so we have to extend and then truncate.
7071 unsigned W = D.getBitWidth();
7072 APInt P = D0.multiplicativeInverse();
7073 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
7074
7075 // Q = floor((2^W - 1) u/ D)
7076 // R = ((2^W - 1) u% D)
7077 APInt Q, R;
7078 APInt::udivrem(LHS: APInt::getAllOnes(numBits: W), RHS: D, Quotient&: Q, Remainder&: R);
7079
7080 // If we are comparing with zero, then that comparison constant is okay,
7081 // else it may need to be one less than that.
7082 if (Cmp.ugt(RHS: R))
7083 Q -= 1;
7084
7085 assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&
7086 "We are expecting that K is always less than all-ones for ShSVT");
7087
7088 // If the lane is tautological the result can be constant-folded.
7089 if (TautologicalLane) {
7090 // Set P and K amount to a bogus values so we can try to splat them.
7091 P = 0;
7092 K = -1;
7093 // And ensure that comparison constant is tautological,
7094 // it will always compare true/false.
7095 Q = -1;
7096 }
7097
7098 PAmts.push_back(Elt: DAG.getConstant(Val: P, DL, VT: SVT));
7099 KAmts.push_back(
7100 Elt: DAG.getConstant(Val: APInt(ShSVT.getSizeInBits(), K, /*isSigned=*/false,
7101 /*implicitTrunc=*/true),
7102 DL, VT: ShSVT));
7103 QAmts.push_back(Elt: DAG.getConstant(Val: Q, DL, VT: SVT));
7104 return true;
7105 };
7106
7107 SDValue N = REMNode.getOperand(i: 0);
7108 SDValue D = REMNode.getOperand(i: 1);
7109
7110 // Collect the values from each element.
7111 if (!ISD::matchBinaryPredicate(LHS: D, RHS: CompTargetNode, Match: BuildUREMPattern))
7112 return SDValue();
7113
7114 // If all lanes are tautological, the result can be constant-folded.
7115 if (AllLanesAreTautological)
7116 return SDValue();
7117
7118 // If this is a urem by a powers-of-two, avoid the fold since it can be
7119 // best implemented as a bit test.
7120 if (AllDivisorsArePowerOfTwo)
7121 return SDValue();
7122
7123 SDValue PVal, KVal, QVal;
7124 if (D.getOpcode() == ISD::BUILD_VECTOR) {
7125 if (HadTautologicalLanes) {
7126 // Try to turn PAmts into a splat, since we don't care about the values
7127 // that are currently '0'. If we can't, just keep '0'`s.
7128 turnVectorIntoSplatVector(Values: PAmts, Predicate: isNullConstant);
7129 // Try to turn KAmts into a splat, since we don't care about the values
7130 // that are currently '-1'. If we can't, change them to '0'`s.
7131 turnVectorIntoSplatVector(Values: KAmts, Predicate: isAllOnesConstant,
7132 AlternativeReplacement: DAG.getConstant(Val: 0, DL, VT: ShSVT));
7133 }
7134
7135 PVal = DAG.getBuildVector(VT, DL, Ops: PAmts);
7136 KVal = DAG.getBuildVector(VT: ShVT, DL, Ops: KAmts);
7137 QVal = DAG.getBuildVector(VT, DL, Ops: QAmts);
7138 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
7139 assert(PAmts.size() == 1 && KAmts.size() == 1 && QAmts.size() == 1 &&
7140 "Expected matchBinaryPredicate to return one element for "
7141 "SPLAT_VECTORs");
7142 PVal = DAG.getSplatVector(VT, DL, Op: PAmts[0]);
7143 KVal = DAG.getSplatVector(VT: ShVT, DL, Op: KAmts[0]);
7144 QVal = DAG.getSplatVector(VT, DL, Op: QAmts[0]);
7145 } else {
7146 PVal = PAmts[0];
7147 KVal = KAmts[0];
7148 QVal = QAmts[0];
7149 }
7150
7151 if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
7152 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::SUB, VT))
7153 return SDValue(); // FIXME: Could/should use `ISD::ADD`?
7154 assert(CompTargetNode.getValueType() == N.getValueType() &&
7155 "Expecting that the types on LHS and RHS of comparisons match.");
7156 N = DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: N, N2: CompTargetNode);
7157 }
7158
7159 // (mul N, P)
7160 SDValue Op0 = DAG.getNode(Opcode: ISD::MUL, DL, VT, N1: N, N2: PVal);
7161 Created.push_back(Elt: Op0.getNode());
7162
7163 // Rotate right only if any divisor was even. We avoid rotates for all-odd
7164 // divisors as a performance improvement, since rotating by 0 is a no-op.
7165 if (HadEvenDivisor) {
7166 // We need ROTR to do this.
7167 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::ROTR, VT))
7168 return SDValue();
7169 // UREM: (rotr (mul N, P), K)
7170 Op0 = DAG.getNode(Opcode: ISD::ROTR, DL, VT, N1: Op0, N2: KVal);
7171 Created.push_back(Elt: Op0.getNode());
7172 }
7173
7174 // UREM: (setule/setugt (rotr (mul N, P), K), Q)
7175 SDValue NewCC =
7176 DAG.getSetCC(DL, VT: SETCCVT, LHS: Op0, RHS: QVal,
7177 Cond: ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
7178 if (!HadTautologicalInvertedLanes)
7179 return NewCC;
7180
7181 // If any lanes previously compared always-false, the NewCC will give
7182 // always-true result for them, so we need to fixup those lanes.
7183 // Or the other way around for inequality predicate.
7184 assert(VT.isVector() && "Can/should only get here for vectors.");
7185 Created.push_back(Elt: NewCC.getNode());
7186
7187 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
7188 // if C2 is not less than C1, the comparison is always false.
7189 // But we have produced the comparison that will give the
7190 // opposive tautological answer. So these lanes would need to be fixed up.
7191 SDValue TautologicalInvertedChannels =
7192 DAG.getSetCC(DL, VT: SETCCVT, LHS: D, RHS: CompTargetNode, Cond: ISD::SETULE);
7193 Created.push_back(Elt: TautologicalInvertedChannels.getNode());
7194
7195 // NOTE: we avoid letting illegal types through even if we're before legalize
7196 // ops – legalization has a hard time producing good code for this.
7197 if (isOperationLegalOrCustom(Op: ISD::VSELECT, VT: SETCCVT)) {
7198 // If we have a vector select, let's replace the comparison results in the
7199 // affected lanes with the correct tautological result.
7200 SDValue Replacement = DAG.getBoolConstant(V: Cond == ISD::SETEQ ? false : true,
7201 DL, VT: SETCCVT, OpVT: SETCCVT);
7202 return DAG.getNode(Opcode: ISD::VSELECT, DL, VT: SETCCVT, N1: TautologicalInvertedChannels,
7203 N2: Replacement, N3: NewCC);
7204 }
7205
7206 // Else, we can just invert the comparison result in the appropriate lanes.
7207 //
7208 // NOTE: see the note above VSELECT above.
7209 if (isOperationLegalOrCustom(Op: ISD::XOR, VT: SETCCVT))
7210 return DAG.getNode(Opcode: ISD::XOR, DL, VT: SETCCVT, N1: NewCC,
7211 N2: TautologicalInvertedChannels);
7212
7213 return SDValue(); // Don't know how to lower.
7214}
7215
7216/// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
7217/// where the divisor is constant and the comparison target is zero,
7218/// return a DAG expression that will generate the same comparison result
7219/// using only multiplications, additions and shifts/rotations.
7220/// Ref: "Hacker's Delight" 10-17.
7221SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
7222 SDValue CompTargetNode,
7223 ISD::CondCode Cond,
7224 DAGCombinerInfo &DCI,
7225 const SDLoc &DL) const {
7226 SmallVector<SDNode *, 7> Built;
7227 if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
7228 DCI, DL, Created&: Built)) {
7229 assert(Built.size() <= 7 && "Max size prediction failed.");
7230 for (SDNode *N : Built)
7231 DCI.AddToWorklist(N);
7232 return Folded;
7233 }
7234
7235 return SDValue();
7236}
7237
7238SDValue
7239TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
7240 SDValue CompTargetNode, ISD::CondCode Cond,
7241 DAGCombinerInfo &DCI, const SDLoc &DL,
7242 SmallVectorImpl<SDNode *> &Created) const {
7243 // Derived from Hacker's Delight, 2nd Edition, by Hank Warren. Section 10-17.
7244 // Fold:
7245 // (seteq/ne (srem N, D), 0)
7246 // To:
7247 // (setule/ugt (rotr (add (mul N, P), A), K), Q)
7248 //
7249 // - D must be constant, with D = D0 * 2^K where D0 is odd
7250 // - P is the multiplicative inverse of D0 modulo 2^W
7251 // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
7252 // - Q = floor((2 * A) / (2^K))
7253 // where W is the width of the common type of N and D.
7254 //
7255 // When D is a power of two (and thus D0 is 1), the normal
7256 // formula for A and Q don't apply, because the derivation
7257 // depends on D not dividing 2^(W-1), and thus theorem ZRS
7258 // does not apply. This specifically fails when N = INT_MIN.
7259 //
7260 // Instead, for power-of-two D, we use:
7261 // - A = 2^(W-1)
7262 // |-> Order-preserving map from [-2^(W-1), 2^(W-1) - 1] to [0,2^W - 1])
7263 // - Q = 2^(W-K) - 1
7264 // |-> Test that the top K bits are zero after rotation
7265 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
7266 "Only applicable for (in)equality comparisons.");
7267
7268 SelectionDAG &DAG = DCI.DAG;
7269
7270 EVT VT = REMNode.getValueType();
7271 EVT SVT = VT.getScalarType();
7272 EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
7273 EVT ShSVT = ShVT.getScalarType();
7274
7275 // If we are after ops legalization, and MUL is unavailable, we can not
7276 // proceed.
7277 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::MUL, VT))
7278 return SDValue();
7279
7280 // TODO: Could support comparing with non-zero too.
7281 ConstantSDNode *CompTarget = isConstOrConstSplat(N: CompTargetNode);
7282 if (!CompTarget || !CompTarget->isZero())
7283 return SDValue();
7284
7285 bool HadIntMinDivisor = false;
7286 bool HadOneDivisor = false;
7287 bool AllDivisorsAreOnes = true;
7288 bool HadEvenDivisor = false;
7289 bool NeedToApplyOffset = false;
7290 bool AllDivisorsArePowerOfTwo = true;
7291 SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts;
7292
7293 auto BuildSREMPattern = [&](ConstantSDNode *C) {
7294 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
7295 if (C->isZero())
7296 return false;
7297
7298 // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
7299
7300 // WARNING: this fold is only valid for positive divisors!
7301 APInt D = C->getAPIntValue();
7302 if (D.isNegative())
7303 D.negate(); // `rem %X, -C` is equivalent to `rem %X, C`
7304
7305 HadIntMinDivisor |= D.isMinSignedValue();
7306
7307 // If all divisors are ones, we will prefer to avoid the fold.
7308 HadOneDivisor |= D.isOne();
7309 AllDivisorsAreOnes &= D.isOne();
7310
7311 // Decompose D into D0 * 2^K
7312 unsigned K = D.countr_zero();
7313 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
7314 APInt D0 = D.lshr(shiftAmt: K);
7315
7316 if (!D.isMinSignedValue()) {
7317 // D is even if it has trailing zeros; unless it's INT_MIN, in which case
7318 // we don't care about this lane in this fold, we'll special-handle it.
7319 HadEvenDivisor |= (K != 0);
7320 }
7321
7322 // D is a power-of-two if D0 is one. This includes INT_MIN.
7323 // If all divisors are power-of-two, we will prefer to avoid the fold.
7324 AllDivisorsArePowerOfTwo &= D0.isOne();
7325
7326 // P = inv(D0, 2^W)
7327 // 2^W requires W + 1 bits, so we have to extend and then truncate.
7328 unsigned W = D.getBitWidth();
7329 APInt P = D0.multiplicativeInverse();
7330 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
7331
7332 // A = floor((2^(W - 1) - 1) / D0) & -2^K
7333 APInt A = APInt::getSignedMaxValue(numBits: W).udiv(RHS: D0);
7334 A.clearLowBits(loBits: K);
7335
7336 if (!D.isMinSignedValue()) {
7337 // If divisor INT_MIN, then we don't care about this lane in this fold,
7338 // we'll special-handle it.
7339 NeedToApplyOffset |= A != 0;
7340 }
7341
7342 // Q = floor((2 * A) / (2^K))
7343 APInt Q = (2 * A).udiv(RHS: APInt::getOneBitSet(numBits: W, BitNo: K));
7344
7345 assert(APInt::getAllOnes(SVT.getSizeInBits()).ugt(A) &&
7346 "We are expecting that A is always less than all-ones for SVT");
7347 assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&
7348 "We are expecting that K is always less than all-ones for ShSVT");
7349
7350 // If D was a power of two, apply the alternate constant derivation.
7351 if (D0.isOne()) {
7352 // A = 2^(W-1)
7353 A = APInt::getSignedMinValue(numBits: W);
7354 // - Q = 2^(W-K) - 1
7355 Q = APInt::getAllOnes(numBits: W - K).zext(width: W);
7356 }
7357
7358 // If the divisor is 1 the result can be constant-folded. Likewise, we
7359 // don't care about INT_MIN lanes, those can be set to undef if appropriate.
7360 if (D.isOne()) {
7361 // Set P, A and K to a bogus values so we can try to splat them.
7362 P = 0;
7363 A = -1;
7364 K = -1;
7365
7366 // x ?% 1 == 0 <--> true <--> x u<= -1
7367 Q = -1;
7368 }
7369
7370 PAmts.push_back(Elt: DAG.getConstant(Val: P, DL, VT: SVT));
7371 AAmts.push_back(Elt: DAG.getConstant(Val: A, DL, VT: SVT));
7372 KAmts.push_back(
7373 Elt: DAG.getConstant(Val: APInt(ShSVT.getSizeInBits(), K, /*isSigned=*/false,
7374 /*implicitTrunc=*/true),
7375 DL, VT: ShSVT));
7376 QAmts.push_back(Elt: DAG.getConstant(Val: Q, DL, VT: SVT));
7377 return true;
7378 };
7379
7380 SDValue N = REMNode.getOperand(i: 0);
7381 SDValue D = REMNode.getOperand(i: 1);
7382
7383 // Collect the values from each element.
7384 if (!ISD::matchUnaryPredicate(Op: D, Match: BuildSREMPattern))
7385 return SDValue();
7386
7387 // If this is a srem by a one, avoid the fold since it can be constant-folded.
7388 if (AllDivisorsAreOnes)
7389 return SDValue();
7390
7391 // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
7392 // since it can be best implemented as a bit test.
7393 if (AllDivisorsArePowerOfTwo)
7394 return SDValue();
7395
7396 SDValue PVal, AVal, KVal, QVal;
7397 if (D.getOpcode() == ISD::BUILD_VECTOR) {
7398 if (HadOneDivisor) {
7399 // Try to turn PAmts into a splat, since we don't care about the values
7400 // that are currently '0'. If we can't, just keep '0'`s.
7401 turnVectorIntoSplatVector(Values: PAmts, Predicate: isNullConstant);
7402 // Try to turn AAmts into a splat, since we don't care about the
7403 // values that are currently '-1'. If we can't, change them to '0'`s.
7404 turnVectorIntoSplatVector(Values: AAmts, Predicate: isAllOnesConstant,
7405 AlternativeReplacement: DAG.getConstant(Val: 0, DL, VT: SVT));
7406 // Try to turn KAmts into a splat, since we don't care about the values
7407 // that are currently '-1'. If we can't, change them to '0'`s.
7408 turnVectorIntoSplatVector(Values: KAmts, Predicate: isAllOnesConstant,
7409 AlternativeReplacement: DAG.getConstant(Val: 0, DL, VT: ShSVT));
7410 }
7411
7412 PVal = DAG.getBuildVector(VT, DL, Ops: PAmts);
7413 AVal = DAG.getBuildVector(VT, DL, Ops: AAmts);
7414 KVal = DAG.getBuildVector(VT: ShVT, DL, Ops: KAmts);
7415 QVal = DAG.getBuildVector(VT, DL, Ops: QAmts);
7416 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
7417 assert(PAmts.size() == 1 && AAmts.size() == 1 && KAmts.size() == 1 &&
7418 QAmts.size() == 1 &&
7419 "Expected matchUnaryPredicate to return one element for scalable "
7420 "vectors");
7421 PVal = DAG.getSplatVector(VT, DL, Op: PAmts[0]);
7422 AVal = DAG.getSplatVector(VT, DL, Op: AAmts[0]);
7423 KVal = DAG.getSplatVector(VT: ShVT, DL, Op: KAmts[0]);
7424 QVal = DAG.getSplatVector(VT, DL, Op: QAmts[0]);
7425 } else {
7426 assert(isa<ConstantSDNode>(D) && "Expected a constant");
7427 PVal = PAmts[0];
7428 AVal = AAmts[0];
7429 KVal = KAmts[0];
7430 QVal = QAmts[0];
7431 }
7432
7433 // (mul N, P)
7434 SDValue Op0 = DAG.getNode(Opcode: ISD::MUL, DL, VT, N1: N, N2: PVal);
7435 Created.push_back(Elt: Op0.getNode());
7436
7437 if (NeedToApplyOffset) {
7438 // We need ADD to do this.
7439 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::ADD, VT))
7440 return SDValue();
7441
7442 // (add (mul N, P), A)
7443 Op0 = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Op0, N2: AVal);
7444 Created.push_back(Elt: Op0.getNode());
7445 }
7446
7447 // Rotate right only if any divisor was even. We avoid rotates for all-odd
7448 // divisors as a performance improvement, since rotating by 0 is a no-op.
7449 if (HadEvenDivisor) {
7450 // We need ROTR to do this.
7451 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::ROTR, VT))
7452 return SDValue();
7453 // SREM: (rotr (add (mul N, P), A), K)
7454 Op0 = DAG.getNode(Opcode: ISD::ROTR, DL, VT, N1: Op0, N2: KVal);
7455 Created.push_back(Elt: Op0.getNode());
7456 }
7457
7458 // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
7459 SDValue Fold =
7460 DAG.getSetCC(DL, VT: SETCCVT, LHS: Op0, RHS: QVal,
7461 Cond: ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
7462
7463 // If we didn't have lanes with INT_MIN divisor, then we're done.
7464 if (!HadIntMinDivisor)
7465 return Fold;
7466
7467 // That fold is only valid for positive divisors. Which effectively means,
7468 // it is invalid for INT_MIN divisors. So if we have such a lane,
7469 // we must fix-up results for said lanes.
7470 assert(VT.isVector() && "Can/should only get here for vectors.");
7471
7472 // NOTE: we avoid letting illegal types through even if we're before legalize
7473 // ops – legalization has a hard time producing good code for the code that
7474 // follows.
7475 if (!isOperationLegalOrCustom(Op: ISD::SETCC, VT: SETCCVT) ||
7476 !isOperationLegalOrCustom(Op: ISD::AND, VT) ||
7477 !isCondCodeLegalOrCustom(CC: Cond, VT: VT.getSimpleVT()) ||
7478 !isOperationLegalOrCustom(Op: ISD::VSELECT, VT: SETCCVT))
7479 return SDValue();
7480
7481 Created.push_back(Elt: Fold.getNode());
7482
7483 SDValue IntMin = DAG.getConstant(
7484 Val: APInt::getSignedMinValue(numBits: SVT.getScalarSizeInBits()), DL, VT);
7485 SDValue IntMax = DAG.getConstant(
7486 Val: APInt::getSignedMaxValue(numBits: SVT.getScalarSizeInBits()), DL, VT);
7487 SDValue Zero =
7488 DAG.getConstant(Val: APInt::getZero(numBits: SVT.getScalarSizeInBits()), DL, VT);
7489
7490 // Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.
7491 SDValue DivisorIsIntMin = DAG.getSetCC(DL, VT: SETCCVT, LHS: D, RHS: IntMin, Cond: ISD::SETEQ);
7492 Created.push_back(Elt: DivisorIsIntMin.getNode());
7493
7494 // (N s% INT_MIN) ==/!= 0 <--> (N & INT_MAX) ==/!= 0
7495 SDValue Masked = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: N, N2: IntMax);
7496 Created.push_back(Elt: Masked.getNode());
7497 SDValue MaskedIsZero = DAG.getSetCC(DL, VT: SETCCVT, LHS: Masked, RHS: Zero, Cond);
7498 Created.push_back(Elt: MaskedIsZero.getNode());
7499
7500 // To produce final result we need to blend 2 vectors: 'SetCC' and
7501 // 'MaskedIsZero'. If the divisor for channel was *NOT* INT_MIN, we pick
7502 // from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is
7503 // constant-folded, select can get lowered to a shuffle with constant mask.
7504 SDValue Blended = DAG.getNode(Opcode: ISD::VSELECT, DL, VT: SETCCVT, N1: DivisorIsIntMin,
7505 N2: MaskedIsZero, N3: Fold);
7506
7507 return Blended;
7508}
7509
7510SDValue TargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
7511 const DenormalMode &Mode) const {
7512 SDLoc DL(Op);
7513 EVT VT = Op.getValueType();
7514 EVT CCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
7515 SDValue FPZero = DAG.getConstantFP(Val: 0.0, DL, VT);
7516
7517 // This is specifically a check for the handling of denormal inputs, not the
7518 // result.
7519 if (Mode.Input == DenormalMode::PreserveSign ||
7520 Mode.Input == DenormalMode::PositiveZero) {
7521 // Test = X == 0.0
7522 return DAG.getSetCC(DL, VT: CCVT, LHS: Op, RHS: FPZero, Cond: ISD::SETEQ);
7523 }
7524
7525 // Testing it with denormal inputs to avoid wrong estimate.
7526 //
7527 // Test = fabs(X) < SmallestNormal
7528 const fltSemantics &FltSem = VT.getFltSemantics();
7529 APFloat SmallestNorm = APFloat::getSmallestNormalized(Sem: FltSem);
7530 SDValue NormC = DAG.getConstantFP(Val: SmallestNorm, DL, VT);
7531 SDValue Fabs = DAG.getNode(Opcode: ISD::FABS, DL, VT, Operand: Op);
7532 return DAG.getSetCC(DL, VT: CCVT, LHS: Fabs, RHS: NormC, Cond: ISD::SETLT);
7533}
7534
7535SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
7536 bool LegalOps, bool OptForSize,
7537 NegatibleCost &Cost,
7538 unsigned Depth) const {
7539 // fneg is removable even if it has multiple uses.
7540 if (Op.getOpcode() == ISD::FNEG || Op.getOpcode() == ISD::VP_FNEG) {
7541 Cost = NegatibleCost::Cheaper;
7542 return Op.getOperand(i: 0);
7543 }
7544
7545 // Don't recurse exponentially.
7546 if (Depth > SelectionDAG::MaxRecursionDepth)
7547 return SDValue();
7548
7549 // Pre-increment recursion depth for use in recursive calls.
7550 ++Depth;
7551 const SDNodeFlags Flags = Op->getFlags();
7552 EVT VT = Op.getValueType();
7553 unsigned Opcode = Op.getOpcode();
7554
7555 // Don't allow anything with multiple uses unless we know it is free.
7556 if (!Op.hasOneUse() && Opcode != ISD::ConstantFP) {
7557 bool IsFreeExtend = Opcode == ISD::FP_EXTEND &&
7558 isFPExtFree(DestVT: VT, SrcVT: Op.getOperand(i: 0).getValueType());
7559 if (!IsFreeExtend)
7560 return SDValue();
7561 }
7562
7563 auto RemoveDeadNode = [&](SDValue N) {
7564 if (N && N.getNode()->use_empty())
7565 DAG.RemoveDeadNode(N: N.getNode());
7566 };
7567
7568 SDLoc DL(Op);
7569
7570 // Because getNegatedExpression can delete nodes we need a handle to keep
7571 // temporary nodes alive in case the recursion manages to create an identical
7572 // node.
7573 std::list<HandleSDNode> Handles;
7574
7575 switch (Opcode) {
7576 case ISD::ConstantFP: {
7577 // Don't invert constant FP values after legalization unless the target says
7578 // the negated constant is legal.
7579 bool IsOpLegal =
7580 isOperationLegal(Op: ISD::ConstantFP, VT) ||
7581 isFPImmLegal(neg(X: cast<ConstantFPSDNode>(Val&: Op)->getValueAPF()), VT,
7582 ForCodeSize: OptForSize);
7583
7584 if (LegalOps && !IsOpLegal)
7585 break;
7586
7587 APFloat V = cast<ConstantFPSDNode>(Val&: Op)->getValueAPF();
7588 V.changeSign();
7589 SDValue CFP = DAG.getConstantFP(Val: V, DL, VT);
7590
7591 // If we already have the use of the negated floating constant, it is free
7592 // to negate it even it has multiple uses.
7593 if (!Op.hasOneUse() && CFP.use_empty())
7594 break;
7595 Cost = NegatibleCost::Neutral;
7596 return CFP;
7597 }
7598 case ISD::SPLAT_VECTOR: {
7599 // fold splat_vector(fneg(X)) -> splat_vector(-X)
7600 SDValue X = Op.getOperand(i: 0);
7601 if (!isOperationLegal(Op: ISD::SPLAT_VECTOR, VT))
7602 break;
7603
7604 SDValue NegX = getCheaperNegatedExpression(Op: X, DAG, LegalOps, OptForSize);
7605 if (!NegX)
7606 break;
7607 Cost = NegatibleCost::Cheaper;
7608 return DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL, VT, Operand: NegX);
7609 }
7610 case ISD::BUILD_VECTOR: {
7611 // Only permit BUILD_VECTOR of constants.
7612 if (llvm::any_of(Range: Op->op_values(), P: [&](SDValue N) {
7613 return !N.isUndef() && !isa<ConstantFPSDNode>(Val: N);
7614 }))
7615 break;
7616
7617 bool IsOpLegal =
7618 (isOperationLegal(Op: ISD::ConstantFP, VT) &&
7619 isOperationLegal(Op: ISD::BUILD_VECTOR, VT)) ||
7620 llvm::all_of(Range: Op->op_values(), P: [&](SDValue N) {
7621 return N.isUndef() ||
7622 isFPImmLegal(neg(X: cast<ConstantFPSDNode>(Val&: N)->getValueAPF()), VT,
7623 ForCodeSize: OptForSize);
7624 });
7625
7626 if (LegalOps && !IsOpLegal)
7627 break;
7628
7629 SmallVector<SDValue, 4> Ops;
7630 for (SDValue C : Op->op_values()) {
7631 if (C.isUndef()) {
7632 Ops.push_back(Elt: C);
7633 continue;
7634 }
7635 APFloat V = cast<ConstantFPSDNode>(Val&: C)->getValueAPF();
7636 V.changeSign();
7637 Ops.push_back(Elt: DAG.getConstantFP(Val: V, DL, VT: C.getValueType()));
7638 }
7639 Cost = NegatibleCost::Neutral;
7640 return DAG.getBuildVector(VT, DL, Ops);
7641 }
7642 case ISD::FADD: {
7643 if (!Flags.hasNoSignedZeros())
7644 break;
7645
7646 // After operation legalization, it might not be legal to create new FSUBs.
7647 if (LegalOps && !isOperationLegalOrCustom(Op: ISD::FSUB, VT))
7648 break;
7649 SDValue X = Op.getOperand(i: 0), Y = Op.getOperand(i: 1);
7650
7651 // fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y)
7652 NegatibleCost CostX = NegatibleCost::Expensive;
7653 SDValue NegX =
7654 getNegatedExpression(Op: X, DAG, LegalOps, OptForSize, Cost&: CostX, Depth);
7655 // Prevent this node from being deleted by the next call.
7656 if (NegX)
7657 Handles.emplace_back(args&: NegX);
7658
7659 // fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
7660 NegatibleCost CostY = NegatibleCost::Expensive;
7661 SDValue NegY =
7662 getNegatedExpression(Op: Y, DAG, LegalOps, OptForSize, Cost&: CostY, Depth);
7663
7664 // We're done with the handles.
7665 Handles.clear();
7666
7667 // Negate the X if its cost is less or equal than Y.
7668 if (NegX && (CostX <= CostY)) {
7669 Cost = CostX;
7670 SDValue N = DAG.getNode(Opcode: ISD::FSUB, DL, VT, N1: NegX, N2: Y, Flags);
7671 if (NegY != N)
7672 RemoveDeadNode(NegY);
7673 return N;
7674 }
7675
7676 // Negate the Y if it is not expensive.
7677 if (NegY) {
7678 Cost = CostY;
7679 SDValue N = DAG.getNode(Opcode: ISD::FSUB, DL, VT, N1: NegY, N2: X, Flags);
7680 if (NegX != N)
7681 RemoveDeadNode(NegX);
7682 return N;
7683 }
7684 break;
7685 }
7686 case ISD::FSUB: {
7687 // We can't turn -(A-B) into B-A when we honor signed zeros.
7688 if (!Flags.hasNoSignedZeros())
7689 break;
7690
7691 SDValue X = Op.getOperand(i: 0), Y = Op.getOperand(i: 1);
7692 // fold (fneg (fsub 0, Y)) -> Y
7693 if (ConstantFPSDNode *C = isConstOrConstSplatFP(N: X, /*AllowUndefs*/ true))
7694 if (C->isZero()) {
7695 Cost = NegatibleCost::Cheaper;
7696 return Y;
7697 }
7698
7699 // fold (fneg (fsub X, Y)) -> (fsub Y, X)
7700 Cost = NegatibleCost::Neutral;
7701 return DAG.getNode(Opcode: ISD::FSUB, DL, VT, N1: Y, N2: X, Flags);
7702 }
7703 case ISD::FMUL:
7704 case ISD::FDIV: {
7705 SDValue X = Op.getOperand(i: 0), Y = Op.getOperand(i: 1);
7706
7707 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
7708 NegatibleCost CostX = NegatibleCost::Expensive;
7709 SDValue NegX =
7710 getNegatedExpression(Op: X, DAG, LegalOps, OptForSize, Cost&: CostX, Depth);
7711 // Prevent this node from being deleted by the next call.
7712 if (NegX)
7713 Handles.emplace_back(args&: NegX);
7714
7715 // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
7716 NegatibleCost CostY = NegatibleCost::Expensive;
7717 SDValue NegY =
7718 getNegatedExpression(Op: Y, DAG, LegalOps, OptForSize, Cost&: CostY, Depth);
7719
7720 // We're done with the handles.
7721 Handles.clear();
7722
7723 // Negate the X if its cost is less or equal than Y.
7724 if (NegX && (CostX <= CostY)) {
7725 Cost = CostX;
7726 SDValue N = DAG.getNode(Opcode, DL, VT, N1: NegX, N2: Y, Flags);
7727 if (NegY != N)
7728 RemoveDeadNode(NegY);
7729 return N;
7730 }
7731
7732 // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
7733 if (auto *C = isConstOrConstSplatFP(N: Op.getOperand(i: 1)))
7734 if (C->isExactlyValue(V: 2.0) && Op.getOpcode() == ISD::FMUL)
7735 break;
7736
7737 // Negate the Y if it is not expensive.
7738 if (NegY) {
7739 Cost = CostY;
7740 SDValue N = DAG.getNode(Opcode, DL, VT, N1: X, N2: NegY, Flags);
7741 if (NegX != N)
7742 RemoveDeadNode(NegX);
7743 return N;
7744 }
7745 break;
7746 }
7747 case ISD::FMA:
7748 case ISD::FMULADD:
7749 case ISD::FMAD: {
7750 if (!Flags.hasNoSignedZeros())
7751 break;
7752
7753 SDValue X = Op.getOperand(i: 0), Y = Op.getOperand(i: 1), Z = Op.getOperand(i: 2);
7754 NegatibleCost CostZ = NegatibleCost::Expensive;
7755 SDValue NegZ =
7756 getNegatedExpression(Op: Z, DAG, LegalOps, OptForSize, Cost&: CostZ, Depth);
7757 // Give up if fail to negate the Z.
7758 if (!NegZ)
7759 break;
7760
7761 // Prevent this node from being deleted by the next two calls.
7762 Handles.emplace_back(args&: NegZ);
7763
7764 // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
7765 NegatibleCost CostX = NegatibleCost::Expensive;
7766 SDValue NegX =
7767 getNegatedExpression(Op: X, DAG, LegalOps, OptForSize, Cost&: CostX, Depth);
7768 // Prevent this node from being deleted by the next call.
7769 if (NegX)
7770 Handles.emplace_back(args&: NegX);
7771
7772 // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
7773 NegatibleCost CostY = NegatibleCost::Expensive;
7774 SDValue NegY =
7775 getNegatedExpression(Op: Y, DAG, LegalOps, OptForSize, Cost&: CostY, Depth);
7776
7777 // We're done with the handles.
7778 Handles.clear();
7779
7780 // Negate the X if its cost is less or equal than Y.
7781 if (NegX && (CostX <= CostY)) {
7782 Cost = std::min(a: CostX, b: CostZ);
7783 SDValue N = DAG.getNode(Opcode, DL, VT, N1: NegX, N2: Y, N3: NegZ, Flags);
7784 if (NegY != N)
7785 RemoveDeadNode(NegY);
7786 return N;
7787 }
7788
7789 // Negate the Y if it is not expensive.
7790 if (NegY) {
7791 Cost = std::min(a: CostY, b: CostZ);
7792 SDValue N = DAG.getNode(Opcode, DL, VT, N1: X, N2: NegY, N3: NegZ, Flags);
7793 if (NegX != N)
7794 RemoveDeadNode(NegX);
7795 return N;
7796 }
7797 break;
7798 }
7799
7800 case ISD::FP_EXTEND:
7801 case ISD::FSIN:
7802 if (SDValue NegV = getNegatedExpression(Op: Op.getOperand(i: 0), DAG, LegalOps,
7803 OptForSize, Cost, Depth))
7804 return DAG.getNode(Opcode, DL, VT, Operand: NegV);
7805 break;
7806 case ISD::FP_ROUND:
7807 if (SDValue NegV = getNegatedExpression(Op: Op.getOperand(i: 0), DAG, LegalOps,
7808 OptForSize, Cost, Depth))
7809 return DAG.getNode(Opcode: ISD::FP_ROUND, DL, VT, N1: NegV, N2: Op.getOperand(i: 1));
7810 break;
7811 case ISD::SELECT:
7812 case ISD::VSELECT: {
7813 // fold (fneg (select C, LHS, RHS)) -> (select C, (fneg LHS), (fneg RHS))
7814 // iff at least one cost is cheaper and the other is neutral/cheaper
7815 SDValue LHS = Op.getOperand(i: 1);
7816 NegatibleCost CostLHS = NegatibleCost::Expensive;
7817 SDValue NegLHS =
7818 getNegatedExpression(Op: LHS, DAG, LegalOps, OptForSize, Cost&: CostLHS, Depth);
7819 if (!NegLHS || CostLHS > NegatibleCost::Neutral) {
7820 RemoveDeadNode(NegLHS);
7821 break;
7822 }
7823
7824 // Prevent this node from being deleted by the next call.
7825 Handles.emplace_back(args&: NegLHS);
7826
7827 SDValue RHS = Op.getOperand(i: 2);
7828 NegatibleCost CostRHS = NegatibleCost::Expensive;
7829 SDValue NegRHS =
7830 getNegatedExpression(Op: RHS, DAG, LegalOps, OptForSize, Cost&: CostRHS, Depth);
7831
7832 // We're done with the handles.
7833 Handles.clear();
7834
7835 if (!NegRHS || CostRHS > NegatibleCost::Neutral ||
7836 (CostLHS != NegatibleCost::Cheaper &&
7837 CostRHS != NegatibleCost::Cheaper)) {
7838 RemoveDeadNode(NegLHS);
7839 RemoveDeadNode(NegRHS);
7840 break;
7841 }
7842
7843 Cost = std::min(a: CostLHS, b: CostRHS);
7844 return DAG.getSelect(DL, VT, Cond: Op.getOperand(i: 0), LHS: NegLHS, RHS: NegRHS);
7845 }
7846 }
7847
7848 return SDValue();
7849}
7850
7851//===----------------------------------------------------------------------===//
7852// Legalization Utilities
7853//===----------------------------------------------------------------------===//
7854
7855bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
7856 SDValue LHS, SDValue RHS,
7857 SmallVectorImpl<SDValue> &Result,
7858 EVT HiLoVT, SelectionDAG &DAG,
7859 MulExpansionKind Kind, SDValue LL,
7860 SDValue LH, SDValue RL, SDValue RH) const {
7861 assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
7862 Opcode == ISD::SMUL_LOHI);
7863
7864 bool HasMULHS = (Kind == MulExpansionKind::Always) ||
7865 isOperationLegalOrCustom(Op: ISD::MULHS, VT: HiLoVT);
7866 bool HasMULHU = (Kind == MulExpansionKind::Always) ||
7867 isOperationLegalOrCustom(Op: ISD::MULHU, VT: HiLoVT);
7868 bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7869 isOperationLegalOrCustom(Op: ISD::SMUL_LOHI, VT: HiLoVT);
7870 bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7871 isOperationLegalOrCustom(Op: ISD::UMUL_LOHI, VT: HiLoVT);
7872
7873 if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
7874 return false;
7875
7876 unsigned OuterBitSize = VT.getScalarSizeInBits();
7877 unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
7878
7879 // LL, LH, RL, and RH must be either all NULL or all set to a value.
7880 assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
7881 (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
7882
7883 SDVTList VTs = DAG.getVTList(VT1: HiLoVT, VT2: HiLoVT);
7884 auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
7885 bool Signed) -> bool {
7886 if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
7887 Lo = DAG.getNode(Opcode: Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, DL: dl, VTList: VTs, N1: L, N2: R);
7888 Hi = SDValue(Lo.getNode(), 1);
7889 return true;
7890 }
7891 if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
7892 Lo = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: HiLoVT, N1: L, N2: R);
7893 Hi = DAG.getNode(Opcode: Signed ? ISD::MULHS : ISD::MULHU, DL: dl, VT: HiLoVT, N1: L, N2: R);
7894 return true;
7895 }
7896 return false;
7897 };
7898
7899 SDValue Lo, Hi;
7900
7901 if (!LL.getNode() && !RL.getNode() &&
7902 isOperationLegalOrCustom(Op: ISD::TRUNCATE, VT: HiLoVT)) {
7903 LL = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: LHS);
7904 RL = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: RHS);
7905 }
7906
7907 if (!LL.getNode())
7908 return false;
7909
7910 APInt HighMask = APInt::getHighBitsSet(numBits: OuterBitSize, hiBitsSet: InnerBitSize);
7911 if (DAG.MaskedValueIsZero(Op: LHS, Mask: HighMask) &&
7912 DAG.MaskedValueIsZero(Op: RHS, Mask: HighMask)) {
7913 // The inputs are both zero-extended.
7914 if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
7915 Result.push_back(Elt: Lo);
7916 Result.push_back(Elt: Hi);
7917 if (Opcode != ISD::MUL) {
7918 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT: HiLoVT);
7919 Result.push_back(Elt: Zero);
7920 Result.push_back(Elt: Zero);
7921 }
7922 return true;
7923 }
7924 }
7925
7926 if (!VT.isVector() && Opcode == ISD::MUL &&
7927 DAG.ComputeMaxSignificantBits(Op: LHS) <= InnerBitSize &&
7928 DAG.ComputeMaxSignificantBits(Op: RHS) <= InnerBitSize) {
7929 // The input values are both sign-extended.
7930 // TODO non-MUL case?
7931 if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
7932 Result.push_back(Elt: Lo);
7933 Result.push_back(Elt: Hi);
7934 return true;
7935 }
7936 }
7937
7938 unsigned ShiftAmount = OuterBitSize - InnerBitSize;
7939 SDValue Shift = DAG.getShiftAmountConstant(Val: ShiftAmount, VT, DL: dl);
7940
7941 if (!LH.getNode() && !RH.getNode() &&
7942 isOperationLegalOrCustom(Op: ISD::SRL, VT) &&
7943 isOperationLegalOrCustom(Op: ISD::TRUNCATE, VT: HiLoVT)) {
7944 LH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: LHS, N2: Shift);
7945 LH = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: LH);
7946 RH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: RHS, N2: Shift);
7947 RH = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: RH);
7948 }
7949
7950 if (!LH.getNode())
7951 return false;
7952
7953 if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
7954 return false;
7955
7956 Result.push_back(Elt: Lo);
7957
7958 if (Opcode == ISD::MUL) {
7959 RH = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: HiLoVT, N1: LL, N2: RH);
7960 LH = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: HiLoVT, N1: LH, N2: RL);
7961 Hi = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: HiLoVT, N1: Hi, N2: RH);
7962 Hi = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: HiLoVT, N1: Hi, N2: LH);
7963 Result.push_back(Elt: Hi);
7964 return true;
7965 }
7966
7967 // Compute the full width result.
7968 auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
7969 Lo = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: Lo);
7970 Hi = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: Hi);
7971 Hi = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Hi, N2: Shift);
7972 return DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Lo, N2: Hi);
7973 };
7974
7975 SDValue Next = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: Hi);
7976 if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
7977 return false;
7978
7979 // This is effectively the add part of a multiply-add of half-sized operands,
7980 // so it cannot overflow.
7981 Next = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Next, N2: Merge(Lo, Hi));
7982
7983 if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
7984 return false;
7985
7986 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT: HiLoVT);
7987 EVT BoolType = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
7988
7989 bool UseGlue = (isOperationLegalOrCustom(Op: ISD::ADDC, VT) &&
7990 isOperationLegalOrCustom(Op: ISD::ADDE, VT));
7991 if (UseGlue)
7992 Next = DAG.getNode(Opcode: ISD::ADDC, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: MVT::Glue), N1: Next,
7993 N2: Merge(Lo, Hi));
7994 else
7995 Next = DAG.getNode(Opcode: ISD::UADDO_CARRY, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: BoolType), N1: Next,
7996 N2: Merge(Lo, Hi), N3: DAG.getConstant(Val: 0, DL: dl, VT: BoolType));
7997
7998 SDValue Carry = Next.getValue(R: 1);
7999 Result.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: Next));
8000 Next = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Next, N2: Shift);
8001
8002 if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
8003 return false;
8004
8005 if (UseGlue)
8006 Hi = DAG.getNode(Opcode: ISD::ADDE, DL: dl, VTList: DAG.getVTList(VT1: HiLoVT, VT2: MVT::Glue), N1: Hi, N2: Zero,
8007 N3: Carry);
8008 else
8009 Hi = DAG.getNode(Opcode: ISD::UADDO_CARRY, DL: dl, VTList: DAG.getVTList(VT1: HiLoVT, VT2: BoolType), N1: Hi,
8010 N2: Zero, N3: Carry);
8011
8012 Next = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Next, N2: Merge(Lo, Hi));
8013
8014 if (Opcode == ISD::SMUL_LOHI) {
8015 SDValue NextSub = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Next,
8016 N2: DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: RL));
8017 Next = DAG.getSelectCC(DL: dl, LHS: LH, RHS: Zero, True: NextSub, False: Next, Cond: ISD::SETLT);
8018
8019 NextSub = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Next,
8020 N2: DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: LL));
8021 Next = DAG.getSelectCC(DL: dl, LHS: RH, RHS: Zero, True: NextSub, False: Next, Cond: ISD::SETLT);
8022 }
8023
8024 Result.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: Next));
8025 Next = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Next, N2: Shift);
8026 Result.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: Next));
8027 return true;
8028}
8029
8030bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
8031 SelectionDAG &DAG, MulExpansionKind Kind,
8032 SDValue LL, SDValue LH, SDValue RL,
8033 SDValue RH) const {
8034 SmallVector<SDValue, 2> Result;
8035 bool Ok = expandMUL_LOHI(Opcode: N->getOpcode(), VT: N->getValueType(ResNo: 0), dl: SDLoc(N),
8036 LHS: N->getOperand(Num: 0), RHS: N->getOperand(Num: 1), Result, HiLoVT,
8037 DAG, Kind, LL, LH, RL, RH);
8038 if (Ok) {
8039 assert(Result.size() == 2);
8040 Lo = Result[0];
8041 Hi = Result[1];
8042 }
8043 return Ok;
8044}
8045
8046// Optimize unsigned division or remainder by constants for types twice as large
8047// as a legal VT.
8048//
8049// If (1 << (BitWidth / 2)) % Constant == 1, then the remainder
8050// can be computed
8051// as:
8052// Sum += __builtin_uadd_overflow(Lo, High, &Sum);
8053// Remainder = Sum % Constant
8054// This is based on "Remainder by Summing Digits" from Hacker's Delight.
8055//
8056// For division, we can compute the remainder using the algorithm described
8057// above, subtract it from the dividend to get an exact multiple of Constant.
8058// Then multiply that exact multiply by the multiplicative inverse modulo
8059// (1 << (BitWidth / 2)) to get the quotient.
8060
8061// If Constant is even, we can shift right the dividend and the divisor by the
8062// number of trailing zeros in Constant before applying the remainder algorithm.
8063// If we're after the quotient, we can subtract this value from the shifted
8064// dividend and multiply by the multiplicative inverse of the shifted divisor.
8065// If we want the remainder, we shift the value left by the number of trailing
8066// zeros and add the bits that were shifted out of the dividend.
8067bool TargetLowering::expandDIVREMByConstant(SDNode *N,
8068 SmallVectorImpl<SDValue> &Result,
8069 EVT HiLoVT, SelectionDAG &DAG,
8070 SDValue LL, SDValue LH) const {
8071 unsigned Opcode = N->getOpcode();
8072 EVT VT = N->getValueType(ResNo: 0);
8073
8074 // TODO: Support signed division/remainder.
8075 if (Opcode == ISD::SREM || Opcode == ISD::SDIV || Opcode == ISD::SDIVREM)
8076 return false;
8077 assert(
8078 (Opcode == ISD::UREM || Opcode == ISD::UDIV || Opcode == ISD::UDIVREM) &&
8079 "Unexpected opcode");
8080
8081 auto *CN = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 1));
8082 if (!CN)
8083 return false;
8084
8085 APInt Divisor = CN->getAPIntValue();
8086 unsigned BitWidth = Divisor.getBitWidth();
8087 unsigned HBitWidth = BitWidth / 2;
8088 assert(VT.getScalarSizeInBits() == BitWidth &&
8089 HiLoVT.getScalarSizeInBits() == HBitWidth && "Unexpected VTs");
8090
8091 // Divisor needs to less than (1 << HBitWidth).
8092 APInt HalfMaxPlus1 = APInt::getOneBitSet(numBits: BitWidth, BitNo: HBitWidth);
8093 if (Divisor.uge(RHS: HalfMaxPlus1))
8094 return false;
8095
8096 // We depend on the UREM by constant optimization in DAGCombiner that requires
8097 // high multiply.
8098 if (!isOperationLegalOrCustom(Op: ISD::MULHU, VT: HiLoVT) &&
8099 !isOperationLegalOrCustom(Op: ISD::UMUL_LOHI, VT: HiLoVT))
8100 return false;
8101
8102 // Don't expand if optimizing for size.
8103 if (DAG.shouldOptForSize())
8104 return false;
8105
8106 // Early out for 0 or 1 divisors.
8107 if (Divisor.ule(RHS: 1))
8108 return false;
8109
8110 // If the divisor is even, shift it until it becomes odd.
8111 unsigned TrailingZeros = 0;
8112 if (!Divisor[0]) {
8113 TrailingZeros = Divisor.countr_zero();
8114 Divisor.lshrInPlace(ShiftAmt: TrailingZeros);
8115 }
8116
8117 SDLoc dl(N);
8118 SDValue Sum;
8119 SDValue PartialRem;
8120
8121 // If (1 << HBitWidth) % divisor == 1, we can add the two halves together and
8122 // then add in the carry.
8123 // TODO: If we can't split it in half, we might be able to split into 3 or
8124 // more pieces using a smaller bit width.
8125 if (HalfMaxPlus1.urem(RHS: Divisor).isOne()) {
8126 assert(!LL == !LH && "Expected both input halves or no input halves!");
8127 if (!LL)
8128 std::tie(args&: LL, args&: LH) = DAG.SplitScalar(N: N->getOperand(Num: 0), DL: dl, LoVT: HiLoVT, HiVT: HiLoVT);
8129
8130 // Shift the input by the number of TrailingZeros in the divisor. The
8131 // shifted out bits will be added to the remainder later.
8132 if (TrailingZeros) {
8133 // Save the shifted off bits if we need the remainder.
8134 if (Opcode != ISD::UDIV) {
8135 APInt Mask = APInt::getLowBitsSet(numBits: HBitWidth, loBitsSet: TrailingZeros);
8136 PartialRem = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: HiLoVT, N1: LL,
8137 N2: DAG.getConstant(Val: Mask, DL: dl, VT: HiLoVT));
8138 }
8139
8140 LL = DAG.getNode(
8141 Opcode: ISD::OR, DL: dl, VT: HiLoVT,
8142 N1: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: HiLoVT, N1: LL,
8143 N2: DAG.getShiftAmountConstant(Val: TrailingZeros, VT: HiLoVT, DL: dl)),
8144 N2: DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: HiLoVT, N1: LH,
8145 N2: DAG.getShiftAmountConstant(Val: HBitWidth - TrailingZeros,
8146 VT: HiLoVT, DL: dl)));
8147 LH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: HiLoVT, N1: LH,
8148 N2: DAG.getShiftAmountConstant(Val: TrailingZeros, VT: HiLoVT, DL: dl));
8149 }
8150
8151 // Use uaddo_carry if we can, otherwise use a compare to detect overflow.
8152 EVT SetCCType =
8153 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: HiLoVT);
8154 if (isOperationLegalOrCustom(Op: ISD::UADDO_CARRY, VT: HiLoVT)) {
8155 SDVTList VTList = DAG.getVTList(VT1: HiLoVT, VT2: SetCCType);
8156 Sum = DAG.getNode(Opcode: ISD::UADDO, DL: dl, VTList, N1: LL, N2: LH);
8157 Sum = DAG.getNode(Opcode: ISD::UADDO_CARRY, DL: dl, VTList, N1: Sum,
8158 N2: DAG.getConstant(Val: 0, DL: dl, VT: HiLoVT), N3: Sum.getValue(R: 1));
8159 } else {
8160 Sum = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: HiLoVT, N1: LL, N2: LH);
8161 SDValue Carry = DAG.getSetCC(DL: dl, VT: SetCCType, LHS: Sum, RHS: LL, Cond: ISD::SETULT);
8162 // If the boolean for the target is 0 or 1, we can add the setcc result
8163 // directly.
8164 if (getBooleanContents(Type: HiLoVT) ==
8165 TargetLoweringBase::ZeroOrOneBooleanContent)
8166 Carry = DAG.getZExtOrTrunc(Op: Carry, DL: dl, VT: HiLoVT);
8167 else
8168 Carry = DAG.getSelect(DL: dl, VT: HiLoVT, Cond: Carry, LHS: DAG.getConstant(Val: 1, DL: dl, VT: HiLoVT),
8169 RHS: DAG.getConstant(Val: 0, DL: dl, VT: HiLoVT));
8170 Sum = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: HiLoVT, N1: Sum, N2: Carry);
8171 }
8172 }
8173
8174 // If we didn't find a sum, we can't do the expansion.
8175 if (!Sum)
8176 return false;
8177
8178 // Perform a HiLoVT urem on the Sum using truncated divisor.
8179 SDValue RemL =
8180 DAG.getNode(Opcode: ISD::UREM, DL: dl, VT: HiLoVT, N1: Sum,
8181 N2: DAG.getConstant(Val: Divisor.trunc(width: HBitWidth), DL: dl, VT: HiLoVT));
8182 SDValue RemH = DAG.getConstant(Val: 0, DL: dl, VT: HiLoVT);
8183
8184 if (Opcode != ISD::UREM) {
8185 // Subtract the remainder from the shifted dividend.
8186 SDValue Dividend = DAG.getNode(Opcode: ISD::BUILD_PAIR, DL: dl, VT, N1: LL, N2: LH);
8187 SDValue Rem = DAG.getNode(Opcode: ISD::BUILD_PAIR, DL: dl, VT, N1: RemL, N2: RemH);
8188
8189 Dividend = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Dividend, N2: Rem);
8190
8191 // Multiply by the multiplicative inverse of the divisor modulo
8192 // (1 << BitWidth).
8193 APInt MulFactor = Divisor.multiplicativeInverse();
8194
8195 SDValue Quotient = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: Dividend,
8196 N2: DAG.getConstant(Val: MulFactor, DL: dl, VT));
8197
8198 // Split the quotient into low and high parts.
8199 SDValue QuotL, QuotH;
8200 std::tie(args&: QuotL, args&: QuotH) = DAG.SplitScalar(N: Quotient, DL: dl, LoVT: HiLoVT, HiVT: HiLoVT);
8201 Result.push_back(Elt: QuotL);
8202 Result.push_back(Elt: QuotH);
8203 }
8204
8205 if (Opcode != ISD::UDIV) {
8206 // If we shifted the input, shift the remainder left and add the bits we
8207 // shifted off the input.
8208 if (TrailingZeros) {
8209 RemL = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: HiLoVT, N1: RemL,
8210 N2: DAG.getShiftAmountConstant(Val: TrailingZeros, VT: HiLoVT, DL: dl));
8211 RemL = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: HiLoVT, N1: RemL, N2: PartialRem);
8212 }
8213 Result.push_back(Elt: RemL);
8214 Result.push_back(Elt: DAG.getConstant(Val: 0, DL: dl, VT: HiLoVT));
8215 }
8216
8217 return true;
8218}
8219
8220// Check that (every element of) Z is undef or not an exact multiple of BW.
8221static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
8222 return ISD::matchUnaryPredicate(
8223 Op: Z,
8224 Match: [=](ConstantSDNode *C) { return !C || C->getAPIntValue().urem(RHS: BW) != 0; },
8225 /*AllowUndefs=*/true, /*AllowTruncation=*/true);
8226}
8227
8228static SDValue expandVPFunnelShift(SDNode *Node, SelectionDAG &DAG) {
8229 EVT VT = Node->getValueType(ResNo: 0);
8230 SDValue ShX, ShY;
8231 SDValue ShAmt, InvShAmt;
8232 SDValue X = Node->getOperand(Num: 0);
8233 SDValue Y = Node->getOperand(Num: 1);
8234 SDValue Z = Node->getOperand(Num: 2);
8235 SDValue Mask = Node->getOperand(Num: 3);
8236 SDValue VL = Node->getOperand(Num: 4);
8237
8238 unsigned BW = VT.getScalarSizeInBits();
8239 bool IsFSHL = Node->getOpcode() == ISD::VP_FSHL;
8240 SDLoc DL(SDValue(Node, 0));
8241
8242 EVT ShVT = Z.getValueType();
8243 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8244 // fshl: X << C | Y >> (BW - C)
8245 // fshr: X << (BW - C) | Y >> C
8246 // where C = Z % BW is not zero
8247 SDValue BitWidthC = DAG.getConstant(Val: BW, DL, VT: ShVT);
8248 ShAmt = DAG.getNode(Opcode: ISD::VP_UREM, DL, VT: ShVT, N1: Z, N2: BitWidthC, N3: Mask, N4: VL);
8249 InvShAmt = DAG.getNode(Opcode: ISD::VP_SUB, DL, VT: ShVT, N1: BitWidthC, N2: ShAmt, N3: Mask, N4: VL);
8250 ShX = DAG.getNode(Opcode: ISD::VP_SHL, DL, VT, N1: X, N2: IsFSHL ? ShAmt : InvShAmt, N3: Mask,
8251 N4: VL);
8252 ShY = DAG.getNode(Opcode: ISD::VP_SRL, DL, VT, N1: Y, N2: IsFSHL ? InvShAmt : ShAmt, N3: Mask,
8253 N4: VL);
8254 } else {
8255 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
8256 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
8257 SDValue BitMask = DAG.getConstant(Val: BW - 1, DL, VT: ShVT);
8258 if (isPowerOf2_32(Value: BW)) {
8259 // Z % BW -> Z & (BW - 1)
8260 ShAmt = DAG.getNode(Opcode: ISD::VP_AND, DL, VT: ShVT, N1: Z, N2: BitMask, N3: Mask, N4: VL);
8261 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8262 SDValue NotZ = DAG.getNode(Opcode: ISD::VP_XOR, DL, VT: ShVT, N1: Z,
8263 N2: DAG.getAllOnesConstant(DL, VT: ShVT), N3: Mask, N4: VL);
8264 InvShAmt = DAG.getNode(Opcode: ISD::VP_AND, DL, VT: ShVT, N1: NotZ, N2: BitMask, N3: Mask, N4: VL);
8265 } else {
8266 SDValue BitWidthC = DAG.getConstant(Val: BW, DL, VT: ShVT);
8267 ShAmt = DAG.getNode(Opcode: ISD::VP_UREM, DL, VT: ShVT, N1: Z, N2: BitWidthC, N3: Mask, N4: VL);
8268 InvShAmt = DAG.getNode(Opcode: ISD::VP_SUB, DL, VT: ShVT, N1: BitMask, N2: ShAmt, N3: Mask, N4: VL);
8269 }
8270
8271 SDValue One = DAG.getConstant(Val: 1, DL, VT: ShVT);
8272 if (IsFSHL) {
8273 ShX = DAG.getNode(Opcode: ISD::VP_SHL, DL, VT, N1: X, N2: ShAmt, N3: Mask, N4: VL);
8274 SDValue ShY1 = DAG.getNode(Opcode: ISD::VP_SRL, DL, VT, N1: Y, N2: One, N3: Mask, N4: VL);
8275 ShY = DAG.getNode(Opcode: ISD::VP_SRL, DL, VT, N1: ShY1, N2: InvShAmt, N3: Mask, N4: VL);
8276 } else {
8277 SDValue ShX1 = DAG.getNode(Opcode: ISD::VP_SHL, DL, VT, N1: X, N2: One, N3: Mask, N4: VL);
8278 ShX = DAG.getNode(Opcode: ISD::VP_SHL, DL, VT, N1: ShX1, N2: InvShAmt, N3: Mask, N4: VL);
8279 ShY = DAG.getNode(Opcode: ISD::VP_SRL, DL, VT, N1: Y, N2: ShAmt, N3: Mask, N4: VL);
8280 }
8281 }
8282 return DAG.getNode(Opcode: ISD::VP_OR, DL, VT, N1: ShX, N2: ShY, N3: Mask, N4: VL);
8283}
8284
8285SDValue TargetLowering::expandFunnelShift(SDNode *Node,
8286 SelectionDAG &DAG) const {
8287 if (Node->isVPOpcode())
8288 return expandVPFunnelShift(Node, DAG);
8289
8290 EVT VT = Node->getValueType(ResNo: 0);
8291
8292 if (VT.isVector() && (!isOperationLegalOrCustom(Op: ISD::SHL, VT) ||
8293 !isOperationLegalOrCustom(Op: ISD::SRL, VT) ||
8294 !isOperationLegalOrCustom(Op: ISD::SUB, VT) ||
8295 !isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT)))
8296 return SDValue();
8297
8298 SDValue X = Node->getOperand(Num: 0);
8299 SDValue Y = Node->getOperand(Num: 1);
8300 SDValue Z = Node->getOperand(Num: 2);
8301
8302 unsigned BW = VT.getScalarSizeInBits();
8303 bool IsFSHL = Node->getOpcode() == ISD::FSHL;
8304 SDLoc DL(SDValue(Node, 0));
8305
8306 EVT ShVT = Z.getValueType();
8307
8308 // If a funnel shift in the other direction is more supported, use it.
8309 unsigned RevOpcode = IsFSHL ? ISD::FSHR : ISD::FSHL;
8310 if (!isOperationLegalOrCustom(Op: Node->getOpcode(), VT) &&
8311 isOperationLegalOrCustom(Op: RevOpcode, VT) && isPowerOf2_32(Value: BW)) {
8312 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8313 // fshl X, Y, Z -> fshr X, Y, -Z
8314 // fshr X, Y, Z -> fshl X, Y, -Z
8315 Z = DAG.getNegative(Val: Z, DL, VT: ShVT);
8316 } else {
8317 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
8318 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
8319 SDValue One = DAG.getConstant(Val: 1, DL, VT: ShVT);
8320 if (IsFSHL) {
8321 Y = DAG.getNode(Opcode: RevOpcode, DL, VT, N1: X, N2: Y, N3: One);
8322 X = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: X, N2: One);
8323 } else {
8324 X = DAG.getNode(Opcode: RevOpcode, DL, VT, N1: X, N2: Y, N3: One);
8325 Y = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Y, N2: One);
8326 }
8327 Z = DAG.getNOT(DL, Val: Z, VT: ShVT);
8328 }
8329 return DAG.getNode(Opcode: RevOpcode, DL, VT, N1: X, N2: Y, N3: Z);
8330 }
8331
8332 SDValue ShX, ShY;
8333 SDValue ShAmt, InvShAmt;
8334 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8335 // fshl: X << C | Y >> (BW - C)
8336 // fshr: X << (BW - C) | Y >> C
8337 // where C = Z % BW is not zero
8338 SDValue BitWidthC = DAG.getConstant(Val: BW, DL, VT: ShVT);
8339 ShAmt = DAG.getNode(Opcode: ISD::UREM, DL, VT: ShVT, N1: Z, N2: BitWidthC);
8340 InvShAmt = DAG.getNode(Opcode: ISD::SUB, DL, VT: ShVT, N1: BitWidthC, N2: ShAmt);
8341 ShX = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: X, N2: IsFSHL ? ShAmt : InvShAmt);
8342 ShY = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Y, N2: IsFSHL ? InvShAmt : ShAmt);
8343 } else {
8344 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
8345 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
8346 SDValue Mask = DAG.getConstant(Val: BW - 1, DL, VT: ShVT);
8347 if (isPowerOf2_32(Value: BW)) {
8348 // Z % BW -> Z & (BW - 1)
8349 ShAmt = DAG.getNode(Opcode: ISD::AND, DL, VT: ShVT, N1: Z, N2: Mask);
8350 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8351 InvShAmt = DAG.getNode(Opcode: ISD::AND, DL, VT: ShVT, N1: DAG.getNOT(DL, Val: Z, VT: ShVT), N2: Mask);
8352 } else {
8353 SDValue BitWidthC = DAG.getConstant(Val: BW, DL, VT: ShVT);
8354 ShAmt = DAG.getNode(Opcode: ISD::UREM, DL, VT: ShVT, N1: Z, N2: BitWidthC);
8355 InvShAmt = DAG.getNode(Opcode: ISD::SUB, DL, VT: ShVT, N1: Mask, N2: ShAmt);
8356 }
8357
8358 SDValue One = DAG.getConstant(Val: 1, DL, VT: ShVT);
8359 if (IsFSHL) {
8360 ShX = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: X, N2: ShAmt);
8361 SDValue ShY1 = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Y, N2: One);
8362 ShY = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: ShY1, N2: InvShAmt);
8363 } else {
8364 SDValue ShX1 = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: X, N2: One);
8365 ShX = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: ShX1, N2: InvShAmt);
8366 ShY = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Y, N2: ShAmt);
8367 }
8368 }
8369 return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: ShX, N2: ShY);
8370}
8371
8372// TODO: Merge with expandFunnelShift.
8373SDValue TargetLowering::expandROT(SDNode *Node, bool AllowVectorOps,
8374 SelectionDAG &DAG) const {
8375 EVT VT = Node->getValueType(ResNo: 0);
8376 unsigned EltSizeInBits = VT.getScalarSizeInBits();
8377 bool IsLeft = Node->getOpcode() == ISD::ROTL;
8378 SDValue Op0 = Node->getOperand(Num: 0);
8379 SDValue Op1 = Node->getOperand(Num: 1);
8380 SDLoc DL(SDValue(Node, 0));
8381
8382 EVT ShVT = Op1.getValueType();
8383 SDValue Zero = DAG.getConstant(Val: 0, DL, VT: ShVT);
8384
8385 // If a rotate in the other direction is more supported, use it.
8386 unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
8387 if (!isOperationLegalOrCustom(Op: Node->getOpcode(), VT) &&
8388 isOperationLegalOrCustom(Op: RevRot, VT) && isPowerOf2_32(Value: EltSizeInBits)) {
8389 SDValue Sub = DAG.getNode(Opcode: ISD::SUB, DL, VT: ShVT, N1: Zero, N2: Op1);
8390 return DAG.getNode(Opcode: RevRot, DL, VT, N1: Op0, N2: Sub);
8391 }
8392
8393 if (!AllowVectorOps && VT.isVector() &&
8394 (!isOperationLegalOrCustom(Op: ISD::SHL, VT) ||
8395 !isOperationLegalOrCustom(Op: ISD::SRL, VT) ||
8396 !isOperationLegalOrCustom(Op: ISD::SUB, VT) ||
8397 !isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT) ||
8398 !isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT)))
8399 return SDValue();
8400
8401 unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
8402 unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
8403 SDValue BitWidthMinusOneC = DAG.getConstant(Val: EltSizeInBits - 1, DL, VT: ShVT);
8404 SDValue ShVal;
8405 SDValue HsVal;
8406 if (isPowerOf2_32(Value: EltSizeInBits)) {
8407 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
8408 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
8409 SDValue NegOp1 = DAG.getNode(Opcode: ISD::SUB, DL, VT: ShVT, N1: Zero, N2: Op1);
8410 SDValue ShAmt = DAG.getNode(Opcode: ISD::AND, DL, VT: ShVT, N1: Op1, N2: BitWidthMinusOneC);
8411 ShVal = DAG.getNode(Opcode: ShOpc, DL, VT, N1: Op0, N2: ShAmt);
8412 SDValue HsAmt = DAG.getNode(Opcode: ISD::AND, DL, VT: ShVT, N1: NegOp1, N2: BitWidthMinusOneC);
8413 HsVal = DAG.getNode(Opcode: HsOpc, DL, VT, N1: Op0, N2: HsAmt);
8414 } else {
8415 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
8416 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
8417 SDValue BitWidthC = DAG.getConstant(Val: EltSizeInBits, DL, VT: ShVT);
8418 SDValue ShAmt = DAG.getNode(Opcode: ISD::UREM, DL, VT: ShVT, N1: Op1, N2: BitWidthC);
8419 ShVal = DAG.getNode(Opcode: ShOpc, DL, VT, N1: Op0, N2: ShAmt);
8420 SDValue HsAmt = DAG.getNode(Opcode: ISD::SUB, DL, VT: ShVT, N1: BitWidthMinusOneC, N2: ShAmt);
8421 SDValue One = DAG.getConstant(Val: 1, DL, VT: ShVT);
8422 HsVal =
8423 DAG.getNode(Opcode: HsOpc, DL, VT, N1: DAG.getNode(Opcode: HsOpc, DL, VT, N1: Op0, N2: One), N2: HsAmt);
8424 }
8425 return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: ShVal, N2: HsVal);
8426}
8427
8428SDValue TargetLowering::expandCLMUL(SDNode *Node, SelectionDAG &DAG) const {
8429 SDLoc DL(Node);
8430 EVT VT = Node->getValueType(ResNo: 0);
8431 SDValue X = Node->getOperand(Num: 0);
8432 SDValue Y = Node->getOperand(Num: 1);
8433 unsigned BW = VT.getScalarSizeInBits();
8434 unsigned Opcode = Node->getOpcode();
8435
8436 switch (Opcode) {
8437 case ISD::CLMUL: {
8438 // NOTE: If you change this expansion, please update the cost model
8439 // calculation in BasicTTIImpl::getTypeBasedIntrinsicInstrCost for
8440 // Intrinsic::clmul.
8441 SDValue Res = DAG.getConstant(Val: 0, DL, VT);
8442 for (unsigned I = 0; I < BW; ++I) {
8443 SDValue Mask = DAG.getConstant(Val: APInt::getOneBitSet(numBits: BW, BitNo: I), DL, VT);
8444 SDValue YMasked = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Y, N2: Mask);
8445 SDValue Mul = DAG.getNode(Opcode: ISD::MUL, DL, VT, N1: X, N2: YMasked);
8446 Res = DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: Res, N2: Mul);
8447 }
8448 return Res;
8449 }
8450 case ISD::CLMULR:
8451 // If we have CLMUL/CLMULH, merge the shifted results to form CLMULR.
8452 if (isOperationLegalOrCustom(Op: ISD::CLMUL, VT) &&
8453 isOperationLegalOrCustom(Op: ISD::CLMULH, VT)) {
8454 SDValue Lo = DAG.getNode(Opcode: ISD::CLMUL, DL, VT, N1: X, N2: Y);
8455 SDValue Hi = DAG.getNode(Opcode: ISD::CLMULH, DL, VT, N1: X, N2: Y);
8456 Lo = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Lo,
8457 N2: DAG.getShiftAmountConstant(Val: BW - 1, VT, DL));
8458 Hi = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Hi,
8459 N2: DAG.getShiftAmountConstant(Val: 1, VT, DL));
8460 return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: Lo, N2: Hi);
8461 }
8462 [[fallthrough]];
8463 case ISD::CLMULH: {
8464 EVT ExtVT = VT.changeElementType(
8465 Context&: *DAG.getContext(), EltVT: EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: 2 * BW));
8466 // For example, ExtVT = i64 based operations aren't legal on a 32-bit
8467 // target; use bitreverse-based lowering in this case.
8468 if (!isOperationLegalOrCustom(Op: ISD::ZERO_EXTEND, VT: ExtVT) ||
8469 !isOperationLegalOrCustom(Op: ISD::SRL, VT: ExtVT)) {
8470 SDValue XRev = DAG.getNode(Opcode: ISD::BITREVERSE, DL, VT, Operand: X);
8471 SDValue YRev = DAG.getNode(Opcode: ISD::BITREVERSE, DL, VT, Operand: Y);
8472 SDValue ClMul = DAG.getNode(Opcode: ISD::CLMUL, DL, VT, N1: XRev, N2: YRev);
8473 SDValue Res = DAG.getNode(Opcode: ISD::BITREVERSE, DL, VT, Operand: ClMul);
8474 if (Opcode == ISD::CLMULH)
8475 Res = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Res,
8476 N2: DAG.getShiftAmountConstant(Val: 1, VT, DL));
8477 return Res;
8478 }
8479 SDValue XExt = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: ExtVT, Operand: X);
8480 SDValue YExt = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: ExtVT, Operand: Y);
8481 SDValue ClMul = DAG.getNode(Opcode: ISD::CLMUL, DL, VT: ExtVT, N1: XExt, N2: YExt);
8482 unsigned ShAmt = Opcode == ISD::CLMULR ? BW - 1 : BW;
8483 SDValue HiBits = DAG.getNode(Opcode: ISD::SRL, DL, VT: ExtVT, N1: ClMul,
8484 N2: DAG.getShiftAmountConstant(Val: ShAmt, VT: ExtVT, DL));
8485 return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: HiBits);
8486 }
8487 }
8488 llvm_unreachable("Expected CLMUL, CLMULR, or CLMULH");
8489}
8490
8491void TargetLowering::expandShiftParts(SDNode *Node, SDValue &Lo, SDValue &Hi,
8492 SelectionDAG &DAG) const {
8493 assert(Node->getNumOperands() == 3 && "Not a double-shift!");
8494 EVT VT = Node->getValueType(ResNo: 0);
8495 unsigned VTBits = VT.getScalarSizeInBits();
8496 assert(isPowerOf2_32(VTBits) && "Power-of-two integer type expected");
8497
8498 bool IsSHL = Node->getOpcode() == ISD::SHL_PARTS;
8499 bool IsSRA = Node->getOpcode() == ISD::SRA_PARTS;
8500 SDValue ShOpLo = Node->getOperand(Num: 0);
8501 SDValue ShOpHi = Node->getOperand(Num: 1);
8502 SDValue ShAmt = Node->getOperand(Num: 2);
8503 EVT ShAmtVT = ShAmt.getValueType();
8504 EVT ShAmtCCVT =
8505 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: ShAmtVT);
8506 SDLoc dl(Node);
8507
8508 // ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and
8509 // ISD::SRA/L nodes haven't. Insert an AND to be safe, it's usually optimized
8510 // away during isel.
8511 SDValue SafeShAmt = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ShAmtVT, N1: ShAmt,
8512 N2: DAG.getConstant(Val: VTBits - 1, DL: dl, VT: ShAmtVT));
8513 SDValue Tmp1 = IsSRA ? DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: ShOpHi,
8514 N2: DAG.getConstant(Val: VTBits - 1, DL: dl, VT: ShAmtVT))
8515 : DAG.getConstant(Val: 0, DL: dl, VT);
8516
8517 SDValue Tmp2, Tmp3;
8518 if (IsSHL) {
8519 Tmp2 = DAG.getNode(Opcode: ISD::FSHL, DL: dl, VT, N1: ShOpHi, N2: ShOpLo, N3: ShAmt);
8520 Tmp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: ShOpLo, N2: SafeShAmt);
8521 } else {
8522 Tmp2 = DAG.getNode(Opcode: ISD::FSHR, DL: dl, VT, N1: ShOpHi, N2: ShOpLo, N3: ShAmt);
8523 Tmp3 = DAG.getNode(Opcode: IsSRA ? ISD::SRA : ISD::SRL, DL: dl, VT, N1: ShOpHi, N2: SafeShAmt);
8524 }
8525
8526 // If the shift amount is larger or equal than the width of a part we don't
8527 // use the result from the FSHL/FSHR. Insert a test and select the appropriate
8528 // values for large shift amounts.
8529 SDValue AndNode = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ShAmtVT, N1: ShAmt,
8530 N2: DAG.getConstant(Val: VTBits, DL: dl, VT: ShAmtVT));
8531 SDValue Cond = DAG.getSetCC(DL: dl, VT: ShAmtCCVT, LHS: AndNode,
8532 RHS: DAG.getConstant(Val: 0, DL: dl, VT: ShAmtVT), Cond: ISD::SETNE);
8533
8534 if (IsSHL) {
8535 Hi = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT, N1: Cond, N2: Tmp3, N3: Tmp2);
8536 Lo = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT, N1: Cond, N2: Tmp1, N3: Tmp3);
8537 } else {
8538 Lo = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT, N1: Cond, N2: Tmp3, N3: Tmp2);
8539 Hi = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT, N1: Cond, N2: Tmp1, N3: Tmp3);
8540 }
8541}
8542
8543bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
8544 SelectionDAG &DAG) const {
8545 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8546 SDValue Src = Node->getOperand(Num: OpNo);
8547 EVT SrcVT = Src.getValueType();
8548 EVT DstVT = Node->getValueType(ResNo: 0);
8549 SDLoc dl(SDValue(Node, 0));
8550
8551 // FIXME: Only f32 to i64 conversions are supported.
8552 if (SrcVT != MVT::f32 || DstVT != MVT::i64)
8553 return false;
8554
8555 if (Node->isStrictFPOpcode())
8556 // When a NaN is converted to an integer a trap is allowed. We can't
8557 // use this expansion here because it would eliminate that trap. Other
8558 // traps are also allowed and cannot be eliminated. See
8559 // IEEE 754-2008 sec 5.8.
8560 return false;
8561
8562 // Expand f32 -> i64 conversion
8563 // This algorithm comes from compiler-rt's implementation of fixsfdi:
8564 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
8565 unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
8566 EVT IntVT = SrcVT.changeTypeToInteger();
8567 EVT IntShVT = getShiftAmountTy(LHSTy: IntVT, DL: DAG.getDataLayout());
8568
8569 SDValue ExponentMask = DAG.getConstant(Val: 0x7F800000, DL: dl, VT: IntVT);
8570 SDValue ExponentLoBit = DAG.getConstant(Val: 23, DL: dl, VT: IntVT);
8571 SDValue Bias = DAG.getConstant(Val: 127, DL: dl, VT: IntVT);
8572 SDValue SignMask = DAG.getConstant(Val: APInt::getSignMask(BitWidth: SrcEltBits), DL: dl, VT: IntVT);
8573 SDValue SignLowBit = DAG.getConstant(Val: SrcEltBits - 1, DL: dl, VT: IntVT);
8574 SDValue MantissaMask = DAG.getConstant(Val: 0x007FFFFF, DL: dl, VT: IntVT);
8575
8576 SDValue Bits = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: IntVT, Operand: Src);
8577
8578 SDValue ExponentBits = DAG.getNode(
8579 Opcode: ISD::SRL, DL: dl, VT: IntVT, N1: DAG.getNode(Opcode: ISD::AND, DL: dl, VT: IntVT, N1: Bits, N2: ExponentMask),
8580 N2: DAG.getZExtOrTrunc(Op: ExponentLoBit, DL: dl, VT: IntShVT));
8581 SDValue Exponent = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: IntVT, N1: ExponentBits, N2: Bias);
8582
8583 SDValue Sign = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT: IntVT,
8584 N1: DAG.getNode(Opcode: ISD::AND, DL: dl, VT: IntVT, N1: Bits, N2: SignMask),
8585 N2: DAG.getZExtOrTrunc(Op: SignLowBit, DL: dl, VT: IntShVT));
8586 Sign = DAG.getSExtOrTrunc(Op: Sign, DL: dl, VT: DstVT);
8587
8588 SDValue R = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: IntVT,
8589 N1: DAG.getNode(Opcode: ISD::AND, DL: dl, VT: IntVT, N1: Bits, N2: MantissaMask),
8590 N2: DAG.getConstant(Val: 0x00800000, DL: dl, VT: IntVT));
8591
8592 R = DAG.getZExtOrTrunc(Op: R, DL: dl, VT: DstVT);
8593
8594 R = DAG.getSelectCC(
8595 DL: dl, LHS: Exponent, RHS: ExponentLoBit,
8596 True: DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: DstVT, N1: R,
8597 N2: DAG.getZExtOrTrunc(
8598 Op: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: IntVT, N1: Exponent, N2: ExponentLoBit),
8599 DL: dl, VT: IntShVT)),
8600 False: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: DstVT, N1: R,
8601 N2: DAG.getZExtOrTrunc(
8602 Op: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: IntVT, N1: ExponentLoBit, N2: Exponent),
8603 DL: dl, VT: IntShVT)),
8604 Cond: ISD::SETGT);
8605
8606 SDValue Ret = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: DstVT,
8607 N1: DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: DstVT, N1: R, N2: Sign), N2: Sign);
8608
8609 Result = DAG.getSelectCC(DL: dl, LHS: Exponent, RHS: DAG.getConstant(Val: 0, DL: dl, VT: IntVT),
8610 True: DAG.getConstant(Val: 0, DL: dl, VT: DstVT), False: Ret, Cond: ISD::SETLT);
8611 return true;
8612}
8613
8614bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
8615 SDValue &Chain,
8616 SelectionDAG &DAG) const {
8617 SDLoc dl(SDValue(Node, 0));
8618 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8619 SDValue Src = Node->getOperand(Num: OpNo);
8620
8621 EVT SrcVT = Src.getValueType();
8622 EVT DstVT = Node->getValueType(ResNo: 0);
8623 EVT SetCCVT =
8624 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: SrcVT);
8625 EVT DstSetCCVT =
8626 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: DstVT);
8627
8628 // Only expand vector types if we have the appropriate vector bit operations.
8629 unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
8630 ISD::FP_TO_SINT;
8631 if (DstVT.isVector() && (!isOperationLegalOrCustom(Op: SIntOpcode, VT: DstVT) ||
8632 !isOperationLegalOrCustomOrPromote(Op: ISD::XOR, VT: SrcVT)))
8633 return false;
8634
8635 // If the maximum float value is smaller then the signed integer range,
8636 // the destination signmask can't be represented by the float, so we can
8637 // just use FP_TO_SINT directly.
8638 const fltSemantics &APFSem = SrcVT.getFltSemantics();
8639 APFloat APF(APFSem, APInt::getZero(numBits: SrcVT.getScalarSizeInBits()));
8640 APInt SignMask = APInt::getSignMask(BitWidth: DstVT.getScalarSizeInBits());
8641 if (APFloat::opOverflow &
8642 APF.convertFromAPInt(Input: SignMask, IsSigned: false, RM: APFloat::rmNearestTiesToEven)) {
8643 if (Node->isStrictFPOpcode()) {
8644 Result = DAG.getNode(Opcode: ISD::STRICT_FP_TO_SINT, DL: dl, ResultTys: { DstVT, MVT::Other },
8645 Ops: { Node->getOperand(Num: 0), Src });
8646 Chain = Result.getValue(R: 1);
8647 } else
8648 Result = DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: dl, VT: DstVT, Operand: Src);
8649 return true;
8650 }
8651
8652 // Don't expand it if there isn't cheap fsub instruction.
8653 if (!isOperationLegalOrCustom(
8654 Op: Node->isStrictFPOpcode() ? ISD::STRICT_FSUB : ISD::FSUB, VT: SrcVT))
8655 return false;
8656
8657 SDValue Cst = DAG.getConstantFP(Val: APF, DL: dl, VT: SrcVT);
8658 SDValue Sel;
8659
8660 if (Node->isStrictFPOpcode()) {
8661 Sel = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: Cst, Cond: ISD::SETLT,
8662 Chain: Node->getOperand(Num: 0), /*IsSignaling*/ true);
8663 Chain = Sel.getValue(R: 1);
8664 } else {
8665 Sel = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: Cst, Cond: ISD::SETLT);
8666 }
8667
8668 bool Strict = Node->isStrictFPOpcode() ||
8669 shouldUseStrictFP_TO_INT(FpVT: SrcVT, IntVT: DstVT, /*IsSigned*/ false);
8670
8671 if (Strict) {
8672 // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
8673 // signmask then offset (the result of which should be fully representable).
8674 // Sel = Src < 0x8000000000000000
8675 // FltOfs = select Sel, 0, 0x8000000000000000
8676 // IntOfs = select Sel, 0, 0x8000000000000000
8677 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8678
8679 // TODO: Should any fast-math-flags be set for the FSUB?
8680 SDValue FltOfs = DAG.getSelect(DL: dl, VT: SrcVT, Cond: Sel,
8681 LHS: DAG.getConstantFP(Val: 0.0, DL: dl, VT: SrcVT), RHS: Cst);
8682 Sel = DAG.getBoolExtOrTrunc(Op: Sel, SL: dl, VT: DstSetCCVT, OpVT: DstVT);
8683 SDValue IntOfs = DAG.getSelect(DL: dl, VT: DstVT, Cond: Sel,
8684 LHS: DAG.getConstant(Val: 0, DL: dl, VT: DstVT),
8685 RHS: DAG.getConstant(Val: SignMask, DL: dl, VT: DstVT));
8686 SDValue SInt;
8687 if (Node->isStrictFPOpcode()) {
8688 SDValue Val = DAG.getNode(Opcode: ISD::STRICT_FSUB, DL: dl, ResultTys: { SrcVT, MVT::Other },
8689 Ops: { Chain, Src, FltOfs });
8690 SInt = DAG.getNode(Opcode: ISD::STRICT_FP_TO_SINT, DL: dl, ResultTys: { DstVT, MVT::Other },
8691 Ops: { Val.getValue(R: 1), Val });
8692 Chain = SInt.getValue(R: 1);
8693 } else {
8694 SDValue Val = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: SrcVT, N1: Src, N2: FltOfs);
8695 SInt = DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: dl, VT: DstVT, Operand: Val);
8696 }
8697 Result = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: DstVT, N1: SInt, N2: IntOfs);
8698 } else {
8699 // Expand based on maximum range of FP_TO_SINT:
8700 // True = fp_to_sint(Src)
8701 // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
8702 // Result = select (Src < 0x8000000000000000), True, False
8703
8704 SDValue True = DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: dl, VT: DstVT, Operand: Src);
8705 // TODO: Should any fast-math-flags be set for the FSUB?
8706 SDValue False = DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: dl, VT: DstVT,
8707 Operand: DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: SrcVT, N1: Src, N2: Cst));
8708 False = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: DstVT, N1: False,
8709 N2: DAG.getConstant(Val: SignMask, DL: dl, VT: DstVT));
8710 Sel = DAG.getBoolExtOrTrunc(Op: Sel, SL: dl, VT: DstSetCCVT, OpVT: DstVT);
8711 Result = DAG.getSelect(DL: dl, VT: DstVT, Cond: Sel, LHS: True, RHS: False);
8712 }
8713 return true;
8714}
8715
8716bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
8717 SDValue &Chain, SelectionDAG &DAG) const {
8718 // This transform is not correct for converting 0 when rounding mode is set
8719 // to round toward negative infinity which will produce -0.0. So disable
8720 // under strictfp.
8721 if (Node->isStrictFPOpcode())
8722 return false;
8723
8724 SDValue Src = Node->getOperand(Num: 0);
8725 EVT SrcVT = Src.getValueType();
8726 EVT DstVT = Node->getValueType(ResNo: 0);
8727
8728 // If the input is known to be non-negative and SINT_TO_FP is legal then use
8729 // it.
8730 if (Node->getFlags().hasNonNeg() &&
8731 isOperationLegalOrCustom(Op: ISD::SINT_TO_FP, VT: SrcVT)) {
8732 Result =
8733 DAG.getNode(Opcode: ISD::SINT_TO_FP, DL: SDLoc(Node), VT: DstVT, Operand: Node->getOperand(Num: 0));
8734 return true;
8735 }
8736
8737 if (SrcVT.getScalarType() != MVT::i64 || DstVT.getScalarType() != MVT::f64)
8738 return false;
8739
8740 // Only expand vector types if we have the appropriate vector bit
8741 // operations.
8742 if (SrcVT.isVector() && (!isOperationLegalOrCustom(Op: ISD::SRL, VT: SrcVT) ||
8743 !isOperationLegalOrCustom(Op: ISD::FADD, VT: DstVT) ||
8744 !isOperationLegalOrCustom(Op: ISD::FSUB, VT: DstVT) ||
8745 !isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT: SrcVT) ||
8746 !isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT: SrcVT)))
8747 return false;
8748
8749 SDLoc dl(SDValue(Node, 0));
8750
8751 // Implementation of unsigned i64 to f64 following the algorithm in
8752 // __floatundidf in compiler_rt. This implementation performs rounding
8753 // correctly in all rounding modes with the exception of converting 0
8754 // when rounding toward negative infinity. In that case the fsub will
8755 // produce -0.0. This will be added to +0.0 and produce -0.0 which is
8756 // incorrect.
8757 SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), DL: dl, VT: SrcVT);
8758 SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
8759 Val: llvm::bit_cast<double>(UINT64_C(0x4530000000100000)), DL: dl, VT: DstVT);
8760 SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), DL: dl, VT: SrcVT);
8761 SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), DL: dl, VT: SrcVT);
8762 SDValue HiShift = DAG.getShiftAmountConstant(Val: 32, VT: SrcVT, DL: dl);
8763
8764 SDValue Lo = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: SrcVT, N1: Src, N2: LoMask);
8765 SDValue Hi = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: SrcVT, N1: Src, N2: HiShift);
8766 SDValue LoOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: SrcVT, N1: Lo, N2: TwoP52);
8767 SDValue HiOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: SrcVT, N1: Hi, N2: TwoP84);
8768 SDValue LoFlt = DAG.getBitcast(VT: DstVT, V: LoOr);
8769 SDValue HiFlt = DAG.getBitcast(VT: DstVT, V: HiOr);
8770 SDValue HiSub = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: DstVT, N1: HiFlt, N2: TwoP84PlusTwoP52);
8771 Result = DAG.getNode(Opcode: ISD::FADD, DL: dl, VT: DstVT, N1: LoFlt, N2: HiSub);
8772 return true;
8773}
8774
8775SDValue
8776TargetLowering::createSelectForFMINNUM_FMAXNUM(SDNode *Node,
8777 SelectionDAG &DAG) const {
8778 unsigned Opcode = Node->getOpcode();
8779 assert((Opcode == ISD::FMINNUM || Opcode == ISD::FMAXNUM ||
8780 Opcode == ISD::STRICT_FMINNUM || Opcode == ISD::STRICT_FMAXNUM) &&
8781 "Wrong opcode");
8782
8783 if (Node->getFlags().hasNoNaNs()) {
8784 ISD::CondCode Pred = Opcode == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
8785 EVT VT = Node->getValueType(ResNo: 0);
8786 if ((!isCondCodeLegal(CC: Pred, VT: VT.getSimpleVT()) ||
8787 !isOperationLegalOrCustom(Op: ISD::VSELECT, VT)) &&
8788 VT.isVector())
8789 return SDValue();
8790 SDValue Op1 = Node->getOperand(Num: 0);
8791 SDValue Op2 = Node->getOperand(Num: 1);
8792 return DAG.getSelectCC(DL: SDLoc(Node), LHS: Op1, RHS: Op2, True: Op1, False: Op2, Cond: Pred,
8793 Flags: Node->getFlags());
8794 }
8795
8796 return SDValue();
8797}
8798
8799SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
8800 SelectionDAG &DAG) const {
8801 if (SDValue Expanded = expandVectorNaryOpBySplitting(Node, DAG))
8802 return Expanded;
8803
8804 EVT VT = Node->getValueType(ResNo: 0);
8805 if (VT.isScalableVector())
8806 report_fatal_error(
8807 reason: "Expanding fminnum/fmaxnum for scalable vectors is undefined.");
8808
8809 SDLoc dl(Node);
8810 unsigned NewOp =
8811 Node->getOpcode() == ISD::FMINNUM ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
8812
8813 if (isOperationLegalOrCustom(Op: NewOp, VT)) {
8814 SDValue Quiet0 = Node->getOperand(Num: 0);
8815 SDValue Quiet1 = Node->getOperand(Num: 1);
8816
8817 if (!Node->getFlags().hasNoNaNs()) {
8818 // Insert canonicalizes if it's possible we need to quiet to get correct
8819 // sNaN behavior.
8820 if (!DAG.isKnownNeverSNaN(Op: Quiet0)) {
8821 Quiet0 = DAG.getNode(Opcode: ISD::FCANONICALIZE, DL: dl, VT, Operand: Quiet0,
8822 Flags: Node->getFlags());
8823 }
8824 if (!DAG.isKnownNeverSNaN(Op: Quiet1)) {
8825 Quiet1 = DAG.getNode(Opcode: ISD::FCANONICALIZE, DL: dl, VT, Operand: Quiet1,
8826 Flags: Node->getFlags());
8827 }
8828 }
8829
8830 return DAG.getNode(Opcode: NewOp, DL: dl, VT, N1: Quiet0, N2: Quiet1, Flags: Node->getFlags());
8831 }
8832
8833 // If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
8834 // instead if there are no NaNs.
8835 if (Node->getFlags().hasNoNaNs() ||
8836 (DAG.isKnownNeverNaN(Op: Node->getOperand(Num: 0)) &&
8837 DAG.isKnownNeverNaN(Op: Node->getOperand(Num: 1)))) {
8838 unsigned IEEE2018Op =
8839 Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
8840 if (isOperationLegalOrCustom(Op: IEEE2018Op, VT))
8841 return DAG.getNode(Opcode: IEEE2018Op, DL: dl, VT, N1: Node->getOperand(Num: 0),
8842 N2: Node->getOperand(Num: 1), Flags: Node->getFlags());
8843 }
8844
8845 if (SDValue SelCC = createSelectForFMINNUM_FMAXNUM(Node, DAG))
8846 return SelCC;
8847
8848 return SDValue();
8849}
8850
8851SDValue TargetLowering::expandFMINIMUM_FMAXIMUM(SDNode *N,
8852 SelectionDAG &DAG) const {
8853 if (SDValue Expanded = expandVectorNaryOpBySplitting(Node: N, DAG))
8854 return Expanded;
8855
8856 SDLoc DL(N);
8857 SDValue LHS = N->getOperand(Num: 0);
8858 SDValue RHS = N->getOperand(Num: 1);
8859 unsigned Opc = N->getOpcode();
8860 EVT VT = N->getValueType(ResNo: 0);
8861 EVT CCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
8862 bool IsMax = Opc == ISD::FMAXIMUM;
8863 SDNodeFlags Flags = N->getFlags();
8864
8865 // First, implement comparison not propagating NaN. If no native fmin or fmax
8866 // available, use plain select with setcc instead.
8867 SDValue MinMax;
8868 unsigned CompOpcIeee = IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
8869 unsigned CompOpc = IsMax ? ISD::FMAXNUM : ISD::FMINNUM;
8870
8871 // FIXME: We should probably define fminnum/fmaxnum variants with correct
8872 // signed zero behavior.
8873 bool MinMaxMustRespectOrderedZero = false;
8874
8875 if (isOperationLegalOrCustom(Op: CompOpcIeee, VT)) {
8876 MinMax = DAG.getNode(Opcode: CompOpcIeee, DL, VT, N1: LHS, N2: RHS, Flags);
8877 MinMaxMustRespectOrderedZero = true;
8878 } else if (isOperationLegalOrCustom(Op: CompOpc, VT)) {
8879 MinMax = DAG.getNode(Opcode: CompOpc, DL, VT, N1: LHS, N2: RHS, Flags);
8880 } else {
8881 if (VT.isVector() && !isOperationLegalOrCustom(Op: ISD::VSELECT, VT))
8882 return DAG.UnrollVectorOp(N);
8883
8884 // NaN (if exists) will be propagated later, so orderness doesn't matter.
8885 SDValue Compare =
8886 DAG.getSetCC(DL, VT: CCVT, LHS, RHS, Cond: IsMax ? ISD::SETOGT : ISD::SETOLT);
8887 MinMax = DAG.getSelect(DL, VT, Cond: Compare, LHS, RHS, Flags);
8888 }
8889
8890 // Propagate any NaN of both operands
8891 if (!N->getFlags().hasNoNaNs() &&
8892 (!DAG.isKnownNeverNaN(Op: RHS) || !DAG.isKnownNeverNaN(Op: LHS))) {
8893 ConstantFP *FPNaN = ConstantFP::get(Context&: *DAG.getContext(),
8894 V: APFloat::getNaN(Sem: VT.getFltSemantics()));
8895 MinMax = DAG.getSelect(DL, VT, Cond: DAG.getSetCC(DL, VT: CCVT, LHS, RHS, Cond: ISD::SETUO),
8896 LHS: DAG.getConstantFP(V: *FPNaN, DL, VT), RHS: MinMax, Flags);
8897 }
8898
8899 // fminimum/fmaximum requires -0.0 less than +0.0
8900 if (!MinMaxMustRespectOrderedZero && !N->getFlags().hasNoSignedZeros() &&
8901 !DAG.isKnownNeverZeroFloat(Op: RHS) && !DAG.isKnownNeverZeroFloat(Op: LHS)) {
8902 SDValue IsZero = DAG.getSetCC(DL, VT: CCVT, LHS: MinMax,
8903 RHS: DAG.getConstantFP(Val: 0.0, DL, VT), Cond: ISD::SETOEQ);
8904 SDValue TestZero =
8905 DAG.getTargetConstant(Val: IsMax ? fcPosZero : fcNegZero, DL, VT: MVT::i32);
8906 SDValue LCmp = DAG.getSelect(
8907 DL, VT, Cond: DAG.getNode(Opcode: ISD::IS_FPCLASS, DL, VT: CCVT, N1: LHS, N2: TestZero), LHS,
8908 RHS: MinMax, Flags);
8909 SDValue RCmp = DAG.getSelect(
8910 DL, VT, Cond: DAG.getNode(Opcode: ISD::IS_FPCLASS, DL, VT: CCVT, N1: RHS, N2: TestZero), LHS: RHS,
8911 RHS: LCmp, Flags);
8912 MinMax = DAG.getSelect(DL, VT, Cond: IsZero, LHS: RCmp, RHS: MinMax, Flags);
8913 }
8914
8915 return MinMax;
8916}
8917
8918SDValue TargetLowering::expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *Node,
8919 SelectionDAG &DAG) const {
8920 SDLoc DL(Node);
8921 SDValue LHS = Node->getOperand(Num: 0);
8922 SDValue RHS = Node->getOperand(Num: 1);
8923 unsigned Opc = Node->getOpcode();
8924 EVT VT = Node->getValueType(ResNo: 0);
8925 EVT CCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
8926 bool IsMax = Opc == ISD::FMAXIMUMNUM;
8927 SDNodeFlags Flags = Node->getFlags();
8928
8929 unsigned NewOp =
8930 Opc == ISD::FMINIMUMNUM ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
8931
8932 if (isOperationLegalOrCustom(Op: NewOp, VT)) {
8933 if (!Flags.hasNoNaNs()) {
8934 // Insert canonicalizes if it's possible we need to quiet to get correct
8935 // sNaN behavior.
8936 if (!DAG.isKnownNeverSNaN(Op: LHS)) {
8937 LHS = DAG.getNode(Opcode: ISD::FCANONICALIZE, DL, VT, Operand: LHS, Flags);
8938 }
8939 if (!DAG.isKnownNeverSNaN(Op: RHS)) {
8940 RHS = DAG.getNode(Opcode: ISD::FCANONICALIZE, DL, VT, Operand: RHS, Flags);
8941 }
8942 }
8943
8944 return DAG.getNode(Opcode: NewOp, DL, VT, N1: LHS, N2: RHS, Flags);
8945 }
8946
8947 // We can use FMINIMUM/FMAXIMUM if there is no NaN, since it has
8948 // same behaviors for all of other cases: +0.0 vs -0.0 included.
8949 if (Flags.hasNoNaNs() ||
8950 (DAG.isKnownNeverNaN(Op: LHS) && DAG.isKnownNeverNaN(Op: RHS))) {
8951 unsigned IEEE2019Op =
8952 Opc == ISD::FMINIMUMNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
8953 if (isOperationLegalOrCustom(Op: IEEE2019Op, VT))
8954 return DAG.getNode(Opcode: IEEE2019Op, DL, VT, N1: LHS, N2: RHS, Flags);
8955 }
8956
8957 // FMINNUM/FMAXMUM returns qNaN if either operand is sNaN, and it may return
8958 // either one for +0.0 vs -0.0.
8959 if ((Flags.hasNoNaNs() ||
8960 (DAG.isKnownNeverSNaN(Op: LHS) && DAG.isKnownNeverSNaN(Op: RHS))) &&
8961 (Flags.hasNoSignedZeros() || DAG.isKnownNeverZeroFloat(Op: LHS) ||
8962 DAG.isKnownNeverZeroFloat(Op: RHS))) {
8963 unsigned IEEE2008Op = Opc == ISD::FMINIMUMNUM ? ISD::FMINNUM : ISD::FMAXNUM;
8964 if (isOperationLegalOrCustom(Op: IEEE2008Op, VT))
8965 return DAG.getNode(Opcode: IEEE2008Op, DL, VT, N1: LHS, N2: RHS, Flags);
8966 }
8967
8968 if (VT.isVector() &&
8969 (isOperationLegalOrCustomOrPromote(Op: Opc, VT: VT.getVectorElementType()) ||
8970 !isOperationLegalOrCustom(Op: ISD::VSELECT, VT)))
8971 return DAG.UnrollVectorOp(N: Node);
8972
8973 // If only one operand is NaN, override it with another operand.
8974 if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(Op: LHS)) {
8975 LHS = DAG.getSelectCC(DL, LHS, RHS: LHS, True: RHS, False: LHS, Cond: ISD::SETUO);
8976 }
8977 if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(Op: RHS)) {
8978 RHS = DAG.getSelectCC(DL, LHS: RHS, RHS, True: LHS, False: RHS, Cond: ISD::SETUO);
8979 }
8980
8981 // Always prefer RHS if equal.
8982 SDValue MinMax =
8983 DAG.getSelectCC(DL, LHS, RHS, True: LHS, False: RHS, Cond: IsMax ? ISD::SETGT : ISD::SETLT);
8984
8985 // TODO: We need quiet sNaN if strictfp.
8986
8987 // Fixup signed zero behavior.
8988 if (Flags.hasNoSignedZeros() || DAG.isKnownNeverZeroFloat(Op: LHS) ||
8989 DAG.isKnownNeverZeroFloat(Op: RHS)) {
8990 return MinMax;
8991 }
8992 SDValue TestZero =
8993 DAG.getTargetConstant(Val: IsMax ? fcPosZero : fcNegZero, DL, VT: MVT::i32);
8994 SDValue IsZero = DAG.getSetCC(DL, VT: CCVT, LHS: MinMax,
8995 RHS: DAG.getConstantFP(Val: 0.0, DL, VT), Cond: ISD::SETEQ);
8996 EVT IntVT = VT.changeTypeToInteger();
8997 EVT FloatVT = VT.changeElementType(Context&: *DAG.getContext(), EltVT: MVT::f32);
8998 SDValue LHSTrunc = LHS;
8999 if (!isTypeLegal(VT: IntVT) && !isOperationLegalOrCustom(Op: ISD::IS_FPCLASS, VT)) {
9000 LHSTrunc = DAG.getNode(Opcode: ISD::FP_ROUND, DL, VT: FloatVT, N1: LHS,
9001 N2: DAG.getIntPtrConstant(Val: 0, DL, /*isTarget=*/true));
9002 }
9003 // It's OK to select from LHS and MinMax, with only one ISD::IS_FPCLASS, as
9004 // we preferred RHS when generate MinMax, if the operands are equal.
9005 SDValue RetZero = DAG.getSelect(
9006 DL, VT, Cond: DAG.getNode(Opcode: ISD::IS_FPCLASS, DL, VT: CCVT, N1: LHSTrunc, N2: TestZero), LHS,
9007 RHS: MinMax, Flags);
9008 return DAG.getSelect(DL, VT, Cond: IsZero, LHS: RetZero, RHS: MinMax, Flags);
9009}
9010
9011/// Returns a true value if if this FPClassTest can be performed with an ordered
9012/// fcmp to 0, and a false value if it's an unordered fcmp to 0. Returns
9013/// std::nullopt if it cannot be performed as a compare with 0.
9014static std::optional<bool> isFCmpEqualZero(FPClassTest Test,
9015 const fltSemantics &Semantics,
9016 const MachineFunction &MF) {
9017 FPClassTest OrderedMask = Test & ~fcNan;
9018 FPClassTest NanTest = Test & fcNan;
9019 bool IsOrdered = NanTest == fcNone;
9020 bool IsUnordered = NanTest == fcNan;
9021
9022 // Skip cases that are testing for only a qnan or snan.
9023 if (!IsOrdered && !IsUnordered)
9024 return std::nullopt;
9025
9026 if (OrderedMask == fcZero &&
9027 MF.getDenormalMode(FPType: Semantics).Input == DenormalMode::IEEE)
9028 return IsOrdered;
9029 if (OrderedMask == (fcZero | fcSubnormal) &&
9030 MF.getDenormalMode(FPType: Semantics).inputsAreZero())
9031 return IsOrdered;
9032 return std::nullopt;
9033}
9034
9035SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
9036 const FPClassTest OrigTestMask,
9037 SDNodeFlags Flags, const SDLoc &DL,
9038 SelectionDAG &DAG) const {
9039 EVT OperandVT = Op.getValueType();
9040 assert(OperandVT.isFloatingPoint());
9041 FPClassTest Test = OrigTestMask;
9042
9043 // Degenerated cases.
9044 if (Test == fcNone)
9045 return DAG.getBoolConstant(V: false, DL, VT: ResultVT, OpVT: OperandVT);
9046 if (Test == fcAllFlags)
9047 return DAG.getBoolConstant(V: true, DL, VT: ResultVT, OpVT: OperandVT);
9048
9049 // PPC double double is a pair of doubles, of which the higher part determines
9050 // the value class.
9051 if (OperandVT == MVT::ppcf128) {
9052 Op = DAG.getNode(Opcode: ISD::EXTRACT_ELEMENT, DL, VT: MVT::f64, N1: Op,
9053 N2: DAG.getConstant(Val: 1, DL, VT: MVT::i32));
9054 OperandVT = MVT::f64;
9055 }
9056
9057 // Floating-point type properties.
9058 EVT ScalarFloatVT = OperandVT.getScalarType();
9059 const Type *FloatTy = ScalarFloatVT.getTypeForEVT(Context&: *DAG.getContext());
9060 const llvm::fltSemantics &Semantics = FloatTy->getFltSemantics();
9061 bool IsF80 = (ScalarFloatVT == MVT::f80);
9062
9063 // Some checks can be implemented using float comparisons, if floating point
9064 // exceptions are ignored.
9065 if (Flags.hasNoFPExcept() &&
9066 isOperationLegalOrCustom(Op: ISD::SETCC, VT: OperandVT.getScalarType())) {
9067 FPClassTest FPTestMask = Test;
9068 bool IsInvertedFP = false;
9069
9070 if (FPClassTest InvertedFPCheck =
9071 invertFPClassTestIfSimpler(Test: FPTestMask, UseFCmp: true)) {
9072 FPTestMask = InvertedFPCheck;
9073 IsInvertedFP = true;
9074 }
9075
9076 ISD::CondCode OrderedCmpOpcode = IsInvertedFP ? ISD::SETUNE : ISD::SETOEQ;
9077 ISD::CondCode UnorderedCmpOpcode = IsInvertedFP ? ISD::SETONE : ISD::SETUEQ;
9078
9079 // See if we can fold an | fcNan into an unordered compare.
9080 FPClassTest OrderedFPTestMask = FPTestMask & ~fcNan;
9081
9082 // Can't fold the ordered check if we're only testing for snan or qnan
9083 // individually.
9084 if ((FPTestMask & fcNan) != fcNan)
9085 OrderedFPTestMask = FPTestMask;
9086
9087 const bool IsOrdered = FPTestMask == OrderedFPTestMask;
9088
9089 if (std::optional<bool> IsCmp0 =
9090 isFCmpEqualZero(Test: FPTestMask, Semantics, MF: DAG.getMachineFunction());
9091 IsCmp0 && (isCondCodeLegalOrCustom(
9092 CC: *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode,
9093 VT: OperandVT.getScalarType().getSimpleVT()))) {
9094
9095 // If denormals could be implicitly treated as 0, this is not equivalent
9096 // to a compare with 0 since it will also be true for denormals.
9097 return DAG.getSetCC(DL, VT: ResultVT, LHS: Op,
9098 RHS: DAG.getConstantFP(Val: 0.0, DL, VT: OperandVT),
9099 Cond: *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode);
9100 }
9101
9102 if (FPTestMask == fcNan &&
9103 isCondCodeLegalOrCustom(CC: IsInvertedFP ? ISD::SETO : ISD::SETUO,
9104 VT: OperandVT.getScalarType().getSimpleVT()))
9105 return DAG.getSetCC(DL, VT: ResultVT, LHS: Op, RHS: Op,
9106 Cond: IsInvertedFP ? ISD::SETO : ISD::SETUO);
9107
9108 bool IsOrderedInf = FPTestMask == fcInf;
9109 if ((FPTestMask == fcInf || FPTestMask == (fcInf | fcNan)) &&
9110 isCondCodeLegalOrCustom(CC: IsOrderedInf ? OrderedCmpOpcode
9111 : UnorderedCmpOpcode,
9112 VT: OperandVT.getScalarType().getSimpleVT()) &&
9113 isOperationLegalOrCustom(Op: ISD::FABS, VT: OperandVT.getScalarType()) &&
9114 (isOperationLegal(Op: ISD::ConstantFP, VT: OperandVT.getScalarType()) ||
9115 (OperandVT.isVector() &&
9116 isOperationLegalOrCustom(Op: ISD::BUILD_VECTOR, VT: OperandVT)))) {
9117 // isinf(x) --> fabs(x) == inf
9118 SDValue Abs = DAG.getNode(Opcode: ISD::FABS, DL, VT: OperandVT, Operand: Op);
9119 SDValue Inf =
9120 DAG.getConstantFP(Val: APFloat::getInf(Sem: Semantics), DL, VT: OperandVT);
9121 return DAG.getSetCC(DL, VT: ResultVT, LHS: Abs, RHS: Inf,
9122 Cond: IsOrderedInf ? OrderedCmpOpcode : UnorderedCmpOpcode);
9123 }
9124
9125 if ((OrderedFPTestMask == fcPosInf || OrderedFPTestMask == fcNegInf) &&
9126 isCondCodeLegalOrCustom(CC: IsOrdered ? OrderedCmpOpcode
9127 : UnorderedCmpOpcode,
9128 VT: OperandVT.getSimpleVT())) {
9129 // isposinf(x) --> x == inf
9130 // isneginf(x) --> x == -inf
9131 // isposinf(x) || nan --> x u== inf
9132 // isneginf(x) || nan --> x u== -inf
9133
9134 SDValue Inf = DAG.getConstantFP(
9135 Val: APFloat::getInf(Sem: Semantics, Negative: OrderedFPTestMask == fcNegInf), DL,
9136 VT: OperandVT);
9137 return DAG.getSetCC(DL, VT: ResultVT, LHS: Op, RHS: Inf,
9138 Cond: IsOrdered ? OrderedCmpOpcode : UnorderedCmpOpcode);
9139 }
9140
9141 if (OrderedFPTestMask == (fcSubnormal | fcZero) && !IsOrdered) {
9142 // TODO: Could handle ordered case, but it produces worse code for
9143 // x86. Maybe handle ordered if fabs is free?
9144
9145 ISD::CondCode OrderedOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
9146 ISD::CondCode UnorderedOp = IsInvertedFP ? ISD::SETOGE : ISD::SETULT;
9147
9148 if (isCondCodeLegalOrCustom(CC: IsOrdered ? OrderedOp : UnorderedOp,
9149 VT: OperandVT.getScalarType().getSimpleVT())) {
9150 // (issubnormal(x) || iszero(x)) --> fabs(x) < smallest_normal
9151
9152 // TODO: Maybe only makes sense if fabs is free. Integer test of
9153 // exponent bits seems better for x86.
9154 SDValue Abs = DAG.getNode(Opcode: ISD::FABS, DL, VT: OperandVT, Operand: Op);
9155 SDValue SmallestNormal = DAG.getConstantFP(
9156 Val: APFloat::getSmallestNormalized(Sem: Semantics), DL, VT: OperandVT);
9157 return DAG.getSetCC(DL, VT: ResultVT, LHS: Abs, RHS: SmallestNormal,
9158 Cond: IsOrdered ? OrderedOp : UnorderedOp);
9159 }
9160 }
9161
9162 if (FPTestMask == fcNormal) {
9163 // TODO: Handle unordered
9164 ISD::CondCode IsFiniteOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
9165 ISD::CondCode IsNormalOp = IsInvertedFP ? ISD::SETOLT : ISD::SETUGE;
9166
9167 if (isCondCodeLegalOrCustom(CC: IsFiniteOp,
9168 VT: OperandVT.getScalarType().getSimpleVT()) &&
9169 isCondCodeLegalOrCustom(CC: IsNormalOp,
9170 VT: OperandVT.getScalarType().getSimpleVT()) &&
9171 isFAbsFree(VT: OperandVT)) {
9172 // isnormal(x) --> fabs(x) < infinity && !(fabs(x) < smallest_normal)
9173 SDValue Inf =
9174 DAG.getConstantFP(Val: APFloat::getInf(Sem: Semantics), DL, VT: OperandVT);
9175 SDValue SmallestNormal = DAG.getConstantFP(
9176 Val: APFloat::getSmallestNormalized(Sem: Semantics), DL, VT: OperandVT);
9177
9178 SDValue Abs = DAG.getNode(Opcode: ISD::FABS, DL, VT: OperandVT, Operand: Op);
9179 SDValue IsFinite = DAG.getSetCC(DL, VT: ResultVT, LHS: Abs, RHS: Inf, Cond: IsFiniteOp);
9180 SDValue IsNormal =
9181 DAG.getSetCC(DL, VT: ResultVT, LHS: Abs, RHS: SmallestNormal, Cond: IsNormalOp);
9182 unsigned LogicOp = IsInvertedFP ? ISD::OR : ISD::AND;
9183 return DAG.getNode(Opcode: LogicOp, DL, VT: ResultVT, N1: IsFinite, N2: IsNormal);
9184 }
9185 }
9186 }
9187
9188 // Some checks may be represented as inversion of simpler check, for example
9189 // "inf|normal|subnormal|zero" => !"nan".
9190 bool IsInverted = false;
9191
9192 if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test, UseFCmp: false)) {
9193 Test = InvertedCheck;
9194 IsInverted = true;
9195 }
9196
9197 // In the general case use integer operations.
9198 unsigned BitSize = OperandVT.getScalarSizeInBits();
9199 EVT IntVT = OperandVT.changeElementType(
9200 Context&: *DAG.getContext(), EltVT: EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: BitSize));
9201 SDValue OpAsInt = DAG.getBitcast(VT: IntVT, V: Op);
9202
9203 // Various masks.
9204 APInt SignBit = APInt::getSignMask(BitWidth: BitSize);
9205 APInt ValueMask = APInt::getSignedMaxValue(numBits: BitSize); // All bits but sign.
9206 APInt Inf = APFloat::getInf(Sem: Semantics).bitcastToAPInt(); // Exp and int bit.
9207 const unsigned ExplicitIntBitInF80 = 63;
9208 APInt ExpMask = Inf;
9209 if (IsF80)
9210 ExpMask.clearBit(BitPosition: ExplicitIntBitInF80);
9211 APInt AllOneMantissa = APFloat::getLargest(Sem: Semantics).bitcastToAPInt() & ~Inf;
9212 APInt QNaNBitMask =
9213 APInt::getOneBitSet(numBits: BitSize, BitNo: AllOneMantissa.getActiveBits() - 1);
9214 APInt InversionMask = APInt::getAllOnes(numBits: ResultVT.getScalarSizeInBits());
9215
9216 SDValue ValueMaskV = DAG.getConstant(Val: ValueMask, DL, VT: IntVT);
9217 SDValue SignBitV = DAG.getConstant(Val: SignBit, DL, VT: IntVT);
9218 SDValue ExpMaskV = DAG.getConstant(Val: ExpMask, DL, VT: IntVT);
9219 SDValue ZeroV = DAG.getConstant(Val: 0, DL, VT: IntVT);
9220 SDValue InfV = DAG.getConstant(Val: Inf, DL, VT: IntVT);
9221 SDValue ResultInversionMask = DAG.getConstant(Val: InversionMask, DL, VT: ResultVT);
9222
9223 SDValue Res;
9224 const auto appendResult = [&](SDValue PartialRes) {
9225 if (PartialRes) {
9226 if (Res)
9227 Res = DAG.getNode(Opcode: ISD::OR, DL, VT: ResultVT, N1: Res, N2: PartialRes);
9228 else
9229 Res = PartialRes;
9230 }
9231 };
9232
9233 SDValue IntBitIsSetV; // Explicit integer bit in f80 mantissa is set.
9234 const auto getIntBitIsSet = [&]() -> SDValue {
9235 if (!IntBitIsSetV) {
9236 APInt IntBitMask(BitSize, 0);
9237 IntBitMask.setBit(ExplicitIntBitInF80);
9238 SDValue IntBitMaskV = DAG.getConstant(Val: IntBitMask, DL, VT: IntVT);
9239 SDValue IntBitV = DAG.getNode(Opcode: ISD::AND, DL, VT: IntVT, N1: OpAsInt, N2: IntBitMaskV);
9240 IntBitIsSetV = DAG.getSetCC(DL, VT: ResultVT, LHS: IntBitV, RHS: ZeroV, Cond: ISD::SETNE);
9241 }
9242 return IntBitIsSetV;
9243 };
9244
9245 // Split the value into sign bit and absolute value.
9246 SDValue AbsV = DAG.getNode(Opcode: ISD::AND, DL, VT: IntVT, N1: OpAsInt, N2: ValueMaskV);
9247 SDValue SignV = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt,
9248 RHS: DAG.getConstant(Val: 0, DL, VT: IntVT), Cond: ISD::SETLT);
9249
9250 // Tests that involve more than one class should be processed first.
9251 SDValue PartialRes;
9252
9253 if (IsF80)
9254 ; // Detect finite numbers of f80 by checking individual classes because
9255 // they have different settings of the explicit integer bit.
9256 else if ((Test & fcFinite) == fcFinite) {
9257 // finite(V) ==> abs(V) < exp_mask
9258 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: ExpMaskV, Cond: ISD::SETLT);
9259 Test &= ~fcFinite;
9260 } else if ((Test & fcFinite) == fcPosFinite) {
9261 // finite(V) && V > 0 ==> V < exp_mask
9262 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt, RHS: ExpMaskV, Cond: ISD::SETULT);
9263 Test &= ~fcPosFinite;
9264 } else if ((Test & fcFinite) == fcNegFinite) {
9265 // finite(V) && V < 0 ==> abs(V) < exp_mask && signbit == 1
9266 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: ExpMaskV, Cond: ISD::SETLT);
9267 PartialRes = DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: PartialRes, N2: SignV);
9268 Test &= ~fcNegFinite;
9269 }
9270 appendResult(PartialRes);
9271
9272 if (FPClassTest PartialCheck = Test & (fcZero | fcSubnormal)) {
9273 // fcZero | fcSubnormal => test all exponent bits are 0
9274 // TODO: Handle sign bit specific cases
9275 if (PartialCheck == (fcZero | fcSubnormal)) {
9276 SDValue ExpBits = DAG.getNode(Opcode: ISD::AND, DL, VT: IntVT, N1: OpAsInt, N2: ExpMaskV);
9277 SDValue ExpIsZero =
9278 DAG.getSetCC(DL, VT: ResultVT, LHS: ExpBits, RHS: ZeroV, Cond: ISD::SETEQ);
9279 appendResult(ExpIsZero);
9280 Test &= ~PartialCheck & fcAllFlags;
9281 }
9282 }
9283
9284 // Check for individual classes.
9285
9286 if (unsigned PartialCheck = Test & fcZero) {
9287 if (PartialCheck == fcPosZero)
9288 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt, RHS: ZeroV, Cond: ISD::SETEQ);
9289 else if (PartialCheck == fcZero)
9290 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: ZeroV, Cond: ISD::SETEQ);
9291 else // ISD::fcNegZero
9292 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt, RHS: SignBitV, Cond: ISD::SETEQ);
9293 appendResult(PartialRes);
9294 }
9295
9296 if (unsigned PartialCheck = Test & fcSubnormal) {
9297 // issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set)
9298 // issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set)
9299 SDValue V = (PartialCheck == fcPosSubnormal) ? OpAsInt : AbsV;
9300 SDValue MantissaV = DAG.getConstant(Val: AllOneMantissa, DL, VT: IntVT);
9301 SDValue VMinusOneV =
9302 DAG.getNode(Opcode: ISD::SUB, DL, VT: IntVT, N1: V, N2: DAG.getConstant(Val: 1, DL, VT: IntVT));
9303 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: VMinusOneV, RHS: MantissaV, Cond: ISD::SETULT);
9304 if (PartialCheck == fcNegSubnormal)
9305 PartialRes = DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: PartialRes, N2: SignV);
9306 appendResult(PartialRes);
9307 }
9308
9309 if (unsigned PartialCheck = Test & fcInf) {
9310 if (PartialCheck == fcPosInf)
9311 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt, RHS: InfV, Cond: ISD::SETEQ);
9312 else if (PartialCheck == fcInf)
9313 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: InfV, Cond: ISD::SETEQ);
9314 else { // ISD::fcNegInf
9315 APInt NegInf = APFloat::getInf(Sem: Semantics, Negative: true).bitcastToAPInt();
9316 SDValue NegInfV = DAG.getConstant(Val: NegInf, DL, VT: IntVT);
9317 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt, RHS: NegInfV, Cond: ISD::SETEQ);
9318 }
9319 appendResult(PartialRes);
9320 }
9321
9322 if (unsigned PartialCheck = Test & fcNan) {
9323 APInt InfWithQnanBit = Inf | QNaNBitMask;
9324 SDValue InfWithQnanBitV = DAG.getConstant(Val: InfWithQnanBit, DL, VT: IntVT);
9325 if (PartialCheck == fcNan) {
9326 // isnan(V) ==> abs(V) > int(inf)
9327 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: InfV, Cond: ISD::SETGT);
9328 if (IsF80) {
9329 // Recognize unsupported values as NaNs for compatibility with glibc.
9330 // In them (exp(V)==0) == int_bit.
9331 SDValue ExpBits = DAG.getNode(Opcode: ISD::AND, DL, VT: IntVT, N1: AbsV, N2: ExpMaskV);
9332 SDValue ExpIsZero =
9333 DAG.getSetCC(DL, VT: ResultVT, LHS: ExpBits, RHS: ZeroV, Cond: ISD::SETEQ);
9334 SDValue IsPseudo =
9335 DAG.getSetCC(DL, VT: ResultVT, LHS: getIntBitIsSet(), RHS: ExpIsZero, Cond: ISD::SETEQ);
9336 PartialRes = DAG.getNode(Opcode: ISD::OR, DL, VT: ResultVT, N1: PartialRes, N2: IsPseudo);
9337 }
9338 } else if (PartialCheck == fcQNan) {
9339 // isquiet(V) ==> abs(V) >= (unsigned(Inf) | quiet_bit)
9340 PartialRes =
9341 DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: InfWithQnanBitV, Cond: ISD::SETGE);
9342 } else { // ISD::fcSNan
9343 // issignaling(V) ==> abs(V) > unsigned(Inf) &&
9344 // abs(V) < (unsigned(Inf) | quiet_bit)
9345 SDValue IsNan = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: InfV, Cond: ISD::SETGT);
9346 SDValue IsNotQnan =
9347 DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: InfWithQnanBitV, Cond: ISD::SETLT);
9348 PartialRes = DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: IsNan, N2: IsNotQnan);
9349 }
9350 appendResult(PartialRes);
9351 }
9352
9353 if (unsigned PartialCheck = Test & fcNormal) {
9354 // isnormal(V) ==> (0 < exp < max_exp) ==> (unsigned(exp-1) < (max_exp-1))
9355 APInt ExpLSB = ExpMask & ~(ExpMask.shl(shiftAmt: 1));
9356 SDValue ExpLSBV = DAG.getConstant(Val: ExpLSB, DL, VT: IntVT);
9357 SDValue ExpMinus1 = DAG.getNode(Opcode: ISD::SUB, DL, VT: IntVT, N1: AbsV, N2: ExpLSBV);
9358 APInt ExpLimit = ExpMask - ExpLSB;
9359 SDValue ExpLimitV = DAG.getConstant(Val: ExpLimit, DL, VT: IntVT);
9360 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: ExpMinus1, RHS: ExpLimitV, Cond: ISD::SETULT);
9361 if (PartialCheck == fcNegNormal)
9362 PartialRes = DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: PartialRes, N2: SignV);
9363 else if (PartialCheck == fcPosNormal) {
9364 SDValue PosSignV =
9365 DAG.getNode(Opcode: ISD::XOR, DL, VT: ResultVT, N1: SignV, N2: ResultInversionMask);
9366 PartialRes = DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: PartialRes, N2: PosSignV);
9367 }
9368 if (IsF80)
9369 PartialRes =
9370 DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: PartialRes, N2: getIntBitIsSet());
9371 appendResult(PartialRes);
9372 }
9373
9374 if (!Res)
9375 return DAG.getConstant(Val: IsInverted, DL, VT: ResultVT);
9376 if (IsInverted)
9377 Res = DAG.getNode(Opcode: ISD::XOR, DL, VT: ResultVT, N1: Res, N2: ResultInversionMask);
9378 return Res;
9379}
9380
9381// Only expand vector types if we have the appropriate vector bit operations.
9382static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) {
9383 assert(VT.isVector() && "Expected vector type");
9384 unsigned Len = VT.getScalarSizeInBits();
9385 return TLI.isOperationLegalOrCustom(Op: ISD::ADD, VT) &&
9386 TLI.isOperationLegalOrCustom(Op: ISD::SUB, VT) &&
9387 TLI.isOperationLegalOrCustom(Op: ISD::SRL, VT) &&
9388 (Len == 8 || TLI.isOperationLegalOrCustom(Op: ISD::MUL, VT)) &&
9389 TLI.isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT);
9390}
9391
9392SDValue TargetLowering::expandCTPOP(SDNode *Node, SelectionDAG &DAG) const {
9393 SDLoc dl(Node);
9394 EVT VT = Node->getValueType(ResNo: 0);
9395 EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
9396 SDValue Op = Node->getOperand(Num: 0);
9397 unsigned Len = VT.getScalarSizeInBits();
9398 assert(VT.isInteger() && "CTPOP not implemented for this type.");
9399
9400 // TODO: Add support for irregular type lengths.
9401 if (!(Len <= 128 && Len % 8 == 0))
9402 return SDValue();
9403
9404 // Only expand vector types if we have the appropriate vector bit operations.
9405 if (VT.isVector() && !canExpandVectorCTPOP(TLI: *this, VT))
9406 return SDValue();
9407
9408 // This is the "best" algorithm from
9409 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
9410 SDValue Mask55 =
9411 DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt(8, 0x55)), DL: dl, VT);
9412 SDValue Mask33 =
9413 DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt(8, 0x33)), DL: dl, VT);
9414 SDValue Mask0F =
9415 DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt(8, 0x0F)), DL: dl, VT);
9416
9417 // v = v - ((v >> 1) & 0x55555555...)
9418 Op = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Op,
9419 N2: DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
9420 N1: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op,
9421 N2: DAG.getConstant(Val: 1, DL: dl, VT: ShVT)),
9422 N2: Mask55));
9423 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
9424 Op = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op, N2: Mask33),
9425 N2: DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
9426 N1: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op,
9427 N2: DAG.getConstant(Val: 2, DL: dl, VT: ShVT)),
9428 N2: Mask33));
9429 // v = (v + (v >> 4)) & 0x0F0F0F0F...
9430 Op = DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
9431 N1: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Op,
9432 N2: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op,
9433 N2: DAG.getConstant(Val: 4, DL: dl, VT: ShVT))),
9434 N2: Mask0F);
9435
9436 if (Len <= 8)
9437 return Op;
9438
9439 // Avoid the multiply if we only have 2 bytes to add.
9440 // TODO: Only doing this for scalars because vectors weren't as obviously
9441 // improved.
9442 if (Len == 16 && !VT.isVector()) {
9443 // v = (v + (v >> 8)) & 0x00FF;
9444 return DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
9445 N1: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Op,
9446 N2: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op,
9447 N2: DAG.getConstant(Val: 8, DL: dl, VT: ShVT))),
9448 N2: DAG.getConstant(Val: 0xFF, DL: dl, VT));
9449 }
9450
9451 // v = (v * 0x01010101...) >> (Len - 8)
9452 SDValue V;
9453 if (isOperationLegalOrCustomOrPromote(
9454 Op: ISD::MUL, VT: getTypeToTransformTo(Context&: *DAG.getContext(), VT))) {
9455 SDValue Mask01 =
9456 DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt(8, 0x01)), DL: dl, VT);
9457 V = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: Op, N2: Mask01);
9458 } else {
9459 V = Op;
9460 for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
9461 SDValue ShiftC = DAG.getShiftAmountConstant(Val: Shift, VT, DL: dl);
9462 V = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: V,
9463 N2: DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: V, N2: ShiftC));
9464 }
9465 }
9466 return DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: V, N2: DAG.getConstant(Val: Len - 8, DL: dl, VT: ShVT));
9467}
9468
9469SDValue TargetLowering::expandVPCTPOP(SDNode *Node, SelectionDAG &DAG) const {
9470 SDLoc dl(Node);
9471 EVT VT = Node->getValueType(ResNo: 0);
9472 EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
9473 SDValue Op = Node->getOperand(Num: 0);
9474 SDValue Mask = Node->getOperand(Num: 1);
9475 SDValue VL = Node->getOperand(Num: 2);
9476 unsigned Len = VT.getScalarSizeInBits();
9477 assert(VT.isInteger() && "VP_CTPOP not implemented for this type.");
9478
9479 // TODO: Add support for irregular type lengths.
9480 if (!(Len <= 128 && Len % 8 == 0))
9481 return SDValue();
9482
9483 // This is same algorithm of expandCTPOP from
9484 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
9485 SDValue Mask55 =
9486 DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt(8, 0x55)), DL: dl, VT);
9487 SDValue Mask33 =
9488 DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt(8, 0x33)), DL: dl, VT);
9489 SDValue Mask0F =
9490 DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt(8, 0x0F)), DL: dl, VT);
9491
9492 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5;
9493
9494 // v = v - ((v >> 1) & 0x55555555...)
9495 Tmp1 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT,
9496 N1: DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op,
9497 N2: DAG.getConstant(Val: 1, DL: dl, VT: ShVT), N3: Mask, N4: VL),
9498 N2: Mask55, N3: Mask, N4: VL);
9499 Op = DAG.getNode(Opcode: ISD::VP_SUB, DL: dl, VT, N1: Op, N2: Tmp1, N3: Mask, N4: VL);
9500
9501 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
9502 Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Op, N2: Mask33, N3: Mask, N4: VL);
9503 Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT,
9504 N1: DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op,
9505 N2: DAG.getConstant(Val: 2, DL: dl, VT: ShVT), N3: Mask, N4: VL),
9506 N2: Mask33, N3: Mask, N4: VL);
9507 Op = DAG.getNode(Opcode: ISD::VP_ADD, DL: dl, VT, N1: Tmp2, N2: Tmp3, N3: Mask, N4: VL);
9508
9509 // v = (v + (v >> 4)) & 0x0F0F0F0F...
9510 Tmp4 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 4, DL: dl, VT: ShVT),
9511 N3: Mask, N4: VL),
9512 Tmp5 = DAG.getNode(Opcode: ISD::VP_ADD, DL: dl, VT, N1: Op, N2: Tmp4, N3: Mask, N4: VL);
9513 Op = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp5, N2: Mask0F, N3: Mask, N4: VL);
9514
9515 if (Len <= 8)
9516 return Op;
9517
9518 // v = (v * 0x01010101...) >> (Len - 8)
9519 SDValue V;
9520 if (isOperationLegalOrCustomOrPromote(
9521 Op: ISD::VP_MUL, VT: getTypeToTransformTo(Context&: *DAG.getContext(), VT))) {
9522 SDValue Mask01 =
9523 DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt(8, 0x01)), DL: dl, VT);
9524 V = DAG.getNode(Opcode: ISD::VP_MUL, DL: dl, VT, N1: Op, N2: Mask01, N3: Mask, N4: VL);
9525 } else {
9526 V = Op;
9527 for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
9528 SDValue ShiftC = DAG.getShiftAmountConstant(Val: Shift, VT, DL: dl);
9529 V = DAG.getNode(Opcode: ISD::VP_ADD, DL: dl, VT, N1: V,
9530 N2: DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: V, N2: ShiftC, N3: Mask, N4: VL),
9531 N3: Mask, N4: VL);
9532 }
9533 }
9534 return DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: V, N2: DAG.getConstant(Val: Len - 8, DL: dl, VT: ShVT),
9535 N3: Mask, N4: VL);
9536}
9537
9538SDValue TargetLowering::expandCTLZ(SDNode *Node, SelectionDAG &DAG) const {
9539 SDLoc dl(Node);
9540 EVT VT = Node->getValueType(ResNo: 0);
9541 EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
9542 SDValue Op = Node->getOperand(Num: 0);
9543 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9544
9545 // If the non-ZERO_UNDEF version is supported we can use that instead.
9546 if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
9547 isOperationLegalOrCustom(Op: ISD::CTLZ, VT))
9548 return DAG.getNode(Opcode: ISD::CTLZ, DL: dl, VT, Operand: Op);
9549
9550 // If the ZERO_UNDEF version is supported use that and handle the zero case.
9551 if (isOperationLegalOrCustom(Op: ISD::CTLZ_ZERO_UNDEF, VT)) {
9552 EVT SetCCVT =
9553 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
9554 SDValue CTLZ = DAG.getNode(Opcode: ISD::CTLZ_ZERO_UNDEF, DL: dl, VT, Operand: Op);
9555 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT);
9556 SDValue SrcIsZero = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Op, RHS: Zero, Cond: ISD::SETEQ);
9557 return DAG.getSelect(DL: dl, VT, Cond: SrcIsZero,
9558 LHS: DAG.getConstant(Val: NumBitsPerElt, DL: dl, VT), RHS: CTLZ);
9559 }
9560
9561 // Only expand vector types if we have the appropriate vector bit operations.
9562 // This includes the operations needed to expand CTPOP if it isn't supported.
9563 if (VT.isVector() && (!isPowerOf2_32(Value: NumBitsPerElt) ||
9564 (!isOperationLegalOrCustom(Op: ISD::CTPOP, VT) &&
9565 !canExpandVectorCTPOP(TLI: *this, VT)) ||
9566 !isOperationLegalOrCustom(Op: ISD::SRL, VT) ||
9567 !isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT)))
9568 return SDValue();
9569
9570 // for now, we do this:
9571 // x = x | (x >> 1);
9572 // x = x | (x >> 2);
9573 // ...
9574 // x = x | (x >>16);
9575 // x = x | (x >>32); // for 64-bit input
9576 // return popcount(~x);
9577 //
9578 // Ref: "Hacker's Delight" by Henry Warren
9579 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
9580 SDValue Tmp = DAG.getConstant(Val: 1ULL << i, DL: dl, VT: ShVT);
9581 Op = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Op,
9582 N2: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: Tmp));
9583 }
9584 Op = DAG.getNOT(DL: dl, Val: Op, VT);
9585 return DAG.getNode(Opcode: ISD::CTPOP, DL: dl, VT, Operand: Op);
9586}
9587
9588SDValue TargetLowering::expandVPCTLZ(SDNode *Node, SelectionDAG &DAG) const {
9589 SDLoc dl(Node);
9590 EVT VT = Node->getValueType(ResNo: 0);
9591 EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
9592 SDValue Op = Node->getOperand(Num: 0);
9593 SDValue Mask = Node->getOperand(Num: 1);
9594 SDValue VL = Node->getOperand(Num: 2);
9595 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9596
9597 // do this:
9598 // x = x | (x >> 1);
9599 // x = x | (x >> 2);
9600 // ...
9601 // x = x | (x >>16);
9602 // x = x | (x >>32); // for 64-bit input
9603 // return popcount(~x);
9604 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
9605 SDValue Tmp = DAG.getConstant(Val: 1ULL << i, DL: dl, VT: ShVT);
9606 Op = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Op,
9607 N2: DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: Tmp, N3: Mask, N4: VL), N3: Mask,
9608 N4: VL);
9609 }
9610 Op = DAG.getNode(Opcode: ISD::VP_XOR, DL: dl, VT, N1: Op, N2: DAG.getAllOnesConstant(DL: dl, VT),
9611 N3: Mask, N4: VL);
9612 return DAG.getNode(Opcode: ISD::VP_CTPOP, DL: dl, VT, N1: Op, N2: Mask, N3: VL);
9613}
9614
9615SDValue TargetLowering::CTTZTableLookup(SDNode *Node, SelectionDAG &DAG,
9616 const SDLoc &DL, EVT VT, SDValue Op,
9617 unsigned BitWidth) const {
9618 if (BitWidth != 32 && BitWidth != 64)
9619 return SDValue();
9620 APInt DeBruijn = BitWidth == 32 ? APInt(32, 0x077CB531U)
9621 : APInt(64, 0x0218A392CD3D5DBFULL);
9622 const DataLayout &TD = DAG.getDataLayout();
9623 MachinePointerInfo PtrInfo =
9624 MachinePointerInfo::getConstantPool(MF&: DAG.getMachineFunction());
9625 unsigned ShiftAmt = BitWidth - Log2_32(Value: BitWidth);
9626 SDValue Neg = DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: DAG.getConstant(Val: 0, DL, VT), N2: Op);
9627 SDValue Lookup = DAG.getNode(
9628 Opcode: ISD::SRL, DL, VT,
9629 N1: DAG.getNode(Opcode: ISD::MUL, DL, VT, N1: DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Op, N2: Neg),
9630 N2: DAG.getConstant(Val: DeBruijn, DL, VT)),
9631 N2: DAG.getShiftAmountConstant(Val: ShiftAmt, VT, DL));
9632 Lookup = DAG.getSExtOrTrunc(Op: Lookup, DL, VT: getPointerTy(DL: TD));
9633
9634 SmallVector<uint8_t> Table(BitWidth, 0);
9635 for (unsigned i = 0; i < BitWidth; i++) {
9636 APInt Shl = DeBruijn.shl(shiftAmt: i);
9637 APInt Lshr = Shl.lshr(shiftAmt: ShiftAmt);
9638 Table[Lshr.getZExtValue()] = i;
9639 }
9640
9641 // Create a ConstantArray in Constant Pool
9642 auto *CA = ConstantDataArray::get(Context&: *DAG.getContext(), Elts&: Table);
9643 SDValue CPIdx = DAG.getConstantPool(C: CA, VT: getPointerTy(DL: TD),
9644 Align: TD.getPrefTypeAlign(Ty: CA->getType()));
9645 SDValue ExtLoad = DAG.getExtLoad(ExtType: ISD::ZEXTLOAD, dl: DL, VT, Chain: DAG.getEntryNode(),
9646 Ptr: DAG.getMemBasePlusOffset(Base: CPIdx, Offset: Lookup, DL),
9647 PtrInfo, MemVT: MVT::i8);
9648 if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF)
9649 return ExtLoad;
9650
9651 EVT SetCCVT =
9652 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
9653 SDValue Zero = DAG.getConstant(Val: 0, DL, VT);
9654 SDValue SrcIsZero = DAG.getSetCC(DL, VT: SetCCVT, LHS: Op, RHS: Zero, Cond: ISD::SETEQ);
9655 return DAG.getSelect(DL, VT, Cond: SrcIsZero,
9656 LHS: DAG.getConstant(Val: BitWidth, DL, VT), RHS: ExtLoad);
9657}
9658
9659SDValue TargetLowering::expandCTTZ(SDNode *Node, SelectionDAG &DAG) const {
9660 SDLoc dl(Node);
9661 EVT VT = Node->getValueType(ResNo: 0);
9662 SDValue Op = Node->getOperand(Num: 0);
9663 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9664
9665 // If the non-ZERO_UNDEF version is supported we can use that instead.
9666 if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
9667 isOperationLegalOrCustom(Op: ISD::CTTZ, VT))
9668 return DAG.getNode(Opcode: ISD::CTTZ, DL: dl, VT, Operand: Op);
9669
9670 // If the ZERO_UNDEF version is supported use that and handle the zero case.
9671 if (isOperationLegalOrCustom(Op: ISD::CTTZ_ZERO_UNDEF, VT)) {
9672 EVT SetCCVT =
9673 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
9674 SDValue CTTZ = DAG.getNode(Opcode: ISD::CTTZ_ZERO_UNDEF, DL: dl, VT, Operand: Op);
9675 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT);
9676 SDValue SrcIsZero = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Op, RHS: Zero, Cond: ISD::SETEQ);
9677 return DAG.getSelect(DL: dl, VT, Cond: SrcIsZero,
9678 LHS: DAG.getConstant(Val: NumBitsPerElt, DL: dl, VT), RHS: CTTZ);
9679 }
9680
9681 // Only expand vector types if we have the appropriate vector bit operations.
9682 // This includes the operations needed to expand CTPOP if it isn't supported.
9683 if (VT.isVector() && (!isPowerOf2_32(Value: NumBitsPerElt) ||
9684 (!isOperationLegalOrCustom(Op: ISD::CTPOP, VT) &&
9685 !isOperationLegalOrCustom(Op: ISD::CTLZ, VT) &&
9686 !canExpandVectorCTPOP(TLI: *this, VT)) ||
9687 !isOperationLegalOrCustom(Op: ISD::SUB, VT) ||
9688 !isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT) ||
9689 !isOperationLegalOrCustomOrPromote(Op: ISD::XOR, VT)))
9690 return SDValue();
9691
9692 // Emit Table Lookup if ISD::CTPOP used in the fallback path below is going
9693 // to be expanded or converted to a libcall.
9694 if (!VT.isVector() && !isOperationLegalOrCustomOrPromote(Op: ISD::CTPOP, VT) &&
9695 !isOperationLegal(Op: ISD::CTLZ, VT))
9696 if (SDValue V = CTTZTableLookup(Node, DAG, DL: dl, VT, Op, BitWidth: NumBitsPerElt))
9697 return V;
9698
9699 // for now, we use: { return popcount(~x & (x - 1)); }
9700 // unless the target has ctlz but not ctpop, in which case we use:
9701 // { return 32 - nlz(~x & (x-1)); }
9702 // Ref: "Hacker's Delight" by Henry Warren
9703 SDValue Tmp = DAG.getNode(
9704 Opcode: ISD::AND, DL: dl, VT, N1: DAG.getNOT(DL: dl, Val: Op, VT),
9705 N2: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 1, DL: dl, VT)));
9706
9707 // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
9708 if (isOperationLegal(Op: ISD::CTLZ, VT) && !isOperationLegal(Op: ISD::CTPOP, VT)) {
9709 return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: DAG.getConstant(Val: NumBitsPerElt, DL: dl, VT),
9710 N2: DAG.getNode(Opcode: ISD::CTLZ, DL: dl, VT, Operand: Tmp));
9711 }
9712
9713 return DAG.getNode(Opcode: ISD::CTPOP, DL: dl, VT, Operand: Tmp);
9714}
9715
9716SDValue TargetLowering::expandVPCTTZ(SDNode *Node, SelectionDAG &DAG) const {
9717 SDValue Op = Node->getOperand(Num: 0);
9718 SDValue Mask = Node->getOperand(Num: 1);
9719 SDValue VL = Node->getOperand(Num: 2);
9720 SDLoc dl(Node);
9721 EVT VT = Node->getValueType(ResNo: 0);
9722
9723 // Same as the vector part of expandCTTZ, use: popcount(~x & (x - 1))
9724 SDValue Not = DAG.getNode(Opcode: ISD::VP_XOR, DL: dl, VT, N1: Op,
9725 N2: DAG.getAllOnesConstant(DL: dl, VT), N3: Mask, N4: VL);
9726 SDValue MinusOne = DAG.getNode(Opcode: ISD::VP_SUB, DL: dl, VT, N1: Op,
9727 N2: DAG.getConstant(Val: 1, DL: dl, VT), N3: Mask, N4: VL);
9728 SDValue Tmp = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Not, N2: MinusOne, N3: Mask, N4: VL);
9729 return DAG.getNode(Opcode: ISD::VP_CTPOP, DL: dl, VT, N1: Tmp, N2: Mask, N3: VL);
9730}
9731
9732SDValue TargetLowering::expandVPCTTZElements(SDNode *N,
9733 SelectionDAG &DAG) const {
9734 // %cond = to_bool_vec %source
9735 // %splat = splat /*val=*/VL
9736 // %tz = step_vector
9737 // %v = vp.select %cond, /*true=*/tz, /*false=*/%splat
9738 // %r = vp.reduce.umin %v
9739 SDLoc DL(N);
9740 SDValue Source = N->getOperand(Num: 0);
9741 SDValue Mask = N->getOperand(Num: 1);
9742 SDValue EVL = N->getOperand(Num: 2);
9743 EVT SrcVT = Source.getValueType();
9744 EVT ResVT = N->getValueType(ResNo: 0);
9745 EVT ResVecVT =
9746 EVT::getVectorVT(Context&: *DAG.getContext(), VT: ResVT, EC: SrcVT.getVectorElementCount());
9747
9748 // Convert to boolean vector.
9749 if (SrcVT.getScalarType() != MVT::i1) {
9750 SDValue AllZero = DAG.getConstant(Val: 0, DL, VT: SrcVT);
9751 SrcVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: MVT::i1,
9752 EC: SrcVT.getVectorElementCount());
9753 Source = DAG.getNode(Opcode: ISD::VP_SETCC, DL, VT: SrcVT, N1: Source, N2: AllZero,
9754 N3: DAG.getCondCode(Cond: ISD::SETNE), N4: Mask, N5: EVL);
9755 }
9756
9757 SDValue ExtEVL = DAG.getZExtOrTrunc(Op: EVL, DL, VT: ResVT);
9758 SDValue Splat = DAG.getSplat(VT: ResVecVT, DL, Op: ExtEVL);
9759 SDValue StepVec = DAG.getStepVector(DL, ResVT: ResVecVT);
9760 SDValue Select =
9761 DAG.getNode(Opcode: ISD::VP_SELECT, DL, VT: ResVecVT, N1: Source, N2: StepVec, N3: Splat, N4: EVL);
9762 return DAG.getNode(Opcode: ISD::VP_REDUCE_UMIN, DL, VT: ResVT, N1: ExtEVL, N2: Select, N3: Mask, N4: EVL);
9763}
9764
9765SDValue TargetLowering::expandVectorFindLastActive(SDNode *N,
9766 SelectionDAG &DAG) const {
9767 SDLoc DL(N);
9768 SDValue Mask = N->getOperand(Num: 0);
9769 EVT MaskVT = Mask.getValueType();
9770 EVT BoolVT = MaskVT.getScalarType();
9771
9772 // Find a suitable type for a stepvector.
9773 ConstantRange VScaleRange(1, /*isFullSet=*/true); // Fixed length default.
9774 if (MaskVT.isScalableVector())
9775 VScaleRange = getVScaleRange(F: &DAG.getMachineFunction().getFunction(), BitWidth: 64);
9776 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9777 uint64_t EltWidth = TLI.getBitWidthForCttzElements(
9778 RetTy: BoolVT.getTypeForEVT(Context&: *DAG.getContext()), EC: MaskVT.getVectorElementCount(),
9779 /*ZeroIsPoison=*/true, VScaleRange: &VScaleRange);
9780 // If the step vector element type is smaller than the mask element type,
9781 // use the mask type directly to avoid widening issues.
9782 EltWidth = std::max(a: EltWidth, b: BoolVT.getFixedSizeInBits());
9783 EVT StepVT = MVT::getIntegerVT(BitWidth: EltWidth);
9784 EVT StepVecVT = MaskVT.changeVectorElementType(Context&: *DAG.getContext(), EltVT: StepVT);
9785
9786 // If promotion or widening is required to make the type legal, do it here.
9787 // Promotion of integers within LegalizeVectorOps is looking for types of
9788 // the same size but with a smaller number of larger elements, not the usual
9789 // larger size with the same number of larger elements.
9790 TargetLowering::LegalizeTypeAction TypeAction =
9791 TLI.getTypeAction(VT: StepVecVT.getSimpleVT());
9792 SDValue StepVec;
9793 if (TypeAction == TargetLowering::TypePromoteInteger) {
9794 StepVecVT = TLI.getTypeToTransformTo(Context&: *DAG.getContext(), VT: StepVecVT);
9795 StepVT = StepVecVT.getVectorElementType();
9796 StepVec = DAG.getStepVector(DL, ResVT: StepVecVT);
9797 } else if (TypeAction == TargetLowering::TypeWidenVector) {
9798 // For widening, the element count changes. Create a step vector with only
9799 // the original elements valid and zeros for padding. Also widen the mask.
9800 EVT WideVecVT = TLI.getTypeToTransformTo(Context&: *DAG.getContext(), VT: StepVecVT);
9801 unsigned WideNumElts = WideVecVT.getVectorNumElements();
9802
9803 // Build widened step vector: <0, 1, ..., OrigNumElts-1, poison, poison, ..>
9804 SDValue OrigStepVec = DAG.getStepVector(DL, ResVT: StepVecVT);
9805 SDValue UndefStep = DAG.getPOISON(VT: WideVecVT);
9806 StepVec = DAG.getInsertSubvector(DL, Vec: UndefStep, SubVec: OrigStepVec, Idx: 0);
9807
9808 // Widen mask: pad with zeros.
9809 EVT WideMaskVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: BoolVT, NumElements: WideNumElts);
9810 SDValue ZeroMask = DAG.getConstant(Val: 0, DL, VT: WideMaskVT);
9811 Mask = DAG.getInsertSubvector(DL, Vec: ZeroMask, SubVec: Mask, Idx: 0);
9812
9813 StepVecVT = WideVecVT;
9814 StepVT = WideVecVT.getVectorElementType();
9815 } else {
9816 StepVec = DAG.getStepVector(DL, ResVT: StepVecVT);
9817 }
9818
9819 // Zero out lanes with inactive elements, then find the highest remaining
9820 // value from the stepvector.
9821 SDValue Zeroes = DAG.getConstant(Val: 0, DL, VT: StepVecVT);
9822 SDValue ActiveElts = DAG.getSelect(DL, VT: StepVecVT, Cond: Mask, LHS: StepVec, RHS: Zeroes);
9823 SDValue HighestIdx = DAG.getNode(Opcode: ISD::VECREDUCE_UMAX, DL, VT: StepVT, Operand: ActiveElts);
9824 return DAG.getZExtOrTrunc(Op: HighestIdx, DL, VT: N->getValueType(ResNo: 0));
9825}
9826
9827SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG,
9828 bool IsNegative) const {
9829 SDLoc dl(N);
9830 EVT VT = N->getValueType(ResNo: 0);
9831 SDValue Op = N->getOperand(Num: 0);
9832
9833 // abs(x) -> smax(x,sub(0,x))
9834 if (!IsNegative && isOperationLegal(Op: ISD::SUB, VT) &&
9835 isOperationLegal(Op: ISD::SMAX, VT)) {
9836 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT);
9837 Op = DAG.getFreeze(V: Op);
9838 return DAG.getNode(Opcode: ISD::SMAX, DL: dl, VT, N1: Op,
9839 N2: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Zero, N2: Op));
9840 }
9841
9842 // abs(x) -> umin(x,sub(0,x))
9843 if (!IsNegative && isOperationLegal(Op: ISD::SUB, VT) &&
9844 isOperationLegal(Op: ISD::UMIN, VT)) {
9845 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT);
9846 Op = DAG.getFreeze(V: Op);
9847 return DAG.getNode(Opcode: ISD::UMIN, DL: dl, VT, N1: Op,
9848 N2: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Zero, N2: Op));
9849 }
9850
9851 // 0 - abs(x) -> smin(x, sub(0,x))
9852 if (IsNegative && isOperationLegal(Op: ISD::SUB, VT) &&
9853 isOperationLegal(Op: ISD::SMIN, VT)) {
9854 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT);
9855 Op = DAG.getFreeze(V: Op);
9856 return DAG.getNode(Opcode: ISD::SMIN, DL: dl, VT, N1: Op,
9857 N2: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Zero, N2: Op));
9858 }
9859
9860 // Only expand vector types if we have the appropriate vector operations.
9861 if (VT.isVector() &&
9862 (!isOperationLegalOrCustom(Op: ISD::SRA, VT) ||
9863 (!IsNegative && !isOperationLegalOrCustom(Op: ISD::ADD, VT)) ||
9864 (IsNegative && !isOperationLegalOrCustom(Op: ISD::SUB, VT)) ||
9865 !isOperationLegalOrCustomOrPromote(Op: ISD::XOR, VT)))
9866 return SDValue();
9867
9868 Op = DAG.getFreeze(V: Op);
9869 SDValue Shift = DAG.getNode(
9870 Opcode: ISD::SRA, DL: dl, VT, N1: Op,
9871 N2: DAG.getShiftAmountConstant(Val: VT.getScalarSizeInBits() - 1, VT, DL: dl));
9872 SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: Op, N2: Shift);
9873
9874 // abs(x) -> Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
9875 if (!IsNegative)
9876 return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Xor, N2: Shift);
9877
9878 // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
9879 return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Shift, N2: Xor);
9880}
9881
9882SDValue TargetLowering::expandABD(SDNode *N, SelectionDAG &DAG) const {
9883 SDLoc dl(N);
9884 EVT VT = N->getValueType(ResNo: 0);
9885 SDValue LHS = N->getOperand(Num: 0);
9886 SDValue RHS = N->getOperand(Num: 1);
9887 bool IsSigned = N->getOpcode() == ISD::ABDS;
9888
9889 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
9890 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
9891 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
9892 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
9893 if (isOperationLegal(Op: MaxOpc, VT) && isOperationLegal(Op: MinOpc, VT)) {
9894 LHS = DAG.getFreeze(V: LHS);
9895 RHS = DAG.getFreeze(V: RHS);
9896 SDValue Max = DAG.getNode(Opcode: MaxOpc, DL: dl, VT, N1: LHS, N2: RHS);
9897 SDValue Min = DAG.getNode(Opcode: MinOpc, DL: dl, VT, N1: LHS, N2: RHS);
9898 return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Max, N2: Min);
9899 }
9900
9901 // abdu(lhs, rhs) -> or(usubsat(lhs,rhs), usubsat(rhs,lhs))
9902 if (!IsSigned && isOperationLegal(Op: ISD::USUBSAT, VT)) {
9903 LHS = DAG.getFreeze(V: LHS);
9904 RHS = DAG.getFreeze(V: RHS);
9905 return DAG.getNode(Opcode: ISD::OR, DL: dl, VT,
9906 N1: DAG.getNode(Opcode: ISD::USUBSAT, DL: dl, VT, N1: LHS, N2: RHS),
9907 N2: DAG.getNode(Opcode: ISD::USUBSAT, DL: dl, VT, N1: RHS, N2: LHS));
9908 }
9909
9910 // If the subtract doesn't overflow then just use abs(sub())
9911 bool IsNonNegative = DAG.SignBitIsZero(Op: LHS) && DAG.SignBitIsZero(Op: RHS);
9912
9913 if (DAG.willNotOverflowSub(IsSigned: IsSigned || IsNonNegative, N0: LHS, N1: RHS))
9914 return DAG.getNode(Opcode: ISD::ABS, DL: dl, VT,
9915 Operand: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: LHS, N2: RHS));
9916
9917 if (DAG.willNotOverflowSub(IsSigned: IsSigned || IsNonNegative, N0: RHS, N1: LHS))
9918 return DAG.getNode(Opcode: ISD::ABS, DL: dl, VT,
9919 Operand: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: RHS, N2: LHS));
9920
9921 EVT CCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
9922 ISD::CondCode CC = IsSigned ? ISD::CondCode::SETGT : ISD::CondCode::SETUGT;
9923 LHS = DAG.getFreeze(V: LHS);
9924 RHS = DAG.getFreeze(V: RHS);
9925 SDValue Cmp = DAG.getSetCC(DL: dl, VT: CCVT, LHS, RHS, Cond: CC);
9926
9927 // Branchless expansion iff cmp result is allbits:
9928 // abds(lhs, rhs) -> sub(sgt(lhs, rhs), xor(sgt(lhs, rhs), sub(lhs, rhs)))
9929 // abdu(lhs, rhs) -> sub(ugt(lhs, rhs), xor(ugt(lhs, rhs), sub(lhs, rhs)))
9930 if (CCVT == VT && getBooleanContents(Type: VT) == ZeroOrNegativeOneBooleanContent) {
9931 SDValue Diff = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: LHS, N2: RHS);
9932 SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: Diff, N2: Cmp);
9933 return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Cmp, N2: Xor);
9934 }
9935
9936 // Similar to the branchless expansion, if we don't prefer selects, use the
9937 // (sign-extended) usubo overflow flag if the (scalar) type is illegal as this
9938 // is more likely to legalize cleanly: abdu(lhs, rhs) -> sub(xor(sub(lhs,
9939 // rhs), uof(lhs, rhs)), uof(lhs, rhs))
9940 if (!IsSigned && VT.isScalarInteger() && !isTypeLegal(VT) &&
9941 !preferSelectsOverBooleanArithmetic(VT)) {
9942 SDValue USubO =
9943 DAG.getNode(Opcode: ISD::USUBO, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: MVT::i1), Ops: {LHS, RHS});
9944 SDValue Cmp = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT, Operand: USubO.getValue(R: 1));
9945 SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: USubO.getValue(R: 0), N2: Cmp);
9946 return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Xor, N2: Cmp);
9947 }
9948
9949 // FIXME: Should really try to split the vector in case it's legal on a
9950 // subvector.
9951 if (VT.isVector() && !isOperationLegalOrCustom(Op: ISD::VSELECT, VT))
9952 return DAG.UnrollVectorOp(N);
9953
9954 // abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9955 // abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9956 return DAG.getSelect(DL: dl, VT, Cond: Cmp, LHS: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: LHS, N2: RHS),
9957 RHS: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: RHS, N2: LHS));
9958}
9959
9960SDValue TargetLowering::expandAVG(SDNode *N, SelectionDAG &DAG) const {
9961 SDLoc dl(N);
9962 EVT VT = N->getValueType(ResNo: 0);
9963 SDValue LHS = N->getOperand(Num: 0);
9964 SDValue RHS = N->getOperand(Num: 1);
9965
9966 unsigned Opc = N->getOpcode();
9967 bool IsFloor = Opc == ISD::AVGFLOORS || Opc == ISD::AVGFLOORU;
9968 bool IsSigned = Opc == ISD::AVGCEILS || Opc == ISD::AVGFLOORS;
9969 unsigned SumOpc = IsFloor ? ISD::ADD : ISD::SUB;
9970 unsigned SignOpc = IsFloor ? ISD::AND : ISD::OR;
9971 unsigned ShiftOpc = IsSigned ? ISD::SRA : ISD::SRL;
9972 unsigned ExtOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
9973 assert((Opc == ISD::AVGFLOORS || Opc == ISD::AVGCEILS ||
9974 Opc == ISD::AVGFLOORU || Opc == ISD::AVGCEILU) &&
9975 "Unknown AVG node");
9976
9977 // If the operands are already extended, we can add+shift.
9978 bool IsExt =
9979 (IsSigned && DAG.ComputeNumSignBits(Op: LHS) >= 2 &&
9980 DAG.ComputeNumSignBits(Op: RHS) >= 2) ||
9981 (!IsSigned && DAG.computeKnownBits(Op: LHS).countMinLeadingZeros() >= 1 &&
9982 DAG.computeKnownBits(Op: RHS).countMinLeadingZeros() >= 1);
9983 if (IsExt) {
9984 SDValue Sum = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: LHS, N2: RHS);
9985 if (!IsFloor)
9986 Sum = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Sum, N2: DAG.getConstant(Val: 1, DL: dl, VT));
9987 return DAG.getNode(Opcode: ShiftOpc, DL: dl, VT, N1: Sum,
9988 N2: DAG.getShiftAmountConstant(Val: 1, VT, DL: dl));
9989 }
9990
9991 // For scalars, see if we can efficiently extend/truncate to use add+shift.
9992 if (VT.isScalarInteger()) {
9993 unsigned BW = VT.getScalarSizeInBits();
9994 EVT ExtVT = VT.getIntegerVT(Context&: *DAG.getContext(), BitWidth: 2 * BW);
9995 if (isTypeLegal(VT: ExtVT) && isTruncateFree(FromVT: ExtVT, ToVT: VT)) {
9996 LHS = DAG.getNode(Opcode: ExtOpc, DL: dl, VT: ExtVT, Operand: LHS);
9997 RHS = DAG.getNode(Opcode: ExtOpc, DL: dl, VT: ExtVT, Operand: RHS);
9998 SDValue Avg = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: ExtVT, N1: LHS, N2: RHS);
9999 if (!IsFloor)
10000 Avg = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: ExtVT, N1: Avg,
10001 N2: DAG.getConstant(Val: 1, DL: dl, VT: ExtVT));
10002 // Just use SRL as we will be truncating away the extended sign bits.
10003 Avg = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: ExtVT, N1: Avg,
10004 N2: DAG.getShiftAmountConstant(Val: 1, VT: ExtVT, DL: dl));
10005 return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Avg);
10006 }
10007 }
10008
10009 // avgflooru(lhs, rhs) -> or(lshr(add(lhs, rhs),1),shl(overflow, typesize-1))
10010 if (Opc == ISD::AVGFLOORU && VT.isScalarInteger() && !isTypeLegal(VT)) {
10011 SDValue UAddWithOverflow =
10012 DAG.getNode(Opcode: ISD::UADDO, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: MVT::i1), Ops: {RHS, LHS});
10013
10014 SDValue Sum = UAddWithOverflow.getValue(R: 0);
10015 SDValue Overflow = UAddWithOverflow.getValue(R: 1);
10016
10017 // Right shift the sum by 1
10018 SDValue LShrVal = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Sum,
10019 N2: DAG.getShiftAmountConstant(Val: 1, VT, DL: dl));
10020
10021 SDValue ZeroExtOverflow = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT, Operand: Overflow);
10022 SDValue OverflowShl = DAG.getNode(
10023 Opcode: ISD::SHL, DL: dl, VT, N1: ZeroExtOverflow,
10024 N2: DAG.getShiftAmountConstant(Val: VT.getScalarSizeInBits() - 1, VT, DL: dl));
10025
10026 return DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: LShrVal, N2: OverflowShl);
10027 }
10028
10029 // avgceils(lhs, rhs) -> sub(or(lhs,rhs),ashr(xor(lhs,rhs),1))
10030 // avgceilu(lhs, rhs) -> sub(or(lhs,rhs),lshr(xor(lhs,rhs),1))
10031 // avgfloors(lhs, rhs) -> add(and(lhs,rhs),ashr(xor(lhs,rhs),1))
10032 // avgflooru(lhs, rhs) -> add(and(lhs,rhs),lshr(xor(lhs,rhs),1))
10033 LHS = DAG.getFreeze(V: LHS);
10034 RHS = DAG.getFreeze(V: RHS);
10035 SDValue Sign = DAG.getNode(Opcode: SignOpc, DL: dl, VT, N1: LHS, N2: RHS);
10036 SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: LHS, N2: RHS);
10037 SDValue Shift =
10038 DAG.getNode(Opcode: ShiftOpc, DL: dl, VT, N1: Xor, N2: DAG.getShiftAmountConstant(Val: 1, VT, DL: dl));
10039 return DAG.getNode(Opcode: SumOpc, DL: dl, VT, N1: Sign, N2: Shift);
10040}
10041
10042SDValue TargetLowering::expandBSWAP(SDNode *N, SelectionDAG &DAG) const {
10043 SDLoc dl(N);
10044 EVT VT = N->getValueType(ResNo: 0);
10045 SDValue Op = N->getOperand(Num: 0);
10046
10047 if (!VT.isSimple())
10048 return SDValue();
10049
10050 EVT SHVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
10051 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
10052 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
10053 default:
10054 return SDValue();
10055 case MVT::i16:
10056 // Use a rotate by 8. This can be further expanded if necessary.
10057 return DAG.getNode(Opcode: ISD::ROTL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT));
10058 case MVT::i32:
10059 // This is meant for ARM speficially, which has ROTR but no ROTL.
10060 if (isOperationLegalOrCustom(Op: ISD::ROTR, VT)) {
10061 SDValue Mask = DAG.getConstant(Val: 0x00FF00FF, DL: dl, VT);
10062 // (x & 0x00FF00FF) rotr 8 | (x rotl 8) & 0x00FF00FF
10063 SDValue And = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op, N2: Mask);
10064 SDValue Rotr =
10065 DAG.getNode(Opcode: ISD::ROTR, DL: dl, VT, N1: And, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT));
10066 SDValue Rotl =
10067 DAG.getNode(Opcode: ISD::ROTR, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 24, DL: dl, VT: SHVT));
10068 SDValue And2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Rotl, N2: Mask);
10069 return DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Rotr, N2: And2);
10070 }
10071 Tmp4 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 24, DL: dl, VT: SHVT));
10072 Tmp3 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op,
10073 N2: DAG.getConstant(Val: 0xFF00, DL: dl, VT));
10074 Tmp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT));
10075 Tmp2 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT));
10076 Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2, N2: DAG.getConstant(Val: 0xFF00, DL: dl, VT));
10077 Tmp1 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 24, DL: dl, VT: SHVT));
10078 Tmp4 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp4, N2: Tmp3);
10079 Tmp2 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp1);
10080 return DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp4, N2: Tmp2);
10081 case MVT::i64:
10082 Tmp8 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 56, DL: dl, VT: SHVT));
10083 Tmp7 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op,
10084 N2: DAG.getConstant(Val: 255ULL<<8, DL: dl, VT));
10085 Tmp7 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp7, N2: DAG.getConstant(Val: 40, DL: dl, VT: SHVT));
10086 Tmp6 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op,
10087 N2: DAG.getConstant(Val: 255ULL<<16, DL: dl, VT));
10088 Tmp6 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp6, N2: DAG.getConstant(Val: 24, DL: dl, VT: SHVT));
10089 Tmp5 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op,
10090 N2: DAG.getConstant(Val: 255ULL<<24, DL: dl, VT));
10091 Tmp5 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp5, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT));
10092 Tmp4 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT));
10093 Tmp4 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp4,
10094 N2: DAG.getConstant(Val: 255ULL<<24, DL: dl, VT));
10095 Tmp3 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 24, DL: dl, VT: SHVT));
10096 Tmp3 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp3,
10097 N2: DAG.getConstant(Val: 255ULL<<16, DL: dl, VT));
10098 Tmp2 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 40, DL: dl, VT: SHVT));
10099 Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2,
10100 N2: DAG.getConstant(Val: 255ULL<<8, DL: dl, VT));
10101 Tmp1 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 56, DL: dl, VT: SHVT));
10102 Tmp8 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp8, N2: Tmp7);
10103 Tmp6 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp6, N2: Tmp5);
10104 Tmp4 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp4, N2: Tmp3);
10105 Tmp2 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp1);
10106 Tmp8 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp8, N2: Tmp6);
10107 Tmp4 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp4, N2: Tmp2);
10108 return DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp8, N2: Tmp4);
10109 }
10110}
10111
10112SDValue TargetLowering::expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const {
10113 SDLoc dl(N);
10114 EVT VT = N->getValueType(ResNo: 0);
10115 SDValue Op = N->getOperand(Num: 0);
10116 SDValue Mask = N->getOperand(Num: 1);
10117 SDValue EVL = N->getOperand(Num: 2);
10118
10119 if (!VT.isSimple())
10120 return SDValue();
10121
10122 EVT SHVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
10123 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
10124 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
10125 default:
10126 return SDValue();
10127 case MVT::i16:
10128 Tmp1 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT),
10129 N3: Mask, N4: EVL);
10130 Tmp2 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT),
10131 N3: Mask, N4: EVL);
10132 return DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp1, N2: Tmp2, N3: Mask, N4: EVL);
10133 case MVT::i32:
10134 Tmp4 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 24, DL: dl, VT: SHVT),
10135 N3: Mask, N4: EVL);
10136 Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 0xFF00, DL: dl, VT),
10137 N3: Mask, N4: EVL);
10138 Tmp3 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT),
10139 N3: Mask, N4: EVL);
10140 Tmp2 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT),
10141 N3: Mask, N4: EVL);
10142 Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp2,
10143 N2: DAG.getConstant(Val: 0xFF00, DL: dl, VT), N3: Mask, N4: EVL);
10144 Tmp1 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 24, DL: dl, VT: SHVT),
10145 N3: Mask, N4: EVL);
10146 Tmp4 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp4, N2: Tmp3, N3: Mask, N4: EVL);
10147 Tmp2 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp2, N2: Tmp1, N3: Mask, N4: EVL);
10148 return DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp4, N2: Tmp2, N3: Mask, N4: EVL);
10149 case MVT::i64:
10150 Tmp8 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 56, DL: dl, VT: SHVT),
10151 N3: Mask, N4: EVL);
10152 Tmp7 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Op,
10153 N2: DAG.getConstant(Val: 255ULL << 8, DL: dl, VT), N3: Mask, N4: EVL);
10154 Tmp7 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp7, N2: DAG.getConstant(Val: 40, DL: dl, VT: SHVT),
10155 N3: Mask, N4: EVL);
10156 Tmp6 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Op,
10157 N2: DAG.getConstant(Val: 255ULL << 16, DL: dl, VT), N3: Mask, N4: EVL);
10158 Tmp6 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp6, N2: DAG.getConstant(Val: 24, DL: dl, VT: SHVT),
10159 N3: Mask, N4: EVL);
10160 Tmp5 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Op,
10161 N2: DAG.getConstant(Val: 255ULL << 24, DL: dl, VT), N3: Mask, N4: EVL);
10162 Tmp5 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp5, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT),
10163 N3: Mask, N4: EVL);
10164 Tmp4 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT),
10165 N3: Mask, N4: EVL);
10166 Tmp4 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp4,
10167 N2: DAG.getConstant(Val: 255ULL << 24, DL: dl, VT), N3: Mask, N4: EVL);
10168 Tmp3 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 24, DL: dl, VT: SHVT),
10169 N3: Mask, N4: EVL);
10170 Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp3,
10171 N2: DAG.getConstant(Val: 255ULL << 16, DL: dl, VT), N3: Mask, N4: EVL);
10172 Tmp2 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 40, DL: dl, VT: SHVT),
10173 N3: Mask, N4: EVL);
10174 Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp2,
10175 N2: DAG.getConstant(Val: 255ULL << 8, DL: dl, VT), N3: Mask, N4: EVL);
10176 Tmp1 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 56, DL: dl, VT: SHVT),
10177 N3: Mask, N4: EVL);
10178 Tmp8 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp8, N2: Tmp7, N3: Mask, N4: EVL);
10179 Tmp6 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp6, N2: Tmp5, N3: Mask, N4: EVL);
10180 Tmp4 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp4, N2: Tmp3, N3: Mask, N4: EVL);
10181 Tmp2 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp2, N2: Tmp1, N3: Mask, N4: EVL);
10182 Tmp8 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp8, N2: Tmp6, N3: Mask, N4: EVL);
10183 Tmp4 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp4, N2: Tmp2, N3: Mask, N4: EVL);
10184 return DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp8, N2: Tmp4, N3: Mask, N4: EVL);
10185 }
10186}
10187
10188SDValue TargetLowering::expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const {
10189 SDLoc dl(N);
10190 EVT VT = N->getValueType(ResNo: 0);
10191 SDValue Op = N->getOperand(Num: 0);
10192 EVT SHVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
10193 unsigned Sz = VT.getScalarSizeInBits();
10194
10195 SDValue Tmp, Tmp2, Tmp3;
10196
10197 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
10198 // and finally the i1 pairs.
10199 // TODO: We can easily support i4/i2 legal types if any target ever does.
10200 if (Sz >= 8 && isPowerOf2_32(Value: Sz)) {
10201 // Create the masks - repeating the pattern every byte.
10202 APInt Mask4 = APInt::getSplat(NewLen: Sz, V: APInt(8, 0x0F));
10203 APInt Mask2 = APInt::getSplat(NewLen: Sz, V: APInt(8, 0x33));
10204 APInt Mask1 = APInt::getSplat(NewLen: Sz, V: APInt(8, 0x55));
10205
10206 // BSWAP if the type is wider than a single byte.
10207 Tmp = (Sz > 8 ? DAG.getNode(Opcode: ISD::BSWAP, DL: dl, VT, Operand: Op) : Op);
10208
10209 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
10210 Tmp2 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: 4, DL: dl, VT: SHVT));
10211 Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2, N2: DAG.getConstant(Val: Mask4, DL: dl, VT));
10212 Tmp3 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask4, DL: dl, VT));
10213 Tmp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: 4, DL: dl, VT: SHVT));
10214 Tmp = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp3);
10215
10216 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
10217 Tmp2 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: 2, DL: dl, VT: SHVT));
10218 Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2, N2: DAG.getConstant(Val: Mask2, DL: dl, VT));
10219 Tmp3 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask2, DL: dl, VT));
10220 Tmp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: 2, DL: dl, VT: SHVT));
10221 Tmp = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp3);
10222
10223 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
10224 Tmp2 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: 1, DL: dl, VT: SHVT));
10225 Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2, N2: DAG.getConstant(Val: Mask1, DL: dl, VT));
10226 Tmp3 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask1, DL: dl, VT));
10227 Tmp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: 1, DL: dl, VT: SHVT));
10228 Tmp = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp3);
10229 return Tmp;
10230 }
10231
10232 Tmp = DAG.getConstant(Val: 0, DL: dl, VT);
10233 for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) {
10234 if (I < J)
10235 Tmp2 =
10236 DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: J - I, DL: dl, VT: SHVT));
10237 else
10238 Tmp2 =
10239 DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: I - J, DL: dl, VT: SHVT));
10240
10241 APInt Shift = APInt::getOneBitSet(numBits: Sz, BitNo: J);
10242 Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2, N2: DAG.getConstant(Val: Shift, DL: dl, VT));
10243 Tmp = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp, N2: Tmp2);
10244 }
10245
10246 return Tmp;
10247}
10248
10249SDValue TargetLowering::expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const {
10250 assert(N->getOpcode() == ISD::VP_BITREVERSE);
10251
10252 SDLoc dl(N);
10253 EVT VT = N->getValueType(ResNo: 0);
10254 SDValue Op = N->getOperand(Num: 0);
10255 SDValue Mask = N->getOperand(Num: 1);
10256 SDValue EVL = N->getOperand(Num: 2);
10257 EVT SHVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
10258 unsigned Sz = VT.getScalarSizeInBits();
10259
10260 SDValue Tmp, Tmp2, Tmp3;
10261
10262 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
10263 // and finally the i1 pairs.
10264 // TODO: We can easily support i4/i2 legal types if any target ever does.
10265 if (Sz >= 8 && isPowerOf2_32(Value: Sz)) {
10266 // Create the masks - repeating the pattern every byte.
10267 APInt Mask4 = APInt::getSplat(NewLen: Sz, V: APInt(8, 0x0F));
10268 APInt Mask2 = APInt::getSplat(NewLen: Sz, V: APInt(8, 0x33));
10269 APInt Mask1 = APInt::getSplat(NewLen: Sz, V: APInt(8, 0x55));
10270
10271 // BSWAP if the type is wider than a single byte.
10272 Tmp = (Sz > 8 ? DAG.getNode(Opcode: ISD::VP_BSWAP, DL: dl, VT, N1: Op, N2: Mask, N3: EVL) : Op);
10273
10274 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
10275 Tmp2 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: 4, DL: dl, VT: SHVT),
10276 N3: Mask, N4: EVL);
10277 Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp2,
10278 N2: DAG.getConstant(Val: Mask4, DL: dl, VT), N3: Mask, N4: EVL);
10279 Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask4, DL: dl, VT),
10280 N3: Mask, N4: EVL);
10281 Tmp3 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: 4, DL: dl, VT: SHVT),
10282 N3: Mask, N4: EVL);
10283 Tmp = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp2, N2: Tmp3, N3: Mask, N4: EVL);
10284
10285 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
10286 Tmp2 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: 2, DL: dl, VT: SHVT),
10287 N3: Mask, N4: EVL);
10288 Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp2,
10289 N2: DAG.getConstant(Val: Mask2, DL: dl, VT), N3: Mask, N4: EVL);
10290 Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask2, DL: dl, VT),
10291 N3: Mask, N4: EVL);
10292 Tmp3 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: 2, DL: dl, VT: SHVT),
10293 N3: Mask, N4: EVL);
10294 Tmp = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp2, N2: Tmp3, N3: Mask, N4: EVL);
10295
10296 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
10297 Tmp2 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: 1, DL: dl, VT: SHVT),
10298 N3: Mask, N4: EVL);
10299 Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp2,
10300 N2: DAG.getConstant(Val: Mask1, DL: dl, VT), N3: Mask, N4: EVL);
10301 Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask1, DL: dl, VT),
10302 N3: Mask, N4: EVL);
10303 Tmp3 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: 1, DL: dl, VT: SHVT),
10304 N3: Mask, N4: EVL);
10305 Tmp = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp2, N2: Tmp3, N3: Mask, N4: EVL);
10306 return Tmp;
10307 }
10308 return SDValue();
10309}
10310
10311std::pair<SDValue, SDValue>
10312TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
10313 SelectionDAG &DAG) const {
10314 SDLoc SL(LD);
10315 SDValue Chain = LD->getChain();
10316 SDValue BasePTR = LD->getBasePtr();
10317 EVT SrcVT = LD->getMemoryVT();
10318 EVT DstVT = LD->getValueType(ResNo: 0);
10319 ISD::LoadExtType ExtType = LD->getExtensionType();
10320
10321 if (SrcVT.isScalableVector())
10322 report_fatal_error(reason: "Cannot scalarize scalable vector loads");
10323
10324 unsigned NumElem = SrcVT.getVectorNumElements();
10325
10326 EVT SrcEltVT = SrcVT.getScalarType();
10327 EVT DstEltVT = DstVT.getScalarType();
10328
10329 // A vector must always be stored in memory as-is, i.e. without any padding
10330 // between the elements, since various code depend on it, e.g. in the
10331 // handling of a bitcast of a vector type to int, which may be done with a
10332 // vector store followed by an integer load. A vector that does not have
10333 // elements that are byte-sized must therefore be stored as an integer
10334 // built out of the extracted vector elements.
10335 if (!SrcEltVT.isByteSized()) {
10336 unsigned NumLoadBits = SrcVT.getStoreSizeInBits();
10337 EVT LoadVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NumLoadBits);
10338
10339 unsigned NumSrcBits = SrcVT.getSizeInBits();
10340 EVT SrcIntVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NumSrcBits);
10341
10342 unsigned SrcEltBits = SrcEltVT.getSizeInBits();
10343 SDValue SrcEltBitMask = DAG.getConstant(
10344 Val: APInt::getLowBitsSet(numBits: NumLoadBits, loBitsSet: SrcEltBits), DL: SL, VT: LoadVT);
10345
10346 // Load the whole vector and avoid masking off the top bits as it makes
10347 // the codegen worse.
10348 SDValue Load =
10349 DAG.getExtLoad(ExtType: ISD::EXTLOAD, dl: SL, VT: LoadVT, Chain, Ptr: BasePTR,
10350 PtrInfo: LD->getPointerInfo(), MemVT: SrcIntVT, Alignment: LD->getBaseAlign(),
10351 MMOFlags: LD->getMemOperand()->getFlags(), AAInfo: LD->getAAInfo());
10352
10353 SmallVector<SDValue, 8> Vals;
10354 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10355 unsigned ShiftIntoIdx =
10356 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
10357 SDValue ShiftAmount = DAG.getShiftAmountConstant(
10358 Val: ShiftIntoIdx * SrcEltVT.getSizeInBits(), VT: LoadVT, DL: SL);
10359 SDValue ShiftedElt = DAG.getNode(Opcode: ISD::SRL, DL: SL, VT: LoadVT, N1: Load, N2: ShiftAmount);
10360 SDValue Elt =
10361 DAG.getNode(Opcode: ISD::AND, DL: SL, VT: LoadVT, N1: ShiftedElt, N2: SrcEltBitMask);
10362 SDValue Scalar = DAG.getNode(Opcode: ISD::TRUNCATE, DL: SL, VT: SrcEltVT, Operand: Elt);
10363
10364 if (ExtType != ISD::NON_EXTLOAD) {
10365 unsigned ExtendOp = ISD::getExtForLoadExtType(IsFP: false, ExtType);
10366 Scalar = DAG.getNode(Opcode: ExtendOp, DL: SL, VT: DstEltVT, Operand: Scalar);
10367 }
10368
10369 Vals.push_back(Elt: Scalar);
10370 }
10371
10372 SDValue Value = DAG.getBuildVector(VT: DstVT, DL: SL, Ops: Vals);
10373 return std::make_pair(x&: Value, y: Load.getValue(R: 1));
10374 }
10375
10376 unsigned Stride = SrcEltVT.getSizeInBits() / 8;
10377 assert(SrcEltVT.isByteSized());
10378
10379 SmallVector<SDValue, 8> Vals;
10380 SmallVector<SDValue, 8> LoadChains;
10381
10382 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10383 SDValue ScalarLoad = DAG.getExtLoad(
10384 ExtType, dl: SL, VT: DstEltVT, Chain, Ptr: BasePTR,
10385 PtrInfo: LD->getPointerInfo().getWithOffset(O: Idx * Stride), MemVT: SrcEltVT,
10386 Alignment: LD->getBaseAlign(), MMOFlags: LD->getMemOperand()->getFlags(), AAInfo: LD->getAAInfo());
10387
10388 BasePTR = DAG.getObjectPtrOffset(SL, Ptr: BasePTR, Offset: TypeSize::getFixed(ExactSize: Stride));
10389
10390 Vals.push_back(Elt: ScalarLoad.getValue(R: 0));
10391 LoadChains.push_back(Elt: ScalarLoad.getValue(R: 1));
10392 }
10393
10394 SDValue NewChain = DAG.getNode(Opcode: ISD::TokenFactor, DL: SL, VT: MVT::Other, Ops: LoadChains);
10395 SDValue Value = DAG.getBuildVector(VT: DstVT, DL: SL, Ops: Vals);
10396
10397 return std::make_pair(x&: Value, y&: NewChain);
10398}
10399
10400SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
10401 SelectionDAG &DAG) const {
10402 SDLoc SL(ST);
10403
10404 SDValue Chain = ST->getChain();
10405 SDValue BasePtr = ST->getBasePtr();
10406 SDValue Value = ST->getValue();
10407 EVT StVT = ST->getMemoryVT();
10408
10409 if (StVT.isScalableVector())
10410 report_fatal_error(reason: "Cannot scalarize scalable vector stores");
10411
10412 // The type of the data we want to save
10413 EVT RegVT = Value.getValueType();
10414 EVT RegSclVT = RegVT.getScalarType();
10415
10416 // The type of data as saved in memory.
10417 EVT MemSclVT = StVT.getScalarType();
10418
10419 unsigned NumElem = StVT.getVectorNumElements();
10420
10421 // A vector must always be stored in memory as-is, i.e. without any padding
10422 // between the elements, since various code depend on it, e.g. in the
10423 // handling of a bitcast of a vector type to int, which may be done with a
10424 // vector store followed by an integer load. A vector that does not have
10425 // elements that are byte-sized must therefore be stored as an integer
10426 // built out of the extracted vector elements.
10427 if (!MemSclVT.isByteSized()) {
10428 unsigned NumBits = StVT.getSizeInBits();
10429 EVT IntVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NumBits);
10430
10431 SDValue CurrVal = DAG.getConstant(Val: 0, DL: SL, VT: IntVT);
10432
10433 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10434 SDValue Elt = DAG.getExtractVectorElt(DL: SL, VT: RegSclVT, Vec: Value, Idx);
10435 SDValue Trunc = DAG.getNode(Opcode: ISD::TRUNCATE, DL: SL, VT: MemSclVT, Operand: Elt);
10436 SDValue ExtElt = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: SL, VT: IntVT, Operand: Trunc);
10437 unsigned ShiftIntoIdx =
10438 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
10439 SDValue ShiftAmount =
10440 DAG.getConstant(Val: ShiftIntoIdx * MemSclVT.getSizeInBits(), DL: SL, VT: IntVT);
10441 SDValue ShiftedElt =
10442 DAG.getNode(Opcode: ISD::SHL, DL: SL, VT: IntVT, N1: ExtElt, N2: ShiftAmount);
10443 CurrVal = DAG.getNode(Opcode: ISD::OR, DL: SL, VT: IntVT, N1: CurrVal, N2: ShiftedElt);
10444 }
10445
10446 return DAG.getStore(Chain, dl: SL, Val: CurrVal, Ptr: BasePtr, PtrInfo: ST->getPointerInfo(),
10447 Alignment: ST->getBaseAlign(), MMOFlags: ST->getMemOperand()->getFlags(),
10448 AAInfo: ST->getAAInfo());
10449 }
10450
10451 // Store Stride in bytes
10452 unsigned Stride = MemSclVT.getSizeInBits() / 8;
10453 assert(Stride && "Zero stride!");
10454 // Extract each of the elements from the original vector and save them into
10455 // memory individually.
10456 SmallVector<SDValue, 8> Stores;
10457 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10458 SDValue Elt = DAG.getExtractVectorElt(DL: SL, VT: RegSclVT, Vec: Value, Idx);
10459
10460 SDValue Ptr =
10461 DAG.getObjectPtrOffset(SL, Ptr: BasePtr, Offset: TypeSize::getFixed(ExactSize: Idx * Stride));
10462
10463 // This scalar TruncStore may be illegal, but we legalize it later.
10464 SDValue Store = DAG.getTruncStore(
10465 Chain, dl: SL, Val: Elt, Ptr, PtrInfo: ST->getPointerInfo().getWithOffset(O: Idx * Stride),
10466 SVT: MemSclVT, Alignment: ST->getBaseAlign(), MMOFlags: ST->getMemOperand()->getFlags(),
10467 AAInfo: ST->getAAInfo());
10468
10469 Stores.push_back(Elt: Store);
10470 }
10471
10472 return DAG.getNode(Opcode: ISD::TokenFactor, DL: SL, VT: MVT::Other, Ops: Stores);
10473}
10474
10475std::pair<SDValue, SDValue>
10476TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
10477 assert(LD->getAddressingMode() == ISD::UNINDEXED &&
10478 "unaligned indexed loads not implemented!");
10479 SDValue Chain = LD->getChain();
10480 SDValue Ptr = LD->getBasePtr();
10481 EVT VT = LD->getValueType(ResNo: 0);
10482 EVT LoadedVT = LD->getMemoryVT();
10483 SDLoc dl(LD);
10484 auto &MF = DAG.getMachineFunction();
10485
10486 if (VT.isFloatingPoint() || VT.isVector()) {
10487 EVT intVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: LoadedVT.getSizeInBits());
10488 if (isTypeLegal(VT: intVT) && isTypeLegal(VT: LoadedVT)) {
10489 if (!isOperationLegalOrCustom(Op: ISD::LOAD, VT: intVT) &&
10490 LoadedVT.isVector()) {
10491 // Scalarize the load and let the individual components be handled.
10492 return scalarizeVectorLoad(LD, DAG);
10493 }
10494
10495 // Expand to a (misaligned) integer load of the same size,
10496 // then bitconvert to floating point or vector.
10497 SDValue newLoad = DAG.getLoad(VT: intVT, dl, Chain, Ptr,
10498 MMO: LD->getMemOperand());
10499 SDValue Result = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: LoadedVT, Operand: newLoad);
10500 if (LoadedVT != VT)
10501 Result = DAG.getNode(Opcode: VT.isFloatingPoint() ? ISD::FP_EXTEND :
10502 ISD::ANY_EXTEND, DL: dl, VT, Operand: Result);
10503
10504 return std::make_pair(x&: Result, y: newLoad.getValue(R: 1));
10505 }
10506
10507 // Copy the value to a (aligned) stack slot using (unaligned) integer
10508 // loads and stores, then do a (aligned) load from the stack slot.
10509 MVT RegVT = getRegisterType(Context&: *DAG.getContext(), VT: intVT);
10510 unsigned LoadedBytes = LoadedVT.getStoreSize();
10511 unsigned RegBytes = RegVT.getSizeInBits() / 8;
10512 unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
10513
10514 // Make sure the stack slot is also aligned for the register type.
10515 SDValue StackBase = DAG.CreateStackTemporary(VT1: LoadedVT, VT2: RegVT);
10516 auto FrameIndex = cast<FrameIndexSDNode>(Val: StackBase.getNode())->getIndex();
10517 SmallVector<SDValue, 8> Stores;
10518 SDValue StackPtr = StackBase;
10519 unsigned Offset = 0;
10520
10521 EVT PtrVT = Ptr.getValueType();
10522 EVT StackPtrVT = StackPtr.getValueType();
10523
10524 SDValue PtrIncrement = DAG.getConstant(Val: RegBytes, DL: dl, VT: PtrVT);
10525 SDValue StackPtrIncrement = DAG.getConstant(Val: RegBytes, DL: dl, VT: StackPtrVT);
10526
10527 // Do all but one copies using the full register width.
10528 for (unsigned i = 1; i < NumRegs; i++) {
10529 // Load one integer register's worth from the original location.
10530 SDValue Load = DAG.getLoad(
10531 VT: RegVT, dl, Chain, Ptr, PtrInfo: LD->getPointerInfo().getWithOffset(O: Offset),
10532 Alignment: LD->getBaseAlign(), MMOFlags: LD->getMemOperand()->getFlags(), AAInfo: LD->getAAInfo());
10533 // Follow the load with a store to the stack slot. Remember the store.
10534 Stores.push_back(Elt: DAG.getStore(
10535 Chain: Load.getValue(R: 1), dl, Val: Load, Ptr: StackPtr,
10536 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset)));
10537 // Increment the pointers.
10538 Offset += RegBytes;
10539
10540 Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: PtrIncrement);
10541 StackPtr = DAG.getObjectPtrOffset(SL: dl, Ptr: StackPtr, Offset: StackPtrIncrement);
10542 }
10543
10544 // The last copy may be partial. Do an extending load.
10545 EVT MemVT = EVT::getIntegerVT(Context&: *DAG.getContext(),
10546 BitWidth: 8 * (LoadedBytes - Offset));
10547 SDValue Load = DAG.getExtLoad(
10548 ExtType: ISD::EXTLOAD, dl, VT: RegVT, Chain, Ptr,
10549 PtrInfo: LD->getPointerInfo().getWithOffset(O: Offset), MemVT, Alignment: LD->getBaseAlign(),
10550 MMOFlags: LD->getMemOperand()->getFlags(), AAInfo: LD->getAAInfo());
10551 // Follow the load with a store to the stack slot. Remember the store.
10552 // On big-endian machines this requires a truncating store to ensure
10553 // that the bits end up in the right place.
10554 Stores.push_back(Elt: DAG.getTruncStore(
10555 Chain: Load.getValue(R: 1), dl, Val: Load, Ptr: StackPtr,
10556 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset), SVT: MemVT));
10557
10558 // The order of the stores doesn't matter - say it with a TokenFactor.
10559 SDValue TF = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: Stores);
10560
10561 // Finally, perform the original load only redirected to the stack slot.
10562 Load = DAG.getExtLoad(ExtType: LD->getExtensionType(), dl, VT, Chain: TF, Ptr: StackBase,
10563 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset: 0),
10564 MemVT: LoadedVT);
10565
10566 // Callers expect a MERGE_VALUES node.
10567 return std::make_pair(x&: Load, y&: TF);
10568 }
10569
10570 assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
10571 "Unaligned load of unsupported type.");
10572
10573 // Compute the new VT that is half the size of the old one. This is an
10574 // integer MVT.
10575 unsigned NumBits = LoadedVT.getSizeInBits();
10576 EVT NewLoadedVT;
10577 NewLoadedVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NumBits/2);
10578 NumBits >>= 1;
10579
10580 Align Alignment = LD->getBaseAlign();
10581 unsigned IncrementSize = NumBits / 8;
10582 ISD::LoadExtType HiExtType = LD->getExtensionType();
10583
10584 // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
10585 if (HiExtType == ISD::NON_EXTLOAD)
10586 HiExtType = ISD::ZEXTLOAD;
10587
10588 // Load the value in two parts
10589 SDValue Lo, Hi;
10590 if (DAG.getDataLayout().isLittleEndian()) {
10591 Lo = DAG.getExtLoad(ExtType: ISD::ZEXTLOAD, dl, VT, Chain, Ptr, PtrInfo: LD->getPointerInfo(),
10592 MemVT: NewLoadedVT, Alignment, MMOFlags: LD->getMemOperand()->getFlags(),
10593 AAInfo: LD->getAAInfo());
10594
10595 Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: TypeSize::getFixed(ExactSize: IncrementSize));
10596 Hi = DAG.getExtLoad(ExtType: HiExtType, dl, VT, Chain, Ptr,
10597 PtrInfo: LD->getPointerInfo().getWithOffset(O: IncrementSize),
10598 MemVT: NewLoadedVT, Alignment, MMOFlags: LD->getMemOperand()->getFlags(),
10599 AAInfo: LD->getAAInfo());
10600 } else {
10601 Hi = DAG.getExtLoad(ExtType: HiExtType, dl, VT, Chain, Ptr, PtrInfo: LD->getPointerInfo(),
10602 MemVT: NewLoadedVT, Alignment, MMOFlags: LD->getMemOperand()->getFlags(),
10603 AAInfo: LD->getAAInfo());
10604
10605 Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: TypeSize::getFixed(ExactSize: IncrementSize));
10606 Lo = DAG.getExtLoad(ExtType: ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
10607 PtrInfo: LD->getPointerInfo().getWithOffset(O: IncrementSize),
10608 MemVT: NewLoadedVT, Alignment, MMOFlags: LD->getMemOperand()->getFlags(),
10609 AAInfo: LD->getAAInfo());
10610 }
10611
10612 // aggregate the two parts
10613 SDValue ShiftAmount = DAG.getShiftAmountConstant(Val: NumBits, VT, DL: dl);
10614 SDValue Result = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Hi, N2: ShiftAmount);
10615 Result = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Result, N2: Lo);
10616
10617 SDValue TF = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, N1: Lo.getValue(R: 1),
10618 N2: Hi.getValue(R: 1));
10619
10620 return std::make_pair(x&: Result, y&: TF);
10621}
10622
10623SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
10624 SelectionDAG &DAG) const {
10625 assert(ST->getAddressingMode() == ISD::UNINDEXED &&
10626 "unaligned indexed stores not implemented!");
10627 SDValue Chain = ST->getChain();
10628 SDValue Ptr = ST->getBasePtr();
10629 SDValue Val = ST->getValue();
10630 EVT VT = Val.getValueType();
10631 Align Alignment = ST->getBaseAlign();
10632 auto &MF = DAG.getMachineFunction();
10633 EVT StoreMemVT = ST->getMemoryVT();
10634
10635 SDLoc dl(ST);
10636 if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) {
10637 EVT intVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: VT.getSizeInBits());
10638 if (isTypeLegal(VT: intVT)) {
10639 if (!isOperationLegalOrCustom(Op: ISD::STORE, VT: intVT) &&
10640 StoreMemVT.isVector()) {
10641 // Scalarize the store and let the individual components be handled.
10642 SDValue Result = scalarizeVectorStore(ST, DAG);
10643 return Result;
10644 }
10645 // Expand to a bitconvert of the value to the integer type of the
10646 // same size, then a (misaligned) int store.
10647 // FIXME: Does not handle truncating floating point stores!
10648 SDValue Result = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: intVT, Operand: Val);
10649 Result = DAG.getStore(Chain, dl, Val: Result, Ptr, PtrInfo: ST->getPointerInfo(),
10650 Alignment, MMOFlags: ST->getMemOperand()->getFlags());
10651 return Result;
10652 }
10653 // Do a (aligned) store to a stack slot, then copy from the stack slot
10654 // to the final destination using (unaligned) integer loads and stores.
10655 MVT RegVT = getRegisterType(
10656 Context&: *DAG.getContext(),
10657 VT: EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: StoreMemVT.getSizeInBits()));
10658 EVT PtrVT = Ptr.getValueType();
10659 unsigned StoredBytes = StoreMemVT.getStoreSize();
10660 unsigned RegBytes = RegVT.getSizeInBits() / 8;
10661 unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
10662
10663 // Make sure the stack slot is also aligned for the register type.
10664 SDValue StackPtr = DAG.CreateStackTemporary(VT1: StoreMemVT, VT2: RegVT);
10665 auto FrameIndex = cast<FrameIndexSDNode>(Val: StackPtr.getNode())->getIndex();
10666
10667 // Perform the original store, only redirected to the stack slot.
10668 SDValue Store = DAG.getTruncStore(
10669 Chain, dl, Val, Ptr: StackPtr,
10670 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset: 0), SVT: StoreMemVT);
10671
10672 EVT StackPtrVT = StackPtr.getValueType();
10673
10674 SDValue PtrIncrement = DAG.getConstant(Val: RegBytes, DL: dl, VT: PtrVT);
10675 SDValue StackPtrIncrement = DAG.getConstant(Val: RegBytes, DL: dl, VT: StackPtrVT);
10676 SmallVector<SDValue, 8> Stores;
10677 unsigned Offset = 0;
10678
10679 // Do all but one copies using the full register width.
10680 for (unsigned i = 1; i < NumRegs; i++) {
10681 // Load one integer register's worth from the stack slot.
10682 SDValue Load = DAG.getLoad(
10683 VT: RegVT, dl, Chain: Store, Ptr: StackPtr,
10684 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset));
10685 // Store it to the final location. Remember the store.
10686 Stores.push_back(Elt: DAG.getStore(Chain: Load.getValue(R: 1), dl, Val: Load, Ptr,
10687 PtrInfo: ST->getPointerInfo().getWithOffset(O: Offset),
10688 Alignment: ST->getBaseAlign(),
10689 MMOFlags: ST->getMemOperand()->getFlags()));
10690 // Increment the pointers.
10691 Offset += RegBytes;
10692 StackPtr = DAG.getObjectPtrOffset(SL: dl, Ptr: StackPtr, Offset: StackPtrIncrement);
10693 Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: PtrIncrement);
10694 }
10695
10696 // The last store may be partial. Do a truncating store. On big-endian
10697 // machines this requires an extending load from the stack slot to ensure
10698 // that the bits are in the right place.
10699 EVT LoadMemVT =
10700 EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: 8 * (StoredBytes - Offset));
10701
10702 // Load from the stack slot.
10703 SDValue Load = DAG.getExtLoad(
10704 ExtType: ISD::EXTLOAD, dl, VT: RegVT, Chain: Store, Ptr: StackPtr,
10705 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset), MemVT: LoadMemVT);
10706
10707 Stores.push_back(Elt: DAG.getTruncStore(
10708 Chain: Load.getValue(R: 1), dl, Val: Load, Ptr,
10709 PtrInfo: ST->getPointerInfo().getWithOffset(O: Offset), SVT: LoadMemVT,
10710 Alignment: ST->getBaseAlign(), MMOFlags: ST->getMemOperand()->getFlags(), AAInfo: ST->getAAInfo()));
10711 // The order of the stores doesn't matter - say it with a TokenFactor.
10712 SDValue Result = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: Stores);
10713 return Result;
10714 }
10715
10716 assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
10717 "Unaligned store of unknown type.");
10718 // Get the half-size VT
10719 EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(Context&: *DAG.getContext());
10720 unsigned NumBits = NewStoredVT.getFixedSizeInBits();
10721 unsigned IncrementSize = NumBits / 8;
10722
10723 // Divide the stored value in two parts.
10724 SDValue ShiftAmount =
10725 DAG.getShiftAmountConstant(Val: NumBits, VT: Val.getValueType(), DL: dl);
10726 SDValue Lo = Val;
10727 // If Val is a constant, replace the upper bits with 0. The SRL will constant
10728 // fold and not use the upper bits. A smaller constant may be easier to
10729 // materialize.
10730 if (auto *C = dyn_cast<ConstantSDNode>(Val&: Lo); C && !C->isOpaque())
10731 Lo = DAG.getNode(
10732 Opcode: ISD::AND, DL: dl, VT, N1: Lo,
10733 N2: DAG.getConstant(Val: APInt::getLowBitsSet(numBits: VT.getSizeInBits(), loBitsSet: NumBits), DL: dl,
10734 VT));
10735 SDValue Hi = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Val, N2: ShiftAmount);
10736
10737 // Store the two parts
10738 SDValue Store1, Store2;
10739 Store1 = DAG.getTruncStore(Chain, dl,
10740 Val: DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
10741 Ptr, PtrInfo: ST->getPointerInfo(), SVT: NewStoredVT, Alignment,
10742 MMOFlags: ST->getMemOperand()->getFlags());
10743
10744 Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: TypeSize::getFixed(ExactSize: IncrementSize));
10745 Store2 = DAG.getTruncStore(
10746 Chain, dl, Val: DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
10747 PtrInfo: ST->getPointerInfo().getWithOffset(O: IncrementSize), SVT: NewStoredVT, Alignment,
10748 MMOFlags: ST->getMemOperand()->getFlags(), AAInfo: ST->getAAInfo());
10749
10750 SDValue Result =
10751 DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, N1: Store1, N2: Store2);
10752 return Result;
10753}
10754
10755SDValue
10756TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
10757 const SDLoc &DL, EVT DataVT,
10758 SelectionDAG &DAG,
10759 bool IsCompressedMemory) const {
10760 SDValue Increment;
10761 EVT AddrVT = Addr.getValueType();
10762 EVT MaskVT = Mask.getValueType();
10763 assert(DataVT.getVectorElementCount() == MaskVT.getVectorElementCount() &&
10764 "Incompatible types of Data and Mask");
10765 if (IsCompressedMemory) {
10766 // Incrementing the pointer according to number of '1's in the mask.
10767 if (DataVT.isScalableVector()) {
10768 EVT MaskExtVT = MaskVT.changeElementType(Context&: *DAG.getContext(), EltVT: MVT::i32);
10769 SDValue MaskExt = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: MaskExtVT, Operand: Mask);
10770 Increment = DAG.getNode(Opcode: ISD::VECREDUCE_ADD, DL, VT: MVT::i32, Operand: MaskExt);
10771 } else {
10772 EVT MaskIntVT =
10773 EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: MaskVT.getSizeInBits());
10774 SDValue MaskInIntReg = DAG.getBitcast(VT: MaskIntVT, V: Mask);
10775 if (MaskIntVT.getSizeInBits() < 32) {
10776 MaskInIntReg =
10777 DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: MVT::i32, Operand: MaskInIntReg);
10778 MaskIntVT = MVT::i32;
10779 }
10780 Increment = DAG.getNode(Opcode: ISD::CTPOP, DL, VT: MaskIntVT, Operand: MaskInIntReg);
10781 }
10782 // Scale is an element size in bytes.
10783 SDValue Scale = DAG.getConstant(Val: DataVT.getScalarSizeInBits() / 8, DL,
10784 VT: AddrVT);
10785 Increment = DAG.getZExtOrTrunc(Op: Increment, DL, VT: AddrVT);
10786 Increment = DAG.getNode(Opcode: ISD::MUL, DL, VT: AddrVT, N1: Increment, N2: Scale);
10787 } else
10788 Increment = DAG.getTypeSize(DL, VT: AddrVT, TS: DataVT.getStoreSize());
10789
10790 return DAG.getNode(Opcode: ISD::ADD, DL, VT: AddrVT, N1: Addr, N2: Increment);
10791}
10792
10793static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx,
10794 EVT VecVT, const SDLoc &dl,
10795 ElementCount SubEC) {
10796 assert(!(SubEC.isScalable() && VecVT.isFixedLengthVector()) &&
10797 "Cannot index a scalable vector within a fixed-width vector");
10798
10799 unsigned NElts = VecVT.getVectorMinNumElements();
10800 unsigned NumSubElts = SubEC.getKnownMinValue();
10801 EVT IdxVT = Idx.getValueType();
10802
10803 if (VecVT.isScalableVector() && !SubEC.isScalable()) {
10804 // If this is a constant index and we know the value plus the number of the
10805 // elements in the subvector minus one is less than the minimum number of
10806 // elements then it's safe to return Idx.
10807 if (auto *IdxCst = dyn_cast<ConstantSDNode>(Val&: Idx))
10808 if (IdxCst->getZExtValue() + (NumSubElts - 1) < NElts)
10809 return Idx;
10810 SDValue VS =
10811 DAG.getVScale(DL: dl, VT: IdxVT, MulImm: APInt(IdxVT.getFixedSizeInBits(), NElts));
10812 unsigned SubOpcode = NumSubElts <= NElts ? ISD::SUB : ISD::USUBSAT;
10813 SDValue Sub = DAG.getNode(Opcode: SubOpcode, DL: dl, VT: IdxVT, N1: VS,
10814 N2: DAG.getConstant(Val: NumSubElts, DL: dl, VT: IdxVT));
10815 return DAG.getNode(Opcode: ISD::UMIN, DL: dl, VT: IdxVT, N1: Idx, N2: Sub);
10816 }
10817 if (isPowerOf2_32(Value: NElts) && NumSubElts == 1) {
10818 APInt Imm = APInt::getLowBitsSet(numBits: IdxVT.getSizeInBits(), loBitsSet: Log2_32(Value: NElts));
10819 return DAG.getNode(Opcode: ISD::AND, DL: dl, VT: IdxVT, N1: Idx,
10820 N2: DAG.getConstant(Val: Imm, DL: dl, VT: IdxVT));
10821 }
10822 unsigned MaxIndex = NumSubElts < NElts ? NElts - NumSubElts : 0;
10823 return DAG.getNode(Opcode: ISD::UMIN, DL: dl, VT: IdxVT, N1: Idx,
10824 N2: DAG.getConstant(Val: MaxIndex, DL: dl, VT: IdxVT));
10825}
10826
10827SDValue
10828TargetLowering::getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr,
10829 EVT VecVT, SDValue Index,
10830 const SDNodeFlags PtrArithFlags) const {
10831 return getVectorSubVecPointer(
10832 DAG, VecPtr, VecVT,
10833 SubVecVT: EVT::getVectorVT(Context&: *DAG.getContext(), VT: VecVT.getVectorElementType(), NumElements: 1),
10834 Index, PtrArithFlags);
10835}
10836
10837SDValue
10838TargetLowering::getVectorSubVecPointer(SelectionDAG &DAG, SDValue VecPtr,
10839 EVT VecVT, EVT SubVecVT, SDValue Index,
10840 const SDNodeFlags PtrArithFlags) const {
10841 SDLoc dl(Index);
10842 // Make sure the index type is big enough to compute in.
10843 Index = DAG.getZExtOrTrunc(Op: Index, DL: dl, VT: VecPtr.getValueType());
10844
10845 EVT EltVT = VecVT.getVectorElementType();
10846
10847 // Calculate the element offset and add it to the pointer.
10848 unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size.
10849 assert(EltSize * 8 == EltVT.getFixedSizeInBits() &&
10850 "Converting bits to bytes lost precision");
10851 assert(SubVecVT.getVectorElementType() == EltVT &&
10852 "Sub-vector must be a vector with matching element type");
10853 Index = clampDynamicVectorIndex(DAG, Idx: Index, VecVT, dl,
10854 SubEC: SubVecVT.getVectorElementCount());
10855
10856 EVT IdxVT = Index.getValueType();
10857 if (SubVecVT.isScalableVector())
10858 Index =
10859 DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: IdxVT, N1: Index,
10860 N2: DAG.getVScale(DL: dl, VT: IdxVT, MulImm: APInt(IdxVT.getSizeInBits(), 1)));
10861
10862 Index = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: IdxVT, N1: Index,
10863 N2: DAG.getConstant(Val: EltSize, DL: dl, VT: IdxVT));
10864 return DAG.getMemBasePlusOffset(Base: VecPtr, Offset: Index, DL: dl, Flags: PtrArithFlags);
10865}
10866
10867//===----------------------------------------------------------------------===//
10868// Implementation of Emulated TLS Model
10869//===----------------------------------------------------------------------===//
10870
10871SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
10872 SelectionDAG &DAG) const {
10873 // Access to address of TLS varialbe xyz is lowered to a function call:
10874 // __emutls_get_address( address of global variable named "__emutls_v.xyz" )
10875 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
10876 PointerType *VoidPtrType = PointerType::get(C&: *DAG.getContext(), AddressSpace: 0);
10877 SDLoc dl(GA);
10878
10879 ArgListTy Args;
10880 const GlobalValue *GV =
10881 cast<GlobalValue>(Val: GA->getGlobal()->stripPointerCastsAndAliases());
10882 SmallString<32> NameString("__emutls_v.");
10883 NameString += GV->getName();
10884 StringRef EmuTlsVarName(NameString);
10885 const GlobalVariable *EmuTlsVar =
10886 GV->getParent()->getNamedGlobal(Name: EmuTlsVarName);
10887 assert(EmuTlsVar && "Cannot find EmuTlsVar ");
10888 Args.emplace_back(args: DAG.getGlobalAddress(GV: EmuTlsVar, DL: dl, VT: PtrVT), args&: VoidPtrType);
10889
10890 SDValue EmuTlsGetAddr = DAG.getExternalSymbol(Sym: "__emutls_get_address", VT: PtrVT);
10891
10892 TargetLowering::CallLoweringInfo CLI(DAG);
10893 CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
10894 CLI.setLibCallee(CC: CallingConv::C, ResultType: VoidPtrType, Target: EmuTlsGetAddr, ArgsList: std::move(Args));
10895 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
10896
10897 // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
10898 // At last for X86 targets, maybe good for other targets too?
10899 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
10900 MFI.setAdjustsStack(true); // Is this only for X86 target?
10901 MFI.setHasCalls(true);
10902
10903 assert((GA->getOffset() == 0) &&
10904 "Emulated TLS must have zero offset in GlobalAddressSDNode");
10905 return CallResult.first;
10906}
10907
10908SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,
10909 SelectionDAG &DAG) const {
10910 assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
10911 if (!isCtlzFast())
10912 return SDValue();
10913 ISD::CondCode CC = cast<CondCodeSDNode>(Val: Op.getOperand(i: 2))->get();
10914 SDLoc dl(Op);
10915 if (isNullConstant(V: Op.getOperand(i: 1)) && CC == ISD::SETEQ) {
10916 EVT VT = Op.getOperand(i: 0).getValueType();
10917 SDValue Zext = Op.getOperand(i: 0);
10918 if (VT.bitsLT(VT: MVT::i32)) {
10919 VT = MVT::i32;
10920 Zext = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: Op.getOperand(i: 0));
10921 }
10922 unsigned Log2b = Log2_32(Value: VT.getSizeInBits());
10923 SDValue Clz = DAG.getNode(Opcode: ISD::CTLZ, DL: dl, VT, Operand: Zext);
10924 SDValue Scc = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Clz,
10925 N2: DAG.getConstant(Val: Log2b, DL: dl, VT: MVT::i32));
10926 return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i32, Operand: Scc);
10927 }
10928 return SDValue();
10929}
10930
10931SDValue TargetLowering::expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const {
10932 SDValue Op0 = Node->getOperand(Num: 0);
10933 SDValue Op1 = Node->getOperand(Num: 1);
10934 EVT VT = Op0.getValueType();
10935 EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
10936 unsigned Opcode = Node->getOpcode();
10937 SDLoc DL(Node);
10938
10939 // If both sign bits are zero, flip UMIN/UMAX <-> SMIN/SMAX if legal.
10940 unsigned AltOpcode = ISD::getOppositeSignednessMinMaxOpcode(MinMaxOpc: Opcode);
10941 if (isOperationLegal(Op: AltOpcode, VT) && DAG.SignBitIsZero(Op: Op0) &&
10942 DAG.SignBitIsZero(Op: Op1))
10943 return DAG.getNode(Opcode: AltOpcode, DL, VT, N1: Op0, N2: Op1);
10944
10945 // umax(x,1) --> sub(x,cmpeq(x,0)) iff cmp result is allbits
10946 if (Opcode == ISD::UMAX && llvm::isOneOrOneSplat(V: Op1, AllowUndefs: true) && BoolVT == VT &&
10947 getBooleanContents(Type: VT) == ZeroOrNegativeOneBooleanContent) {
10948 Op0 = DAG.getFreeze(V: Op0);
10949 SDValue Zero = DAG.getConstant(Val: 0, DL, VT);
10950 return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Op0,
10951 N2: DAG.getSetCC(DL, VT, LHS: Op0, RHS: Zero, Cond: ISD::SETEQ));
10952 }
10953
10954 // umin(x,y) -> sub(x,usubsat(x,y))
10955 // TODO: Missing freeze(Op0)?
10956 if (Opcode == ISD::UMIN && isOperationLegal(Op: ISD::SUB, VT) &&
10957 isOperationLegal(Op: ISD::USUBSAT, VT)) {
10958 return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Op0,
10959 N2: DAG.getNode(Opcode: ISD::USUBSAT, DL, VT, N1: Op0, N2: Op1));
10960 }
10961
10962 // umax(x,y) -> add(x,usubsat(y,x))
10963 // TODO: Missing freeze(Op0)?
10964 if (Opcode == ISD::UMAX && isOperationLegal(Op: ISD::ADD, VT) &&
10965 isOperationLegal(Op: ISD::USUBSAT, VT)) {
10966 return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Op0,
10967 N2: DAG.getNode(Opcode: ISD::USUBSAT, DL, VT, N1: Op1, N2: Op0));
10968 }
10969
10970 // FIXME: Should really try to split the vector in case it's legal on a
10971 // subvector.
10972 if (VT.isVector() && !isOperationLegalOrCustom(Op: ISD::VSELECT, VT))
10973 return DAG.UnrollVectorOp(N: Node);
10974
10975 // Attempt to find an existing SETCC node that we can reuse.
10976 // TODO: Do we need a generic doesSETCCNodeExist?
10977 // TODO: Missing freeze(Op0)/freeze(Op1)?
10978 auto buildMinMax = [&](ISD::CondCode PrefCC, ISD::CondCode AltCC,
10979 ISD::CondCode PrefCommuteCC,
10980 ISD::CondCode AltCommuteCC) {
10981 SDVTList BoolVTList = DAG.getVTList(VT: BoolVT);
10982 for (ISD::CondCode CC : {PrefCC, AltCC}) {
10983 if (DAG.doesNodeExist(Opcode: ISD::SETCC, VTList: BoolVTList,
10984 Ops: {Op0, Op1, DAG.getCondCode(Cond: CC)})) {
10985 SDValue Cond = DAG.getSetCC(DL, VT: BoolVT, LHS: Op0, RHS: Op1, Cond: CC);
10986 return DAG.getSelect(DL, VT, Cond, LHS: Op0, RHS: Op1);
10987 }
10988 }
10989 for (ISD::CondCode CC : {PrefCommuteCC, AltCommuteCC}) {
10990 if (DAG.doesNodeExist(Opcode: ISD::SETCC, VTList: BoolVTList,
10991 Ops: {Op0, Op1, DAG.getCondCode(Cond: CC)})) {
10992 SDValue Cond = DAG.getSetCC(DL, VT: BoolVT, LHS: Op0, RHS: Op1, Cond: CC);
10993 return DAG.getSelect(DL, VT, Cond, LHS: Op1, RHS: Op0);
10994 }
10995 }
10996 SDValue Cond = DAG.getSetCC(DL, VT: BoolVT, LHS: Op0, RHS: Op1, Cond: PrefCC);
10997 return DAG.getSelect(DL, VT, Cond, LHS: Op0, RHS: Op1);
10998 };
10999
11000 // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
11001 // -> Y = (A < B) ? B : A
11002 // -> Y = (A >= B) ? A : B
11003 // -> Y = (A <= B) ? B : A
11004 switch (Opcode) {
11005 case ISD::SMAX:
11006 return buildMinMax(ISD::SETGT, ISD::SETGE, ISD::SETLT, ISD::SETLE);
11007 case ISD::SMIN:
11008 return buildMinMax(ISD::SETLT, ISD::SETLE, ISD::SETGT, ISD::SETGE);
11009 case ISD::UMAX:
11010 return buildMinMax(ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE);
11011 case ISD::UMIN:
11012 return buildMinMax(ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE);
11013 }
11014
11015 llvm_unreachable("How did we get here?");
11016}
11017
11018SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
11019 unsigned Opcode = Node->getOpcode();
11020 SDValue LHS = Node->getOperand(Num: 0);
11021 SDValue RHS = Node->getOperand(Num: 1);
11022 EVT VT = LHS.getValueType();
11023 SDLoc dl(Node);
11024
11025 assert(VT == RHS.getValueType() && "Expected operands to be the same type");
11026 assert(VT.isInteger() && "Expected operands to be integers");
11027
11028 // usub.sat(a, b) -> umax(a, b) - b
11029 if (Opcode == ISD::USUBSAT && isOperationLegal(Op: ISD::UMAX, VT)) {
11030 SDValue Max = DAG.getNode(Opcode: ISD::UMAX, DL: dl, VT, N1: LHS, N2: RHS);
11031 return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Max, N2: RHS);
11032 }
11033
11034 // usub.sat(a, 1) -> sub(a, zext(a != 0))
11035 if (Opcode == ISD::USUBSAT && isOneOrOneSplat(V: RHS)) {
11036 LHS = DAG.getFreeze(V: LHS);
11037 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT);
11038 EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
11039 SDValue IsNonZero = DAG.getSetCC(DL: dl, VT: BoolVT, LHS, RHS: Zero, Cond: ISD::SETNE);
11040 SDValue Subtrahend = DAG.getBoolExtOrTrunc(Op: IsNonZero, SL: dl, VT, OpVT: BoolVT);
11041 Subtrahend =
11042 DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Subtrahend, N2: DAG.getConstant(Val: 1, DL: dl, VT));
11043 return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: LHS, N2: Subtrahend);
11044 }
11045
11046 // uadd.sat(a, b) -> umin(a, ~b) + b
11047 if (Opcode == ISD::UADDSAT && isOperationLegal(Op: ISD::UMIN, VT)) {
11048 SDValue InvRHS = DAG.getNOT(DL: dl, Val: RHS, VT);
11049 SDValue Min = DAG.getNode(Opcode: ISD::UMIN, DL: dl, VT, N1: LHS, N2: InvRHS);
11050 return DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Min, N2: RHS);
11051 }
11052
11053 unsigned OverflowOp;
11054 switch (Opcode) {
11055 case ISD::SADDSAT:
11056 OverflowOp = ISD::SADDO;
11057 break;
11058 case ISD::UADDSAT:
11059 OverflowOp = ISD::UADDO;
11060 break;
11061 case ISD::SSUBSAT:
11062 OverflowOp = ISD::SSUBO;
11063 break;
11064 case ISD::USUBSAT:
11065 OverflowOp = ISD::USUBO;
11066 break;
11067 default:
11068 llvm_unreachable("Expected method to receive signed or unsigned saturation "
11069 "addition or subtraction node.");
11070 }
11071
11072 // FIXME: Should really try to split the vector in case it's legal on a
11073 // subvector.
11074 if (VT.isVector() && !isOperationLegalOrCustom(Op: ISD::VSELECT, VT))
11075 return DAG.UnrollVectorOp(N: Node);
11076
11077 unsigned BitWidth = LHS.getScalarValueSizeInBits();
11078 EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
11079 SDValue Result = DAG.getNode(Opcode: OverflowOp, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: BoolVT), N1: LHS, N2: RHS);
11080 SDValue SumDiff = Result.getValue(R: 0);
11081 SDValue Overflow = Result.getValue(R: 1);
11082 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT);
11083 SDValue AllOnes = DAG.getAllOnesConstant(DL: dl, VT);
11084
11085 if (Opcode == ISD::UADDSAT) {
11086 if (getBooleanContents(Type: VT) == ZeroOrNegativeOneBooleanContent) {
11087 // (LHS + RHS) | OverflowMask
11088 SDValue OverflowMask = DAG.getSExtOrTrunc(Op: Overflow, DL: dl, VT);
11089 return DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: SumDiff, N2: OverflowMask);
11090 }
11091 // Overflow ? 0xffff.... : (LHS + RHS)
11092 return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: AllOnes, RHS: SumDiff);
11093 }
11094
11095 if (Opcode == ISD::USUBSAT) {
11096 if (getBooleanContents(Type: VT) == ZeroOrNegativeOneBooleanContent) {
11097 // (LHS - RHS) & ~OverflowMask
11098 SDValue OverflowMask = DAG.getSExtOrTrunc(Op: Overflow, DL: dl, VT);
11099 SDValue Not = DAG.getNOT(DL: dl, Val: OverflowMask, VT);
11100 return DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: SumDiff, N2: Not);
11101 }
11102 // Overflow ? 0 : (LHS - RHS)
11103 return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: Zero, RHS: SumDiff);
11104 }
11105
11106 if (Opcode == ISD::SADDSAT || Opcode == ISD::SSUBSAT) {
11107 APInt MinVal = APInt::getSignedMinValue(numBits: BitWidth);
11108 APInt MaxVal = APInt::getSignedMaxValue(numBits: BitWidth);
11109
11110 KnownBits KnownLHS = DAG.computeKnownBits(Op: LHS);
11111 KnownBits KnownRHS = DAG.computeKnownBits(Op: RHS);
11112
11113 // If either of the operand signs are known, then they are guaranteed to
11114 // only saturate in one direction. If non-negative they will saturate
11115 // towards SIGNED_MAX, if negative they will saturate towards SIGNED_MIN.
11116 //
11117 // In the case of ISD::SSUBSAT, 'x - y' is equivalent to 'x + (-y)', so the
11118 // sign of 'y' has to be flipped.
11119
11120 bool LHSIsNonNegative = KnownLHS.isNonNegative();
11121 bool RHSIsNonNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNonNegative()
11122 : KnownRHS.isNegative();
11123 if (LHSIsNonNegative || RHSIsNonNegative) {
11124 SDValue SatMax = DAG.getConstant(Val: MaxVal, DL: dl, VT);
11125 return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: SatMax, RHS: SumDiff);
11126 }
11127
11128 bool LHSIsNegative = KnownLHS.isNegative();
11129 bool RHSIsNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNegative()
11130 : KnownRHS.isNonNegative();
11131 if (LHSIsNegative || RHSIsNegative) {
11132 SDValue SatMin = DAG.getConstant(Val: MinVal, DL: dl, VT);
11133 return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: SatMin, RHS: SumDiff);
11134 }
11135 }
11136
11137 // Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff
11138 APInt MinVal = APInt::getSignedMinValue(numBits: BitWidth);
11139 SDValue SatMin = DAG.getConstant(Val: MinVal, DL: dl, VT);
11140 SDValue Shift = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: SumDiff,
11141 N2: DAG.getConstant(Val: BitWidth - 1, DL: dl, VT));
11142 Result = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: Shift, N2: SatMin);
11143 return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: Result, RHS: SumDiff);
11144}
11145
11146SDValue TargetLowering::expandCMP(SDNode *Node, SelectionDAG &DAG) const {
11147 unsigned Opcode = Node->getOpcode();
11148 SDValue LHS = Node->getOperand(Num: 0);
11149 SDValue RHS = Node->getOperand(Num: 1);
11150 EVT VT = LHS.getValueType();
11151 EVT ResVT = Node->getValueType(ResNo: 0);
11152 EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
11153 SDLoc dl(Node);
11154
11155 auto LTPredicate = (Opcode == ISD::UCMP ? ISD::SETULT : ISD::SETLT);
11156 auto GTPredicate = (Opcode == ISD::UCMP ? ISD::SETUGT : ISD::SETGT);
11157 SDValue IsLT = DAG.getSetCC(DL: dl, VT: BoolVT, LHS, RHS, Cond: LTPredicate);
11158 SDValue IsGT = DAG.getSetCC(DL: dl, VT: BoolVT, LHS, RHS, Cond: GTPredicate);
11159
11160 // We can't perform arithmetic on i1 values. Extending them would
11161 // probably result in worse codegen, so let's just use two selects instead.
11162 // Some targets are also just better off using selects rather than subtraction
11163 // because one of the conditions can be merged with one of the selects.
11164 // And finally, if we don't know the contents of high bits of a boolean value
11165 // we can't perform any arithmetic either.
11166 if (preferSelectsOverBooleanArithmetic(VT) ||
11167 BoolVT.getScalarSizeInBits() == 1 ||
11168 getBooleanContents(Type: BoolVT) == UndefinedBooleanContent) {
11169 SDValue SelectZeroOrOne =
11170 DAG.getSelect(DL: dl, VT: ResVT, Cond: IsGT, LHS: DAG.getConstant(Val: 1, DL: dl, VT: ResVT),
11171 RHS: DAG.getConstant(Val: 0, DL: dl, VT: ResVT));
11172 return DAG.getSelect(DL: dl, VT: ResVT, Cond: IsLT, LHS: DAG.getAllOnesConstant(DL: dl, VT: ResVT),
11173 RHS: SelectZeroOrOne);
11174 }
11175
11176 if (getBooleanContents(Type: BoolVT) == ZeroOrNegativeOneBooleanContent)
11177 std::swap(a&: IsGT, b&: IsLT);
11178 return DAG.getSExtOrTrunc(Op: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: BoolVT, N1: IsGT, N2: IsLT), DL: dl,
11179 VT: ResVT);
11180}
11181
11182SDValue TargetLowering::expandShlSat(SDNode *Node, SelectionDAG &DAG) const {
11183 unsigned Opcode = Node->getOpcode();
11184 bool IsSigned = Opcode == ISD::SSHLSAT;
11185 SDValue LHS = Node->getOperand(Num: 0);
11186 SDValue RHS = Node->getOperand(Num: 1);
11187 EVT VT = LHS.getValueType();
11188 SDLoc dl(Node);
11189
11190 assert((Node->getOpcode() == ISD::SSHLSAT ||
11191 Node->getOpcode() == ISD::USHLSAT) &&
11192 "Expected a SHLSAT opcode");
11193 assert(VT.isInteger() && "Expected operands to be integers");
11194
11195 if (VT.isVector() && !isOperationLegalOrCustom(Op: ISD::VSELECT, VT))
11196 return DAG.UnrollVectorOp(N: Node);
11197
11198 // If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate.
11199
11200 unsigned BW = VT.getScalarSizeInBits();
11201 EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
11202 SDValue Result = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: LHS, N2: RHS);
11203 SDValue Orig =
11204 DAG.getNode(Opcode: IsSigned ? ISD::SRA : ISD::SRL, DL: dl, VT, N1: Result, N2: RHS);
11205
11206 SDValue SatVal;
11207 if (IsSigned) {
11208 SDValue SatMin = DAG.getConstant(Val: APInt::getSignedMinValue(numBits: BW), DL: dl, VT);
11209 SDValue SatMax = DAG.getConstant(Val: APInt::getSignedMaxValue(numBits: BW), DL: dl, VT);
11210 SDValue Cond =
11211 DAG.getSetCC(DL: dl, VT: BoolVT, LHS, RHS: DAG.getConstant(Val: 0, DL: dl, VT), Cond: ISD::SETLT);
11212 SatVal = DAG.getSelect(DL: dl, VT, Cond, LHS: SatMin, RHS: SatMax);
11213 } else {
11214 SatVal = DAG.getConstant(Val: APInt::getMaxValue(numBits: BW), DL: dl, VT);
11215 }
11216 SDValue Cond = DAG.getSetCC(DL: dl, VT: BoolVT, LHS, RHS: Orig, Cond: ISD::SETNE);
11217 return DAG.getSelect(DL: dl, VT, Cond, LHS: SatVal, RHS: Result);
11218}
11219
11220void TargetLowering::forceExpandMultiply(SelectionDAG &DAG, const SDLoc &dl,
11221 bool Signed, SDValue &Lo, SDValue &Hi,
11222 SDValue LHS, SDValue RHS,
11223 SDValue HiLHS, SDValue HiRHS) const {
11224 EVT VT = LHS.getValueType();
11225 assert(RHS.getValueType() == VT && "Mismatching operand types");
11226
11227 assert((HiLHS && HiRHS) || (!HiLHS && !HiRHS));
11228 assert((!Signed || !HiLHS) &&
11229 "Signed flag should only be set when HiLHS and RiRHS are null");
11230
11231 // We'll expand the multiplication by brute force because we have no other
11232 // options. This is a trivially-generalized version of the code from
11233 // Hacker's Delight (itself derived from Knuth's Algorithm M from section
11234 // 4.3.1). If Signed is set, we can use arithmetic right shifts to propagate
11235 // sign bits while calculating the Hi half.
11236 unsigned Bits = VT.getSizeInBits();
11237 unsigned HalfBits = Bits / 2;
11238 SDValue Mask = DAG.getConstant(Val: APInt::getLowBitsSet(numBits: Bits, loBitsSet: HalfBits), DL: dl, VT);
11239 SDValue LL = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: LHS, N2: Mask);
11240 SDValue RL = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: RHS, N2: Mask);
11241
11242 SDValue T = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LL, N2: RL);
11243 SDValue TL = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: T, N2: Mask);
11244
11245 SDValue Shift = DAG.getShiftAmountConstant(Val: HalfBits, VT, DL: dl);
11246 // This is always an unsigned shift.
11247 SDValue TH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: T, N2: Shift);
11248
11249 unsigned ShiftOpc = Signed ? ISD::SRA : ISD::SRL;
11250 SDValue LH = DAG.getNode(Opcode: ShiftOpc, DL: dl, VT, N1: LHS, N2: Shift);
11251 SDValue RH = DAG.getNode(Opcode: ShiftOpc, DL: dl, VT, N1: RHS, N2: Shift);
11252
11253 SDValue U =
11254 DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LH, N2: RL), N2: TH);
11255 SDValue UL = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: U, N2: Mask);
11256 SDValue UH = DAG.getNode(Opcode: ShiftOpc, DL: dl, VT, N1: U, N2: Shift);
11257
11258 SDValue V =
11259 DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LL, N2: RH), N2: UL);
11260 SDValue VH = DAG.getNode(Opcode: ShiftOpc, DL: dl, VT, N1: V, N2: Shift);
11261
11262 Lo = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: TL,
11263 N2: DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: V, N2: Shift));
11264
11265 Hi = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LH, N2: RH),
11266 N2: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: UH, N2: VH));
11267
11268 // If HiLHS and HiRHS are set, multiply them by the opposite low part and add
11269 // the products to Hi.
11270 if (HiLHS) {
11271 Hi = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Hi,
11272 N2: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT,
11273 N1: DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: HiRHS, N2: LHS),
11274 N2: DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: RHS, N2: HiLHS)));
11275 }
11276}
11277
11278void TargetLowering::forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl,
11279 bool Signed, const SDValue LHS,
11280 const SDValue RHS, SDValue &Lo,
11281 SDValue &Hi) const {
11282 EVT VT = LHS.getValueType();
11283 assert(RHS.getValueType() == VT && "Mismatching operand types");
11284 EVT WideVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: VT.getSizeInBits() * 2);
11285 // We can fall back to a libcall with an illegal type for the MUL if we
11286 // have a libcall big enough.
11287 RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
11288 if (WideVT == MVT::i16)
11289 LC = RTLIB::MUL_I16;
11290 else if (WideVT == MVT::i32)
11291 LC = RTLIB::MUL_I32;
11292 else if (WideVT == MVT::i64)
11293 LC = RTLIB::MUL_I64;
11294 else if (WideVT == MVT::i128)
11295 LC = RTLIB::MUL_I128;
11296
11297 RTLIB::LibcallImpl LibcallImpl = getLibcallImpl(Call: LC);
11298 if (LibcallImpl == RTLIB::Unsupported) {
11299 forceExpandMultiply(DAG, dl, Signed, Lo, Hi, LHS, RHS);
11300 return;
11301 }
11302
11303 SDValue HiLHS, HiRHS;
11304 if (Signed) {
11305 // The high part is obtained by SRA'ing all but one of the bits of low
11306 // part.
11307 unsigned LoSize = VT.getFixedSizeInBits();
11308 SDValue Shift = DAG.getShiftAmountConstant(Val: LoSize - 1, VT, DL: dl);
11309 HiLHS = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: LHS, N2: Shift);
11310 HiRHS = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: RHS, N2: Shift);
11311 } else {
11312 HiLHS = DAG.getConstant(Val: 0, DL: dl, VT);
11313 HiRHS = DAG.getConstant(Val: 0, DL: dl, VT);
11314 }
11315
11316 // Attempt a libcall.
11317 SDValue Ret;
11318 TargetLowering::MakeLibCallOptions CallOptions;
11319 CallOptions.setIsSigned(Signed);
11320 CallOptions.setIsPostTypeLegalization(true);
11321 if (shouldSplitFunctionArgumentsAsLittleEndian(DL: DAG.getDataLayout())) {
11322 // Halves of WideVT are packed into registers in different order
11323 // depending on platform endianness. This is usually handled by
11324 // the C calling convention, but we can't defer to it in
11325 // the legalizer.
11326 SDValue Args[] = {LHS, HiLHS, RHS, HiRHS};
11327 Ret = makeLibCall(DAG, LC, RetVT: WideVT, Ops: Args, CallOptions, dl).first;
11328 } else {
11329 SDValue Args[] = {HiLHS, LHS, HiRHS, RHS};
11330 Ret = makeLibCall(DAG, LC, RetVT: WideVT, Ops: Args, CallOptions, dl).first;
11331 }
11332 assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
11333 "Ret value is a collection of constituent nodes holding result.");
11334 if (DAG.getDataLayout().isLittleEndian()) {
11335 // Same as above.
11336 Lo = Ret.getOperand(i: 0);
11337 Hi = Ret.getOperand(i: 1);
11338 } else {
11339 Lo = Ret.getOperand(i: 1);
11340 Hi = Ret.getOperand(i: 0);
11341 }
11342}
11343
11344SDValue
11345TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
11346 assert((Node->getOpcode() == ISD::SMULFIX ||
11347 Node->getOpcode() == ISD::UMULFIX ||
11348 Node->getOpcode() == ISD::SMULFIXSAT ||
11349 Node->getOpcode() == ISD::UMULFIXSAT) &&
11350 "Expected a fixed point multiplication opcode");
11351
11352 SDLoc dl(Node);
11353 SDValue LHS = Node->getOperand(Num: 0);
11354 SDValue RHS = Node->getOperand(Num: 1);
11355 EVT VT = LHS.getValueType();
11356 unsigned Scale = Node->getConstantOperandVal(Num: 2);
11357 bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT ||
11358 Node->getOpcode() == ISD::UMULFIXSAT);
11359 bool Signed = (Node->getOpcode() == ISD::SMULFIX ||
11360 Node->getOpcode() == ISD::SMULFIXSAT);
11361 EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
11362 unsigned VTSize = VT.getScalarSizeInBits();
11363
11364 if (!Scale) {
11365 // [us]mul.fix(a, b, 0) -> mul(a, b)
11366 if (!Saturating) {
11367 if (isOperationLegalOrCustom(Op: ISD::MUL, VT))
11368 return DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LHS, N2: RHS);
11369 } else if (Signed && isOperationLegalOrCustom(Op: ISD::SMULO, VT)) {
11370 SDValue Result =
11371 DAG.getNode(Opcode: ISD::SMULO, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: BoolVT), N1: LHS, N2: RHS);
11372 SDValue Product = Result.getValue(R: 0);
11373 SDValue Overflow = Result.getValue(R: 1);
11374 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT);
11375
11376 APInt MinVal = APInt::getSignedMinValue(numBits: VTSize);
11377 APInt MaxVal = APInt::getSignedMaxValue(numBits: VTSize);
11378 SDValue SatMin = DAG.getConstant(Val: MinVal, DL: dl, VT);
11379 SDValue SatMax = DAG.getConstant(Val: MaxVal, DL: dl, VT);
11380 // Xor the inputs, if resulting sign bit is 0 the product will be
11381 // positive, else negative.
11382 SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: LHS, N2: RHS);
11383 SDValue ProdNeg = DAG.getSetCC(DL: dl, VT: BoolVT, LHS: Xor, RHS: Zero, Cond: ISD::SETLT);
11384 Result = DAG.getSelect(DL: dl, VT, Cond: ProdNeg, LHS: SatMin, RHS: SatMax);
11385 return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: Result, RHS: Product);
11386 } else if (!Signed && isOperationLegalOrCustom(Op: ISD::UMULO, VT)) {
11387 SDValue Result =
11388 DAG.getNode(Opcode: ISD::UMULO, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: BoolVT), N1: LHS, N2: RHS);
11389 SDValue Product = Result.getValue(R: 0);
11390 SDValue Overflow = Result.getValue(R: 1);
11391
11392 APInt MaxVal = APInt::getMaxValue(numBits: VTSize);
11393 SDValue SatMax = DAG.getConstant(Val: MaxVal, DL: dl, VT);
11394 return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: SatMax, RHS: Product);
11395 }
11396 }
11397
11398 assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
11399 "Expected scale to be less than the number of bits if signed or at "
11400 "most the number of bits if unsigned.");
11401 assert(LHS.getValueType() == RHS.getValueType() &&
11402 "Expected both operands to be the same type");
11403
11404 // Get the upper and lower bits of the result.
11405 SDValue Lo, Hi;
11406 unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
11407 unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
11408 EVT WideVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: VTSize * 2);
11409 if (VT.isVector())
11410 WideVT =
11411 EVT::getVectorVT(Context&: *DAG.getContext(), VT: WideVT, EC: VT.getVectorElementCount());
11412 if (isOperationLegalOrCustom(Op: LoHiOp, VT)) {
11413 SDValue Result = DAG.getNode(Opcode: LoHiOp, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: VT), N1: LHS, N2: RHS);
11414 Lo = Result.getValue(R: 0);
11415 Hi = Result.getValue(R: 1);
11416 } else if (isOperationLegalOrCustom(Op: HiOp, VT)) {
11417 Lo = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LHS, N2: RHS);
11418 Hi = DAG.getNode(Opcode: HiOp, DL: dl, VT, N1: LHS, N2: RHS);
11419 } else if (isOperationLegalOrCustom(Op: ISD::MUL, VT: WideVT)) {
11420 // Try for a multiplication using a wider type.
11421 unsigned Ext = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
11422 SDValue LHSExt = DAG.getNode(Opcode: Ext, DL: dl, VT: WideVT, Operand: LHS);
11423 SDValue RHSExt = DAG.getNode(Opcode: Ext, DL: dl, VT: WideVT, Operand: RHS);
11424 SDValue Res = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: WideVT, N1: LHSExt, N2: RHSExt);
11425 Lo = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Res);
11426 SDValue Shifted =
11427 DAG.getNode(Opcode: ISD::SRA, DL: dl, VT: WideVT, N1: Res,
11428 N2: DAG.getShiftAmountConstant(Val: VTSize, VT: WideVT, DL: dl));
11429 Hi = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Shifted);
11430 } else if (VT.isVector()) {
11431 return SDValue();
11432 } else {
11433 forceExpandWideMUL(DAG, dl, Signed, LHS, RHS, Lo, Hi);
11434 }
11435
11436 if (Scale == VTSize)
11437 // Result is just the top half since we'd be shifting by the width of the
11438 // operand. Overflow impossible so this works for both UMULFIX and
11439 // UMULFIXSAT.
11440 return Hi;
11441
11442 // The result will need to be shifted right by the scale since both operands
11443 // are scaled. The result is given to us in 2 halves, so we only want part of
11444 // both in the result.
11445 SDValue Result = DAG.getNode(Opcode: ISD::FSHR, DL: dl, VT, N1: Hi, N2: Lo,
11446 N3: DAG.getShiftAmountConstant(Val: Scale, VT, DL: dl));
11447 if (!Saturating)
11448 return Result;
11449
11450 if (!Signed) {
11451 // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
11452 // widened multiplication) aren't all zeroes.
11453
11454 // Saturate to max if ((Hi >> Scale) != 0),
11455 // which is the same as if (Hi > ((1 << Scale) - 1))
11456 APInt MaxVal = APInt::getMaxValue(numBits: VTSize);
11457 SDValue LowMask = DAG.getConstant(Val: APInt::getLowBitsSet(numBits: VTSize, loBitsSet: Scale),
11458 DL: dl, VT);
11459 Result = DAG.getSelectCC(DL: dl, LHS: Hi, RHS: LowMask,
11460 True: DAG.getConstant(Val: MaxVal, DL: dl, VT), False: Result,
11461 Cond: ISD::SETUGT);
11462
11463 return Result;
11464 }
11465
11466 // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
11467 // widened multiplication) aren't all ones or all zeroes.
11468
11469 SDValue SatMin = DAG.getConstant(Val: APInt::getSignedMinValue(numBits: VTSize), DL: dl, VT);
11470 SDValue SatMax = DAG.getConstant(Val: APInt::getSignedMaxValue(numBits: VTSize), DL: dl, VT);
11471
11472 if (Scale == 0) {
11473 SDValue Sign = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: Lo,
11474 N2: DAG.getShiftAmountConstant(Val: VTSize - 1, VT, DL: dl));
11475 SDValue Overflow = DAG.getSetCC(DL: dl, VT: BoolVT, LHS: Hi, RHS: Sign, Cond: ISD::SETNE);
11476 // Saturated to SatMin if wide product is negative, and SatMax if wide
11477 // product is positive ...
11478 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT);
11479 SDValue ResultIfOverflow = DAG.getSelectCC(DL: dl, LHS: Hi, RHS: Zero, True: SatMin, False: SatMax,
11480 Cond: ISD::SETLT);
11481 // ... but only if we overflowed.
11482 return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: ResultIfOverflow, RHS: Result);
11483 }
11484
11485 // We handled Scale==0 above so all the bits to examine is in Hi.
11486
11487 // Saturate to max if ((Hi >> (Scale - 1)) > 0),
11488 // which is the same as if (Hi > (1 << (Scale - 1)) - 1)
11489 SDValue LowMask = DAG.getConstant(Val: APInt::getLowBitsSet(numBits: VTSize, loBitsSet: Scale - 1),
11490 DL: dl, VT);
11491 Result = DAG.getSelectCC(DL: dl, LHS: Hi, RHS: LowMask, True: SatMax, False: Result, Cond: ISD::SETGT);
11492 // Saturate to min if (Hi >> (Scale - 1)) < -1),
11493 // which is the same as if (HI < (-1 << (Scale - 1))
11494 SDValue HighMask =
11495 DAG.getConstant(Val: APInt::getHighBitsSet(numBits: VTSize, hiBitsSet: VTSize - Scale + 1),
11496 DL: dl, VT);
11497 Result = DAG.getSelectCC(DL: dl, LHS: Hi, RHS: HighMask, True: SatMin, False: Result, Cond: ISD::SETLT);
11498 return Result;
11499}
11500
11501SDValue
11502TargetLowering::expandFixedPointDiv(unsigned Opcode, const SDLoc &dl,
11503 SDValue LHS, SDValue RHS,
11504 unsigned Scale, SelectionDAG &DAG) const {
11505 assert((Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT ||
11506 Opcode == ISD::UDIVFIX || Opcode == ISD::UDIVFIXSAT) &&
11507 "Expected a fixed point division opcode");
11508
11509 EVT VT = LHS.getValueType();
11510 bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT;
11511 bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT;
11512 EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
11513
11514 // If there is enough room in the type to upscale the LHS or downscale the
11515 // RHS before the division, we can perform it in this type without having to
11516 // resize. For signed operations, the LHS headroom is the number of
11517 // redundant sign bits, and for unsigned ones it is the number of zeroes.
11518 // The headroom for the RHS is the number of trailing zeroes.
11519 unsigned LHSLead = Signed ? DAG.ComputeNumSignBits(Op: LHS) - 1
11520 : DAG.computeKnownBits(Op: LHS).countMinLeadingZeros();
11521 unsigned RHSTrail = DAG.computeKnownBits(Op: RHS).countMinTrailingZeros();
11522
11523 // For signed saturating operations, we need to be able to detect true integer
11524 // division overflow; that is, when you have MIN / -EPS. However, this
11525 // is undefined behavior and if we emit divisions that could take such
11526 // values it may cause undesired behavior (arithmetic exceptions on x86, for
11527 // example).
11528 // Avoid this by requiring an extra bit so that we never get this case.
11529 // FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale
11530 // signed saturating division, we need to emit a whopping 32-bit division.
11531 if (LHSLead + RHSTrail < Scale + (unsigned)(Saturating && Signed))
11532 return SDValue();
11533
11534 unsigned LHSShift = std::min(a: LHSLead, b: Scale);
11535 unsigned RHSShift = Scale - LHSShift;
11536
11537 // At this point, we know that if we shift the LHS up by LHSShift and the
11538 // RHS down by RHSShift, we can emit a regular division with a final scaling
11539 // factor of Scale.
11540
11541 if (LHSShift)
11542 LHS = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: LHS,
11543 N2: DAG.getShiftAmountConstant(Val: LHSShift, VT, DL: dl));
11544 if (RHSShift)
11545 RHS = DAG.getNode(Opcode: Signed ? ISD::SRA : ISD::SRL, DL: dl, VT, N1: RHS,
11546 N2: DAG.getShiftAmountConstant(Val: RHSShift, VT, DL: dl));
11547
11548 SDValue Quot;
11549 if (Signed) {
11550 // For signed operations, if the resulting quotient is negative and the
11551 // remainder is nonzero, subtract 1 from the quotient to round towards
11552 // negative infinity.
11553 SDValue Rem;
11554 // FIXME: Ideally we would always produce an SDIVREM here, but if the
11555 // type isn't legal, SDIVREM cannot be expanded. There is no reason why
11556 // we couldn't just form a libcall, but the type legalizer doesn't do it.
11557 if (isTypeLegal(VT) &&
11558 isOperationLegalOrCustom(Op: ISD::SDIVREM, VT)) {
11559 Quot = DAG.getNode(Opcode: ISD::SDIVREM, DL: dl,
11560 VTList: DAG.getVTList(VT1: VT, VT2: VT),
11561 N1: LHS, N2: RHS);
11562 Rem = Quot.getValue(R: 1);
11563 Quot = Quot.getValue(R: 0);
11564 } else {
11565 Quot = DAG.getNode(Opcode: ISD::SDIV, DL: dl, VT,
11566 N1: LHS, N2: RHS);
11567 Rem = DAG.getNode(Opcode: ISD::SREM, DL: dl, VT,
11568 N1: LHS, N2: RHS);
11569 }
11570 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT);
11571 SDValue RemNonZero = DAG.getSetCC(DL: dl, VT: BoolVT, LHS: Rem, RHS: Zero, Cond: ISD::SETNE);
11572 SDValue LHSNeg = DAG.getSetCC(DL: dl, VT: BoolVT, LHS, RHS: Zero, Cond: ISD::SETLT);
11573 SDValue RHSNeg = DAG.getSetCC(DL: dl, VT: BoolVT, LHS: RHS, RHS: Zero, Cond: ISD::SETLT);
11574 SDValue QuotNeg = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: BoolVT, N1: LHSNeg, N2: RHSNeg);
11575 SDValue Sub1 = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Quot,
11576 N2: DAG.getConstant(Val: 1, DL: dl, VT));
11577 Quot = DAG.getSelect(DL: dl, VT,
11578 Cond: DAG.getNode(Opcode: ISD::AND, DL: dl, VT: BoolVT, N1: RemNonZero, N2: QuotNeg),
11579 LHS: Sub1, RHS: Quot);
11580 } else
11581 Quot = DAG.getNode(Opcode: ISD::UDIV, DL: dl, VT,
11582 N1: LHS, N2: RHS);
11583
11584 return Quot;
11585}
11586
11587void TargetLowering::expandUADDSUBO(
11588 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
11589 SDLoc dl(Node);
11590 SDValue LHS = Node->getOperand(Num: 0);
11591 SDValue RHS = Node->getOperand(Num: 1);
11592 bool IsAdd = Node->getOpcode() == ISD::UADDO;
11593
11594 // If UADDO_CARRY/SUBO_CARRY is legal, use that instead.
11595 unsigned OpcCarry = IsAdd ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
11596 if (isOperationLegalOrCustom(Op: OpcCarry, VT: Node->getValueType(ResNo: 0))) {
11597 SDValue CarryIn = DAG.getConstant(Val: 0, DL: dl, VT: Node->getValueType(ResNo: 1));
11598 SDValue NodeCarry = DAG.getNode(Opcode: OpcCarry, DL: dl, VTList: Node->getVTList(),
11599 Ops: { LHS, RHS, CarryIn });
11600 Result = SDValue(NodeCarry.getNode(), 0);
11601 Overflow = SDValue(NodeCarry.getNode(), 1);
11602 return;
11603 }
11604
11605 Result = DAG.getNode(Opcode: IsAdd ? ISD::ADD : ISD::SUB, DL: dl,
11606 VT: LHS.getValueType(), N1: LHS, N2: RHS);
11607
11608 EVT ResultType = Node->getValueType(ResNo: 1);
11609 EVT SetCCType = getSetCCResultType(
11610 DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: Node->getValueType(ResNo: 0));
11611 SDValue SetCC;
11612 if (IsAdd && isOneConstant(V: RHS)) {
11613 // Special case: uaddo X, 1 overflowed if X+1 is 0. This potential reduces
11614 // the live range of X. We assume comparing with 0 is cheap.
11615 // The general case (X + C) < C is not necessarily beneficial. Although we
11616 // reduce the live range of X, we may introduce the materialization of
11617 // constant C.
11618 SetCC =
11619 DAG.getSetCC(DL: dl, VT: SetCCType, LHS: Result,
11620 RHS: DAG.getConstant(Val: 0, DL: dl, VT: Node->getValueType(ResNo: 0)), Cond: ISD::SETEQ);
11621 } else if (IsAdd && isAllOnesConstant(V: RHS)) {
11622 // Special case: uaddo X, -1 overflows if X != 0.
11623 SetCC =
11624 DAG.getSetCC(DL: dl, VT: SetCCType, LHS,
11625 RHS: DAG.getConstant(Val: 0, DL: dl, VT: Node->getValueType(ResNo: 0)), Cond: ISD::SETNE);
11626 } else {
11627 ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
11628 SetCC = DAG.getSetCC(DL: dl, VT: SetCCType, LHS: Result, RHS: LHS, Cond: CC);
11629 }
11630 Overflow = DAG.getBoolExtOrTrunc(Op: SetCC, SL: dl, VT: ResultType, OpVT: ResultType);
11631}
11632
11633void TargetLowering::expandSADDSUBO(
11634 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
11635 SDLoc dl(Node);
11636 SDValue LHS = Node->getOperand(Num: 0);
11637 SDValue RHS = Node->getOperand(Num: 1);
11638 bool IsAdd = Node->getOpcode() == ISD::SADDO;
11639
11640 Result = DAG.getNode(Opcode: IsAdd ? ISD::ADD : ISD::SUB, DL: dl,
11641 VT: LHS.getValueType(), N1: LHS, N2: RHS);
11642
11643 EVT ResultType = Node->getValueType(ResNo: 1);
11644 EVT OType = getSetCCResultType(
11645 DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: Node->getValueType(ResNo: 0));
11646
11647 // If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
11648 unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
11649 if (isOperationLegal(Op: OpcSat, VT: LHS.getValueType())) {
11650 SDValue Sat = DAG.getNode(Opcode: OpcSat, DL: dl, VT: LHS.getValueType(), N1: LHS, N2: RHS);
11651 SDValue SetCC = DAG.getSetCC(DL: dl, VT: OType, LHS: Result, RHS: Sat, Cond: ISD::SETNE);
11652 Overflow = DAG.getBoolExtOrTrunc(Op: SetCC, SL: dl, VT: ResultType, OpVT: ResultType);
11653 return;
11654 }
11655
11656 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT: LHS.getValueType());
11657
11658 // For an addition, the result should be less than one of the operands (LHS)
11659 // if and only if the other operand (RHS) is negative, otherwise there will
11660 // be overflow.
11661 // For a subtraction, the result should be less than one of the operands
11662 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
11663 // otherwise there will be overflow.
11664 SDValue ResultLowerThanLHS = DAG.getSetCC(DL: dl, VT: OType, LHS: Result, RHS: LHS, Cond: ISD::SETLT);
11665 SDValue ConditionRHS =
11666 DAG.getSetCC(DL: dl, VT: OType, LHS: RHS, RHS: Zero, Cond: IsAdd ? ISD::SETLT : ISD::SETGT);
11667
11668 Overflow = DAG.getBoolExtOrTrunc(
11669 Op: DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: OType, N1: ConditionRHS, N2: ResultLowerThanLHS), SL: dl,
11670 VT: ResultType, OpVT: ResultType);
11671}
11672
11673bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,
11674 SDValue &Overflow, SelectionDAG &DAG) const {
11675 SDLoc dl(Node);
11676 EVT VT = Node->getValueType(ResNo: 0);
11677 EVT SetCCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
11678 SDValue LHS = Node->getOperand(Num: 0);
11679 SDValue RHS = Node->getOperand(Num: 1);
11680 bool isSigned = Node->getOpcode() == ISD::SMULO;
11681
11682 // For power-of-two multiplications we can use a simpler shift expansion.
11683 if (ConstantSDNode *RHSC = isConstOrConstSplat(N: RHS)) {
11684 const APInt &C = RHSC->getAPIntValue();
11685 // mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
11686 if (C.isPowerOf2()) {
11687 // smulo(x, signed_min) is same as umulo(x, signed_min).
11688 bool UseArithShift = isSigned && !C.isMinSignedValue();
11689 SDValue ShiftAmt = DAG.getShiftAmountConstant(Val: C.logBase2(), VT, DL: dl);
11690 Result = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: LHS, N2: ShiftAmt);
11691 Overflow = DAG.getSetCC(DL: dl, VT: SetCCVT,
11692 LHS: DAG.getNode(Opcode: UseArithShift ? ISD::SRA : ISD::SRL,
11693 DL: dl, VT, N1: Result, N2: ShiftAmt),
11694 RHS: LHS, Cond: ISD::SETNE);
11695 return true;
11696 }
11697 }
11698
11699 EVT WideVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: VT.getScalarSizeInBits() * 2);
11700 if (VT.isVector())
11701 WideVT =
11702 EVT::getVectorVT(Context&: *DAG.getContext(), VT: WideVT, EC: VT.getVectorElementCount());
11703
11704 SDValue BottomHalf;
11705 SDValue TopHalf;
11706 static const unsigned Ops[2][3] =
11707 { { ISD::MULHU, ISD::UMUL_LOHI, ISD::ZERO_EXTEND },
11708 { ISD::MULHS, ISD::SMUL_LOHI, ISD::SIGN_EXTEND }};
11709 if (isOperationLegalOrCustom(Op: Ops[isSigned][0], VT)) {
11710 BottomHalf = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LHS, N2: RHS);
11711 TopHalf = DAG.getNode(Opcode: Ops[isSigned][0], DL: dl, VT, N1: LHS, N2: RHS);
11712 } else if (isOperationLegalOrCustom(Op: Ops[isSigned][1], VT)) {
11713 BottomHalf = DAG.getNode(Opcode: Ops[isSigned][1], DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: VT), N1: LHS,
11714 N2: RHS);
11715 TopHalf = BottomHalf.getValue(R: 1);
11716 } else if (isTypeLegal(VT: WideVT)) {
11717 LHS = DAG.getNode(Opcode: Ops[isSigned][2], DL: dl, VT: WideVT, Operand: LHS);
11718 RHS = DAG.getNode(Opcode: Ops[isSigned][2], DL: dl, VT: WideVT, Operand: RHS);
11719 SDValue Mul = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: WideVT, N1: LHS, N2: RHS);
11720 BottomHalf = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Mul);
11721 SDValue ShiftAmt =
11722 DAG.getShiftAmountConstant(Val: VT.getScalarSizeInBits(), VT: WideVT, DL: dl);
11723 TopHalf = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT,
11724 Operand: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: WideVT, N1: Mul, N2: ShiftAmt));
11725 } else {
11726 if (VT.isVector())
11727 return false;
11728
11729 forceExpandWideMUL(DAG, dl, Signed: isSigned, LHS, RHS, Lo&: BottomHalf, Hi&: TopHalf);
11730 }
11731
11732 Result = BottomHalf;
11733 if (isSigned) {
11734 SDValue ShiftAmt = DAG.getShiftAmountConstant(
11735 Val: VT.getScalarSizeInBits() - 1, VT: BottomHalf.getValueType(), DL: dl);
11736 SDValue Sign = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: BottomHalf, N2: ShiftAmt);
11737 Overflow = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: TopHalf, RHS: Sign, Cond: ISD::SETNE);
11738 } else {
11739 Overflow = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: TopHalf,
11740 RHS: DAG.getConstant(Val: 0, DL: dl, VT), Cond: ISD::SETNE);
11741 }
11742
11743 // Truncate the result if SetCC returns a larger type than needed.
11744 EVT RType = Node->getValueType(ResNo: 1);
11745 if (RType.bitsLT(VT: Overflow.getValueType()))
11746 Overflow = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: RType, Operand: Overflow);
11747
11748 assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
11749 "Unexpected result type for S/UMULO legalization");
11750 return true;
11751}
11752
11753SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const {
11754 SDLoc dl(Node);
11755 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(VecReduceOpcode: Node->getOpcode());
11756 SDValue Op = Node->getOperand(Num: 0);
11757 EVT VT = Op.getValueType();
11758
11759 // Try to use a shuffle reduction for power of two vectors.
11760 if (VT.isPow2VectorType()) {
11761 while (VT.getVectorElementCount().isKnownMultipleOf(RHS: 2)) {
11762 EVT HalfVT = VT.getHalfNumVectorElementsVT(Context&: *DAG.getContext());
11763 if (!isOperationLegalOrCustom(Op: BaseOpcode, VT: HalfVT))
11764 break;
11765
11766 SDValue Lo, Hi;
11767 std::tie(args&: Lo, args&: Hi) = DAG.SplitVector(N: Op, DL: dl);
11768 Op = DAG.getNode(Opcode: BaseOpcode, DL: dl, VT: HalfVT, N1: Lo, N2: Hi, Flags: Node->getFlags());
11769 VT = HalfVT;
11770
11771 // Stop if splitting is enough to make the reduction legal.
11772 if (isOperationLegalOrCustom(Op: Node->getOpcode(), VT: HalfVT))
11773 return DAG.getNode(Opcode: Node->getOpcode(), DL: dl, VT: Node->getValueType(ResNo: 0), Operand: Op,
11774 Flags: Node->getFlags());
11775 }
11776 }
11777
11778 if (VT.isScalableVector())
11779 reportFatalInternalError(
11780 reason: "Expanding reductions for scalable vectors is undefined.");
11781
11782 EVT EltVT = VT.getVectorElementType();
11783 unsigned NumElts = VT.getVectorNumElements();
11784
11785 SmallVector<SDValue, 8> Ops;
11786 DAG.ExtractVectorElements(Op, Args&: Ops, Start: 0, Count: NumElts);
11787
11788 SDValue Res = Ops[0];
11789 for (unsigned i = 1; i < NumElts; i++)
11790 Res = DAG.getNode(Opcode: BaseOpcode, DL: dl, VT: EltVT, N1: Res, N2: Ops[i], Flags: Node->getFlags());
11791
11792 // Result type may be wider than element type.
11793 if (EltVT != Node->getValueType(ResNo: 0))
11794 Res = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT: Node->getValueType(ResNo: 0), Operand: Res);
11795 return Res;
11796}
11797
11798SDValue TargetLowering::expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const {
11799 SDLoc dl(Node);
11800 SDValue AccOp = Node->getOperand(Num: 0);
11801 SDValue VecOp = Node->getOperand(Num: 1);
11802 SDNodeFlags Flags = Node->getFlags();
11803
11804 EVT VT = VecOp.getValueType();
11805 EVT EltVT = VT.getVectorElementType();
11806
11807 if (VT.isScalableVector())
11808 report_fatal_error(
11809 reason: "Expanding reductions for scalable vectors is undefined.");
11810
11811 unsigned NumElts = VT.getVectorNumElements();
11812
11813 SmallVector<SDValue, 8> Ops;
11814 DAG.ExtractVectorElements(Op: VecOp, Args&: Ops, Start: 0, Count: NumElts);
11815
11816 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(VecReduceOpcode: Node->getOpcode());
11817
11818 SDValue Res = AccOp;
11819 for (unsigned i = 0; i < NumElts; i++)
11820 Res = DAG.getNode(Opcode: BaseOpcode, DL: dl, VT: EltVT, N1: Res, N2: Ops[i], Flags);
11821
11822 return Res;
11823}
11824
11825bool TargetLowering::expandREM(SDNode *Node, SDValue &Result,
11826 SelectionDAG &DAG) const {
11827 EVT VT = Node->getValueType(ResNo: 0);
11828 SDLoc dl(Node);
11829 bool isSigned = Node->getOpcode() == ISD::SREM;
11830 unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
11831 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
11832 SDValue Dividend = Node->getOperand(Num: 0);
11833 SDValue Divisor = Node->getOperand(Num: 1);
11834 if (isOperationLegalOrCustom(Op: DivRemOpc, VT)) {
11835 SDVTList VTs = DAG.getVTList(VT1: VT, VT2: VT);
11836 Result = DAG.getNode(Opcode: DivRemOpc, DL: dl, VTList: VTs, N1: Dividend, N2: Divisor).getValue(R: 1);
11837 return true;
11838 }
11839 if (isOperationLegalOrCustom(Op: DivOpc, VT)) {
11840 // X % Y -> X-X/Y*Y
11841 SDValue Divide = DAG.getNode(Opcode: DivOpc, DL: dl, VT, N1: Dividend, N2: Divisor);
11842 SDValue Mul = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: Divide, N2: Divisor);
11843 Result = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Dividend, N2: Mul);
11844 return true;
11845 }
11846 return false;
11847}
11848
11849SDValue TargetLowering::expandFP_TO_INT_SAT(SDNode *Node,
11850 SelectionDAG &DAG) const {
11851 bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT;
11852 SDLoc dl(SDValue(Node, 0));
11853 SDValue Src = Node->getOperand(Num: 0);
11854
11855 // DstVT is the result type, while SatVT is the size to which we saturate
11856 EVT SrcVT = Src.getValueType();
11857 EVT DstVT = Node->getValueType(ResNo: 0);
11858
11859 EVT SatVT = cast<VTSDNode>(Val: Node->getOperand(Num: 1))->getVT();
11860 unsigned SatWidth = SatVT.getScalarSizeInBits();
11861 unsigned DstWidth = DstVT.getScalarSizeInBits();
11862 assert(SatWidth <= DstWidth &&
11863 "Expected saturation width smaller than result width");
11864
11865 // Determine minimum and maximum integer values and their corresponding
11866 // floating-point values.
11867 APInt MinInt, MaxInt;
11868 if (IsSigned) {
11869 MinInt = APInt::getSignedMinValue(numBits: SatWidth).sext(width: DstWidth);
11870 MaxInt = APInt::getSignedMaxValue(numBits: SatWidth).sext(width: DstWidth);
11871 } else {
11872 MinInt = APInt::getMinValue(numBits: SatWidth).zext(width: DstWidth);
11873 MaxInt = APInt::getMaxValue(numBits: SatWidth).zext(width: DstWidth);
11874 }
11875
11876 // We cannot risk emitting FP_TO_XINT nodes with a source VT of [b]f16, as
11877 // libcall emission cannot handle this. Large result types will fail.
11878 if (SrcVT == MVT::f16 || SrcVT == MVT::bf16) {
11879 Src = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f32, Operand: Src);
11880 SrcVT = Src.getValueType();
11881 }
11882
11883 const fltSemantics &Sem = SrcVT.getFltSemantics();
11884 APFloat MinFloat(Sem);
11885 APFloat MaxFloat(Sem);
11886
11887 APFloat::opStatus MinStatus =
11888 MinFloat.convertFromAPInt(Input: MinInt, IsSigned, RM: APFloat::rmTowardZero);
11889 APFloat::opStatus MaxStatus =
11890 MaxFloat.convertFromAPInt(Input: MaxInt, IsSigned, RM: APFloat::rmTowardZero);
11891 bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
11892 !(MaxStatus & APFloat::opStatus::opInexact);
11893
11894 SDValue MinFloatNode = DAG.getConstantFP(Val: MinFloat, DL: dl, VT: SrcVT);
11895 SDValue MaxFloatNode = DAG.getConstantFP(Val: MaxFloat, DL: dl, VT: SrcVT);
11896
11897 // If the integer bounds are exactly representable as floats and min/max are
11898 // legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
11899 // of comparisons and selects.
11900 bool MinMaxLegal = isOperationLegal(Op: ISD::FMINNUM, VT: SrcVT) &&
11901 isOperationLegal(Op: ISD::FMAXNUM, VT: SrcVT);
11902 if (AreExactFloatBounds && MinMaxLegal) {
11903 SDValue Clamped = Src;
11904
11905 // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
11906 Clamped = DAG.getNode(Opcode: ISD::FMAXNUM, DL: dl, VT: SrcVT, N1: Clamped, N2: MinFloatNode);
11907 // Clamp by MaxFloat from above. NaN cannot occur.
11908 Clamped = DAG.getNode(Opcode: ISD::FMINNUM, DL: dl, VT: SrcVT, N1: Clamped, N2: MaxFloatNode);
11909 // Convert clamped value to integer.
11910 SDValue FpToInt = DAG.getNode(Opcode: IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT,
11911 DL: dl, VT: DstVT, Operand: Clamped);
11912
11913 // In the unsigned case we're done, because we mapped NaN to MinFloat,
11914 // which will cast to zero.
11915 if (!IsSigned)
11916 return FpToInt;
11917
11918 // Otherwise, select 0 if Src is NaN.
11919 SDValue ZeroInt = DAG.getConstant(Val: 0, DL: dl, VT: DstVT);
11920 EVT SetCCVT =
11921 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: SrcVT);
11922 SDValue IsNan = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: Src, Cond: ISD::CondCode::SETUO);
11923 return DAG.getSelect(DL: dl, VT: DstVT, Cond: IsNan, LHS: ZeroInt, RHS: FpToInt);
11924 }
11925
11926 SDValue MinIntNode = DAG.getConstant(Val: MinInt, DL: dl, VT: DstVT);
11927 SDValue MaxIntNode = DAG.getConstant(Val: MaxInt, DL: dl, VT: DstVT);
11928
11929 // Result of direct conversion. The assumption here is that the operation is
11930 // non-trapping and it's fine to apply it to an out-of-range value if we
11931 // select it away later.
11932 SDValue FpToInt =
11933 DAG.getNode(Opcode: IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, DL: dl, VT: DstVT, Operand: Src);
11934
11935 SDValue Select = FpToInt;
11936
11937 EVT SetCCVT =
11938 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: SrcVT);
11939
11940 // If Src ULT MinFloat, select MinInt. In particular, this also selects
11941 // MinInt if Src is NaN.
11942 SDValue ULT = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: MinFloatNode, Cond: ISD::SETULT);
11943 Select = DAG.getSelect(DL: dl, VT: DstVT, Cond: ULT, LHS: MinIntNode, RHS: Select);
11944 // If Src OGT MaxFloat, select MaxInt.
11945 SDValue OGT = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: MaxFloatNode, Cond: ISD::SETOGT);
11946 Select = DAG.getSelect(DL: dl, VT: DstVT, Cond: OGT, LHS: MaxIntNode, RHS: Select);
11947
11948 // In the unsigned case we are done, because we mapped NaN to MinInt, which
11949 // is already zero.
11950 if (!IsSigned)
11951 return Select;
11952
11953 // Otherwise, select 0 if Src is NaN.
11954 SDValue ZeroInt = DAG.getConstant(Val: 0, DL: dl, VT: DstVT);
11955 SDValue IsNan = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: Src, Cond: ISD::CondCode::SETUO);
11956 return DAG.getSelect(DL: dl, VT: DstVT, Cond: IsNan, LHS: ZeroInt, RHS: Select);
11957}
11958
11959SDValue TargetLowering::expandRoundInexactToOdd(EVT ResultVT, SDValue Op,
11960 const SDLoc &dl,
11961 SelectionDAG &DAG) const {
11962 EVT OperandVT = Op.getValueType();
11963 if (OperandVT.getScalarType() == ResultVT.getScalarType())
11964 return Op;
11965 EVT ResultIntVT = ResultVT.changeTypeToInteger();
11966 // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
11967 // can induce double-rounding which may alter the results. We can
11968 // correct for this using a trick explained in: Boldo, Sylvie, and
11969 // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
11970 // World Congress. 2005.
11971 SDValue Narrow = DAG.getFPExtendOrRound(Op, DL: dl, VT: ResultVT);
11972 SDValue NarrowAsWide = DAG.getFPExtendOrRound(Op: Narrow, DL: dl, VT: OperandVT);
11973
11974 // We can keep the narrow value as-is if narrowing was exact (no
11975 // rounding error), the wide value was NaN (the narrow value is also
11976 // NaN and should be preserved) or if we rounded to the odd value.
11977 SDValue NarrowBits = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: ResultIntVT, Operand: Narrow);
11978 SDValue One = DAG.getConstant(Val: 1, DL: dl, VT: ResultIntVT);
11979 SDValue NegativeOne = DAG.getAllOnesConstant(DL: dl, VT: ResultIntVT);
11980 SDValue And = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ResultIntVT, N1: NarrowBits, N2: One);
11981 EVT ResultIntVTCCVT = getSetCCResultType(
11982 DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: And.getValueType());
11983 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT: ResultIntVT);
11984 // The result is already odd so we don't need to do anything.
11985 SDValue AlreadyOdd = DAG.getSetCC(DL: dl, VT: ResultIntVTCCVT, LHS: And, RHS: Zero, Cond: ISD::SETNE);
11986
11987 EVT WideSetCCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(),
11988 VT: Op.getValueType());
11989 // We keep results which are exact, odd or NaN.
11990 SDValue KeepNarrow =
11991 DAG.getSetCC(DL: dl, VT: WideSetCCVT, LHS: Op, RHS: NarrowAsWide, Cond: ISD::SETUEQ);
11992 KeepNarrow = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: WideSetCCVT, N1: KeepNarrow, N2: AlreadyOdd);
11993 // We morally performed a round-down if AbsNarrow is smaller than
11994 // AbsWide.
11995 SDValue AbsWide = DAG.getNode(Opcode: ISD::FABS, DL: dl, VT: OperandVT, Operand: Op);
11996 SDValue AbsNarrowAsWide = DAG.getNode(Opcode: ISD::FABS, DL: dl, VT: OperandVT, Operand: NarrowAsWide);
11997 SDValue NarrowIsRd =
11998 DAG.getSetCC(DL: dl, VT: WideSetCCVT, LHS: AbsWide, RHS: AbsNarrowAsWide, Cond: ISD::SETOGT);
11999 // If the narrow value is odd or exact, pick it.
12000 // Otherwise, narrow is even and corresponds to either the rounded-up
12001 // or rounded-down value. If narrow is the rounded-down value, we want
12002 // the rounded-up value as it will be odd.
12003 SDValue Adjust = DAG.getSelect(DL: dl, VT: ResultIntVT, Cond: NarrowIsRd, LHS: One, RHS: NegativeOne);
12004 SDValue Adjusted = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: ResultIntVT, N1: NarrowBits, N2: Adjust);
12005 Op = DAG.getSelect(DL: dl, VT: ResultIntVT, Cond: KeepNarrow, LHS: NarrowBits, RHS: Adjusted);
12006 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: ResultVT, Operand: Op);
12007}
12008
12009SDValue TargetLowering::expandFP_ROUND(SDNode *Node, SelectionDAG &DAG) const {
12010 assert(Node->getOpcode() == ISD::FP_ROUND && "Unexpected opcode!");
12011 SDValue Op = Node->getOperand(Num: 0);
12012 EVT VT = Node->getValueType(ResNo: 0);
12013 SDLoc dl(Node);
12014 if (VT.getScalarType() == MVT::bf16) {
12015 if (Node->getConstantOperandVal(Num: 1) == 1) {
12016 return DAG.getNode(Opcode: ISD::FP_TO_BF16, DL: dl, VT, Operand: Node->getOperand(Num: 0));
12017 }
12018 EVT OperandVT = Op.getValueType();
12019 SDValue IsNaN = DAG.getSetCC(
12020 DL: dl,
12021 VT: getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: OperandVT),
12022 LHS: Op, RHS: Op, Cond: ISD::SETUO);
12023
12024 // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
12025 // can induce double-rounding which may alter the results. We can
12026 // correct for this using a trick explained in: Boldo, Sylvie, and
12027 // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
12028 // World Congress. 2005.
12029 EVT F32 = VT.changeElementType(Context&: *DAG.getContext(), EltVT: MVT::f32);
12030 EVT I32 = F32.changeTypeToInteger();
12031 Op = expandRoundInexactToOdd(ResultVT: F32, Op, dl, DAG);
12032 Op = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: I32, Operand: Op);
12033
12034 // Conversions should set NaN's quiet bit. This also prevents NaNs from
12035 // turning into infinities.
12036 SDValue NaN =
12037 DAG.getNode(Opcode: ISD::OR, DL: dl, VT: I32, N1: Op, N2: DAG.getConstant(Val: 0x400000, DL: dl, VT: I32));
12038
12039 // Factor in the contribution of the low 16 bits.
12040 SDValue One = DAG.getConstant(Val: 1, DL: dl, VT: I32);
12041 SDValue Lsb = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: I32, N1: Op,
12042 N2: DAG.getShiftAmountConstant(Val: 16, VT: I32, DL: dl));
12043 Lsb = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: I32, N1: Lsb, N2: One);
12044 SDValue RoundingBias =
12045 DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: I32, N1: DAG.getConstant(Val: 0x7fff, DL: dl, VT: I32), N2: Lsb);
12046 SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: I32, N1: Op, N2: RoundingBias);
12047
12048 // Don't round if we had a NaN, we don't want to turn 0x7fffffff into
12049 // 0x80000000.
12050 Op = DAG.getSelect(DL: dl, VT: I32, Cond: IsNaN, LHS: NaN, RHS: Add);
12051
12052 // Now that we have rounded, shift the bits into position.
12053 Op = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: I32, N1: Op,
12054 N2: DAG.getShiftAmountConstant(Val: 16, VT: I32, DL: dl));
12055 Op = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: I32, Operand: Op);
12056 EVT I16 = I32.changeElementType(Context&: *DAG.getContext(), EltVT: MVT::i16);
12057 Op = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: I16, Operand: Op);
12058 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT, Operand: Op);
12059 }
12060 return SDValue();
12061}
12062
12063SDValue TargetLowering::expandVectorSplice(SDNode *Node,
12064 SelectionDAG &DAG) const {
12065 assert((Node->getOpcode() == ISD::VECTOR_SPLICE_LEFT ||
12066 Node->getOpcode() == ISD::VECTOR_SPLICE_RIGHT) &&
12067 "Unexpected opcode!");
12068 assert((Node->getValueType(0).isScalableVector() ||
12069 !isa<ConstantSDNode>(Node->getOperand(2))) &&
12070 "Fixed length vector types with constant offsets expected to use "
12071 "SHUFFLE_VECTOR!");
12072
12073 EVT VT = Node->getValueType(ResNo: 0);
12074 SDValue V1 = Node->getOperand(Num: 0);
12075 SDValue V2 = Node->getOperand(Num: 1);
12076 SDValue Offset = Node->getOperand(Num: 2);
12077 SDLoc DL(Node);
12078
12079 // Expand through memory thusly:
12080 // Alloca CONCAT_VECTORS_TYPES(V1, V2) Ptr
12081 // Store V1, Ptr
12082 // Store V2, Ptr + sizeof(V1)
12083 // if (VECTOR_SPLICE_LEFT)
12084 // Ptr = Ptr + (Offset * sizeof(VT.Elt))
12085 // else
12086 // Ptr = Ptr + sizeof(V1) - (Offset * size(VT.Elt))
12087 // Res = Load Ptr
12088
12089 Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false);
12090
12091 EVT MemVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: VT.getVectorElementType(),
12092 EC: VT.getVectorElementCount() * 2);
12093 SDValue StackPtr = DAG.CreateStackTemporary(Bytes: MemVT.getStoreSize(), Alignment);
12094 EVT PtrVT = StackPtr.getValueType();
12095 auto &MF = DAG.getMachineFunction();
12096 auto FrameIndex = cast<FrameIndexSDNode>(Val: StackPtr.getNode())->getIndex();
12097 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FI: FrameIndex);
12098
12099 // Store the lo part of CONCAT_VECTORS(V1, V2)
12100 SDValue StoreV1 = DAG.getStore(Chain: DAG.getEntryNode(), dl: DL, Val: V1, Ptr: StackPtr, PtrInfo);
12101 // Store the hi part of CONCAT_VECTORS(V1, V2)
12102 SDValue VTBytes = DAG.getTypeSize(DL, VT: PtrVT, TS: VT.getStoreSize());
12103 SDValue StackPtr2 = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: StackPtr, N2: VTBytes);
12104 SDValue StoreV2 = DAG.getStore(Chain: StoreV1, dl: DL, Val: V2, Ptr: StackPtr2, PtrInfo);
12105
12106 // NOTE: TrailingBytes must be clamped so as not to read outside of V1:V2.
12107 SDValue EltByteSize =
12108 DAG.getTypeSize(DL, VT: PtrVT, TS: VT.getVectorElementType().getStoreSize());
12109 Offset = DAG.getZExtOrTrunc(Op: Offset, DL, VT: PtrVT);
12110 SDValue TrailingBytes = DAG.getNode(Opcode: ISD::MUL, DL, VT: PtrVT, N1: Offset, N2: EltByteSize);
12111
12112 TrailingBytes = DAG.getNode(Opcode: ISD::UMIN, DL, VT: PtrVT, N1: TrailingBytes, N2: VTBytes);
12113
12114 if (Node->getOpcode() == ISD::VECTOR_SPLICE_LEFT)
12115 StackPtr = DAG.getMemBasePlusOffset(Base: StackPtr, Offset: TrailingBytes, DL);
12116 else
12117 StackPtr = DAG.getNode(Opcode: ISD::SUB, DL, VT: PtrVT, N1: StackPtr2, N2: TrailingBytes);
12118
12119 // Load the spliced result
12120 return DAG.getLoad(VT, dl: DL, Chain: StoreV2, Ptr: StackPtr,
12121 PtrInfo: MachinePointerInfo::getUnknownStack(MF));
12122}
12123
12124SDValue TargetLowering::expandVECTOR_COMPRESS(SDNode *Node,
12125 SelectionDAG &DAG) const {
12126 SDLoc DL(Node);
12127 SDValue Vec = Node->getOperand(Num: 0);
12128 SDValue Mask = Node->getOperand(Num: 1);
12129 SDValue Passthru = Node->getOperand(Num: 2);
12130
12131 EVT VecVT = Vec.getValueType();
12132 EVT ScalarVT = VecVT.getScalarType();
12133 EVT MaskVT = Mask.getValueType();
12134 EVT MaskScalarVT = MaskVT.getScalarType();
12135
12136 // Needs to be handled by targets that have scalable vector types.
12137 if (VecVT.isScalableVector())
12138 report_fatal_error(reason: "Cannot expand masked_compress for scalable vectors.");
12139
12140 SDValue StackPtr = DAG.CreateStackTemporary(
12141 Bytes: VecVT.getStoreSize(), Alignment: DAG.getReducedAlign(VT: VecVT, /*UseABI=*/false));
12142 int FI = cast<FrameIndexSDNode>(Val: StackPtr.getNode())->getIndex();
12143 MachinePointerInfo PtrInfo =
12144 MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI);
12145
12146 MVT PositionVT = getVectorIdxTy(DL: DAG.getDataLayout());
12147 SDValue Chain = DAG.getEntryNode();
12148 SDValue OutPos = DAG.getConstant(Val: 0, DL, VT: PositionVT);
12149
12150 bool HasPassthru = !Passthru.isUndef();
12151
12152 // If we have a passthru vector, store it on the stack, overwrite the matching
12153 // positions and then re-write the last element that was potentially
12154 // overwritten even though mask[i] = false.
12155 if (HasPassthru)
12156 Chain = DAG.getStore(Chain, dl: DL, Val: Passthru, Ptr: StackPtr, PtrInfo);
12157
12158 SDValue LastWriteVal;
12159 APInt PassthruSplatVal;
12160 bool IsSplatPassthru =
12161 ISD::isConstantSplatVector(N: Passthru.getNode(), SplatValue&: PassthruSplatVal);
12162
12163 if (IsSplatPassthru) {
12164 // As we do not know which position we wrote to last, we cannot simply
12165 // access that index from the passthru vector. So we first check if passthru
12166 // is a splat vector, to use any element ...
12167 LastWriteVal = DAG.getConstant(Val: PassthruSplatVal, DL, VT: ScalarVT);
12168 } else if (HasPassthru) {
12169 // ... if it is not a splat vector, we need to get the passthru value at
12170 // position = popcount(mask) and re-load it from the stack before it is
12171 // overwritten in the loop below.
12172 EVT PopcountVT = ScalarVT.changeTypeToInteger();
12173 SDValue Popcount = DAG.getNode(
12174 Opcode: ISD::TRUNCATE, DL,
12175 VT: MaskVT.changeVectorElementType(Context&: *DAG.getContext(), EltVT: MVT::i1), Operand: Mask);
12176 Popcount = DAG.getNode(
12177 Opcode: ISD::ZERO_EXTEND, DL,
12178 VT: MaskVT.changeVectorElementType(Context&: *DAG.getContext(), EltVT: PopcountVT),
12179 Operand: Popcount);
12180 Popcount = DAG.getNode(Opcode: ISD::VECREDUCE_ADD, DL, VT: PopcountVT, Operand: Popcount);
12181 SDValue LastElmtPtr =
12182 getVectorElementPointer(DAG, VecPtr: StackPtr, VecVT, Index: Popcount);
12183 LastWriteVal = DAG.getLoad(
12184 VT: ScalarVT, dl: DL, Chain, Ptr: LastElmtPtr,
12185 PtrInfo: MachinePointerInfo::getUnknownStack(MF&: DAG.getMachineFunction()));
12186 Chain = LastWriteVal.getValue(R: 1);
12187 }
12188
12189 unsigned NumElms = VecVT.getVectorNumElements();
12190 for (unsigned I = 0; I < NumElms; I++) {
12191 SDValue ValI = DAG.getExtractVectorElt(DL, VT: ScalarVT, Vec, Idx: I);
12192 SDValue OutPtr = getVectorElementPointer(DAG, VecPtr: StackPtr, VecVT, Index: OutPos);
12193 Chain = DAG.getStore(
12194 Chain, dl: DL, Val: ValI, Ptr: OutPtr,
12195 PtrInfo: MachinePointerInfo::getUnknownStack(MF&: DAG.getMachineFunction()));
12196
12197 // Get the mask value and add it to the current output position. This
12198 // either increments by 1 if MaskI is true or adds 0 otherwise.
12199 // Freeze in case we have poison/undef mask entries.
12200 SDValue MaskI = DAG.getExtractVectorElt(DL, VT: MaskScalarVT, Vec: Mask, Idx: I);
12201 MaskI = DAG.getFreeze(V: MaskI);
12202 MaskI = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i1, Operand: MaskI);
12203 MaskI = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: PositionVT, Operand: MaskI);
12204 OutPos = DAG.getNode(Opcode: ISD::ADD, DL, VT: PositionVT, N1: OutPos, N2: MaskI);
12205
12206 if (HasPassthru && I == NumElms - 1) {
12207 SDValue EndOfVector =
12208 DAG.getConstant(Val: VecVT.getVectorNumElements() - 1, DL, VT: PositionVT);
12209 SDValue AllLanesSelected =
12210 DAG.getSetCC(DL, VT: MVT::i1, LHS: OutPos, RHS: EndOfVector, Cond: ISD::CondCode::SETUGT);
12211 OutPos = DAG.getNode(Opcode: ISD::UMIN, DL, VT: PositionVT, N1: OutPos, N2: EndOfVector);
12212 OutPtr = getVectorElementPointer(DAG, VecPtr: StackPtr, VecVT, Index: OutPos);
12213
12214 // Re-write the last ValI if all lanes were selected. Otherwise,
12215 // overwrite the last write it with the passthru value.
12216 LastWriteVal = DAG.getSelect(DL, VT: ScalarVT, Cond: AllLanesSelected, LHS: ValI,
12217 RHS: LastWriteVal, Flags: SDNodeFlags::Unpredictable);
12218 Chain = DAG.getStore(
12219 Chain, dl: DL, Val: LastWriteVal, Ptr: OutPtr,
12220 PtrInfo: MachinePointerInfo::getUnknownStack(MF&: DAG.getMachineFunction()));
12221 }
12222 }
12223
12224 return DAG.getLoad(VT: VecVT, dl: DL, Chain, Ptr: StackPtr, PtrInfo);
12225}
12226
12227SDValue TargetLowering::expandPartialReduceMLA(SDNode *N,
12228 SelectionDAG &DAG) const {
12229 SDLoc DL(N);
12230 SDValue Acc = N->getOperand(Num: 0);
12231 SDValue MulLHS = N->getOperand(Num: 1);
12232 SDValue MulRHS = N->getOperand(Num: 2);
12233 EVT AccVT = Acc.getValueType();
12234 EVT MulOpVT = MulLHS.getValueType();
12235
12236 EVT ExtMulOpVT =
12237 EVT::getVectorVT(Context&: *DAG.getContext(), VT: AccVT.getVectorElementType(),
12238 EC: MulOpVT.getVectorElementCount());
12239
12240 unsigned ExtOpcLHS, ExtOpcRHS;
12241 switch (N->getOpcode()) {
12242 default:
12243 llvm_unreachable("Unexpected opcode");
12244 case ISD::PARTIAL_REDUCE_UMLA:
12245 ExtOpcLHS = ExtOpcRHS = ISD::ZERO_EXTEND;
12246 break;
12247 case ISD::PARTIAL_REDUCE_SMLA:
12248 ExtOpcLHS = ExtOpcRHS = ISD::SIGN_EXTEND;
12249 break;
12250 case ISD::PARTIAL_REDUCE_FMLA:
12251 ExtOpcLHS = ExtOpcRHS = ISD::FP_EXTEND;
12252 break;
12253 }
12254
12255 if (ExtMulOpVT != MulOpVT) {
12256 MulLHS = DAG.getNode(Opcode: ExtOpcLHS, DL, VT: ExtMulOpVT, Operand: MulLHS);
12257 MulRHS = DAG.getNode(Opcode: ExtOpcRHS, DL, VT: ExtMulOpVT, Operand: MulRHS);
12258 }
12259 SDValue Input = MulLHS;
12260 if (N->getOpcode() == ISD::PARTIAL_REDUCE_FMLA) {
12261 if (!llvm::isOneOrOneSplatFP(V: MulRHS))
12262 Input = DAG.getNode(Opcode: ISD::FMUL, DL, VT: ExtMulOpVT, N1: MulLHS, N2: MulRHS);
12263 } else if (!llvm::isOneOrOneSplat(V: MulRHS)) {
12264 Input = DAG.getNode(Opcode: ISD::MUL, DL, VT: ExtMulOpVT, N1: MulLHS, N2: MulRHS);
12265 }
12266
12267 unsigned Stride = AccVT.getVectorMinNumElements();
12268 unsigned ScaleFactor = MulOpVT.getVectorMinNumElements() / Stride;
12269
12270 // Collect all of the subvectors
12271 std::deque<SDValue> Subvectors = {Acc};
12272 for (unsigned I = 0; I < ScaleFactor; I++)
12273 Subvectors.push_back(x: DAG.getExtractSubvector(DL, VT: AccVT, Vec: Input, Idx: I * Stride));
12274
12275 unsigned FlatNode =
12276 N->getOpcode() == ISD::PARTIAL_REDUCE_FMLA ? ISD::FADD : ISD::ADD;
12277
12278 // Flatten the subvector tree
12279 while (Subvectors.size() > 1) {
12280 Subvectors.push_back(
12281 x: DAG.getNode(Opcode: FlatNode, DL, VT: AccVT, Ops: {Subvectors[0], Subvectors[1]}));
12282 Subvectors.pop_front();
12283 Subvectors.pop_front();
12284 }
12285
12286 assert(Subvectors.size() == 1 &&
12287 "There should only be one subvector after tree flattening");
12288
12289 return Subvectors[0];
12290}
12291
12292/// Given a store node \p StoreNode, return true if it is safe to fold that node
12293/// into \p FPNode, which expands to a library call with output pointers.
12294static bool canFoldStoreIntoLibCallOutputPointers(StoreSDNode *StoreNode,
12295 SDNode *FPNode) {
12296 SmallVector<const SDNode *, 8> Worklist;
12297 SmallVector<const SDNode *, 8> DeferredNodes;
12298 SmallPtrSet<const SDNode *, 16> Visited;
12299
12300 // Skip FPNode use by StoreNode (that's the use we want to fold into FPNode).
12301 for (SDValue Op : StoreNode->ops())
12302 if (Op.getNode() != FPNode)
12303 Worklist.push_back(Elt: Op.getNode());
12304
12305 unsigned MaxSteps = SelectionDAG::getHasPredecessorMaxSteps();
12306 while (!Worklist.empty()) {
12307 const SDNode *Node = Worklist.pop_back_val();
12308 auto [_, Inserted] = Visited.insert(Ptr: Node);
12309 if (!Inserted)
12310 continue;
12311
12312 if (MaxSteps > 0 && Visited.size() >= MaxSteps)
12313 return false;
12314
12315 // Reached the FPNode (would result in a cycle).
12316 // OR Reached CALLSEQ_START (would result in nested call sequences).
12317 if (Node == FPNode || Node->getOpcode() == ISD::CALLSEQ_START)
12318 return false;
12319
12320 if (Node->getOpcode() == ISD::CALLSEQ_END) {
12321 // Defer looking into call sequences (so we can check we're outside one).
12322 // We still need to look through these for the predecessor check.
12323 DeferredNodes.push_back(Elt: Node);
12324 continue;
12325 }
12326
12327 for (SDValue Op : Node->ops())
12328 Worklist.push_back(Elt: Op.getNode());
12329 }
12330
12331 // True if we're outside a call sequence and don't have the FPNode as a
12332 // predecessor. No cycles or nested call sequences possible.
12333 return !SDNode::hasPredecessorHelper(N: FPNode, Visited, Worklist&: DeferredNodes,
12334 MaxSteps);
12335}
12336
12337bool TargetLowering::expandMultipleResultFPLibCall(
12338 SelectionDAG &DAG, RTLIB::Libcall LC, SDNode *Node,
12339 SmallVectorImpl<SDValue> &Results,
12340 std::optional<unsigned> CallRetResNo) const {
12341 if (LC == RTLIB::UNKNOWN_LIBCALL)
12342 return false;
12343
12344 RTLIB::LibcallImpl LibcallImpl = getLibcallImpl(Call: LC);
12345 if (LibcallImpl == RTLIB::Unsupported)
12346 return false;
12347
12348 LLVMContext &Ctx = *DAG.getContext();
12349 EVT VT = Node->getValueType(ResNo: 0);
12350 unsigned NumResults = Node->getNumValues();
12351
12352 // Find users of the node that store the results (and share input chains). The
12353 // destination pointers can be used instead of creating stack allocations.
12354 SDValue StoresInChain;
12355 SmallVector<StoreSDNode *, 2> ResultStores(NumResults);
12356 for (SDNode *User : Node->users()) {
12357 if (!ISD::isNormalStore(N: User))
12358 continue;
12359 auto *ST = cast<StoreSDNode>(Val: User);
12360 SDValue StoreValue = ST->getValue();
12361 unsigned ResNo = StoreValue.getResNo();
12362 // Ensure the store corresponds to an output pointer.
12363 if (CallRetResNo == ResNo)
12364 continue;
12365 // Ensure the store to the default address space and not atomic or volatile.
12366 if (!ST->isSimple() || ST->getAddressSpace() != 0)
12367 continue;
12368 // Ensure all store chains are the same (so they don't alias).
12369 if (StoresInChain && ST->getChain() != StoresInChain)
12370 continue;
12371 // Ensure the store is properly aligned.
12372 Type *StoreType = StoreValue.getValueType().getTypeForEVT(Context&: Ctx);
12373 if (ST->getAlign() <
12374 DAG.getDataLayout().getABITypeAlign(Ty: StoreType->getScalarType()))
12375 continue;
12376 // Avoid:
12377 // 1. Creating cyclic dependencies.
12378 // 2. Expanding the node to a call within a call sequence.
12379 if (!canFoldStoreIntoLibCallOutputPointers(StoreNode: ST, FPNode: Node))
12380 continue;
12381 ResultStores[ResNo] = ST;
12382 StoresInChain = ST->getChain();
12383 }
12384
12385 ArgListTy Args;
12386
12387 // Pass the arguments.
12388 for (const SDValue &Op : Node->op_values()) {
12389 EVT ArgVT = Op.getValueType();
12390 Type *ArgTy = ArgVT.getTypeForEVT(Context&: Ctx);
12391 Args.emplace_back(args: Op, args&: ArgTy);
12392 }
12393
12394 // Pass the output pointers.
12395 SmallVector<SDValue, 2> ResultPtrs(NumResults);
12396 Type *PointerTy = PointerType::getUnqual(C&: Ctx);
12397 for (auto [ResNo, ST] : llvm::enumerate(First&: ResultStores)) {
12398 if (ResNo == CallRetResNo)
12399 continue;
12400 EVT ResVT = Node->getValueType(ResNo);
12401 SDValue ResultPtr = ST ? ST->getBasePtr() : DAG.CreateStackTemporary(VT: ResVT);
12402 ResultPtrs[ResNo] = ResultPtr;
12403 Args.emplace_back(args&: ResultPtr, args&: PointerTy);
12404 }
12405
12406 SDLoc DL(Node);
12407
12408 if (RTLIB::RuntimeLibcallsInfo::hasVectorMaskArgument(Impl: LibcallImpl)) {
12409 // Pass the vector mask (if required).
12410 EVT MaskVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: Ctx, VT);
12411 SDValue Mask = DAG.getBoolConstant(V: true, DL, VT: MaskVT, OpVT: VT);
12412 Args.emplace_back(args&: Mask, args: MaskVT.getTypeForEVT(Context&: Ctx));
12413 }
12414
12415 Type *RetType = CallRetResNo.has_value()
12416 ? Node->getValueType(ResNo: *CallRetResNo).getTypeForEVT(Context&: Ctx)
12417 : Type::getVoidTy(C&: Ctx);
12418 SDValue InChain = StoresInChain ? StoresInChain : DAG.getEntryNode();
12419 SDValue Callee =
12420 DAG.getExternalSymbol(LCImpl: LibcallImpl, VT: getPointerTy(DL: DAG.getDataLayout()));
12421 TargetLowering::CallLoweringInfo CLI(DAG);
12422 CLI.setDebugLoc(DL).setChain(InChain).setLibCallee(
12423 CC: getLibcallImplCallingConv(Call: LibcallImpl), ResultType: RetType, Target: Callee, ArgsList: std::move(Args));
12424
12425 auto [Call, CallChain] = LowerCallTo(CLI);
12426
12427 for (auto [ResNo, ResultPtr] : llvm::enumerate(First&: ResultPtrs)) {
12428 if (ResNo == CallRetResNo) {
12429 Results.push_back(Elt: Call);
12430 continue;
12431 }
12432 MachinePointerInfo PtrInfo;
12433 SDValue LoadResult = DAG.getLoad(VT: Node->getValueType(ResNo), dl: DL, Chain: CallChain,
12434 Ptr: ResultPtr, PtrInfo);
12435 SDValue OutChain = LoadResult.getValue(R: 1);
12436
12437 if (StoreSDNode *ST = ResultStores[ResNo]) {
12438 // Replace store with the library call.
12439 DAG.ReplaceAllUsesOfValueWith(From: SDValue(ST, 0), To: OutChain);
12440 PtrInfo = ST->getPointerInfo();
12441 } else {
12442 PtrInfo = MachinePointerInfo::getFixedStack(
12443 MF&: DAG.getMachineFunction(),
12444 FI: cast<FrameIndexSDNode>(Val&: ResultPtr)->getIndex());
12445 }
12446
12447 Results.push_back(Elt: LoadResult);
12448 }
12449
12450 return true;
12451}
12452
12453bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT,
12454 SDValue &LHS, SDValue &RHS,
12455 SDValue &CC, SDValue Mask,
12456 SDValue EVL, bool &NeedInvert,
12457 const SDLoc &dl, SDValue &Chain,
12458 bool IsSignaling) const {
12459 MVT OpVT = LHS.getSimpleValueType();
12460 ISD::CondCode CCCode = cast<CondCodeSDNode>(Val&: CC)->get();
12461 NeedInvert = false;
12462 assert(!EVL == !Mask && "VP Mask and EVL must either both be set or unset");
12463 bool IsNonVP = !EVL;
12464 switch (getCondCodeAction(CC: CCCode, VT: OpVT)) {
12465 default:
12466 llvm_unreachable("Unknown condition code action!");
12467 case TargetLowering::Legal:
12468 // Nothing to do.
12469 break;
12470 case TargetLowering::Expand: {
12471 ISD::CondCode InvCC = ISD::getSetCCSwappedOperands(Operation: CCCode);
12472 if (isCondCodeLegalOrCustom(CC: InvCC, VT: OpVT)) {
12473 std::swap(a&: LHS, b&: RHS);
12474 CC = DAG.getCondCode(Cond: InvCC);
12475 return true;
12476 }
12477 // Swapping operands didn't work. Try inverting the condition.
12478 bool NeedSwap = false;
12479 InvCC = getSetCCInverse(Operation: CCCode, Type: OpVT);
12480 if (!isCondCodeLegalOrCustom(CC: InvCC, VT: OpVT)) {
12481 // If inverting the condition is not enough, try swapping operands
12482 // on top of it.
12483 InvCC = ISD::getSetCCSwappedOperands(Operation: InvCC);
12484 NeedSwap = true;
12485 }
12486 if (isCondCodeLegalOrCustom(CC: InvCC, VT: OpVT)) {
12487 CC = DAG.getCondCode(Cond: InvCC);
12488 NeedInvert = true;
12489 if (NeedSwap)
12490 std::swap(a&: LHS, b&: RHS);
12491 return true;
12492 }
12493
12494 // Special case: expand i1 comparisons using logical operations.
12495 if (OpVT == MVT::i1) {
12496 SDValue Ret;
12497 switch (CCCode) {
12498 default:
12499 llvm_unreachable("Unknown integer setcc!");
12500 case ISD::SETEQ: // X == Y --> ~(X ^ Y)
12501 Ret = DAG.getNOT(DL: dl, Val: DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: MVT::i1, N1: LHS, N2: RHS),
12502 VT: MVT::i1);
12503 break;
12504 case ISD::SETNE: // X != Y --> (X ^ Y)
12505 Ret = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: MVT::i1, N1: LHS, N2: RHS);
12506 break;
12507 case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
12508 case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
12509 Ret = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i1, N1: RHS,
12510 N2: DAG.getNOT(DL: dl, Val: LHS, VT: MVT::i1));
12511 break;
12512 case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
12513 case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
12514 Ret = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i1, N1: LHS,
12515 N2: DAG.getNOT(DL: dl, Val: RHS, VT: MVT::i1));
12516 break;
12517 case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
12518 case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
12519 Ret = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: MVT::i1, N1: RHS,
12520 N2: DAG.getNOT(DL: dl, Val: LHS, VT: MVT::i1));
12521 break;
12522 case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
12523 case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
12524 Ret = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: MVT::i1, N1: LHS,
12525 N2: DAG.getNOT(DL: dl, Val: RHS, VT: MVT::i1));
12526 break;
12527 }
12528
12529 LHS = DAG.getZExtOrTrunc(Op: Ret, DL: dl, VT);
12530 RHS = SDValue();
12531 CC = SDValue();
12532 return true;
12533 }
12534
12535 ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
12536 unsigned Opc = 0;
12537 switch (CCCode) {
12538 default:
12539 llvm_unreachable("Don't know how to expand this condition!");
12540 case ISD::SETUO:
12541 if (isCondCodeLegal(CC: ISD::SETUNE, VT: OpVT)) {
12542 CC1 = ISD::SETUNE;
12543 CC2 = ISD::SETUNE;
12544 Opc = ISD::OR;
12545 break;
12546 }
12547 assert(isCondCodeLegal(ISD::SETOEQ, OpVT) &&
12548 "If SETUE is expanded, SETOEQ or SETUNE must be legal!");
12549 NeedInvert = true;
12550 [[fallthrough]];
12551 case ISD::SETO:
12552 assert(isCondCodeLegal(ISD::SETOEQ, OpVT) &&
12553 "If SETO is expanded, SETOEQ must be legal!");
12554 CC1 = ISD::SETOEQ;
12555 CC2 = ISD::SETOEQ;
12556 Opc = ISD::AND;
12557 break;
12558 case ISD::SETONE:
12559 case ISD::SETUEQ:
12560 // If the SETUO or SETO CC isn't legal, we might be able to use
12561 // SETOGT || SETOLT, inverting the result for SETUEQ. We only need one
12562 // of SETOGT/SETOLT to be legal, the other can be emulated by swapping
12563 // the operands.
12564 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
12565 if (!isCondCodeLegal(CC: CC2, VT: OpVT) && (isCondCodeLegal(CC: ISD::SETOGT, VT: OpVT) ||
12566 isCondCodeLegal(CC: ISD::SETOLT, VT: OpVT))) {
12567 CC1 = ISD::SETOGT;
12568 CC2 = ISD::SETOLT;
12569 Opc = ISD::OR;
12570 NeedInvert = ((unsigned)CCCode & 0x8U);
12571 break;
12572 }
12573 [[fallthrough]];
12574 case ISD::SETOEQ:
12575 case ISD::SETOGT:
12576 case ISD::SETOGE:
12577 case ISD::SETOLT:
12578 case ISD::SETOLE:
12579 case ISD::SETUNE:
12580 case ISD::SETUGT:
12581 case ISD::SETUGE:
12582 case ISD::SETULT:
12583 case ISD::SETULE:
12584 // If we are floating point, assign and break, otherwise fall through.
12585 if (!OpVT.isInteger()) {
12586 // We can use the 4th bit to tell if we are the unordered
12587 // or ordered version of the opcode.
12588 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
12589 Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
12590 CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
12591 break;
12592 }
12593 // Fallthrough if we are unsigned integer.
12594 [[fallthrough]];
12595 case ISD::SETLE:
12596 case ISD::SETGT:
12597 case ISD::SETGE:
12598 case ISD::SETLT:
12599 case ISD::SETNE:
12600 case ISD::SETEQ:
12601 // If all combinations of inverting the condition and swapping operands
12602 // didn't work then we have no means to expand the condition.
12603 llvm_unreachable("Don't know how to expand this condition!");
12604 }
12605
12606 SDValue SetCC1, SetCC2;
12607 if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
12608 // If we aren't the ordered or unorder operation,
12609 // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
12610 if (IsNonVP) {
12611 SetCC1 = DAG.getSetCC(DL: dl, VT, LHS, RHS, Cond: CC1, Chain, IsSignaling);
12612 SetCC2 = DAG.getSetCC(DL: dl, VT, LHS, RHS, Cond: CC2, Chain, IsSignaling);
12613 } else {
12614 SetCC1 = DAG.getSetCCVP(DL: dl, VT, LHS, RHS, Cond: CC1, Mask, EVL);
12615 SetCC2 = DAG.getSetCCVP(DL: dl, VT, LHS, RHS, Cond: CC2, Mask, EVL);
12616 }
12617 } else {
12618 // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
12619 if (IsNonVP) {
12620 SetCC1 = DAG.getSetCC(DL: dl, VT, LHS, RHS: LHS, Cond: CC1, Chain, IsSignaling);
12621 SetCC2 = DAG.getSetCC(DL: dl, VT, LHS: RHS, RHS, Cond: CC2, Chain, IsSignaling);
12622 } else {
12623 SetCC1 = DAG.getSetCCVP(DL: dl, VT, LHS, RHS: LHS, Cond: CC1, Mask, EVL);
12624 SetCC2 = DAG.getSetCCVP(DL: dl, VT, LHS: RHS, RHS, Cond: CC2, Mask, EVL);
12625 }
12626 }
12627 if (Chain)
12628 Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, N1: SetCC1.getValue(R: 1),
12629 N2: SetCC2.getValue(R: 1));
12630 if (IsNonVP)
12631 LHS = DAG.getNode(Opcode: Opc, DL: dl, VT, N1: SetCC1, N2: SetCC2);
12632 else {
12633 // Transform the binary opcode to the VP equivalent.
12634 assert((Opc == ISD::OR || Opc == ISD::AND) && "Unexpected opcode");
12635 Opc = Opc == ISD::OR ? ISD::VP_OR : ISD::VP_AND;
12636 LHS = DAG.getNode(Opcode: Opc, DL: dl, VT, N1: SetCC1, N2: SetCC2, N3: Mask, N4: EVL);
12637 }
12638 RHS = SDValue();
12639 CC = SDValue();
12640 return true;
12641 }
12642 }
12643 return false;
12644}
12645
12646SDValue TargetLowering::expandVectorNaryOpBySplitting(SDNode *Node,
12647 SelectionDAG &DAG) const {
12648 EVT VT = Node->getValueType(ResNo: 0);
12649 // Despite its documentation, GetSplitDestVTs will assert if VT cannot be
12650 // split into two equal parts.
12651 if (!VT.isVector() || !VT.getVectorElementCount().isKnownMultipleOf(RHS: 2))
12652 return SDValue();
12653
12654 // Restrict expansion to cases where both parts can be concatenated.
12655 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VT);
12656 if (LoVT != HiVT || !isTypeLegal(VT: LoVT))
12657 return SDValue();
12658
12659 SDLoc DL(Node);
12660 unsigned Opcode = Node->getOpcode();
12661
12662 // Don't expand if the result is likely to be unrolled anyway.
12663 if (!isOperationLegalOrCustomOrPromote(Op: Opcode, VT: LoVT))
12664 return SDValue();
12665
12666 SmallVector<SDValue, 4> LoOps, HiOps;
12667 for (const SDValue &V : Node->op_values()) {
12668 auto [Lo, Hi] = DAG.SplitVector(N: V, DL, LoVT, HiVT);
12669 LoOps.push_back(Elt: Lo);
12670 HiOps.push_back(Elt: Hi);
12671 }
12672
12673 SDValue SplitOpLo = DAG.getNode(Opcode, DL, VT: LoVT, Ops: LoOps);
12674 SDValue SplitOpHi = DAG.getNode(Opcode, DL, VT: HiVT, Ops: HiOps);
12675 return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT, N1: SplitOpLo, N2: SplitOpHi);
12676}
12677
12678SDValue TargetLowering::scalarizeExtractedVectorLoad(EVT ResultVT,
12679 const SDLoc &DL,
12680 EVT InVecVT, SDValue EltNo,
12681 LoadSDNode *OriginalLoad,
12682 SelectionDAG &DAG) const {
12683 assert(OriginalLoad->isSimple());
12684
12685 EVT VecEltVT = InVecVT.getVectorElementType();
12686
12687 // If the vector element type is not a multiple of a byte then we are unable
12688 // to correctly compute an address to load only the extracted element as a
12689 // scalar.
12690 if (!VecEltVT.isByteSized())
12691 return SDValue();
12692
12693 ISD::LoadExtType ExtTy =
12694 ResultVT.bitsGT(VT: VecEltVT) ? ISD::EXTLOAD : ISD::NON_EXTLOAD;
12695 if (!isOperationLegalOrCustom(Op: ISD::LOAD, VT: VecEltVT))
12696 return SDValue();
12697
12698 std::optional<unsigned> ByteOffset;
12699 Align Alignment = OriginalLoad->getAlign();
12700 MachinePointerInfo MPI;
12701 if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(Val&: EltNo)) {
12702 int Elt = ConstEltNo->getZExtValue();
12703 ByteOffset = VecEltVT.getSizeInBits() * Elt / 8;
12704 MPI = OriginalLoad->getPointerInfo().getWithOffset(O: *ByteOffset);
12705 Alignment = commonAlignment(A: Alignment, Offset: *ByteOffset);
12706 } else {
12707 // Discard the pointer info except the address space because the memory
12708 // operand can't represent this new access since the offset is variable.
12709 MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
12710 Alignment = commonAlignment(A: Alignment, Offset: VecEltVT.getSizeInBits() / 8);
12711 }
12712
12713 if (!shouldReduceLoadWidth(Load: OriginalLoad, ExtTy, NewVT: VecEltVT, ByteOffset))
12714 return SDValue();
12715
12716 unsigned IsFast = 0;
12717 if (!allowsMemoryAccess(Context&: *DAG.getContext(), DL: DAG.getDataLayout(), VT: VecEltVT,
12718 AddrSpace: OriginalLoad->getAddressSpace(), Alignment,
12719 Flags: OriginalLoad->getMemOperand()->getFlags(), Fast: &IsFast) ||
12720 !IsFast)
12721 return SDValue();
12722
12723 // The original DAG loaded the entire vector from memory, so arithmetic
12724 // within it must be inbounds.
12725 SDValue NewPtr = getInboundsVectorElementPointer(
12726 DAG, VecPtr: OriginalLoad->getBasePtr(), VecVT: InVecVT, Index: EltNo);
12727
12728 // We are replacing a vector load with a scalar load. The new load must have
12729 // identical memory op ordering to the original.
12730 SDValue Load;
12731 if (ResultVT.bitsGT(VT: VecEltVT)) {
12732 // If the result type of vextract is wider than the load, then issue an
12733 // extending load instead.
12734 ISD::LoadExtType ExtType = isLoadExtLegal(ExtType: ISD::ZEXTLOAD, ValVT: ResultVT, MemVT: VecEltVT)
12735 ? ISD::ZEXTLOAD
12736 : ISD::EXTLOAD;
12737 Load = DAG.getExtLoad(ExtType, dl: DL, VT: ResultVT, Chain: OriginalLoad->getChain(),
12738 Ptr: NewPtr, PtrInfo: MPI, MemVT: VecEltVT, Alignment,
12739 MMOFlags: OriginalLoad->getMemOperand()->getFlags(),
12740 AAInfo: OriginalLoad->getAAInfo());
12741 DAG.makeEquivalentMemoryOrdering(OldLoad: OriginalLoad, NewMemOp: Load);
12742 } else {
12743 // The result type is narrower or the same width as the vector element
12744 Load = DAG.getLoad(VT: VecEltVT, dl: DL, Chain: OriginalLoad->getChain(), Ptr: NewPtr, PtrInfo: MPI,
12745 Alignment, MMOFlags: OriginalLoad->getMemOperand()->getFlags(),
12746 AAInfo: OriginalLoad->getAAInfo());
12747 DAG.makeEquivalentMemoryOrdering(OldLoad: OriginalLoad, NewMemOp: Load);
12748 if (ResultVT.bitsLT(VT: VecEltVT))
12749 Load = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: ResultVT, Operand: Load);
12750 else
12751 Load = DAG.getBitcast(VT: ResultVT, V: Load);
12752 }
12753
12754 return Load;
12755}
12756