1//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This implements the TargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/CodeGen/TargetLowering.h"
14#include "llvm/ADT/STLExtras.h"
15#include "llvm/Analysis/ValueTracking.h"
16#include "llvm/Analysis/VectorUtils.h"
17#include "llvm/CodeGen/Analysis.h"
18#include "llvm/CodeGen/CallingConvLower.h"
19#include "llvm/CodeGen/CodeGenCommonISel.h"
20#include "llvm/CodeGen/MachineFrameInfo.h"
21#include "llvm/CodeGen/MachineFunction.h"
22#include "llvm/CodeGen/MachineJumpTableInfo.h"
23#include "llvm/CodeGen/MachineRegisterInfo.h"
24#include "llvm/CodeGen/SDPatternMatch.h"
25#include "llvm/CodeGen/SelectionDAG.h"
26#include "llvm/CodeGen/TargetRegisterInfo.h"
27#include "llvm/IR/DataLayout.h"
28#include "llvm/IR/DerivedTypes.h"
29#include "llvm/IR/GlobalVariable.h"
30#include "llvm/IR/LLVMContext.h"
31#include "llvm/MC/MCAsmInfo.h"
32#include "llvm/MC/MCExpr.h"
33#include "llvm/Support/DivisionByConstantInfo.h"
34#include "llvm/Support/ErrorHandling.h"
35#include "llvm/Support/KnownBits.h"
36#include "llvm/Support/MathExtras.h"
37#include "llvm/Target/TargetMachine.h"
38#include <cctype>
39#include <deque>
40using namespace llvm;
41using namespace llvm::SDPatternMatch;
42
43/// NOTE: The TargetMachine owns TLOF.
44TargetLowering::TargetLowering(const TargetMachine &tm,
45 const TargetSubtargetInfo &STI)
46 : TargetLoweringBase(tm, STI) {}
47
48// Define the virtual destructor out-of-line for build efficiency.
49TargetLowering::~TargetLowering() = default;
50
51const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
52 return nullptr;
53}
54
55bool TargetLowering::isPositionIndependent() const {
56 return getTargetMachine().isPositionIndependent();
57}
58
59/// Check whether a given call node is in tail position within its function. If
60/// so, it sets Chain to the input chain of the tail call.
61bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
62 SDValue &Chain) const {
63 const Function &F = DAG.getMachineFunction().getFunction();
64
65 // First, check if tail calls have been disabled in this function.
66 if (F.getFnAttribute(Kind: "disable-tail-calls").getValueAsBool())
67 return false;
68
69 // Conservatively require the attributes of the call to match those of
70 // the return. Ignore following attributes because they don't affect the
71 // call sequence.
72 AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs());
73 for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
74 Attribute::DereferenceableOrNull, Attribute::NoAlias,
75 Attribute::NonNull, Attribute::NoUndef,
76 Attribute::Range, Attribute::NoFPClass})
77 CallerAttrs.removeAttribute(Val: Attr);
78
79 if (CallerAttrs.hasAttributes())
80 return false;
81
82 // It's not safe to eliminate the sign / zero extension of the return value.
83 if (CallerAttrs.contains(A: Attribute::ZExt) ||
84 CallerAttrs.contains(A: Attribute::SExt))
85 return false;
86
87 // Check if the only use is a function return node.
88 return isUsedByReturnOnly(Node, Chain);
89}
90
91bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
92 const uint32_t *CallerPreservedMask,
93 const SmallVectorImpl<CCValAssign> &ArgLocs,
94 const SmallVectorImpl<SDValue> &OutVals) const {
95 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
96 const CCValAssign &ArgLoc = ArgLocs[I];
97 if (!ArgLoc.isRegLoc())
98 continue;
99 MCRegister Reg = ArgLoc.getLocReg();
100 // Only look at callee saved registers.
101 if (MachineOperand::clobbersPhysReg(RegMask: CallerPreservedMask, PhysReg: Reg))
102 continue;
103 // Check that we pass the value used for the caller.
104 // (We look for a CopyFromReg reading a virtual register that is used
105 // for the function live-in value of register Reg)
106 SDValue Value = OutVals[I];
107 if (Value->getOpcode() == ISD::AssertZext)
108 Value = Value.getOperand(i: 0);
109 if (Value->getOpcode() != ISD::CopyFromReg)
110 return false;
111 Register ArgReg = cast<RegisterSDNode>(Val: Value->getOperand(Num: 1))->getReg();
112 if (MRI.getLiveInPhysReg(VReg: ArgReg) != Reg)
113 return false;
114 }
115 return true;
116}
117
118/// Set CallLoweringInfo attribute flags based on a call instruction
119/// and called function attributes.
120void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
121 unsigned ArgIdx) {
122 IsSExt = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::SExt);
123 IsZExt = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::ZExt);
124 IsNoExt = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::NoExt);
125 IsInReg = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::InReg);
126 IsSRet = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::StructRet);
127 IsNest = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::Nest);
128 IsByVal = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::ByVal);
129 IsPreallocated = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::Preallocated);
130 IsInAlloca = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::InAlloca);
131 IsReturned = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::Returned);
132 IsSwiftSelf = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::SwiftSelf);
133 IsSwiftAsync = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::SwiftAsync);
134 IsSwiftError = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::SwiftError);
135 Alignment = Call->getParamStackAlign(ArgNo: ArgIdx);
136 IndirectType = nullptr;
137 assert(IsByVal + IsPreallocated + IsInAlloca + IsSRet <= 1 &&
138 "multiple ABI attributes?");
139 if (IsByVal) {
140 IndirectType = Call->getParamByValType(ArgNo: ArgIdx);
141 if (!Alignment)
142 Alignment = Call->getParamAlign(ArgNo: ArgIdx);
143 }
144 if (IsPreallocated)
145 IndirectType = Call->getParamPreallocatedType(ArgNo: ArgIdx);
146 if (IsInAlloca)
147 IndirectType = Call->getParamInAllocaType(ArgNo: ArgIdx);
148 if (IsSRet)
149 IndirectType = Call->getParamStructRetType(ArgNo: ArgIdx);
150}
151
152/// Generate a libcall taking the given operands as arguments and returning a
153/// result of type RetVT.
154std::pair<SDValue, SDValue>
155TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl,
156 EVT RetVT, ArrayRef<SDValue> Ops,
157 MakeLibCallOptions CallOptions, const SDLoc &dl,
158 SDValue InChain) const {
159 if (LibcallImpl == RTLIB::Unsupported)
160 reportFatalInternalError(reason: "unsupported library call operation");
161
162 if (!InChain)
163 InChain = DAG.getEntryNode();
164
165 TargetLowering::ArgListTy Args;
166 Args.reserve(n: Ops.size());
167
168 ArrayRef<Type *> OpsTypeOverrides = CallOptions.OpsTypeOverrides;
169 for (unsigned i = 0; i < Ops.size(); ++i) {
170 SDValue NewOp = Ops[i];
171 Type *Ty = i < OpsTypeOverrides.size() && OpsTypeOverrides[i]
172 ? OpsTypeOverrides[i]
173 : NewOp.getValueType().getTypeForEVT(Context&: *DAG.getContext());
174 TargetLowering::ArgListEntry Entry(NewOp, Ty);
175 if (CallOptions.IsSoften)
176 Entry.OrigTy =
177 CallOptions.OpsVTBeforeSoften[i].getTypeForEVT(Context&: *DAG.getContext());
178
179 Entry.IsSExt =
180 shouldSignExtendTypeInLibCall(Ty: Entry.Ty, IsSigned: CallOptions.IsSigned);
181 Entry.IsZExt = !Entry.IsSExt;
182
183 if (CallOptions.IsSoften &&
184 !shouldExtendTypeInLibCall(Type: CallOptions.OpsVTBeforeSoften[i])) {
185 Entry.IsSExt = Entry.IsZExt = false;
186 }
187 Args.push_back(x: Entry);
188 }
189
190 SDValue Callee =
191 DAG.getExternalSymbol(LCImpl: LibcallImpl, VT: getPointerTy(DL: DAG.getDataLayout()));
192
193 Type *RetTy = RetVT.getTypeForEVT(Context&: *DAG.getContext());
194 Type *OrigRetTy = RetTy;
195 TargetLowering::CallLoweringInfo CLI(DAG);
196 bool signExtend = shouldSignExtendTypeInLibCall(Ty: RetTy, IsSigned: CallOptions.IsSigned);
197 bool zeroExtend = !signExtend;
198
199 if (CallOptions.IsSoften) {
200 OrigRetTy = CallOptions.RetVTBeforeSoften.getTypeForEVT(Context&: *DAG.getContext());
201 if (!shouldExtendTypeInLibCall(Type: CallOptions.RetVTBeforeSoften))
202 signExtend = zeroExtend = false;
203 }
204
205 CLI.setDebugLoc(dl)
206 .setChain(InChain)
207 .setLibCallee(CC: getLibcallImplCallingConv(Call: LibcallImpl), ResultType: RetTy, OrigResultType: OrigRetTy,
208 Target: Callee, ArgsList: std::move(Args))
209 .setNoReturn(CallOptions.DoesNotReturn)
210 .setDiscardResult(!CallOptions.IsReturnValueUsed)
211 .setIsPostTypeLegalization(CallOptions.IsPostTypeLegalization)
212 .setSExtResult(signExtend)
213 .setZExtResult(zeroExtend);
214 return LowerCallTo(CLI);
215}
216
217bool TargetLowering::findOptimalMemOpLowering(
218 LLVMContext &Context, std::vector<EVT> &MemOps, unsigned Limit,
219 const MemOp &Op, unsigned DstAS, unsigned SrcAS,
220 const AttributeList &FuncAttributes, EVT *LargestVT) const {
221 if (Limit != ~unsigned(0) && Op.isMemcpyWithFixedDstAlign() &&
222 Op.getSrcAlign() < Op.getDstAlign())
223 return false;
224
225 EVT VT = getOptimalMemOpType(Context, Op, FuncAttributes);
226
227 if (VT == MVT::Other) {
228 // Use the largest integer type whose alignment constraints are satisfied.
229 // We only need to check DstAlign here as SrcAlign is always greater or
230 // equal to DstAlign (or zero).
231 VT = MVT::LAST_INTEGER_VALUETYPE;
232 if (Op.isFixedDstAlign())
233 while (Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
234 !allowsMisalignedMemoryAccesses(VT, AddrSpace: DstAS, Alignment: Op.getDstAlign()))
235 VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
236 assert(VT.isInteger());
237
238 // Find the largest legal integer type.
239 MVT LVT = MVT::LAST_INTEGER_VALUETYPE;
240 while (!isTypeLegal(VT: LVT))
241 LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
242 assert(LVT.isInteger());
243
244 // If the type we've chosen is larger than the largest legal integer type
245 // then use that instead.
246 if (VT.bitsGT(VT: LVT))
247 VT = LVT;
248 }
249
250 unsigned NumMemOps = 0;
251 uint64_t Size = Op.size();
252 while (Size) {
253 unsigned VTSize = VT.getSizeInBits() / 8;
254 while (VTSize > Size) {
255 // For now, only use non-vector load / store's for the left-over pieces.
256 EVT NewVT = VT;
257 unsigned NewVTSize;
258
259 bool Found = false;
260 if (VT.isVector() || VT.isFloatingPoint()) {
261 NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
262 if (isOperationLegalOrCustom(Op: ISD::STORE, VT: NewVT) &&
263 isSafeMemOpType(NewVT.getSimpleVT()))
264 Found = true;
265 else if (NewVT == MVT::i64 &&
266 isOperationLegalOrCustom(Op: ISD::STORE, VT: MVT::f64) &&
267 isSafeMemOpType(MVT::f64)) {
268 // i64 is usually not legal on 32-bit targets, but f64 may be.
269 NewVT = MVT::f64;
270 Found = true;
271 }
272 }
273
274 if (!Found) {
275 do {
276 NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
277 if (NewVT == MVT::i8)
278 break;
279 } while (!isSafeMemOpType(NewVT.getSimpleVT()));
280 }
281 NewVTSize = NewVT.getSizeInBits() / 8;
282
283 // If the new VT cannot cover all of the remaining bits, then consider
284 // issuing a (or a pair of) unaligned and overlapping load / store.
285 unsigned Fast;
286 if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
287 allowsMisalignedMemoryAccesses(
288 VT, AddrSpace: DstAS, Alignment: Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
289 Flags: MachineMemOperand::MONone, &Fast) &&
290 Fast)
291 VTSize = Size;
292 else {
293 VT = NewVT;
294 VTSize = NewVTSize;
295 }
296 }
297
298 if (++NumMemOps > Limit)
299 return false;
300
301 MemOps.push_back(x: VT);
302 Size -= VTSize;
303 }
304
305 return true;
306}
307
308/// Soften the operands of a comparison. This code is shared among BR_CC,
309/// SELECT_CC, and SETCC handlers.
310void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
311 SDValue &NewLHS, SDValue &NewRHS,
312 ISD::CondCode &CCCode,
313 const SDLoc &dl, const SDValue OldLHS,
314 const SDValue OldRHS) const {
315 SDValue Chain;
316 return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, DL: dl, OldLHS,
317 OldRHS, Chain);
318}
319
320void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
321 SDValue &NewLHS, SDValue &NewRHS,
322 ISD::CondCode &CCCode,
323 const SDLoc &dl, const SDValue OldLHS,
324 const SDValue OldRHS,
325 SDValue &Chain,
326 bool IsSignaling) const {
327 // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
328 // not supporting it. We can update this code when libgcc provides such
329 // functions.
330
331 assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
332 && "Unsupported setcc type!");
333
334 // Expand into one or more soft-fp libcall(s).
335 RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
336 bool ShouldInvertCC = false;
337 switch (CCCode) {
338 case ISD::SETEQ:
339 case ISD::SETOEQ:
340 LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
341 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
342 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
343 break;
344 case ISD::SETNE:
345 case ISD::SETUNE:
346 LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
347 (VT == MVT::f64) ? RTLIB::UNE_F64 :
348 (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
349 break;
350 case ISD::SETGE:
351 case ISD::SETOGE:
352 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
353 (VT == MVT::f64) ? RTLIB::OGE_F64 :
354 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
355 break;
356 case ISD::SETLT:
357 case ISD::SETOLT:
358 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
359 (VT == MVT::f64) ? RTLIB::OLT_F64 :
360 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
361 break;
362 case ISD::SETLE:
363 case ISD::SETOLE:
364 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
365 (VT == MVT::f64) ? RTLIB::OLE_F64 :
366 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
367 break;
368 case ISD::SETGT:
369 case ISD::SETOGT:
370 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
371 (VT == MVT::f64) ? RTLIB::OGT_F64 :
372 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
373 break;
374 case ISD::SETO:
375 ShouldInvertCC = true;
376 [[fallthrough]];
377 case ISD::SETUO:
378 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
379 (VT == MVT::f64) ? RTLIB::UO_F64 :
380 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
381 break;
382 case ISD::SETONE:
383 // SETONE = O && UNE
384 ShouldInvertCC = true;
385 [[fallthrough]];
386 case ISD::SETUEQ:
387 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
388 (VT == MVT::f64) ? RTLIB::UO_F64 :
389 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
390 LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
391 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
392 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
393 break;
394 default:
395 // Invert CC for unordered comparisons
396 ShouldInvertCC = true;
397 switch (CCCode) {
398 case ISD::SETULT:
399 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
400 (VT == MVT::f64) ? RTLIB::OGE_F64 :
401 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
402 break;
403 case ISD::SETULE:
404 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
405 (VT == MVT::f64) ? RTLIB::OGT_F64 :
406 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
407 break;
408 case ISD::SETUGT:
409 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
410 (VT == MVT::f64) ? RTLIB::OLE_F64 :
411 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
412 break;
413 case ISD::SETUGE:
414 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
415 (VT == MVT::f64) ? RTLIB::OLT_F64 :
416 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
417 break;
418 default: llvm_unreachable("Do not know how to soften this setcc!");
419 }
420 }
421
422 // Use the target specific return value for comparison lib calls.
423 EVT RetVT = getCmpLibcallReturnType();
424 SDValue Ops[2] = {NewLHS, NewRHS};
425 TargetLowering::MakeLibCallOptions CallOptions;
426 EVT OpsVT[2] = { OldLHS.getValueType(),
427 OldRHS.getValueType() };
428 CallOptions.setTypeListBeforeSoften(OpsVT, RetVT);
429 auto Call = makeLibCall(DAG, LC: LC1, RetVT, Ops, CallOptions, dl, Chain);
430 NewLHS = Call.first;
431 NewRHS = DAG.getConstant(Val: 0, DL: dl, VT: RetVT);
432
433 RTLIB::LibcallImpl LC1Impl = getLibcallImpl(Call: LC1);
434 if (LC1Impl == RTLIB::Unsupported) {
435 reportFatalUsageError(
436 reason: "no libcall available to soften floating-point compare");
437 }
438
439 CCCode = getSoftFloatCmpLibcallPredicate(Call: LC1Impl);
440 if (ShouldInvertCC) {
441 assert(RetVT.isInteger());
442 CCCode = getSetCCInverse(Operation: CCCode, Type: RetVT);
443 }
444
445 if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
446 // Update Chain.
447 Chain = Call.second;
448 } else {
449 RTLIB::LibcallImpl LC2Impl = getLibcallImpl(Call: LC2);
450 if (LC2Impl == RTLIB::Unsupported) {
451 reportFatalUsageError(
452 reason: "no libcall available to soften floating-point compare");
453 }
454
455 assert(CCCode == (ShouldInvertCC ? ISD::SETEQ : ISD::SETNE) &&
456 "unordered call should be simple boolean");
457
458 EVT SetCCVT =
459 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: RetVT);
460 if (getBooleanContents(Type: RetVT) == ZeroOrOneBooleanContent) {
461 NewLHS = DAG.getNode(Opcode: ISD::AssertZext, DL: dl, VT: RetVT, N1: Call.first,
462 N2: DAG.getValueType(MVT::i1));
463 }
464
465 SDValue Tmp = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: NewLHS, RHS: NewRHS, Cond: CCCode);
466 auto Call2 = makeLibCall(DAG, LC: LC2, RetVT, Ops, CallOptions, dl, Chain);
467 CCCode = getSoftFloatCmpLibcallPredicate(Call: LC2Impl);
468 if (ShouldInvertCC)
469 CCCode = getSetCCInverse(Operation: CCCode, Type: RetVT);
470 NewLHS = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Call2.first, RHS: NewRHS, Cond: CCCode);
471 if (Chain)
472 Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, N1: Call.second,
473 N2: Call2.second);
474 NewLHS = DAG.getNode(Opcode: ShouldInvertCC ? ISD::AND : ISD::OR, DL: dl,
475 VT: Tmp.getValueType(), N1: Tmp, N2: NewLHS);
476 NewRHS = SDValue();
477 }
478}
479
480/// Return the entry encoding for a jump table in the current function. The
481/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
482unsigned TargetLowering::getJumpTableEncoding() const {
483 // In non-pic modes, just use the address of a block.
484 if (!isPositionIndependent())
485 return MachineJumpTableInfo::EK_BlockAddress;
486
487 // Otherwise, use a label difference.
488 return MachineJumpTableInfo::EK_LabelDifference32;
489}
490
491SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
492 SelectionDAG &DAG) const {
493 return Table;
494}
495
496/// This returns the relocation base for the given PIC jumptable, the same as
497/// getPICJumpTableRelocBase, but as an MCExpr.
498const MCExpr *
499TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
500 unsigned JTI,MCContext &Ctx) const{
501 // The normal PIC reloc base is the label at the start of the jump table.
502 return MCSymbolRefExpr::create(Symbol: MF->getJTISymbol(JTI, Ctx), Ctx);
503}
504
505SDValue TargetLowering::expandIndirectJTBranch(const SDLoc &dl, SDValue Value,
506 SDValue Addr, int JTI,
507 SelectionDAG &DAG) const {
508 SDValue Chain = Value;
509 // Jump table debug info is only needed if CodeView is enabled.
510 if (DAG.getTarget().getTargetTriple().isOSBinFormatCOFF()) {
511 Chain = DAG.getJumpTableDebugInfo(JTI, Chain, DL: dl);
512 }
513 return DAG.getNode(Opcode: ISD::BRIND, DL: dl, VT: MVT::Other, N1: Chain, N2: Addr);
514}
515
516bool
517TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
518 const TargetMachine &TM = getTargetMachine();
519 const GlobalValue *GV = GA->getGlobal();
520
521 // If the address is not even local to this DSO we will have to load it from
522 // a got and then add the offset.
523 if (!TM.shouldAssumeDSOLocal(GV))
524 return false;
525
526 // If the code is position independent we will have to add a base register.
527 if (isPositionIndependent())
528 return false;
529
530 // Otherwise we can do it.
531 return true;
532}
533
534//===----------------------------------------------------------------------===//
535// Optimization Methods
536//===----------------------------------------------------------------------===//
537
538/// If the specified instruction has a constant integer operand and there are
539/// bits set in that constant that are not demanded, then clear those bits and
540/// return true.
541bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
542 const APInt &DemandedBits,
543 const APInt &DemandedElts,
544 TargetLoweringOpt &TLO) const {
545 SDLoc DL(Op);
546 unsigned Opcode = Op.getOpcode();
547
548 // Early-out if we've ended up calling an undemanded node, leave this to
549 // constant folding.
550 if (DemandedBits.isZero() || DemandedElts.isZero())
551 return false;
552
553 // Do target-specific constant optimization.
554 if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
555 return TLO.New.getNode();
556
557 // FIXME: ISD::SELECT, ISD::SELECT_CC
558 switch (Opcode) {
559 default:
560 break;
561 case ISD::XOR:
562 case ISD::AND:
563 case ISD::OR: {
564 auto *Op1C = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 1));
565 if (!Op1C || Op1C->isOpaque())
566 return false;
567
568 // If this is a 'not' op, don't touch it because that's a canonical form.
569 const APInt &C = Op1C->getAPIntValue();
570 if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(RHS: C))
571 return false;
572
573 if (!C.isSubsetOf(RHS: DemandedBits)) {
574 EVT VT = Op.getValueType();
575 SDValue NewC = TLO.DAG.getConstant(Val: DemandedBits & C, DL, VT);
576 SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, N1: Op.getOperand(i: 0), N2: NewC,
577 Flags: Op->getFlags());
578 return TLO.CombineTo(O: Op, N: NewOp);
579 }
580
581 break;
582 }
583 }
584
585 return false;
586}
587
588bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
589 const APInt &DemandedBits,
590 TargetLoweringOpt &TLO) const {
591 EVT VT = Op.getValueType();
592 APInt DemandedElts = VT.isVector()
593 ? APInt::getAllOnes(numBits: VT.getVectorNumElements())
594 : APInt(1, 1);
595 return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
596}
597
598/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
599/// This uses isTruncateFree/isZExtFree and ANY_EXTEND for the widening cast,
600/// but it could be generalized for targets with other types of implicit
601/// widening casts.
602bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
603 const APInt &DemandedBits,
604 TargetLoweringOpt &TLO) const {
605 assert(Op.getNumOperands() == 2 &&
606 "ShrinkDemandedOp only supports binary operators!");
607 assert(Op.getNode()->getNumValues() == 1 &&
608 "ShrinkDemandedOp only supports nodes with one result!");
609
610 EVT VT = Op.getValueType();
611 SelectionDAG &DAG = TLO.DAG;
612 SDLoc dl(Op);
613
614 // Early return, as this function cannot handle vector types.
615 if (VT.isVector())
616 return false;
617
618 assert(Op.getOperand(0).getValueType().getScalarSizeInBits() == BitWidth &&
619 Op.getOperand(1).getValueType().getScalarSizeInBits() == BitWidth &&
620 "ShrinkDemandedOp only supports operands that have the same size!");
621
622 // Don't do this if the node has another user, which may require the
623 // full value.
624 if (!Op.getNode()->hasOneUse())
625 return false;
626
627 // Search for the smallest integer type with free casts to and from
628 // Op's type. For expedience, just check power-of-2 integer types.
629 unsigned DemandedSize = DemandedBits.getActiveBits();
630 for (unsigned SmallVTBits = llvm::bit_ceil(Value: DemandedSize);
631 SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(A: SmallVTBits)) {
632 EVT SmallVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: SmallVTBits);
633 if (isTruncateFree(Val: Op, VT2: SmallVT) && isZExtFree(FromTy: SmallVT, ToTy: VT)) {
634 // We found a type with free casts.
635
636 // If the operation has the 'disjoint' flag, then the
637 // operands on the new node are also disjoint.
638 SDNodeFlags Flags(Op->getFlags().hasDisjoint() ? SDNodeFlags::Disjoint
639 : SDNodeFlags::None);
640 unsigned Opcode = Op.getOpcode();
641 if (Opcode == ISD::PTRADD) {
642 // It isn't a ptradd anymore if it doesn't operate on the entire
643 // pointer.
644 Opcode = ISD::ADD;
645 }
646 SDValue X = DAG.getNode(
647 Opcode, DL: dl, VT: SmallVT,
648 N1: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: SmallVT, Operand: Op.getOperand(i: 0)),
649 N2: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: SmallVT, Operand: Op.getOperand(i: 1)), Flags);
650 assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
651 SDValue Z = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT, Operand: X);
652 return TLO.CombineTo(O: Op, N: Z);
653 }
654 }
655 return false;
656}
657
658bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
659 DAGCombinerInfo &DCI) const {
660 SelectionDAG &DAG = DCI.DAG;
661 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
662 !DCI.isBeforeLegalizeOps());
663 KnownBits Known;
664
665 bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
666 if (Simplified) {
667 DCI.AddToWorklist(N: Op.getNode());
668 DCI.CommitTargetLoweringOpt(TLO);
669 }
670 return Simplified;
671}
672
673bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
674 const APInt &DemandedElts,
675 DAGCombinerInfo &DCI) const {
676 SelectionDAG &DAG = DCI.DAG;
677 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
678 !DCI.isBeforeLegalizeOps());
679 KnownBits Known;
680
681 bool Simplified =
682 SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO);
683 if (Simplified) {
684 DCI.AddToWorklist(N: Op.getNode());
685 DCI.CommitTargetLoweringOpt(TLO);
686 }
687 return Simplified;
688}
689
690bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
691 KnownBits &Known,
692 TargetLoweringOpt &TLO,
693 unsigned Depth,
694 bool AssumeSingleUse) const {
695 EVT VT = Op.getValueType();
696
697 // Since the number of lanes in a scalable vector is unknown at compile time,
698 // we track one bit which is implicitly broadcast to all lanes. This means
699 // that all lanes in a scalable vector are considered demanded.
700 APInt DemandedElts = VT.isFixedLengthVector()
701 ? APInt::getAllOnes(numBits: VT.getVectorNumElements())
702 : APInt(1, 1);
703 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
704 AssumeSingleUse);
705}
706
707// TODO: Under what circumstances can we create nodes? Constant folding?
708SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
709 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
710 SelectionDAG &DAG, unsigned Depth) const {
711 EVT VT = Op.getValueType();
712
713 // Limit search depth.
714 if (Depth >= SelectionDAG::MaxRecursionDepth)
715 return SDValue();
716
717 // Ignore UNDEFs.
718 if (Op.isUndef())
719 return SDValue();
720
721 // Not demanding any bits/elts from Op.
722 if (DemandedBits == 0 || DemandedElts == 0)
723 return DAG.getUNDEF(VT);
724
725 bool IsLE = DAG.getDataLayout().isLittleEndian();
726 unsigned NumElts = DemandedElts.getBitWidth();
727 unsigned BitWidth = DemandedBits.getBitWidth();
728 KnownBits LHSKnown, RHSKnown;
729 switch (Op.getOpcode()) {
730 case ISD::BITCAST: {
731 if (VT.isScalableVector())
732 return SDValue();
733
734 SDValue Src = peekThroughBitcasts(V: Op.getOperand(i: 0));
735 EVT SrcVT = Src.getValueType();
736 EVT DstVT = Op.getValueType();
737 if (SrcVT == DstVT)
738 return Src;
739
740 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
741 unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
742 if (NumSrcEltBits == NumDstEltBits)
743 if (SDValue V = SimplifyMultipleUseDemandedBits(
744 Op: Src, DemandedBits, DemandedElts, DAG, Depth: Depth + 1))
745 return DAG.getBitcast(VT: DstVT, V);
746
747 if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) {
748 unsigned Scale = NumDstEltBits / NumSrcEltBits;
749 unsigned NumSrcElts = SrcVT.getVectorNumElements();
750 APInt DemandedSrcBits = APInt::getZero(numBits: NumSrcEltBits);
751 for (unsigned i = 0; i != Scale; ++i) {
752 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
753 unsigned BitOffset = EltOffset * NumSrcEltBits;
754 DemandedSrcBits |= DemandedBits.extractBits(numBits: NumSrcEltBits, bitPosition: BitOffset);
755 }
756 // Recursive calls below may turn not demanded elements into poison, so we
757 // need to demand all smaller source elements that maps to a demanded
758 // destination element.
759 APInt DemandedSrcElts = APIntOps::ScaleBitMask(A: DemandedElts, NewBitWidth: NumSrcElts);
760
761 if (SDValue V = SimplifyMultipleUseDemandedBits(
762 Op: Src, DemandedBits: DemandedSrcBits, DemandedElts: DemandedSrcElts, DAG, Depth: Depth + 1))
763 return DAG.getBitcast(VT: DstVT, V);
764 }
765
766 // TODO - bigendian once we have test coverage.
767 if (IsLE && (NumSrcEltBits % NumDstEltBits) == 0) {
768 unsigned Scale = NumSrcEltBits / NumDstEltBits;
769 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
770 APInt DemandedSrcBits = APInt::getZero(numBits: NumSrcEltBits);
771 APInt DemandedSrcElts = APInt::getZero(numBits: NumSrcElts);
772 for (unsigned i = 0; i != NumElts; ++i)
773 if (DemandedElts[i]) {
774 unsigned Offset = (i % Scale) * NumDstEltBits;
775 DemandedSrcBits.insertBits(SubBits: DemandedBits, bitPosition: Offset);
776 DemandedSrcElts.setBit(i / Scale);
777 }
778
779 if (SDValue V = SimplifyMultipleUseDemandedBits(
780 Op: Src, DemandedBits: DemandedSrcBits, DemandedElts: DemandedSrcElts, DAG, Depth: Depth + 1))
781 return DAG.getBitcast(VT: DstVT, V);
782 }
783
784 break;
785 }
786 case ISD::AND: {
787 LHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1);
788 RHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1);
789
790 // If all of the demanded bits are known 1 on one side, return the other.
791 // These bits cannot contribute to the result of the 'and' in this
792 // context.
793 if (DemandedBits.isSubsetOf(RHS: LHSKnown.Zero | RHSKnown.One))
794 return Op.getOperand(i: 0);
795 if (DemandedBits.isSubsetOf(RHS: RHSKnown.Zero | LHSKnown.One))
796 return Op.getOperand(i: 1);
797 break;
798 }
799 case ISD::OR: {
800 LHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1);
801 RHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1);
802
803 // If all of the demanded bits are known zero on one side, return the
804 // other. These bits cannot contribute to the result of the 'or' in this
805 // context.
806 if (DemandedBits.isSubsetOf(RHS: LHSKnown.One | RHSKnown.Zero))
807 return Op.getOperand(i: 0);
808 if (DemandedBits.isSubsetOf(RHS: RHSKnown.One | LHSKnown.Zero))
809 return Op.getOperand(i: 1);
810 break;
811 }
812 case ISD::XOR: {
813 LHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1);
814 RHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1);
815
816 // If all of the demanded bits are known zero on one side, return the
817 // other.
818 if (DemandedBits.isSubsetOf(RHS: RHSKnown.Zero))
819 return Op.getOperand(i: 0);
820 if (DemandedBits.isSubsetOf(RHS: LHSKnown.Zero))
821 return Op.getOperand(i: 1);
822 break;
823 }
824 case ISD::ADD: {
825 RHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1);
826 if (RHSKnown.isZero())
827 return Op.getOperand(i: 0);
828
829 LHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1);
830 if (LHSKnown.isZero())
831 return Op.getOperand(i: 1);
832 break;
833 }
834 case ISD::SHL: {
835 // If we are only demanding sign bits then we can use the shift source
836 // directly.
837 if (std::optional<unsigned> MaxSA =
838 DAG.getValidMaximumShiftAmount(V: Op, DemandedElts, Depth: Depth + 1)) {
839 SDValue Op0 = Op.getOperand(i: 0);
840 unsigned ShAmt = *MaxSA;
841 unsigned NumSignBits =
842 DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + 1);
843 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
844 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
845 return Op0;
846 }
847 break;
848 }
849 case ISD::SRL: {
850 // If we are only demanding sign bits then we can use the shift source
851 // directly.
852 if (std::optional<unsigned> MaxSA =
853 DAG.getValidMaximumShiftAmount(V: Op, DemandedElts, Depth: Depth + 1)) {
854 SDValue Op0 = Op.getOperand(i: 0);
855 unsigned ShAmt = *MaxSA;
856 // Must already be signbits in DemandedBits bounds, and can't demand any
857 // shifted in zeroes.
858 if (DemandedBits.countl_zero() >= ShAmt) {
859 unsigned NumSignBits =
860 DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + 1);
861 if (DemandedBits.countr_zero() >= (BitWidth - NumSignBits))
862 return Op0;
863 }
864 }
865 break;
866 }
867 case ISD::SETCC: {
868 SDValue Op0 = Op.getOperand(i: 0);
869 SDValue Op1 = Op.getOperand(i: 1);
870 ISD::CondCode CC = cast<CondCodeSDNode>(Val: Op.getOperand(i: 2))->get();
871 // If (1) we only need the sign-bit, (2) the setcc operands are the same
872 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
873 // -1, we may be able to bypass the setcc.
874 if (DemandedBits.isSignMask() &&
875 Op0.getScalarValueSizeInBits() == BitWidth &&
876 getBooleanContents(Type: Op0.getValueType()) ==
877 BooleanContent::ZeroOrNegativeOneBooleanContent) {
878 // If we're testing X < 0, then this compare isn't needed - just use X!
879 // FIXME: We're limiting to integer types here, but this should also work
880 // if we don't care about FP signed-zero. The use of SETLT with FP means
881 // that we don't care about NaNs.
882 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
883 (isNullConstant(V: Op1) || ISD::isBuildVectorAllZeros(N: Op1.getNode())))
884 return Op0;
885 }
886 break;
887 }
888 case ISD::SIGN_EXTEND_INREG: {
889 // If none of the extended bits are demanded, eliminate the sextinreg.
890 SDValue Op0 = Op.getOperand(i: 0);
891 EVT ExVT = cast<VTSDNode>(Val: Op.getOperand(i: 1))->getVT();
892 unsigned ExBits = ExVT.getScalarSizeInBits();
893 if (DemandedBits.getActiveBits() <= ExBits &&
894 shouldRemoveRedundantExtend(Op))
895 return Op0;
896 // If the input is already sign extended, just drop the extension.
897 unsigned NumSignBits = DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + 1);
898 if (NumSignBits >= (BitWidth - ExBits + 1))
899 return Op0;
900 break;
901 }
902 case ISD::ANY_EXTEND_VECTOR_INREG:
903 case ISD::SIGN_EXTEND_VECTOR_INREG:
904 case ISD::ZERO_EXTEND_VECTOR_INREG: {
905 if (VT.isScalableVector())
906 return SDValue();
907
908 // If we only want the lowest element and none of extended bits, then we can
909 // return the bitcasted source vector.
910 SDValue Src = Op.getOperand(i: 0);
911 EVT SrcVT = Src.getValueType();
912 EVT DstVT = Op.getValueType();
913 if (IsLE && DemandedElts == 1 &&
914 DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
915 DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
916 return DAG.getBitcast(VT: DstVT, V: Src);
917 }
918 break;
919 }
920 case ISD::INSERT_VECTOR_ELT: {
921 if (VT.isScalableVector())
922 return SDValue();
923
924 // If we don't demand the inserted element, return the base vector.
925 SDValue Vec = Op.getOperand(i: 0);
926 auto *CIdx = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 2));
927 EVT VecVT = Vec.getValueType();
928 if (CIdx && CIdx->getAPIntValue().ult(RHS: VecVT.getVectorNumElements()) &&
929 !DemandedElts[CIdx->getZExtValue()])
930 return Vec;
931 break;
932 }
933 case ISD::INSERT_SUBVECTOR: {
934 if (VT.isScalableVector())
935 return SDValue();
936
937 SDValue Vec = Op.getOperand(i: 0);
938 SDValue Sub = Op.getOperand(i: 1);
939 uint64_t Idx = Op.getConstantOperandVal(i: 2);
940 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
941 APInt DemandedSubElts = DemandedElts.extractBits(numBits: NumSubElts, bitPosition: Idx);
942 // If we don't demand the inserted subvector, return the base vector.
943 if (DemandedSubElts == 0)
944 return Vec;
945 break;
946 }
947 case ISD::VECTOR_SHUFFLE: {
948 assert(!VT.isScalableVector());
949 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Val&: Op)->getMask();
950
951 // If all the demanded elts are from one operand and are inline,
952 // then we can use the operand directly.
953 bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
954 for (unsigned i = 0; i != NumElts; ++i) {
955 int M = ShuffleMask[i];
956 if (M < 0 || !DemandedElts[i])
957 continue;
958 AllUndef = false;
959 IdentityLHS &= (M == (int)i);
960 IdentityRHS &= ((M - NumElts) == i);
961 }
962
963 if (AllUndef)
964 return DAG.getUNDEF(VT: Op.getValueType());
965 if (IdentityLHS)
966 return Op.getOperand(i: 0);
967 if (IdentityRHS)
968 return Op.getOperand(i: 1);
969 break;
970 }
971 default:
972 // TODO: Probably okay to remove after audit; here to reduce change size
973 // in initial enablement patch for scalable vectors
974 if (VT.isScalableVector())
975 return SDValue();
976
977 if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
978 if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
979 Op, DemandedBits, DemandedElts, DAG, Depth))
980 return V;
981 break;
982 }
983 return SDValue();
984}
985
986SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
987 SDValue Op, const APInt &DemandedBits, SelectionDAG &DAG,
988 unsigned Depth) const {
989 EVT VT = Op.getValueType();
990 // Since the number of lanes in a scalable vector is unknown at compile time,
991 // we track one bit which is implicitly broadcast to all lanes. This means
992 // that all lanes in a scalable vector are considered demanded.
993 APInt DemandedElts = VT.isFixedLengthVector()
994 ? APInt::getAllOnes(numBits: VT.getVectorNumElements())
995 : APInt(1, 1);
996 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
997 Depth);
998}
999
1000SDValue TargetLowering::SimplifyMultipleUseDemandedVectorElts(
1001 SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
1002 unsigned Depth) const {
1003 APInt DemandedBits = APInt::getAllOnes(numBits: Op.getScalarValueSizeInBits());
1004 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
1005 Depth);
1006}
1007
1008// Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1).
1009// or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1).
1010static SDValue combineShiftToAVG(SDValue Op,
1011 TargetLowering::TargetLoweringOpt &TLO,
1012 const TargetLowering &TLI,
1013 const APInt &DemandedBits,
1014 const APInt &DemandedElts, unsigned Depth) {
1015 assert((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) &&
1016 "SRL or SRA node is required here!");
1017 // Is the right shift using an immediate value of 1?
1018 ConstantSDNode *N1C = isConstOrConstSplat(N: Op.getOperand(i: 1), DemandedElts);
1019 if (!N1C || !N1C->isOne())
1020 return SDValue();
1021
1022 // We are looking for an avgfloor
1023 // add(ext, ext)
1024 // or one of these as a avgceil
1025 // add(add(ext, ext), 1)
1026 // add(add(ext, 1), ext)
1027 // add(ext, add(ext, 1))
1028 SDValue Add = Op.getOperand(i: 0);
1029 if (Add.getOpcode() != ISD::ADD)
1030 return SDValue();
1031
1032 SDValue ExtOpA = Add.getOperand(i: 0);
1033 SDValue ExtOpB = Add.getOperand(i: 1);
1034 SDValue Add2;
1035 auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3, SDValue A) {
1036 ConstantSDNode *ConstOp;
1037 if ((ConstOp = isConstOrConstSplat(N: Op2, DemandedElts)) &&
1038 ConstOp->isOne()) {
1039 ExtOpA = Op1;
1040 ExtOpB = Op3;
1041 Add2 = A;
1042 return true;
1043 }
1044 if ((ConstOp = isConstOrConstSplat(N: Op3, DemandedElts)) &&
1045 ConstOp->isOne()) {
1046 ExtOpA = Op1;
1047 ExtOpB = Op2;
1048 Add2 = A;
1049 return true;
1050 }
1051 return false;
1052 };
1053 bool IsCeil =
1054 (ExtOpA.getOpcode() == ISD::ADD &&
1055 MatchOperands(ExtOpA.getOperand(i: 0), ExtOpA.getOperand(i: 1), ExtOpB, ExtOpA)) ||
1056 (ExtOpB.getOpcode() == ISD::ADD &&
1057 MatchOperands(ExtOpB.getOperand(i: 0), ExtOpB.getOperand(i: 1), ExtOpA, ExtOpB));
1058
1059 // If the shift is signed (sra):
1060 // - Needs >= 2 sign bit for both operands.
1061 // - Needs >= 2 zero bits.
1062 // If the shift is unsigned (srl):
1063 // - Needs >= 1 zero bit for both operands.
1064 // - Needs 1 demanded bit zero and >= 2 sign bits.
1065 SelectionDAG &DAG = TLO.DAG;
1066 unsigned ShiftOpc = Op.getOpcode();
1067 bool IsSigned = false;
1068 unsigned KnownBits;
1069 unsigned NumSignedA = DAG.ComputeNumSignBits(Op: ExtOpA, DemandedElts, Depth);
1070 unsigned NumSignedB = DAG.ComputeNumSignBits(Op: ExtOpB, DemandedElts, Depth);
1071 unsigned NumSigned = std::min(a: NumSignedA, b: NumSignedB) - 1;
1072 unsigned NumZeroA =
1073 DAG.computeKnownBits(Op: ExtOpA, DemandedElts, Depth).countMinLeadingZeros();
1074 unsigned NumZeroB =
1075 DAG.computeKnownBits(Op: ExtOpB, DemandedElts, Depth).countMinLeadingZeros();
1076 unsigned NumZero = std::min(a: NumZeroA, b: NumZeroB);
1077
1078 switch (ShiftOpc) {
1079 default:
1080 llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG");
1081 case ISD::SRA: {
1082 if (NumZero >= 2 && NumSigned < NumZero) {
1083 IsSigned = false;
1084 KnownBits = NumZero;
1085 break;
1086 }
1087 if (NumSigned >= 1) {
1088 IsSigned = true;
1089 KnownBits = NumSigned;
1090 break;
1091 }
1092 return SDValue();
1093 }
1094 case ISD::SRL: {
1095 if (NumZero >= 1 && NumSigned < NumZero) {
1096 IsSigned = false;
1097 KnownBits = NumZero;
1098 break;
1099 }
1100 if (NumSigned >= 1 && DemandedBits.isSignBitClear()) {
1101 IsSigned = true;
1102 KnownBits = NumSigned;
1103 break;
1104 }
1105 return SDValue();
1106 }
1107 }
1108
1109 unsigned AVGOpc = IsCeil ? (IsSigned ? ISD::AVGCEILS : ISD::AVGCEILU)
1110 : (IsSigned ? ISD::AVGFLOORS : ISD::AVGFLOORU);
1111
1112 // Find the smallest power-2 type that is legal for this vector size and
1113 // operation, given the original type size and the number of known sign/zero
1114 // bits.
1115 EVT VT = Op.getValueType();
1116 unsigned MinWidth =
1117 std::max<unsigned>(a: VT.getScalarSizeInBits() - KnownBits, b: 8);
1118 EVT NVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: llvm::bit_ceil(Value: MinWidth));
1119 if (NVT.getScalarSizeInBits() > VT.getScalarSizeInBits())
1120 return SDValue();
1121 if (VT.isVector())
1122 NVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: NVT, EC: VT.getVectorElementCount());
1123 if (TLO.LegalTypes() && !TLI.isOperationLegal(Op: AVGOpc, VT: NVT)) {
1124 // If we could not transform, and (both) adds are nuw/nsw, we can use the
1125 // larger type size to do the transform.
1126 if (TLO.LegalOperations() && !TLI.isOperationLegal(Op: AVGOpc, VT))
1127 return SDValue();
1128 if (DAG.willNotOverflowAdd(IsSigned, N0: Add.getOperand(i: 0),
1129 N1: Add.getOperand(i: 1)) &&
1130 (!Add2 || DAG.willNotOverflowAdd(IsSigned, N0: Add2.getOperand(i: 0),
1131 N1: Add2.getOperand(i: 1))))
1132 NVT = VT;
1133 else
1134 return SDValue();
1135 }
1136
1137 // Don't create a AVGFLOOR node with a scalar constant unless its legal as
1138 // this is likely to stop other folds (reassociation, value tracking etc.)
1139 if (!IsCeil && !TLI.isOperationLegal(Op: AVGOpc, VT: NVT) &&
1140 (isa<ConstantSDNode>(Val: ExtOpA) || isa<ConstantSDNode>(Val: ExtOpB)))
1141 return SDValue();
1142
1143 SDLoc DL(Op);
1144 SDValue ResultAVG =
1145 DAG.getNode(Opcode: AVGOpc, DL, VT: NVT, N1: DAG.getExtOrTrunc(IsSigned, Op: ExtOpA, DL, VT: NVT),
1146 N2: DAG.getExtOrTrunc(IsSigned, Op: ExtOpB, DL, VT: NVT));
1147 return DAG.getExtOrTrunc(IsSigned, Op: ResultAVG, DL, VT);
1148}
1149
1150/// Look at Op. At this point, we know that only the OriginalDemandedBits of the
1151/// result of Op are ever used downstream. If we can use this information to
1152/// simplify Op, create a new simplified DAG node and return true, returning the
1153/// original and new nodes in Old and New. Otherwise, analyze the expression and
1154/// return a mask of Known bits for the expression (used to simplify the
1155/// caller). The Known bits may only be accurate for those bits in the
1156/// OriginalDemandedBits and OriginalDemandedElts.
1157bool TargetLowering::SimplifyDemandedBits(
1158 SDValue Op, const APInt &OriginalDemandedBits,
1159 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
1160 unsigned Depth, bool AssumeSingleUse) const {
1161 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
1162 assert(Op.getScalarValueSizeInBits() == BitWidth &&
1163 "Mask size mismatches value type size!");
1164
1165 // Don't know anything.
1166 Known = KnownBits(BitWidth);
1167
1168 EVT VT = Op.getValueType();
1169 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
1170 unsigned NumElts = OriginalDemandedElts.getBitWidth();
1171 assert((!VT.isFixedLengthVector() || NumElts == VT.getVectorNumElements()) &&
1172 "Unexpected vector size");
1173
1174 APInt DemandedBits = OriginalDemandedBits;
1175 APInt DemandedElts = OriginalDemandedElts;
1176 SDLoc dl(Op);
1177
1178 // Undef operand.
1179 if (Op.isUndef())
1180 return false;
1181
1182 // We can't simplify target constants.
1183 if (Op.getOpcode() == ISD::TargetConstant)
1184 return false;
1185
1186 if (Op.getOpcode() == ISD::Constant) {
1187 // We know all of the bits for a constant!
1188 Known = KnownBits::makeConstant(C: Op->getAsAPIntVal());
1189 return false;
1190 }
1191
1192 if (Op.getOpcode() == ISD::ConstantFP) {
1193 // We know all of the bits for a floating point constant!
1194 Known = KnownBits::makeConstant(
1195 C: cast<ConstantFPSDNode>(Val&: Op)->getValueAPF().bitcastToAPInt());
1196 return false;
1197 }
1198
1199 // Other users may use these bits.
1200 bool HasMultiUse = false;
1201 if (!AssumeSingleUse && !Op.getNode()->hasOneUse()) {
1202 if (Depth >= SelectionDAG::MaxRecursionDepth) {
1203 // Limit search depth.
1204 return false;
1205 }
1206 // Allow multiple uses, just set the DemandedBits/Elts to all bits.
1207 DemandedBits = APInt::getAllOnes(numBits: BitWidth);
1208 DemandedElts = APInt::getAllOnes(numBits: NumElts);
1209 HasMultiUse = true;
1210 } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
1211 // Not demanding any bits/elts from Op.
1212 return TLO.CombineTo(O: Op, N: TLO.DAG.getUNDEF(VT));
1213 } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
1214 // Limit search depth.
1215 return false;
1216 }
1217
1218 KnownBits Known2;
1219 switch (Op.getOpcode()) {
1220 case ISD::SCALAR_TO_VECTOR: {
1221 if (VT.isScalableVector())
1222 return false;
1223 if (!DemandedElts[0])
1224 return TLO.CombineTo(O: Op, N: TLO.DAG.getUNDEF(VT));
1225
1226 KnownBits SrcKnown;
1227 SDValue Src = Op.getOperand(i: 0);
1228 unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
1229 APInt SrcDemandedBits = DemandedBits.zext(width: SrcBitWidth);
1230 if (SimplifyDemandedBits(Op: Src, DemandedBits: SrcDemandedBits, Known&: SrcKnown, TLO, Depth: Depth + 1))
1231 return true;
1232
1233 // Upper elements are undef, so only get the knownbits if we just demand
1234 // the bottom element.
1235 if (DemandedElts == 1)
1236 Known = SrcKnown.anyextOrTrunc(BitWidth);
1237 break;
1238 }
1239 case ISD::BUILD_VECTOR:
1240 // Collect the known bits that are shared by every demanded element.
1241 // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
1242 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1243 return false; // Don't fall through, will infinitely loop.
1244 case ISD::SPLAT_VECTOR: {
1245 SDValue Scl = Op.getOperand(i: 0);
1246 APInt DemandedSclBits = DemandedBits.zextOrTrunc(width: Scl.getValueSizeInBits());
1247 KnownBits KnownScl;
1248 if (SimplifyDemandedBits(Op: Scl, DemandedBits: DemandedSclBits, Known&: KnownScl, TLO, Depth: Depth + 1))
1249 return true;
1250
1251 // Implicitly truncate the bits to match the official semantics of
1252 // SPLAT_VECTOR.
1253 Known = KnownScl.trunc(BitWidth);
1254 break;
1255 }
1256 case ISD::LOAD: {
1257 auto *LD = cast<LoadSDNode>(Val&: Op);
1258 if (getTargetConstantFromLoad(LD)) {
1259 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1260 return false; // Don't fall through, will infinitely loop.
1261 }
1262 if (ISD::isZEXTLoad(N: Op.getNode()) && Op.getResNo() == 0) {
1263 // If this is a ZEXTLoad and we are looking at the loaded value.
1264 EVT MemVT = LD->getMemoryVT();
1265 unsigned MemBits = MemVT.getScalarSizeInBits();
1266 Known.Zero.setBitsFrom(MemBits);
1267 return false; // Don't fall through, will infinitely loop.
1268 }
1269 break;
1270 }
1271 case ISD::INSERT_VECTOR_ELT: {
1272 if (VT.isScalableVector())
1273 return false;
1274 SDValue Vec = Op.getOperand(i: 0);
1275 SDValue Scl = Op.getOperand(i: 1);
1276 auto *CIdx = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 2));
1277 EVT VecVT = Vec.getValueType();
1278
1279 // If index isn't constant, assume we need all vector elements AND the
1280 // inserted element.
1281 APInt DemandedVecElts(DemandedElts);
1282 if (CIdx && CIdx->getAPIntValue().ult(RHS: VecVT.getVectorNumElements())) {
1283 unsigned Idx = CIdx->getZExtValue();
1284 DemandedVecElts.clearBit(BitPosition: Idx);
1285
1286 // Inserted element is not required.
1287 if (!DemandedElts[Idx])
1288 return TLO.CombineTo(O: Op, N: Vec);
1289 }
1290
1291 KnownBits KnownScl;
1292 unsigned NumSclBits = Scl.getScalarValueSizeInBits();
1293 APInt DemandedSclBits = DemandedBits.zextOrTrunc(width: NumSclBits);
1294 if (SimplifyDemandedBits(Op: Scl, DemandedBits: DemandedSclBits, Known&: KnownScl, TLO, Depth: Depth + 1))
1295 return true;
1296
1297 Known = KnownScl.anyextOrTrunc(BitWidth);
1298
1299 KnownBits KnownVec;
1300 if (SimplifyDemandedBits(Op: Vec, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedVecElts, Known&: KnownVec, TLO,
1301 Depth: Depth + 1))
1302 return true;
1303
1304 if (!!DemandedVecElts)
1305 Known = Known.intersectWith(RHS: KnownVec);
1306
1307 return false;
1308 }
1309 case ISD::INSERT_SUBVECTOR: {
1310 if (VT.isScalableVector())
1311 return false;
1312 // Demand any elements from the subvector and the remainder from the src its
1313 // inserted into.
1314 SDValue Src = Op.getOperand(i: 0);
1315 SDValue Sub = Op.getOperand(i: 1);
1316 uint64_t Idx = Op.getConstantOperandVal(i: 2);
1317 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
1318 APInt DemandedSubElts = DemandedElts.extractBits(numBits: NumSubElts, bitPosition: Idx);
1319 APInt DemandedSrcElts = DemandedElts;
1320 DemandedSrcElts.clearBits(LoBit: Idx, HiBit: Idx + NumSubElts);
1321
1322 KnownBits KnownSub, KnownSrc;
1323 if (SimplifyDemandedBits(Op: Sub, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedSubElts, Known&: KnownSub, TLO,
1324 Depth: Depth + 1))
1325 return true;
1326 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedSrcElts, Known&: KnownSrc, TLO,
1327 Depth: Depth + 1))
1328 return true;
1329
1330 Known.setAllConflict();
1331 if (!!DemandedSubElts)
1332 Known = Known.intersectWith(RHS: KnownSub);
1333 if (!!DemandedSrcElts)
1334 Known = Known.intersectWith(RHS: KnownSrc);
1335
1336 // Attempt to avoid multi-use src if we don't need anything from it.
1337 if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||
1338 !DemandedSrcElts.isAllOnes()) {
1339 SDValue NewSub = SimplifyMultipleUseDemandedBits(
1340 Op: Sub, DemandedBits, DemandedElts: DemandedSubElts, DAG&: TLO.DAG, Depth: Depth + 1);
1341 SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1342 Op: Src, DemandedBits, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + 1);
1343 if (NewSub || NewSrc) {
1344 NewSub = NewSub ? NewSub : Sub;
1345 NewSrc = NewSrc ? NewSrc : Src;
1346 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: NewSrc, N2: NewSub,
1347 N3: Op.getOperand(i: 2));
1348 return TLO.CombineTo(O: Op, N: NewOp);
1349 }
1350 }
1351 break;
1352 }
1353 case ISD::EXTRACT_SUBVECTOR: {
1354 if (VT.isScalableVector())
1355 return false;
1356 // Offset the demanded elts by the subvector index.
1357 SDValue Src = Op.getOperand(i: 0);
1358 if (Src.getValueType().isScalableVector())
1359 break;
1360 uint64_t Idx = Op.getConstantOperandVal(i: 1);
1361 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1362 APInt DemandedSrcElts = DemandedElts.zext(width: NumSrcElts).shl(shiftAmt: Idx);
1363
1364 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedSrcElts, Known, TLO,
1365 Depth: Depth + 1))
1366 return true;
1367
1368 // Attempt to avoid multi-use src if we don't need anything from it.
1369 if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
1370 SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
1371 Op: Src, DemandedBits, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + 1);
1372 if (DemandedSrc) {
1373 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: DemandedSrc,
1374 N2: Op.getOperand(i: 1));
1375 return TLO.CombineTo(O: Op, N: NewOp);
1376 }
1377 }
1378 break;
1379 }
1380 case ISD::CONCAT_VECTORS: {
1381 if (VT.isScalableVector())
1382 return false;
1383 Known.setAllConflict();
1384 EVT SubVT = Op.getOperand(i: 0).getValueType();
1385 unsigned NumSubVecs = Op.getNumOperands();
1386 unsigned NumSubElts = SubVT.getVectorNumElements();
1387 for (unsigned i = 0; i != NumSubVecs; ++i) {
1388 APInt DemandedSubElts =
1389 DemandedElts.extractBits(numBits: NumSubElts, bitPosition: i * NumSubElts);
1390 if (SimplifyDemandedBits(Op: Op.getOperand(i), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedSubElts,
1391 Known&: Known2, TLO, Depth: Depth + 1))
1392 return true;
1393 // Known bits are shared by every demanded subvector element.
1394 if (!!DemandedSubElts)
1395 Known = Known.intersectWith(RHS: Known2);
1396 }
1397 break;
1398 }
1399 case ISD::VECTOR_SHUFFLE: {
1400 assert(!VT.isScalableVector());
1401 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Val&: Op)->getMask();
1402
1403 // Collect demanded elements from shuffle operands..
1404 APInt DemandedLHS, DemandedRHS;
1405 if (!getShuffleDemandedElts(SrcWidth: NumElts, Mask: ShuffleMask, DemandedElts, DemandedLHS,
1406 DemandedRHS))
1407 break;
1408
1409 if (!!DemandedLHS || !!DemandedRHS) {
1410 SDValue Op0 = Op.getOperand(i: 0);
1411 SDValue Op1 = Op.getOperand(i: 1);
1412
1413 Known.setAllConflict();
1414 if (!!DemandedLHS) {
1415 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedLHS, Known&: Known2, TLO,
1416 Depth: Depth + 1))
1417 return true;
1418 Known = Known.intersectWith(RHS: Known2);
1419 }
1420 if (!!DemandedRHS) {
1421 if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedRHS, Known&: Known2, TLO,
1422 Depth: Depth + 1))
1423 return true;
1424 Known = Known.intersectWith(RHS: Known2);
1425 }
1426
1427 // Attempt to avoid multi-use ops if we don't need anything from them.
1428 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1429 Op: Op0, DemandedBits, DemandedElts: DemandedLHS, DAG&: TLO.DAG, Depth: Depth + 1);
1430 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1431 Op: Op1, DemandedBits, DemandedElts: DemandedRHS, DAG&: TLO.DAG, Depth: Depth + 1);
1432 if (DemandedOp0 || DemandedOp1) {
1433 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1434 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1435 SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, N1: Op0, N2: Op1, Mask: ShuffleMask);
1436 return TLO.CombineTo(O: Op, N: NewOp);
1437 }
1438 }
1439 break;
1440 }
1441 case ISD::AND: {
1442 SDValue Op0 = Op.getOperand(i: 0);
1443 SDValue Op1 = Op.getOperand(i: 1);
1444
1445 // If the RHS is a constant, check to see if the LHS would be zero without
1446 // using the bits from the RHS. Below, we use knowledge about the RHS to
1447 // simplify the LHS, here we're using information from the LHS to simplify
1448 // the RHS.
1449 if (ConstantSDNode *RHSC = isConstOrConstSplat(N: Op1, DemandedElts)) {
1450 // Do not increment Depth here; that can cause an infinite loop.
1451 KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op: Op0, DemandedElts, Depth);
1452 // If the LHS already has zeros where RHSC does, this 'and' is dead.
1453 if ((LHSKnown.Zero & DemandedBits) ==
1454 (~RHSC->getAPIntValue() & DemandedBits))
1455 return TLO.CombineTo(O: Op, N: Op0);
1456
1457 // If any of the set bits in the RHS are known zero on the LHS, shrink
1458 // the constant.
1459 if (ShrinkDemandedConstant(Op, DemandedBits: ~LHSKnown.Zero & DemandedBits,
1460 DemandedElts, TLO))
1461 return true;
1462
1463 // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1464 // constant, but if this 'and' is only clearing bits that were just set by
1465 // the xor, then this 'and' can be eliminated by shrinking the mask of
1466 // the xor. For example, for a 32-bit X:
1467 // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1468 if (isBitwiseNot(V: Op0) && Op0.hasOneUse() &&
1469 LHSKnown.One == ~RHSC->getAPIntValue()) {
1470 SDValue Xor = TLO.DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: Op0.getOperand(i: 0), N2: Op1);
1471 return TLO.CombineTo(O: Op, N: Xor);
1472 }
1473 }
1474
1475 // (X +/- Y) & Y --> ~X & Y when Y is a power of 2 (or zero).
1476 SDValue X, Y;
1477 if (sd_match(N: Op,
1478 P: m_And(L: m_Value(N&: Y),
1479 R: m_OneUse(P: m_AnyOf(preds: m_Add(L: m_Value(N&: X), R: m_Deferred(V&: Y)),
1480 preds: m_Sub(L: m_Value(N&: X), R: m_Deferred(V&: Y)))))) &&
1481 TLO.DAG.isKnownToBeAPowerOfTwo(Val: Y, DemandedElts, /*OrZero=*/true)) {
1482 return TLO.CombineTo(
1483 O: Op, N: TLO.DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: TLO.DAG.getNOT(DL: dl, Val: X, VT), N2: Y));
1484 }
1485
1486 // AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)
1487 // iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits).
1488 if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR && !VT.isScalableVector() &&
1489 (Op0.getOperand(i: 0).isUndef() ||
1490 ISD::isBuildVectorOfConstantSDNodes(N: Op0.getOperand(i: 0).getNode())) &&
1491 Op0->hasOneUse()) {
1492 unsigned NumSubElts =
1493 Op0.getOperand(i: 1).getValueType().getVectorNumElements();
1494 unsigned SubIdx = Op0.getConstantOperandVal(i: 2);
1495 APInt DemandedSub =
1496 APInt::getBitsSet(numBits: NumElts, loBit: SubIdx, hiBit: SubIdx + NumSubElts);
1497 KnownBits KnownSubMask =
1498 TLO.DAG.computeKnownBits(Op: Op1, DemandedElts: DemandedSub & DemandedElts, Depth: Depth + 1);
1499 if (DemandedBits.isSubsetOf(RHS: KnownSubMask.One)) {
1500 SDValue NewAnd =
1501 TLO.DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op0.getOperand(i: 0), N2: Op1);
1502 SDValue NewInsert =
1503 TLO.DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL: dl, VT, N1: NewAnd,
1504 N2: Op0.getOperand(i: 1), N3: Op0.getOperand(i: 2));
1505 return TLO.CombineTo(O: Op, N: NewInsert);
1506 }
1507 }
1508
1509 if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts, Known, TLO,
1510 Depth: Depth + 1))
1511 return true;
1512 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: ~Known.Zero & DemandedBits, OriginalDemandedElts: DemandedElts,
1513 Known&: Known2, TLO, Depth: Depth + 1))
1514 return true;
1515
1516 // If all of the demanded bits are known one on one side, return the other.
1517 // These bits cannot contribute to the result of the 'and'.
1518 if (DemandedBits.isSubsetOf(RHS: Known2.Zero | Known.One))
1519 return TLO.CombineTo(O: Op, N: Op0);
1520 if (DemandedBits.isSubsetOf(RHS: Known.Zero | Known2.One))
1521 return TLO.CombineTo(O: Op, N: Op1);
1522 // If all of the demanded bits in the inputs are known zeros, return zero.
1523 if (DemandedBits.isSubsetOf(RHS: Known.Zero | Known2.Zero))
1524 return TLO.CombineTo(O: Op, N: TLO.DAG.getConstant(Val: 0, DL: dl, VT));
1525 // If the RHS is a constant, see if we can simplify it.
1526 if (ShrinkDemandedConstant(Op, DemandedBits: ~Known2.Zero & DemandedBits, DemandedElts,
1527 TLO))
1528 return true;
1529 // If the operation can be done in a smaller type, do so.
1530 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1531 return true;
1532
1533 // Attempt to avoid multi-use ops if we don't need anything from them.
1534 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1535 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1536 Op: Op0, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
1537 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1538 Op: Op1, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
1539 if (DemandedOp0 || DemandedOp1) {
1540 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1541 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1542 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: Op0, N2: Op1);
1543 return TLO.CombineTo(O: Op, N: NewOp);
1544 }
1545 }
1546
1547 Known &= Known2;
1548 break;
1549 }
1550 case ISD::OR: {
1551 SDValue Op0 = Op.getOperand(i: 0);
1552 SDValue Op1 = Op.getOperand(i: 1);
1553 if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts, Known, TLO,
1554 Depth: Depth + 1)) {
1555 Op->dropFlags(Mask: SDNodeFlags::Disjoint);
1556 return true;
1557 }
1558
1559 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: ~Known.One & DemandedBits, OriginalDemandedElts: DemandedElts,
1560 Known&: Known2, TLO, Depth: Depth + 1)) {
1561 Op->dropFlags(Mask: SDNodeFlags::Disjoint);
1562 return true;
1563 }
1564
1565 // If all of the demanded bits are known zero on one side, return the other.
1566 // These bits cannot contribute to the result of the 'or'.
1567 if (DemandedBits.isSubsetOf(RHS: Known2.One | Known.Zero))
1568 return TLO.CombineTo(O: Op, N: Op0);
1569 if (DemandedBits.isSubsetOf(RHS: Known.One | Known2.Zero))
1570 return TLO.CombineTo(O: Op, N: Op1);
1571 // If the RHS is a constant, see if we can simplify it.
1572 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1573 return true;
1574 // If the operation can be done in a smaller type, do so.
1575 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1576 return true;
1577
1578 // Attempt to avoid multi-use ops if we don't need anything from them.
1579 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1580 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1581 Op: Op0, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
1582 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1583 Op: Op1, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
1584 if (DemandedOp0 || DemandedOp1) {
1585 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1586 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1587 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: Op0, N2: Op1);
1588 return TLO.CombineTo(O: Op, N: NewOp);
1589 }
1590 }
1591
1592 // (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2))
1593 // TODO: Use SimplifyMultipleUseDemandedBits to peek through masks.
1594 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::AND &&
1595 Op0->hasOneUse() && Op1->hasOneUse()) {
1596 // Attempt to match all commutations - m_c_Or would've been useful!
1597 for (int I = 0; I != 2; ++I) {
1598 SDValue X = Op.getOperand(i: I).getOperand(i: 0);
1599 SDValue C1 = Op.getOperand(i: I).getOperand(i: 1);
1600 SDValue Alt = Op.getOperand(i: 1 - I).getOperand(i: 0);
1601 SDValue C2 = Op.getOperand(i: 1 - I).getOperand(i: 1);
1602 if (Alt.getOpcode() == ISD::OR) {
1603 for (int J = 0; J != 2; ++J) {
1604 if (X == Alt.getOperand(i: J)) {
1605 SDValue Y = Alt.getOperand(i: 1 - J);
1606 if (SDValue C12 = TLO.DAG.FoldConstantArithmetic(Opcode: ISD::OR, DL: dl, VT,
1607 Ops: {C1, C2})) {
1608 SDValue MaskX = TLO.DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: X, N2: C12);
1609 SDValue MaskY = TLO.DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Y, N2: C2);
1610 return TLO.CombineTo(
1611 O: Op, N: TLO.DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: MaskX, N2: MaskY));
1612 }
1613 }
1614 }
1615 }
1616 }
1617 }
1618
1619 Known |= Known2;
1620 break;
1621 }
1622 case ISD::XOR: {
1623 SDValue Op0 = Op.getOperand(i: 0);
1624 SDValue Op1 = Op.getOperand(i: 1);
1625
1626 if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts, Known, TLO,
1627 Depth: Depth + 1))
1628 return true;
1629 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
1630 Depth: Depth + 1))
1631 return true;
1632
1633 // If all of the demanded bits are known zero on one side, return the other.
1634 // These bits cannot contribute to the result of the 'xor'.
1635 if (DemandedBits.isSubsetOf(RHS: Known.Zero))
1636 return TLO.CombineTo(O: Op, N: Op0);
1637 if (DemandedBits.isSubsetOf(RHS: Known2.Zero))
1638 return TLO.CombineTo(O: Op, N: Op1);
1639 // If the operation can be done in a smaller type, do so.
1640 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1641 return true;
1642
1643 // If all of the unknown bits are known to be zero on one side or the other
1644 // turn this into an *inclusive* or.
1645 // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1646 if (DemandedBits.isSubsetOf(RHS: Known.Zero | Known2.Zero))
1647 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Op0, N2: Op1));
1648
1649 ConstantSDNode *C = isConstOrConstSplat(N: Op1, DemandedElts);
1650 if (C) {
1651 // If one side is a constant, and all of the set bits in the constant are
1652 // also known set on the other side, turn this into an AND, as we know
1653 // the bits will be cleared.
1654 // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1655 // NB: it is okay if more bits are known than are requested
1656 if (C->getAPIntValue() == Known2.One) {
1657 SDValue ANDC =
1658 TLO.DAG.getConstant(Val: ~C->getAPIntValue() & DemandedBits, DL: dl, VT);
1659 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op0, N2: ANDC));
1660 }
1661
1662 // If the RHS is a constant, see if we can change it. Don't alter a -1
1663 // constant because that's a 'not' op, and that is better for combining
1664 // and codegen.
1665 if (!C->isAllOnes() && DemandedBits.isSubsetOf(RHS: C->getAPIntValue())) {
1666 // We're flipping all demanded bits. Flip the undemanded bits too.
1667 SDValue New = TLO.DAG.getNOT(DL: dl, Val: Op0, VT);
1668 return TLO.CombineTo(O: Op, N: New);
1669 }
1670
1671 unsigned Op0Opcode = Op0.getOpcode();
1672 if ((Op0Opcode == ISD::SRL || Op0Opcode == ISD::SHL) && Op0.hasOneUse()) {
1673 if (ConstantSDNode *ShiftC =
1674 isConstOrConstSplat(N: Op0.getOperand(i: 1), DemandedElts)) {
1675 // Don't crash on an oversized shift. We can not guarantee that a
1676 // bogus shift has been simplified to undef.
1677 if (ShiftC->getAPIntValue().ult(RHS: BitWidth)) {
1678 uint64_t ShiftAmt = ShiftC->getZExtValue();
1679 APInt Ones = APInt::getAllOnes(numBits: BitWidth);
1680 Ones = Op0Opcode == ISD::SHL ? Ones.shl(shiftAmt: ShiftAmt)
1681 : Ones.lshr(shiftAmt: ShiftAmt);
1682 if ((DemandedBits & C->getAPIntValue()) == (DemandedBits & Ones) &&
1683 isDesirableToCommuteXorWithShift(N: Op.getNode())) {
1684 // If the xor constant is a demanded mask, do a 'not' before the
1685 // shift:
1686 // xor (X << ShiftC), XorC --> (not X) << ShiftC
1687 // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
1688 SDValue Not = TLO.DAG.getNOT(DL: dl, Val: Op0.getOperand(i: 0), VT);
1689 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Op0Opcode, DL: dl, VT, N1: Not,
1690 N2: Op0.getOperand(i: 1)));
1691 }
1692 }
1693 }
1694 }
1695 }
1696
1697 // If we can't turn this into a 'not', try to shrink the constant.
1698 if (!C || !C->isAllOnes())
1699 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1700 return true;
1701
1702 // Attempt to avoid multi-use ops if we don't need anything from them.
1703 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1704 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1705 Op: Op0, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
1706 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1707 Op: Op1, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
1708 if (DemandedOp0 || DemandedOp1) {
1709 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1710 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1711 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: Op0, N2: Op1);
1712 return TLO.CombineTo(O: Op, N: NewOp);
1713 }
1714 }
1715
1716 Known ^= Known2;
1717 break;
1718 }
1719 case ISD::SELECT:
1720 if (SimplifyDemandedBits(Op: Op.getOperand(i: 2), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1721 Known, TLO, Depth: Depth + 1))
1722 return true;
1723 if (SimplifyDemandedBits(Op: Op.getOperand(i: 1), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1724 Known&: Known2, TLO, Depth: Depth + 1))
1725 return true;
1726
1727 // If the operands are constants, see if we can simplify them.
1728 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1729 return true;
1730
1731 // Only known if known in both the LHS and RHS.
1732 Known = Known.intersectWith(RHS: Known2);
1733 break;
1734 case ISD::VSELECT:
1735 if (SimplifyDemandedBits(Op: Op.getOperand(i: 2), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1736 Known, TLO, Depth: Depth + 1))
1737 return true;
1738 if (SimplifyDemandedBits(Op: Op.getOperand(i: 1), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1739 Known&: Known2, TLO, Depth: Depth + 1))
1740 return true;
1741
1742 // Only known if known in both the LHS and RHS.
1743 Known = Known.intersectWith(RHS: Known2);
1744 break;
1745 case ISD::SELECT_CC:
1746 if (SimplifyDemandedBits(Op: Op.getOperand(i: 3), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1747 Known, TLO, Depth: Depth + 1))
1748 return true;
1749 if (SimplifyDemandedBits(Op: Op.getOperand(i: 2), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1750 Known&: Known2, TLO, Depth: Depth + 1))
1751 return true;
1752
1753 // If the operands are constants, see if we can simplify them.
1754 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1755 return true;
1756
1757 // Only known if known in both the LHS and RHS.
1758 Known = Known.intersectWith(RHS: Known2);
1759 break;
1760 case ISD::SETCC: {
1761 SDValue Op0 = Op.getOperand(i: 0);
1762 SDValue Op1 = Op.getOperand(i: 1);
1763 ISD::CondCode CC = cast<CondCodeSDNode>(Val: Op.getOperand(i: 2))->get();
1764 // If we're testing X < 0, X >= 0, X <= -1 or X > -1
1765 // (X is of integer type) then we only need the sign mask of the previous
1766 // result
1767 if (Op1.getValueType().isInteger() &&
1768 (((CC == ISD::SETLT || CC == ISD::SETGE) && isNullOrNullSplat(V: Op1)) ||
1769 ((CC == ISD::SETLE || CC == ISD::SETGT) &&
1770 isAllOnesOrAllOnesSplat(V: Op1)))) {
1771 KnownBits KnownOp0;
1772 if (SimplifyDemandedBits(
1773 Op: Op0, OriginalDemandedBits: APInt::getSignMask(BitWidth: Op0.getScalarValueSizeInBits()),
1774 OriginalDemandedElts: DemandedElts, Known&: KnownOp0, TLO, Depth: Depth + 1))
1775 return true;
1776 // If (1) we only need the sign-bit, (2) the setcc operands are the same
1777 // width as the setcc result, and (3) the result of a setcc conforms to 0
1778 // or -1, we may be able to bypass the setcc.
1779 if (DemandedBits.isSignMask() &&
1780 Op0.getScalarValueSizeInBits() == BitWidth &&
1781 getBooleanContents(Type: Op0.getValueType()) ==
1782 BooleanContent::ZeroOrNegativeOneBooleanContent) {
1783 // If we remove a >= 0 or > -1 (for integers), we need to introduce a
1784 // NOT Operation
1785 if (CC == ISD::SETGE || CC == ISD::SETGT) {
1786 SDLoc DL(Op);
1787 EVT VT = Op0.getValueType();
1788 SDValue NotOp0 = TLO.DAG.getNOT(DL, Val: Op0, VT);
1789 return TLO.CombineTo(O: Op, N: NotOp0);
1790 }
1791 return TLO.CombineTo(O: Op, N: Op0);
1792 }
1793 }
1794 if (getBooleanContents(Type: Op0.getValueType()) ==
1795 TargetLowering::ZeroOrOneBooleanContent &&
1796 BitWidth > 1)
1797 Known.Zero.setBitsFrom(1);
1798 break;
1799 }
1800 case ISD::SHL: {
1801 SDValue Op0 = Op.getOperand(i: 0);
1802 SDValue Op1 = Op.getOperand(i: 1);
1803 EVT ShiftVT = Op1.getValueType();
1804
1805 if (std::optional<unsigned> KnownSA =
1806 TLO.DAG.getValidShiftAmount(V: Op, DemandedElts, Depth: Depth + 1)) {
1807 unsigned ShAmt = *KnownSA;
1808 if (ShAmt == 0)
1809 return TLO.CombineTo(O: Op, N: Op0);
1810
1811 // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1812 // single shift. We can do this if the bottom bits (which are shifted
1813 // out) are never demanded.
1814 // TODO - support non-uniform vector amounts.
1815 if (Op0.getOpcode() == ISD::SRL) {
1816 if (!DemandedBits.intersects(RHS: APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: ShAmt))) {
1817 if (std::optional<unsigned> InnerSA =
1818 TLO.DAG.getValidShiftAmount(V: Op0, DemandedElts, Depth: Depth + 2)) {
1819 unsigned C1 = *InnerSA;
1820 unsigned Opc = ISD::SHL;
1821 int Diff = ShAmt - C1;
1822 if (Diff < 0) {
1823 Diff = -Diff;
1824 Opc = ISD::SRL;
1825 }
1826 SDValue NewSA = TLO.DAG.getConstant(Val: Diff, DL: dl, VT: ShiftVT);
1827 return TLO.CombineTo(
1828 O: Op, N: TLO.DAG.getNode(Opcode: Opc, DL: dl, VT, N1: Op0.getOperand(i: 0), N2: NewSA));
1829 }
1830 }
1831 }
1832
1833 // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1834 // are not demanded. This will likely allow the anyext to be folded away.
1835 // TODO - support non-uniform vector amounts.
1836 if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1837 SDValue InnerOp = Op0.getOperand(i: 0);
1838 EVT InnerVT = InnerOp.getValueType();
1839 unsigned InnerBits = InnerVT.getScalarSizeInBits();
1840 if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1841 isTypeDesirableForOp(ISD::SHL, VT: InnerVT)) {
1842 SDValue NarrowShl = TLO.DAG.getNode(
1843 Opcode: ISD::SHL, DL: dl, VT: InnerVT, N1: InnerOp,
1844 N2: TLO.DAG.getShiftAmountConstant(Val: ShAmt, VT: InnerVT, DL: dl));
1845 return TLO.CombineTo(
1846 O: Op, N: TLO.DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT, Operand: NarrowShl));
1847 }
1848
1849 // Repeat the SHL optimization above in cases where an extension
1850 // intervenes: (shl (anyext (shr x, c1)), c2) to
1851 // (shl (anyext x), c2-c1). This requires that the bottom c1 bits
1852 // aren't demanded (as above) and that the shifted upper c1 bits of
1853 // x aren't demanded.
1854 // TODO - support non-uniform vector amounts.
1855 if (InnerOp.getOpcode() == ISD::SRL && Op0.hasOneUse() &&
1856 InnerOp.hasOneUse()) {
1857 if (std::optional<unsigned> SA2 = TLO.DAG.getValidShiftAmount(
1858 V: InnerOp, DemandedElts, Depth: Depth + 2)) {
1859 unsigned InnerShAmt = *SA2;
1860 if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1861 DemandedBits.getActiveBits() <=
1862 (InnerBits - InnerShAmt + ShAmt) &&
1863 DemandedBits.countr_zero() >= ShAmt) {
1864 SDValue NewSA =
1865 TLO.DAG.getConstant(Val: ShAmt - InnerShAmt, DL: dl, VT: ShiftVT);
1866 SDValue NewExt = TLO.DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT,
1867 Operand: InnerOp.getOperand(i: 0));
1868 return TLO.CombineTo(
1869 O: Op, N: TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: NewExt, N2: NewSA));
1870 }
1871 }
1872 }
1873 }
1874
1875 APInt InDemandedMask = DemandedBits.lshr(shiftAmt: ShAmt);
1876 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: InDemandedMask, OriginalDemandedElts: DemandedElts, Known, TLO,
1877 Depth: Depth + 1)) {
1878 // Disable the nsw and nuw flags. We can no longer guarantee that we
1879 // won't wrap after simplification.
1880 Op->dropFlags(Mask: SDNodeFlags::NoWrap);
1881 return true;
1882 }
1883 Known <<= ShAmt;
1884 // low bits known zero.
1885 Known.Zero.setLowBits(ShAmt);
1886
1887 // Attempt to avoid multi-use ops if we don't need anything from them.
1888 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1889 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1890 Op: Op0, DemandedBits: InDemandedMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
1891 if (DemandedOp0) {
1892 SDValue NewOp = TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: DemandedOp0, N2: Op1);
1893 return TLO.CombineTo(O: Op, N: NewOp);
1894 }
1895 }
1896
1897 // TODO: Can we merge this fold with the one below?
1898 // Try shrinking the operation as long as the shift amount will still be
1899 // in range.
1900 if (ShAmt < DemandedBits.getActiveBits() && !VT.isVector() &&
1901 Op.getNode()->hasOneUse()) {
1902 // Search for the smallest integer type with free casts to and from
1903 // Op's type. For expedience, just check power-of-2 integer types.
1904 unsigned DemandedSize = DemandedBits.getActiveBits();
1905 for (unsigned SmallVTBits = llvm::bit_ceil(Value: DemandedSize);
1906 SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(A: SmallVTBits)) {
1907 EVT SmallVT = EVT::getIntegerVT(Context&: *TLO.DAG.getContext(), BitWidth: SmallVTBits);
1908 if (isNarrowingProfitable(N: Op.getNode(), SrcVT: VT, DestVT: SmallVT) &&
1909 isTypeDesirableForOp(ISD::SHL, VT: SmallVT) &&
1910 isTruncateFree(FromVT: VT, ToVT: SmallVT) && isZExtFree(FromTy: SmallVT, ToTy: VT) &&
1911 (!TLO.LegalOperations() || isOperationLegal(Op: ISD::SHL, VT: SmallVT))) {
1912 assert(DemandedSize <= SmallVTBits &&
1913 "Narrowed below demanded bits?");
1914 // We found a type with free casts.
1915 SDValue NarrowShl = TLO.DAG.getNode(
1916 Opcode: ISD::SHL, DL: dl, VT: SmallVT,
1917 N1: TLO.DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: SmallVT, Operand: Op.getOperand(i: 0)),
1918 N2: TLO.DAG.getShiftAmountConstant(Val: ShAmt, VT: SmallVT, DL: dl));
1919 return TLO.CombineTo(
1920 O: Op, N: TLO.DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT, Operand: NarrowShl));
1921 }
1922 }
1923 }
1924
1925 // Narrow shift to lower half - similar to ShrinkDemandedOp.
1926 // (shl i64:x, K) -> (i64 zero_extend (shl (i32 (trunc i64:x)), K))
1927 // Only do this if we demand the upper half so the knownbits are correct.
1928 unsigned HalfWidth = BitWidth / 2;
1929 if ((BitWidth % 2) == 0 && !VT.isVector() && ShAmt < HalfWidth &&
1930 DemandedBits.countLeadingOnes() >= HalfWidth) {
1931 EVT HalfVT = EVT::getIntegerVT(Context&: *TLO.DAG.getContext(), BitWidth: HalfWidth);
1932 if (isNarrowingProfitable(N: Op.getNode(), SrcVT: VT, DestVT: HalfVT) &&
1933 isTypeDesirableForOp(ISD::SHL, VT: HalfVT) &&
1934 isTruncateFree(FromVT: VT, ToVT: HalfVT) && isZExtFree(FromTy: HalfVT, ToTy: VT) &&
1935 (!TLO.LegalOperations() || isOperationLegal(Op: ISD::SHL, VT: HalfVT))) {
1936 // If we're demanding the upper bits at all, we must ensure
1937 // that the upper bits of the shift result are known to be zero,
1938 // which is equivalent to the narrow shift being NUW.
1939 if (bool IsNUW = (Known.countMinLeadingZeros() >= HalfWidth)) {
1940 bool IsNSW = Known.countMinSignBits() > HalfWidth;
1941 SDNodeFlags Flags;
1942 Flags.setNoSignedWrap(IsNSW);
1943 Flags.setNoUnsignedWrap(IsNUW);
1944 SDValue NewOp = TLO.DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HalfVT, Operand: Op0);
1945 SDValue NewShiftAmt =
1946 TLO.DAG.getShiftAmountConstant(Val: ShAmt, VT: HalfVT, DL: dl);
1947 SDValue NewShift = TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: HalfVT, N1: NewOp,
1948 N2: NewShiftAmt, Flags);
1949 SDValue NewExt =
1950 TLO.DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: NewShift);
1951 return TLO.CombineTo(O: Op, N: NewExt);
1952 }
1953 }
1954 }
1955 } else {
1956 // This is a variable shift, so we can't shift the demand mask by a known
1957 // amount. But if we are not demanding high bits, then we are not
1958 // demanding those bits from the pre-shifted operand either.
1959 if (unsigned CTLZ = DemandedBits.countl_zero()) {
1960 APInt DemandedFromOp(APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth - CTLZ));
1961 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: DemandedFromOp, OriginalDemandedElts: DemandedElts, Known, TLO,
1962 Depth: Depth + 1)) {
1963 // Disable the nsw and nuw flags. We can no longer guarantee that we
1964 // won't wrap after simplification.
1965 Op->dropFlags(Mask: SDNodeFlags::NoWrap);
1966 return true;
1967 }
1968 Known.resetAll();
1969 }
1970 }
1971
1972 // If we are only demanding sign bits then we can use the shift source
1973 // directly.
1974 if (std::optional<unsigned> MaxSA =
1975 TLO.DAG.getValidMaximumShiftAmount(V: Op, DemandedElts, Depth: Depth + 1)) {
1976 unsigned ShAmt = *MaxSA;
1977 unsigned NumSignBits =
1978 TLO.DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + 1);
1979 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
1980 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
1981 return TLO.CombineTo(O: Op, N: Op0);
1982 }
1983 break;
1984 }
1985 case ISD::SRL: {
1986 SDValue Op0 = Op.getOperand(i: 0);
1987 SDValue Op1 = Op.getOperand(i: 1);
1988 EVT ShiftVT = Op1.getValueType();
1989
1990 if (std::optional<unsigned> KnownSA =
1991 TLO.DAG.getValidShiftAmount(V: Op, DemandedElts, Depth: Depth + 1)) {
1992 unsigned ShAmt = *KnownSA;
1993 if (ShAmt == 0)
1994 return TLO.CombineTo(O: Op, N: Op0);
1995
1996 // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1997 // single shift. We can do this if the top bits (which are shifted out)
1998 // are never demanded.
1999 // TODO - support non-uniform vector amounts.
2000 if (Op0.getOpcode() == ISD::SHL) {
2001 if (!DemandedBits.intersects(RHS: APInt::getHighBitsSet(numBits: BitWidth, hiBitsSet: ShAmt))) {
2002 if (std::optional<unsigned> InnerSA =
2003 TLO.DAG.getValidShiftAmount(V: Op0, DemandedElts, Depth: Depth + 2)) {
2004 unsigned C1 = *InnerSA;
2005 unsigned Opc = ISD::SRL;
2006 int Diff = ShAmt - C1;
2007 if (Diff < 0) {
2008 Diff = -Diff;
2009 Opc = ISD::SHL;
2010 }
2011 SDValue NewSA = TLO.DAG.getConstant(Val: Diff, DL: dl, VT: ShiftVT);
2012 return TLO.CombineTo(
2013 O: Op, N: TLO.DAG.getNode(Opcode: Opc, DL: dl, VT, N1: Op0.getOperand(i: 0), N2: NewSA));
2014 }
2015 }
2016 }
2017
2018 // If this is (srl (sra X, C1), ShAmt), see if we can combine this into a
2019 // single sra. We can do this if the top bits are never demanded.
2020 if (Op0.getOpcode() == ISD::SRA && Op0.hasOneUse()) {
2021 if (!DemandedBits.intersects(RHS: APInt::getHighBitsSet(numBits: BitWidth, hiBitsSet: ShAmt))) {
2022 if (std::optional<unsigned> InnerSA =
2023 TLO.DAG.getValidShiftAmount(V: Op0, DemandedElts, Depth: Depth + 2)) {
2024 unsigned C1 = *InnerSA;
2025 // Clamp the combined shift amount if it exceeds the bit width.
2026 unsigned Combined = std::min(a: C1 + ShAmt, b: BitWidth - 1);
2027 SDValue NewSA = TLO.DAG.getConstant(Val: Combined, DL: dl, VT: ShiftVT);
2028 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::SRA, DL: dl, VT,
2029 N1: Op0.getOperand(i: 0), N2: NewSA));
2030 }
2031 }
2032 }
2033
2034 APInt InDemandedMask = (DemandedBits << ShAmt);
2035
2036 // If the shift is exact, then it does demand the low bits (and knows that
2037 // they are zero).
2038 if (Op->getFlags().hasExact())
2039 InDemandedMask.setLowBits(ShAmt);
2040
2041 // Narrow shift to lower half - similar to ShrinkDemandedOp.
2042 // (srl i64:x, K) -> (i64 zero_extend (srl (i32 (trunc i64:x)), K))
2043 if ((BitWidth % 2) == 0 && !VT.isVector()) {
2044 APInt HiBits = APInt::getHighBitsSet(numBits: BitWidth, hiBitsSet: BitWidth / 2);
2045 EVT HalfVT = EVT::getIntegerVT(Context&: *TLO.DAG.getContext(), BitWidth: BitWidth / 2);
2046 if (isNarrowingProfitable(N: Op.getNode(), SrcVT: VT, DestVT: HalfVT) &&
2047 isTypeDesirableForOp(ISD::SRL, VT: HalfVT) &&
2048 isTruncateFree(FromVT: VT, ToVT: HalfVT) && isZExtFree(FromTy: HalfVT, ToTy: VT) &&
2049 (!TLO.LegalOperations() || isOperationLegal(Op: ISD::SRL, VT: HalfVT)) &&
2050 ((InDemandedMask.countLeadingZeros() >= (BitWidth / 2)) ||
2051 TLO.DAG.MaskedValueIsZero(Op: Op0, Mask: HiBits))) {
2052 SDValue NewOp = TLO.DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HalfVT, Operand: Op0);
2053 SDValue NewShiftAmt =
2054 TLO.DAG.getShiftAmountConstant(Val: ShAmt, VT: HalfVT, DL: dl);
2055 SDValue NewShift =
2056 TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: HalfVT, N1: NewOp, N2: NewShiftAmt);
2057 return TLO.CombineTo(
2058 O: Op, N: TLO.DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: NewShift));
2059 }
2060 }
2061
2062 // Compute the new bits that are at the top now.
2063 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: InDemandedMask, OriginalDemandedElts: DemandedElts, Known, TLO,
2064 Depth: Depth + 1))
2065 return true;
2066 Known >>= ShAmt;
2067 // High bits known zero.
2068 Known.Zero.setHighBits(ShAmt);
2069
2070 // Attempt to avoid multi-use ops if we don't need anything from them.
2071 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2072 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2073 Op: Op0, DemandedBits: InDemandedMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
2074 if (DemandedOp0) {
2075 SDValue NewOp = TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: DemandedOp0, N2: Op1);
2076 return TLO.CombineTo(O: Op, N: NewOp);
2077 }
2078 }
2079 } else {
2080 // Use generic knownbits computation as it has support for non-uniform
2081 // shift amounts.
2082 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2083 }
2084
2085 // If we are only demanding sign bits then we can use the shift source
2086 // directly.
2087 if (std::optional<unsigned> MaxSA =
2088 TLO.DAG.getValidMaximumShiftAmount(V: Op, DemandedElts, Depth: Depth + 1)) {
2089 unsigned ShAmt = *MaxSA;
2090 // Must already be signbits in DemandedBits bounds, and can't demand any
2091 // shifted in zeroes.
2092 if (DemandedBits.countl_zero() >= ShAmt) {
2093 unsigned NumSignBits =
2094 TLO.DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + 1);
2095 if (DemandedBits.countr_zero() >= (BitWidth - NumSignBits))
2096 return TLO.CombineTo(O: Op, N: Op0);
2097 }
2098 }
2099
2100 // Try to match AVG patterns (after shift simplification).
2101 if (SDValue AVG = combineShiftToAVG(Op, TLO, TLI: *this, DemandedBits,
2102 DemandedElts, Depth: Depth + 1))
2103 return TLO.CombineTo(O: Op, N: AVG);
2104
2105 break;
2106 }
2107 case ISD::SRA: {
2108 SDValue Op0 = Op.getOperand(i: 0);
2109 SDValue Op1 = Op.getOperand(i: 1);
2110 EVT ShiftVT = Op1.getValueType();
2111
2112 // If we only want bits that already match the signbit then we don't need
2113 // to shift.
2114 unsigned NumHiDemandedBits = BitWidth - DemandedBits.countr_zero();
2115 if (TLO.DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + 1) >=
2116 NumHiDemandedBits)
2117 return TLO.CombineTo(O: Op, N: Op0);
2118
2119 // If this is an arithmetic shift right and only the low-bit is set, we can
2120 // always convert this into a logical shr, even if the shift amount is
2121 // variable. The low bit of the shift cannot be an input sign bit unless
2122 // the shift amount is >= the size of the datatype, which is undefined.
2123 if (DemandedBits.isOne())
2124 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op0, N2: Op1));
2125
2126 if (std::optional<unsigned> KnownSA =
2127 TLO.DAG.getValidShiftAmount(V: Op, DemandedElts, Depth: Depth + 1)) {
2128 unsigned ShAmt = *KnownSA;
2129 if (ShAmt == 0)
2130 return TLO.CombineTo(O: Op, N: Op0);
2131
2132 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target
2133 // supports sext_inreg.
2134 if (Op0.getOpcode() == ISD::SHL) {
2135 if (std::optional<unsigned> InnerSA =
2136 TLO.DAG.getValidShiftAmount(V: Op0, DemandedElts, Depth: Depth + 2)) {
2137 unsigned LowBits = BitWidth - ShAmt;
2138 EVT ExtVT = VT.changeElementType(
2139 Context&: *TLO.DAG.getContext(),
2140 EltVT: EVT::getIntegerVT(Context&: *TLO.DAG.getContext(), BitWidth: LowBits));
2141
2142 if (*InnerSA == ShAmt) {
2143 if (!TLO.LegalOperations() ||
2144 getOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: ExtVT) == Legal)
2145 return TLO.CombineTo(
2146 O: Op, N: TLO.DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL: dl, VT,
2147 N1: Op0.getOperand(i: 0),
2148 N2: TLO.DAG.getValueType(ExtVT)));
2149
2150 // Even if we can't convert to sext_inreg, we might be able to
2151 // remove this shift pair if the input is already sign extended.
2152 unsigned NumSignBits =
2153 TLO.DAG.ComputeNumSignBits(Op: Op0.getOperand(i: 0), DemandedElts);
2154 if (NumSignBits > ShAmt)
2155 return TLO.CombineTo(O: Op, N: Op0.getOperand(i: 0));
2156 }
2157 }
2158 }
2159
2160 APInt InDemandedMask = (DemandedBits << ShAmt);
2161
2162 // If the shift is exact, then it does demand the low bits (and knows that
2163 // they are zero).
2164 if (Op->getFlags().hasExact())
2165 InDemandedMask.setLowBits(ShAmt);
2166
2167 // If any of the demanded bits are produced by the sign extension, we also
2168 // demand the input sign bit.
2169 if (DemandedBits.countl_zero() < ShAmt)
2170 InDemandedMask.setSignBit();
2171
2172 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: InDemandedMask, OriginalDemandedElts: DemandedElts, Known, TLO,
2173 Depth: Depth + 1))
2174 return true;
2175 Known >>= ShAmt;
2176
2177 // If the input sign bit is known to be zero, or if none of the top bits
2178 // are demanded, turn this into an unsigned shift right.
2179 if (Known.Zero[BitWidth - ShAmt - 1] ||
2180 DemandedBits.countl_zero() >= ShAmt) {
2181 SDNodeFlags Flags;
2182 Flags.setExact(Op->getFlags().hasExact());
2183 return TLO.CombineTo(
2184 O: Op, N: TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op0, N2: Op1, Flags));
2185 }
2186
2187 int Log2 = DemandedBits.exactLogBase2();
2188 if (Log2 >= 0) {
2189 // The bit must come from the sign.
2190 SDValue NewSA = TLO.DAG.getConstant(Val: BitWidth - 1 - Log2, DL: dl, VT: ShiftVT);
2191 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op0, N2: NewSA));
2192 }
2193
2194 if (Known.One[BitWidth - ShAmt - 1])
2195 // New bits are known one.
2196 Known.One.setHighBits(ShAmt);
2197
2198 // Attempt to avoid multi-use ops if we don't need anything from them.
2199 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2200 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2201 Op: Op0, DemandedBits: InDemandedMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
2202 if (DemandedOp0) {
2203 SDValue NewOp = TLO.DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: DemandedOp0, N2: Op1);
2204 return TLO.CombineTo(O: Op, N: NewOp);
2205 }
2206 }
2207 }
2208
2209 // Try to match AVG patterns (after shift simplification).
2210 if (SDValue AVG = combineShiftToAVG(Op, TLO, TLI: *this, DemandedBits,
2211 DemandedElts, Depth: Depth + 1))
2212 return TLO.CombineTo(O: Op, N: AVG);
2213
2214 break;
2215 }
2216 case ISD::FSHL:
2217 case ISD::FSHR: {
2218 SDValue Op0 = Op.getOperand(i: 0);
2219 SDValue Op1 = Op.getOperand(i: 1);
2220 SDValue Op2 = Op.getOperand(i: 2);
2221 bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
2222
2223 if (ConstantSDNode *SA = isConstOrConstSplat(N: Op2, DemandedElts)) {
2224 unsigned Amt = SA->getAPIntValue().urem(RHS: BitWidth);
2225
2226 // For fshl, 0-shift returns the 1st arg.
2227 // For fshr, 0-shift returns the 2nd arg.
2228 if (Amt == 0) {
2229 if (SimplifyDemandedBits(Op: IsFSHL ? Op0 : Op1, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
2230 Known, TLO, Depth: Depth + 1))
2231 return true;
2232 break;
2233 }
2234
2235 // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
2236 // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
2237 APInt Demanded0 = DemandedBits.lshr(shiftAmt: IsFSHL ? Amt : (BitWidth - Amt));
2238 APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
2239 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: Demanded0, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
2240 Depth: Depth + 1))
2241 return true;
2242 if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: Demanded1, OriginalDemandedElts: DemandedElts, Known, TLO,
2243 Depth: Depth + 1))
2244 return true;
2245
2246 Known2 <<= (IsFSHL ? Amt : (BitWidth - Amt));
2247 Known >>= (IsFSHL ? (BitWidth - Amt) : Amt);
2248 Known = Known.unionWith(RHS: Known2);
2249
2250 // Attempt to avoid multi-use ops if we don't need anything from them.
2251 if (!Demanded0.isAllOnes() || !Demanded1.isAllOnes() ||
2252 !DemandedElts.isAllOnes()) {
2253 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2254 Op: Op0, DemandedBits: Demanded0, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
2255 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2256 Op: Op1, DemandedBits: Demanded1, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
2257 if (DemandedOp0 || DemandedOp1) {
2258 DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0;
2259 DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1;
2260 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: DemandedOp0,
2261 N2: DemandedOp1, N3: Op2);
2262 return TLO.CombineTo(O: Op, N: NewOp);
2263 }
2264 }
2265 }
2266
2267 if (isPowerOf2_32(Value: BitWidth)) {
2268 // Fold FSHR(Op0,Op1,Op2) -> SRL(Op1,Op2)
2269 // iff we're guaranteed not to use Op0.
2270 // TODO: Add FSHL equivalent?
2271 if (!IsFSHL && !DemandedBits.isAllOnes() &&
2272 (!TLO.LegalOperations() || isOperationLegal(Op: ISD::SRL, VT))) {
2273 KnownBits KnownAmt =
2274 TLO.DAG.computeKnownBits(Op: Op2, DemandedElts, Depth: Depth + 1);
2275 unsigned MaxShiftAmt =
2276 KnownAmt.getMaxValue().getLimitedValue(Limit: BitWidth - 1);
2277 // Check we don't demand any shifted bits outside Op1.
2278 if (DemandedBits.countl_zero() >= MaxShiftAmt) {
2279 EVT AmtVT = Op2.getValueType();
2280 SDValue NewAmt =
2281 TLO.DAG.getNode(Opcode: ISD::AND, DL: dl, VT: AmtVT, N1: Op2,
2282 N2: TLO.DAG.getConstant(Val: BitWidth - 1, DL: dl, VT: AmtVT));
2283 SDValue NewOp = TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op1, N2: NewAmt);
2284 return TLO.CombineTo(O: Op, N: NewOp);
2285 }
2286 }
2287
2288 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2289 APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
2290 if (SimplifyDemandedBits(Op: Op2, OriginalDemandedBits: DemandedAmtBits, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
2291 Depth: Depth + 1))
2292 return true;
2293 }
2294 break;
2295 }
2296 case ISD::ROTL:
2297 case ISD::ROTR: {
2298 SDValue Op0 = Op.getOperand(i: 0);
2299 SDValue Op1 = Op.getOperand(i: 1);
2300 bool IsROTL = (Op.getOpcode() == ISD::ROTL);
2301
2302 // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
2303 if (BitWidth == TLO.DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + 1))
2304 return TLO.CombineTo(O: Op, N: Op0);
2305
2306 if (ConstantSDNode *SA = isConstOrConstSplat(N: Op1, DemandedElts)) {
2307 unsigned Amt = SA->getAPIntValue().urem(RHS: BitWidth);
2308 unsigned RevAmt = BitWidth - Amt;
2309
2310 // rotl: (Op0 << Amt) | (Op0 >> (BW - Amt))
2311 // rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt)
2312 APInt Demanded0 = DemandedBits.rotr(rotateAmt: IsROTL ? Amt : RevAmt);
2313 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: Demanded0, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
2314 Depth: Depth + 1))
2315 return true;
2316
2317 // rot*(x, 0) --> x
2318 if (Amt == 0)
2319 return TLO.CombineTo(O: Op, N: Op0);
2320
2321 // See if we don't demand either half of the rotated bits.
2322 if ((!TLO.LegalOperations() || isOperationLegal(Op: ISD::SHL, VT)) &&
2323 DemandedBits.countr_zero() >= (IsROTL ? Amt : RevAmt)) {
2324 Op1 = TLO.DAG.getConstant(Val: IsROTL ? Amt : RevAmt, DL: dl, VT: Op1.getValueType());
2325 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op0, N2: Op1));
2326 }
2327 if ((!TLO.LegalOperations() || isOperationLegal(Op: ISD::SRL, VT)) &&
2328 DemandedBits.countl_zero() >= (IsROTL ? RevAmt : Amt)) {
2329 Op1 = TLO.DAG.getConstant(Val: IsROTL ? RevAmt : Amt, DL: dl, VT: Op1.getValueType());
2330 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op0, N2: Op1));
2331 }
2332 }
2333
2334 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2335 if (isPowerOf2_32(Value: BitWidth)) {
2336 APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
2337 if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: DemandedAmtBits, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
2338 Depth: Depth + 1))
2339 return true;
2340 }
2341 break;
2342 }
2343 case ISD::SMIN:
2344 case ISD::SMAX:
2345 case ISD::UMIN:
2346 case ISD::UMAX: {
2347 unsigned Opc = Op.getOpcode();
2348 SDValue Op0 = Op.getOperand(i: 0);
2349 SDValue Op1 = Op.getOperand(i: 1);
2350
2351 // If we're only demanding signbits, then we can simplify to OR/AND node.
2352 unsigned BitOp =
2353 (Opc == ISD::SMIN || Opc == ISD::UMAX) ? ISD::OR : ISD::AND;
2354 unsigned NumSignBits =
2355 std::min(a: TLO.DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + 1),
2356 b: TLO.DAG.ComputeNumSignBits(Op: Op1, DemandedElts, Depth: Depth + 1));
2357 unsigned NumDemandedUpperBits = BitWidth - DemandedBits.countr_zero();
2358 if (NumSignBits >= NumDemandedUpperBits)
2359 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: BitOp, DL: SDLoc(Op), VT, N1: Op0, N2: Op1));
2360
2361 // Check if one arg is always less/greater than (or equal) to the other arg.
2362 KnownBits Known0 = TLO.DAG.computeKnownBits(Op: Op0, DemandedElts, Depth: Depth + 1);
2363 KnownBits Known1 = TLO.DAG.computeKnownBits(Op: Op1, DemandedElts, Depth: Depth + 1);
2364 switch (Opc) {
2365 case ISD::SMIN:
2366 if (std::optional<bool> IsSLE = KnownBits::sle(LHS: Known0, RHS: Known1))
2367 return TLO.CombineTo(O: Op, N: *IsSLE ? Op0 : Op1);
2368 if (std::optional<bool> IsSLT = KnownBits::slt(LHS: Known0, RHS: Known1))
2369 return TLO.CombineTo(O: Op, N: *IsSLT ? Op0 : Op1);
2370 Known = KnownBits::smin(LHS: Known0, RHS: Known1);
2371 break;
2372 case ISD::SMAX:
2373 if (std::optional<bool> IsSGE = KnownBits::sge(LHS: Known0, RHS: Known1))
2374 return TLO.CombineTo(O: Op, N: *IsSGE ? Op0 : Op1);
2375 if (std::optional<bool> IsSGT = KnownBits::sgt(LHS: Known0, RHS: Known1))
2376 return TLO.CombineTo(O: Op, N: *IsSGT ? Op0 : Op1);
2377 Known = KnownBits::smax(LHS: Known0, RHS: Known1);
2378 break;
2379 case ISD::UMIN:
2380 if (std::optional<bool> IsULE = KnownBits::ule(LHS: Known0, RHS: Known1))
2381 return TLO.CombineTo(O: Op, N: *IsULE ? Op0 : Op1);
2382 if (std::optional<bool> IsULT = KnownBits::ult(LHS: Known0, RHS: Known1))
2383 return TLO.CombineTo(O: Op, N: *IsULT ? Op0 : Op1);
2384 Known = KnownBits::umin(LHS: Known0, RHS: Known1);
2385 break;
2386 case ISD::UMAX:
2387 if (std::optional<bool> IsUGE = KnownBits::uge(LHS: Known0, RHS: Known1))
2388 return TLO.CombineTo(O: Op, N: *IsUGE ? Op0 : Op1);
2389 if (std::optional<bool> IsUGT = KnownBits::ugt(LHS: Known0, RHS: Known1))
2390 return TLO.CombineTo(O: Op, N: *IsUGT ? Op0 : Op1);
2391 Known = KnownBits::umax(LHS: Known0, RHS: Known1);
2392 break;
2393 }
2394 break;
2395 }
2396 case ISD::BITREVERSE: {
2397 SDValue Src = Op.getOperand(i: 0);
2398 APInt DemandedSrcBits = DemandedBits.reverseBits();
2399 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedSrcBits, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
2400 Depth: Depth + 1))
2401 return true;
2402 Known = Known2.reverseBits();
2403 break;
2404 }
2405 case ISD::BSWAP: {
2406 SDValue Src = Op.getOperand(i: 0);
2407
2408 // If the only bits demanded come from one byte of the bswap result,
2409 // just shift the input byte into position to eliminate the bswap.
2410 unsigned NLZ = DemandedBits.countl_zero();
2411 unsigned NTZ = DemandedBits.countr_zero();
2412
2413 // Round NTZ down to the next byte. If we have 11 trailing zeros, then
2414 // we need all the bits down to bit 8. Likewise, round NLZ. If we
2415 // have 14 leading zeros, round to 8.
2416 NLZ = alignDown(Value: NLZ, Align: 8);
2417 NTZ = alignDown(Value: NTZ, Align: 8);
2418 // If we need exactly one byte, we can do this transformation.
2419 if (BitWidth - NLZ - NTZ == 8) {
2420 // Replace this with either a left or right shift to get the byte into
2421 // the right place.
2422 unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL;
2423 if (!TLO.LegalOperations() || isOperationLegal(Op: ShiftOpcode, VT)) {
2424 unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
2425 SDValue ShAmt = TLO.DAG.getShiftAmountConstant(Val: ShiftAmount, VT, DL: dl);
2426 SDValue NewOp = TLO.DAG.getNode(Opcode: ShiftOpcode, DL: dl, VT, N1: Src, N2: ShAmt);
2427 return TLO.CombineTo(O: Op, N: NewOp);
2428 }
2429 }
2430
2431 APInt DemandedSrcBits = DemandedBits.byteSwap();
2432 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedSrcBits, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
2433 Depth: Depth + 1))
2434 return true;
2435 Known = Known2.byteSwap();
2436 break;
2437 }
2438 case ISD::CTPOP: {
2439 // If only 1 bit is demanded, replace with PARITY as long as we're before
2440 // op legalization.
2441 // FIXME: Limit to scalars for now.
2442 if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
2443 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::PARITY, DL: dl, VT,
2444 Operand: Op.getOperand(i: 0)));
2445
2446 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2447 break;
2448 }
2449 case ISD::SIGN_EXTEND_INREG: {
2450 SDValue Op0 = Op.getOperand(i: 0);
2451 EVT ExVT = cast<VTSDNode>(Val: Op.getOperand(i: 1))->getVT();
2452 unsigned ExVTBits = ExVT.getScalarSizeInBits();
2453
2454 // If we only care about the highest bit, don't bother shifting right.
2455 if (DemandedBits.isSignMask()) {
2456 unsigned MinSignedBits =
2457 TLO.DAG.ComputeMaxSignificantBits(Op: Op0, DemandedElts, Depth: Depth + 1);
2458 bool AlreadySignExtended = ExVTBits >= MinSignedBits;
2459 // However if the input is already sign extended we expect the sign
2460 // extension to be dropped altogether later and do not simplify.
2461 if (!AlreadySignExtended) {
2462 // Compute the correct shift amount type, which must be getShiftAmountTy
2463 // for scalar types after legalization.
2464 SDValue ShiftAmt =
2465 TLO.DAG.getShiftAmountConstant(Val: BitWidth - ExVTBits, VT, DL: dl);
2466 return TLO.CombineTo(O: Op,
2467 N: TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op0, N2: ShiftAmt));
2468 }
2469 }
2470
2471 // If none of the extended bits are demanded, eliminate the sextinreg.
2472 if (DemandedBits.getActiveBits() <= ExVTBits)
2473 return TLO.CombineTo(O: Op, N: Op0);
2474
2475 APInt InputDemandedBits = DemandedBits.getLoBits(numBits: ExVTBits);
2476
2477 // Since the sign extended bits are demanded, we know that the sign
2478 // bit is demanded.
2479 InputDemandedBits.setBit(ExVTBits - 1);
2480
2481 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: InputDemandedBits, OriginalDemandedElts: DemandedElts, Known, TLO,
2482 Depth: Depth + 1))
2483 return true;
2484
2485 // If the sign bit of the input is known set or clear, then we know the
2486 // top bits of the result.
2487
2488 // If the input sign bit is known zero, convert this into a zero extension.
2489 if (Known.Zero[ExVTBits - 1])
2490 return TLO.CombineTo(O: Op, N: TLO.DAG.getZeroExtendInReg(Op: Op0, DL: dl, VT: ExVT));
2491
2492 APInt Mask = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: ExVTBits);
2493 if (Known.One[ExVTBits - 1]) { // Input sign bit known set
2494 Known.One.setBitsFrom(ExVTBits);
2495 Known.Zero &= Mask;
2496 } else { // Input sign bit unknown
2497 Known.Zero &= Mask;
2498 Known.One &= Mask;
2499 }
2500 break;
2501 }
2502 case ISD::BUILD_PAIR: {
2503 EVT HalfVT = Op.getOperand(i: 0).getValueType();
2504 unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
2505
2506 APInt MaskLo = DemandedBits.getLoBits(numBits: HalfBitWidth).trunc(width: HalfBitWidth);
2507 APInt MaskHi = DemandedBits.getHiBits(numBits: HalfBitWidth).trunc(width: HalfBitWidth);
2508
2509 KnownBits KnownLo, KnownHi;
2510
2511 if (SimplifyDemandedBits(Op: Op.getOperand(i: 0), DemandedBits: MaskLo, Known&: KnownLo, TLO, Depth: Depth + 1))
2512 return true;
2513
2514 if (SimplifyDemandedBits(Op: Op.getOperand(i: 1), DemandedBits: MaskHi, Known&: KnownHi, TLO, Depth: Depth + 1))
2515 return true;
2516
2517 Known = KnownHi.concat(Lo: KnownLo);
2518 break;
2519 }
2520 case ISD::ZERO_EXTEND_VECTOR_INREG:
2521 if (VT.isScalableVector())
2522 return false;
2523 [[fallthrough]];
2524 case ISD::ZERO_EXTEND: {
2525 SDValue Src = Op.getOperand(i: 0);
2526 EVT SrcVT = Src.getValueType();
2527 unsigned InBits = SrcVT.getScalarSizeInBits();
2528 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2529 bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
2530
2531 // If none of the top bits are demanded, convert this into an any_extend.
2532 if (DemandedBits.getActiveBits() <= InBits) {
2533 // If we only need the non-extended bits of the bottom element
2534 // then we can just bitcast to the result.
2535 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2536 VT.getSizeInBits() == SrcVT.getSizeInBits())
2537 return TLO.CombineTo(O: Op, N: TLO.DAG.getBitcast(VT, V: Src));
2538
2539 unsigned Opc =
2540 IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
2541 if (!TLO.LegalOperations() || isOperationLegal(Op: Opc, VT))
2542 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Opc, DL: dl, VT, Operand: Src));
2543 }
2544
2545 APInt InDemandedBits = DemandedBits.trunc(width: InBits);
2546 APInt InDemandedElts = DemandedElts.zext(width: InElts);
2547 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: InDemandedBits, OriginalDemandedElts: InDemandedElts, Known, TLO,
2548 Depth: Depth + 1)) {
2549 Op->dropFlags(Mask: SDNodeFlags::NonNeg);
2550 return true;
2551 }
2552 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2553 Known = Known.zext(BitWidth);
2554
2555 // Attempt to avoid multi-use ops if we don't need anything from them.
2556 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2557 Op: Src, DemandedBits: InDemandedBits, DemandedElts: InDemandedElts, DAG&: TLO.DAG, Depth: Depth + 1))
2558 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, Operand: NewSrc));
2559 break;
2560 }
2561 case ISD::SIGN_EXTEND_VECTOR_INREG:
2562 if (VT.isScalableVector())
2563 return false;
2564 [[fallthrough]];
2565 case ISD::SIGN_EXTEND: {
2566 SDValue Src = Op.getOperand(i: 0);
2567 EVT SrcVT = Src.getValueType();
2568 unsigned InBits = SrcVT.getScalarSizeInBits();
2569 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2570 bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
2571
2572 APInt InDemandedElts = DemandedElts.zext(width: InElts);
2573 APInt InDemandedBits = DemandedBits.trunc(width: InBits);
2574
2575 // Since some of the sign extended bits are demanded, we know that the sign
2576 // bit is demanded.
2577 InDemandedBits.setBit(InBits - 1);
2578
2579 // If none of the top bits are demanded, convert this into an any_extend.
2580 if (DemandedBits.getActiveBits() <= InBits) {
2581 // If we only need the non-extended bits of the bottom element
2582 // then we can just bitcast to the result.
2583 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2584 VT.getSizeInBits() == SrcVT.getSizeInBits())
2585 return TLO.CombineTo(O: Op, N: TLO.DAG.getBitcast(VT, V: Src));
2586
2587 // Don't lose an all signbits 0/-1 splat on targets with 0/-1 booleans.
2588 if (getBooleanContents(Type: VT) != ZeroOrNegativeOneBooleanContent ||
2589 TLO.DAG.ComputeNumSignBits(Op: Src, DemandedElts: InDemandedElts, Depth: Depth + 1) !=
2590 InBits) {
2591 unsigned Opc =
2592 IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
2593 if (!TLO.LegalOperations() || isOperationLegal(Op: Opc, VT))
2594 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Opc, DL: dl, VT, Operand: Src));
2595 }
2596 }
2597
2598 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: InDemandedBits, OriginalDemandedElts: InDemandedElts, Known, TLO,
2599 Depth: Depth + 1))
2600 return true;
2601 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2602
2603 // If the sign bit is known one, the top bits match.
2604 Known = Known.sext(BitWidth);
2605
2606 // If the sign bit is known zero, convert this to a zero extend.
2607 if (Known.isNonNegative()) {
2608 unsigned Opc =
2609 IsVecInReg ? ISD::ZERO_EXTEND_VECTOR_INREG : ISD::ZERO_EXTEND;
2610 if (!TLO.LegalOperations() || isOperationLegal(Op: Opc, VT)) {
2611 SDNodeFlags Flags;
2612 if (!IsVecInReg)
2613 Flags |= SDNodeFlags::NonNeg;
2614 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Opc, DL: dl, VT, Operand: Src, Flags));
2615 }
2616 }
2617
2618 // Attempt to avoid multi-use ops if we don't need anything from them.
2619 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2620 Op: Src, DemandedBits: InDemandedBits, DemandedElts: InDemandedElts, DAG&: TLO.DAG, Depth: Depth + 1))
2621 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, Operand: NewSrc));
2622 break;
2623 }
2624 case ISD::ANY_EXTEND_VECTOR_INREG:
2625 if (VT.isScalableVector())
2626 return false;
2627 [[fallthrough]];
2628 case ISD::ANY_EXTEND: {
2629 SDValue Src = Op.getOperand(i: 0);
2630 EVT SrcVT = Src.getValueType();
2631 unsigned InBits = SrcVT.getScalarSizeInBits();
2632 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2633 bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
2634
2635 // If we only need the bottom element then we can just bitcast.
2636 // TODO: Handle ANY_EXTEND?
2637 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2638 VT.getSizeInBits() == SrcVT.getSizeInBits())
2639 return TLO.CombineTo(O: Op, N: TLO.DAG.getBitcast(VT, V: Src));
2640
2641 APInt InDemandedBits = DemandedBits.trunc(width: InBits);
2642 APInt InDemandedElts = DemandedElts.zext(width: InElts);
2643 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: InDemandedBits, OriginalDemandedElts: InDemandedElts, Known, TLO,
2644 Depth: Depth + 1))
2645 return true;
2646 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2647 Known = Known.anyext(BitWidth);
2648
2649 // Attempt to avoid multi-use ops if we don't need anything from them.
2650 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2651 Op: Src, DemandedBits: InDemandedBits, DemandedElts: InDemandedElts, DAG&: TLO.DAG, Depth: Depth + 1))
2652 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, Operand: NewSrc));
2653 break;
2654 }
2655 case ISD::TRUNCATE: {
2656 SDValue Src = Op.getOperand(i: 0);
2657
2658 // Simplify the input, using demanded bit information, and compute the known
2659 // zero/one bits live out.
2660 unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
2661 APInt TruncMask = DemandedBits.zext(width: OperandBitWidth);
2662 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: TruncMask, OriginalDemandedElts: DemandedElts, Known, TLO,
2663 Depth: Depth + 1)) {
2664 // Disable the nsw and nuw flags. We can no longer guarantee that we
2665 // won't wrap after simplification.
2666 Op->dropFlags(Mask: SDNodeFlags::NoWrap);
2667 return true;
2668 }
2669 Known = Known.trunc(BitWidth);
2670
2671 // Attempt to avoid multi-use ops if we don't need anything from them.
2672 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2673 Op: Src, DemandedBits: TruncMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1))
2674 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: NewSrc));
2675
2676 // If the input is only used by this truncate, see if we can shrink it based
2677 // on the known demanded bits.
2678 switch (Src.getOpcode()) {
2679 default:
2680 break;
2681 case ISD::SRL:
2682 // Shrink SRL by a constant if none of the high bits shifted in are
2683 // demanded.
2684 if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
2685 // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
2686 // undesirable.
2687 break;
2688
2689 if (Src.getNode()->hasOneUse()) {
2690 if (isTruncateFree(Val: Src, VT2: VT) &&
2691 !isTruncateFree(FromVT: Src.getValueType(), ToVT: VT)) {
2692 // If truncate is only free at trunc(srl), do not turn it into
2693 // srl(trunc). The check is done by first check the truncate is free
2694 // at Src's opcode(srl), then check the truncate is not done by
2695 // referencing sub-register. In test, if both trunc(srl) and
2696 // srl(trunc)'s trunc are free, srl(trunc) performs better. If only
2697 // trunc(srl)'s trunc is free, trunc(srl) is better.
2698 break;
2699 }
2700
2701 std::optional<unsigned> ShAmtC =
2702 TLO.DAG.getValidShiftAmount(V: Src, DemandedElts, Depth: Depth + 2);
2703 if (!ShAmtC || *ShAmtC >= BitWidth)
2704 break;
2705 unsigned ShVal = *ShAmtC;
2706
2707 APInt HighBits =
2708 APInt::getHighBitsSet(numBits: OperandBitWidth, hiBitsSet: OperandBitWidth - BitWidth);
2709 HighBits.lshrInPlace(ShiftAmt: ShVal);
2710 HighBits = HighBits.trunc(width: BitWidth);
2711 if (!(HighBits & DemandedBits)) {
2712 // None of the shifted in bits are needed. Add a truncate of the
2713 // shift input, then shift it.
2714 SDValue NewShAmt = TLO.DAG.getShiftAmountConstant(Val: ShVal, VT, DL: dl);
2715 SDValue NewTrunc =
2716 TLO.DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Src.getOperand(i: 0));
2717 return TLO.CombineTo(
2718 O: Op, N: TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: NewTrunc, N2: NewShAmt));
2719 }
2720 }
2721 break;
2722 }
2723
2724 break;
2725 }
2726 case ISD::AssertZext: {
2727 // AssertZext demands all of the high bits, plus any of the low bits
2728 // demanded by its users.
2729 EVT ZVT = cast<VTSDNode>(Val: Op.getOperand(i: 1))->getVT();
2730 APInt InMask = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: ZVT.getSizeInBits());
2731 if (SimplifyDemandedBits(Op: Op.getOperand(i: 0), DemandedBits: ~InMask | DemandedBits, Known,
2732 TLO, Depth: Depth + 1))
2733 return true;
2734
2735 Known.Zero |= ~InMask;
2736 Known.One &= (~Known.Zero);
2737 break;
2738 }
2739 case ISD::EXTRACT_VECTOR_ELT: {
2740 SDValue Src = Op.getOperand(i: 0);
2741 SDValue Idx = Op.getOperand(i: 1);
2742 ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
2743 unsigned EltBitWidth = Src.getScalarValueSizeInBits();
2744
2745 if (SrcEltCnt.isScalable())
2746 return false;
2747
2748 // Demand the bits from every vector element without a constant index.
2749 unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2750 APInt DemandedSrcElts = APInt::getAllOnes(numBits: NumSrcElts);
2751 if (auto *CIdx = dyn_cast<ConstantSDNode>(Val&: Idx))
2752 if (CIdx->getAPIntValue().ult(RHS: NumSrcElts))
2753 DemandedSrcElts = APInt::getOneBitSet(numBits: NumSrcElts, BitNo: CIdx->getZExtValue());
2754
2755 // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
2756 // anything about the extended bits.
2757 APInt DemandedSrcBits = DemandedBits;
2758 if (BitWidth > EltBitWidth)
2759 DemandedSrcBits = DemandedSrcBits.trunc(width: EltBitWidth);
2760
2761 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedSrcBits, OriginalDemandedElts: DemandedSrcElts, Known&: Known2, TLO,
2762 Depth: Depth + 1))
2763 return true;
2764
2765 // Attempt to avoid multi-use ops if we don't need anything from them.
2766 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2767 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2768 Op: Src, DemandedBits: DemandedSrcBits, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + 1)) {
2769 SDValue NewOp =
2770 TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: DemandedSrc, N2: Idx);
2771 return TLO.CombineTo(O: Op, N: NewOp);
2772 }
2773 }
2774
2775 Known = Known2;
2776 if (BitWidth > EltBitWidth)
2777 Known = Known.anyext(BitWidth);
2778 break;
2779 }
2780 case ISD::BITCAST: {
2781 if (VT.isScalableVector())
2782 return false;
2783 SDValue Src = Op.getOperand(i: 0);
2784 EVT SrcVT = Src.getValueType();
2785 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
2786
2787 // If this is an FP->Int bitcast and if the sign bit is the only
2788 // thing demanded, turn this into a FGETSIGN.
2789 if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
2790 DemandedBits == APInt::getSignMask(BitWidth: Op.getValueSizeInBits()) &&
2791 SrcVT.isFloatingPoint()) {
2792 if (isOperationLegalOrCustom(Op: ISD::FGETSIGN, VT)) {
2793 // Make a FGETSIGN + SHL to move the sign bit into the appropriate
2794 // place. We expect the SHL to be eliminated by other optimizations.
2795 SDValue Sign = TLO.DAG.getNode(Opcode: ISD::FGETSIGN, DL: dl, VT, Operand: Src);
2796 unsigned ShVal = Op.getValueSizeInBits() - 1;
2797 SDValue ShAmt = TLO.DAG.getShiftAmountConstant(Val: ShVal, VT, DL: dl);
2798 return TLO.CombineTo(O: Op,
2799 N: TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Sign, N2: ShAmt));
2800 }
2801 }
2802
2803 // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
2804 // Demand the elt/bit if any of the original elts/bits are demanded.
2805 if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) {
2806 unsigned Scale = BitWidth / NumSrcEltBits;
2807 unsigned NumSrcElts = SrcVT.getVectorNumElements();
2808 APInt DemandedSrcBits = APInt::getZero(numBits: NumSrcEltBits);
2809 for (unsigned i = 0; i != Scale; ++i) {
2810 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
2811 unsigned BitOffset = EltOffset * NumSrcEltBits;
2812 DemandedSrcBits |= DemandedBits.extractBits(numBits: NumSrcEltBits, bitPosition: BitOffset);
2813 }
2814 // Recursive calls below may turn not demanded elements into poison, so we
2815 // need to demand all smaller source elements that maps to a demanded
2816 // destination element.
2817 APInt DemandedSrcElts = APIntOps::ScaleBitMask(A: DemandedElts, NewBitWidth: NumSrcElts);
2818
2819 APInt KnownSrcUndef, KnownSrcZero;
2820 if (SimplifyDemandedVectorElts(Op: Src, DemandedEltMask: DemandedSrcElts, KnownUndef&: KnownSrcUndef,
2821 KnownZero&: KnownSrcZero, TLO, Depth: Depth + 1))
2822 return true;
2823
2824 KnownBits KnownSrcBits;
2825 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedSrcBits, OriginalDemandedElts: DemandedSrcElts,
2826 Known&: KnownSrcBits, TLO, Depth: Depth + 1))
2827 return true;
2828 } else if (IsLE && (NumSrcEltBits % BitWidth) == 0) {
2829 // TODO - bigendian once we have test coverage.
2830 unsigned Scale = NumSrcEltBits / BitWidth;
2831 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2832 APInt DemandedSrcBits = APInt::getZero(numBits: NumSrcEltBits);
2833 APInt DemandedSrcElts = APInt::getZero(numBits: NumSrcElts);
2834 for (unsigned i = 0; i != NumElts; ++i)
2835 if (DemandedElts[i]) {
2836 unsigned Offset = (i % Scale) * BitWidth;
2837 DemandedSrcBits.insertBits(SubBits: DemandedBits, bitPosition: Offset);
2838 DemandedSrcElts.setBit(i / Scale);
2839 }
2840
2841 if (SrcVT.isVector()) {
2842 APInt KnownSrcUndef, KnownSrcZero;
2843 if (SimplifyDemandedVectorElts(Op: Src, DemandedEltMask: DemandedSrcElts, KnownUndef&: KnownSrcUndef,
2844 KnownZero&: KnownSrcZero, TLO, Depth: Depth + 1))
2845 return true;
2846 }
2847
2848 KnownBits KnownSrcBits;
2849 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedSrcBits, OriginalDemandedElts: DemandedSrcElts,
2850 Known&: KnownSrcBits, TLO, Depth: Depth + 1))
2851 return true;
2852
2853 // Attempt to avoid multi-use ops if we don't need anything from them.
2854 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2855 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2856 Op: Src, DemandedBits: DemandedSrcBits, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + 1)) {
2857 SDValue NewOp = TLO.DAG.getBitcast(VT, V: DemandedSrc);
2858 return TLO.CombineTo(O: Op, N: NewOp);
2859 }
2860 }
2861 }
2862
2863 // If this is a bitcast, let computeKnownBits handle it. Only do this on a
2864 // recursive call where Known may be useful to the caller.
2865 if (Depth > 0) {
2866 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2867 return false;
2868 }
2869 break;
2870 }
2871 case ISD::MUL:
2872 if (DemandedBits.isPowerOf2()) {
2873 // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
2874 // If we demand exactly one bit N and we have "X * (C' << N)" where C' is
2875 // odd (has LSB set), then the left-shifted low bit of X is the answer.
2876 unsigned CTZ = DemandedBits.countr_zero();
2877 ConstantSDNode *C = isConstOrConstSplat(N: Op.getOperand(i: 1), DemandedElts);
2878 if (C && C->getAPIntValue().countr_zero() == CTZ) {
2879 SDValue AmtC = TLO.DAG.getShiftAmountConstant(Val: CTZ, VT, DL: dl);
2880 SDValue Shl = TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op.getOperand(i: 0), N2: AmtC);
2881 return TLO.CombineTo(O: Op, N: Shl);
2882 }
2883 }
2884 // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
2885 // X * X is odd iff X is odd.
2886 // 'Quadratic Reciprocity': X * X -> 0 for bit[1]
2887 if (Op.getOperand(i: 0) == Op.getOperand(i: 1) && DemandedBits.ult(RHS: 4)) {
2888 SDValue One = TLO.DAG.getConstant(Val: 1, DL: dl, VT);
2889 SDValue And1 = TLO.DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op.getOperand(i: 0), N2: One);
2890 return TLO.CombineTo(O: Op, N: And1);
2891 }
2892 [[fallthrough]];
2893 case ISD::PTRADD:
2894 if (Op.getOperand(i: 0).getValueType() != Op.getOperand(i: 1).getValueType())
2895 break;
2896 // PTRADD behaves like ADD if pointers are represented as integers.
2897 [[fallthrough]];
2898 case ISD::ADD:
2899 case ISD::SUB: {
2900 // Add, Sub, and Mul don't demand any bits in positions beyond that
2901 // of the highest bit demanded of them.
2902 SDValue Op0 = Op.getOperand(i: 0), Op1 = Op.getOperand(i: 1);
2903 SDNodeFlags Flags = Op.getNode()->getFlags();
2904 unsigned DemandedBitsLZ = DemandedBits.countl_zero();
2905 APInt LoMask = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth - DemandedBitsLZ);
2906 KnownBits KnownOp0, KnownOp1;
2907 auto GetDemandedBitsLHSMask = [&](APInt Demanded,
2908 const KnownBits &KnownRHS) {
2909 if (Op.getOpcode() == ISD::MUL)
2910 Demanded.clearHighBits(hiBits: KnownRHS.countMinTrailingZeros());
2911 return Demanded;
2912 };
2913 if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: LoMask, OriginalDemandedElts: DemandedElts, Known&: KnownOp1, TLO,
2914 Depth: Depth + 1) ||
2915 SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: GetDemandedBitsLHSMask(LoMask, KnownOp1),
2916 OriginalDemandedElts: DemandedElts, Known&: KnownOp0, TLO, Depth: Depth + 1) ||
2917 // See if the operation should be performed at a smaller bit width.
2918 ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
2919 // Disable the nsw and nuw flags. We can no longer guarantee that we
2920 // won't wrap after simplification.
2921 Op->dropFlags(Mask: SDNodeFlags::NoWrap);
2922 return true;
2923 }
2924
2925 // neg x with only low bit demanded is simply x.
2926 if (Op.getOpcode() == ISD::SUB && DemandedBits.isOne() &&
2927 isNullConstant(V: Op0))
2928 return TLO.CombineTo(O: Op, N: Op1);
2929
2930 // Attempt to avoid multi-use ops if we don't need anything from them.
2931 if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2932 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2933 Op: Op0, DemandedBits: LoMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
2934 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2935 Op: Op1, DemandedBits: LoMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
2936 if (DemandedOp0 || DemandedOp1) {
2937 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
2938 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
2939 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: Op0, N2: Op1,
2940 Flags: Flags & ~SDNodeFlags::NoWrap);
2941 return TLO.CombineTo(O: Op, N: NewOp);
2942 }
2943 }
2944
2945 // If we have a constant operand, we may be able to turn it into -1 if we
2946 // do not demand the high bits. This can make the constant smaller to
2947 // encode, allow more general folding, or match specialized instruction
2948 // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
2949 // is probably not useful (and could be detrimental).
2950 ConstantSDNode *C = isConstOrConstSplat(N: Op1);
2951 APInt HighMask = APInt::getHighBitsSet(numBits: BitWidth, hiBitsSet: DemandedBitsLZ);
2952 if (C && !C->isAllOnes() && !C->isOne() &&
2953 (C->getAPIntValue() | HighMask).isAllOnes()) {
2954 SDValue Neg1 = TLO.DAG.getAllOnesConstant(DL: dl, VT);
2955 // Disable the nsw and nuw flags. We can no longer guarantee that we
2956 // won't wrap after simplification.
2957 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: Op0, N2: Neg1,
2958 Flags: Flags & ~SDNodeFlags::NoWrap);
2959 return TLO.CombineTo(O: Op, N: NewOp);
2960 }
2961
2962 // Match a multiply with a disguised negated-power-of-2 and convert to a
2963 // an equivalent shift-left amount.
2964 // Example: (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2965 auto getShiftLeftAmt = [&HighMask](SDValue Mul) -> unsigned {
2966 if (Mul.getOpcode() != ISD::MUL || !Mul.hasOneUse())
2967 return 0;
2968
2969 // Don't touch opaque constants. Also, ignore zero and power-of-2
2970 // multiplies. Those will get folded later.
2971 ConstantSDNode *MulC = isConstOrConstSplat(N: Mul.getOperand(i: 1));
2972 if (MulC && !MulC->isOpaque() && !MulC->isZero() &&
2973 !MulC->getAPIntValue().isPowerOf2()) {
2974 APInt UnmaskedC = MulC->getAPIntValue() | HighMask;
2975 if (UnmaskedC.isNegatedPowerOf2())
2976 return (-UnmaskedC).logBase2();
2977 }
2978 return 0;
2979 };
2980
2981 auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y,
2982 unsigned ShlAmt) {
2983 SDValue ShlAmtC = TLO.DAG.getShiftAmountConstant(Val: ShlAmt, VT, DL: dl);
2984 SDValue Shl = TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: X, N2: ShlAmtC);
2985 SDValue Res = TLO.DAG.getNode(Opcode: NT, DL: dl, VT, N1: Y, N2: Shl);
2986 return TLO.CombineTo(O: Op, N: Res);
2987 };
2988
2989 if (isOperationLegalOrCustom(Op: ISD::SHL, VT)) {
2990 if (Op.getOpcode() == ISD::ADD) {
2991 // (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2992 if (unsigned ShAmt = getShiftLeftAmt(Op0))
2993 return foldMul(ISD::SUB, Op0.getOperand(i: 0), Op1, ShAmt);
2994 // Op0 + (X * MulC) --> Op0 - (X << log2(-MulC))
2995 if (unsigned ShAmt = getShiftLeftAmt(Op1))
2996 return foldMul(ISD::SUB, Op1.getOperand(i: 0), Op0, ShAmt);
2997 }
2998 if (Op.getOpcode() == ISD::SUB) {
2999 // Op0 - (X * MulC) --> Op0 + (X << log2(-MulC))
3000 if (unsigned ShAmt = getShiftLeftAmt(Op1))
3001 return foldMul(ISD::ADD, Op1.getOperand(i: 0), Op0, ShAmt);
3002 }
3003 }
3004
3005 if (Op.getOpcode() == ISD::MUL) {
3006 Known = KnownBits::mul(LHS: KnownOp0, RHS: KnownOp1);
3007 } else { // Op.getOpcode() is either ISD::ADD, ISD::PTRADD, or ISD::SUB.
3008 Known = KnownBits::computeForAddSub(
3009 Add: Op.getOpcode() != ISD::SUB, NSW: Flags.hasNoSignedWrap(),
3010 NUW: Flags.hasNoUnsignedWrap(), LHS: KnownOp0, RHS: KnownOp1);
3011 }
3012 break;
3013 }
3014 case ISD::FABS: {
3015 SDValue Op0 = Op.getOperand(i: 0);
3016 APInt SignMask = APInt::getSignMask(BitWidth);
3017
3018 if (!DemandedBits.intersects(RHS: SignMask))
3019 return TLO.CombineTo(O: Op, N: Op0);
3020
3021 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts, Known, TLO,
3022 Depth: Depth + 1))
3023 return true;
3024
3025 if (Known.isNonNegative())
3026 return TLO.CombineTo(O: Op, N: Op0);
3027 if (Known.isNegative())
3028 return TLO.CombineTo(
3029 O: Op, N: TLO.DAG.getNode(Opcode: ISD::FNEG, DL: dl, VT, Operand: Op0, Flags: Op->getFlags()));
3030
3031 Known.Zero |= SignMask;
3032 Known.One &= ~SignMask;
3033
3034 break;
3035 }
3036 case ISD::FCOPYSIGN: {
3037 SDValue Op0 = Op.getOperand(i: 0);
3038 SDValue Op1 = Op.getOperand(i: 1);
3039
3040 unsigned BitWidth0 = Op0.getScalarValueSizeInBits();
3041 unsigned BitWidth1 = Op1.getScalarValueSizeInBits();
3042 APInt SignMask0 = APInt::getSignMask(BitWidth: BitWidth0);
3043 APInt SignMask1 = APInt::getSignMask(BitWidth: BitWidth1);
3044
3045 if (!DemandedBits.intersects(RHS: SignMask0))
3046 return TLO.CombineTo(O: Op, N: Op0);
3047
3048 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: ~SignMask0 & DemandedBits, OriginalDemandedElts: DemandedElts,
3049 Known, TLO, Depth: Depth + 1) ||
3050 SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: SignMask1, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
3051 Depth: Depth + 1))
3052 return true;
3053
3054 if (Known2.isNonNegative())
3055 return TLO.CombineTo(
3056 O: Op, N: TLO.DAG.getNode(Opcode: ISD::FABS, DL: dl, VT, Operand: Op0, Flags: Op->getFlags()));
3057
3058 if (Known2.isNegative())
3059 return TLO.CombineTo(
3060 O: Op, N: TLO.DAG.getNode(Opcode: ISD::FNEG, DL: dl, VT,
3061 Operand: TLO.DAG.getNode(Opcode: ISD::FABS, DL: SDLoc(Op0), VT, Operand: Op0)));
3062
3063 Known.Zero &= ~SignMask0;
3064 Known.One &= ~SignMask0;
3065 break;
3066 }
3067 case ISD::FNEG: {
3068 SDValue Op0 = Op.getOperand(i: 0);
3069 APInt SignMask = APInt::getSignMask(BitWidth);
3070
3071 if (!DemandedBits.intersects(RHS: SignMask))
3072 return TLO.CombineTo(O: Op, N: Op0);
3073
3074 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts, Known, TLO,
3075 Depth: Depth + 1))
3076 return true;
3077
3078 if (!Known.isSignUnknown()) {
3079 Known.Zero ^= SignMask;
3080 Known.One ^= SignMask;
3081 }
3082
3083 break;
3084 }
3085 default:
3086 // We also ask the target about intrinsics (which could be specific to it).
3087 if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3088 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
3089 // TODO: Probably okay to remove after audit; here to reduce change size
3090 // in initial enablement patch for scalable vectors
3091 if (Op.getValueType().isScalableVector())
3092 break;
3093 if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
3094 Known, TLO, Depth))
3095 return true;
3096 break;
3097 }
3098
3099 // Just use computeKnownBits to compute output bits.
3100 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
3101 break;
3102 }
3103
3104 // If we know the value of all of the demanded bits, return this as a
3105 // constant.
3106 if (!isTargetCanonicalConstantNode(Op) &&
3107 DemandedBits.isSubsetOf(RHS: Known.Zero | Known.One)) {
3108 // Avoid folding to a constant if any OpaqueConstant is involved.
3109 if (llvm::any_of(Range: Op->ops(), P: [](SDValue V) {
3110 auto *C = dyn_cast<ConstantSDNode>(Val&: V);
3111 return C && C->isOpaque();
3112 }))
3113 return false;
3114 if (VT.isInteger())
3115 return TLO.CombineTo(O: Op, N: TLO.DAG.getConstant(Val: Known.One, DL: dl, VT));
3116 if (VT.isFloatingPoint())
3117 return TLO.CombineTo(
3118 O: Op, N: TLO.DAG.getConstantFP(Val: APFloat(VT.getFltSemantics(), Known.One),
3119 DL: dl, VT));
3120 }
3121
3122 // A multi use 'all demanded elts' simplify failed to find any knownbits.
3123 // Try again just for the original demanded elts.
3124 // Ensure we do this AFTER constant folding above.
3125 if (HasMultiUse && Known.isUnknown() && !OriginalDemandedElts.isAllOnes())
3126 Known = TLO.DAG.computeKnownBits(Op, DemandedElts: OriginalDemandedElts, Depth);
3127
3128 return false;
3129}
3130
3131bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op,
3132 const APInt &DemandedElts,
3133 DAGCombinerInfo &DCI) const {
3134 SelectionDAG &DAG = DCI.DAG;
3135 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
3136 !DCI.isBeforeLegalizeOps());
3137
3138 APInt KnownUndef, KnownZero;
3139 bool Simplified =
3140 SimplifyDemandedVectorElts(Op, DemandedEltMask: DemandedElts, KnownUndef, KnownZero, TLO);
3141 if (Simplified) {
3142 DCI.AddToWorklist(N: Op.getNode());
3143 DCI.CommitTargetLoweringOpt(TLO);
3144 }
3145
3146 return Simplified;
3147}
3148
3149/// Given a vector binary operation and known undefined elements for each input
3150/// operand, compute whether each element of the output is undefined.
3151static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG,
3152 const APInt &UndefOp0,
3153 const APInt &UndefOp1) {
3154 EVT VT = BO.getValueType();
3155 assert(DAG.getTargetLoweringInfo().isBinOp(BO.getOpcode()) && VT.isVector() &&
3156 "Vector binop only");
3157
3158 EVT EltVT = VT.getVectorElementType();
3159 unsigned NumElts = VT.isFixedLengthVector() ? VT.getVectorNumElements() : 1;
3160 assert(UndefOp0.getBitWidth() == NumElts &&
3161 UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
3162
3163 auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
3164 const APInt &UndefVals) {
3165 if (UndefVals[Index])
3166 return DAG.getUNDEF(VT: EltVT);
3167
3168 if (auto *BV = dyn_cast<BuildVectorSDNode>(Val&: V)) {
3169 // Try hard to make sure that the getNode() call is not creating temporary
3170 // nodes. Ignore opaque integers because they do not constant fold.
3171 SDValue Elt = BV->getOperand(Num: Index);
3172 auto *C = dyn_cast<ConstantSDNode>(Val&: Elt);
3173 if (isa<ConstantFPSDNode>(Val: Elt) || Elt.isUndef() || (C && !C->isOpaque()))
3174 return Elt;
3175 }
3176
3177 return SDValue();
3178 };
3179
3180 APInt KnownUndef = APInt::getZero(numBits: NumElts);
3181 for (unsigned i = 0; i != NumElts; ++i) {
3182 // If both inputs for this element are either constant or undef and match
3183 // the element type, compute the constant/undef result for this element of
3184 // the vector.
3185 // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
3186 // not handle FP constants. The code within getNode() should be refactored
3187 // to avoid the danger of creating a bogus temporary node here.
3188 SDValue C0 = getUndefOrConstantElt(BO.getOperand(i: 0), i, UndefOp0);
3189 SDValue C1 = getUndefOrConstantElt(BO.getOperand(i: 1), i, UndefOp1);
3190 if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
3191 if (DAG.getNode(Opcode: BO.getOpcode(), DL: SDLoc(BO), VT: EltVT, N1: C0, N2: C1).isUndef())
3192 KnownUndef.setBit(i);
3193 }
3194 return KnownUndef;
3195}
3196
3197bool TargetLowering::SimplifyDemandedVectorElts(
3198 SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
3199 APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
3200 bool AssumeSingleUse) const {
3201 EVT VT = Op.getValueType();
3202 unsigned Opcode = Op.getOpcode();
3203 APInt DemandedElts = OriginalDemandedElts;
3204 unsigned NumElts = DemandedElts.getBitWidth();
3205 assert(VT.isVector() && "Expected vector op");
3206
3207 KnownUndef = KnownZero = APInt::getZero(numBits: NumElts);
3208
3209 if (!shouldSimplifyDemandedVectorElts(Op, TLO))
3210 return false;
3211
3212 // TODO: For now we assume we know nothing about scalable vectors.
3213 if (VT.isScalableVector())
3214 return false;
3215
3216 assert(VT.getVectorNumElements() == NumElts &&
3217 "Mask size mismatches value type element count!");
3218
3219 // Undef operand.
3220 if (Op.isUndef()) {
3221 KnownUndef.setAllBits();
3222 return false;
3223 }
3224
3225 // If Op has other users, assume that all elements are needed.
3226 if (!AssumeSingleUse && !Op.getNode()->hasOneUse())
3227 DemandedElts.setAllBits();
3228
3229 // Not demanding any elements from Op.
3230 if (DemandedElts == 0) {
3231 KnownUndef.setAllBits();
3232 return TLO.CombineTo(O: Op, N: TLO.DAG.getUNDEF(VT));
3233 }
3234
3235 // Limit search depth.
3236 if (Depth >= SelectionDAG::MaxRecursionDepth)
3237 return false;
3238
3239 SDLoc DL(Op);
3240 unsigned EltSizeInBits = VT.getScalarSizeInBits();
3241 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
3242
3243 // Helper for demanding the specified elements and all the bits of both binary
3244 // operands.
3245 auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
3246 SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op: Op0, DemandedElts,
3247 DAG&: TLO.DAG, Depth: Depth + 1);
3248 SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op: Op1, DemandedElts,
3249 DAG&: TLO.DAG, Depth: Depth + 1);
3250 if (NewOp0 || NewOp1) {
3251 SDValue NewOp =
3252 TLO.DAG.getNode(Opcode, DL: SDLoc(Op), VT, N1: NewOp0 ? NewOp0 : Op0,
3253 N2: NewOp1 ? NewOp1 : Op1, Flags: Op->getFlags());
3254 return TLO.CombineTo(O: Op, N: NewOp);
3255 }
3256 return false;
3257 };
3258
3259 switch (Opcode) {
3260 case ISD::SCALAR_TO_VECTOR: {
3261 if (!DemandedElts[0]) {
3262 KnownUndef.setAllBits();
3263 return TLO.CombineTo(O: Op, N: TLO.DAG.getUNDEF(VT));
3264 }
3265 KnownUndef.setHighBits(NumElts - 1);
3266 break;
3267 }
3268 case ISD::BITCAST: {
3269 SDValue Src = Op.getOperand(i: 0);
3270 EVT SrcVT = Src.getValueType();
3271
3272 if (!SrcVT.isVector()) {
3273 // TODO - bigendian once we have test coverage.
3274 if (IsLE) {
3275 APInt DemandedSrcBits = APInt::getZero(numBits: SrcVT.getSizeInBits());
3276 unsigned EltSize = VT.getScalarSizeInBits();
3277 for (unsigned I = 0; I != NumElts; ++I) {
3278 if (DemandedElts[I]) {
3279 unsigned Offset = I * EltSize;
3280 DemandedSrcBits.setBits(loBit: Offset, hiBit: Offset + EltSize);
3281 }
3282 }
3283 KnownBits Known;
3284 if (SimplifyDemandedBits(Op: Src, DemandedBits: DemandedSrcBits, Known, TLO, Depth: Depth + 1))
3285 return true;
3286 }
3287 break;
3288 }
3289
3290 // Fast handling of 'identity' bitcasts.
3291 unsigned NumSrcElts = SrcVT.getVectorNumElements();
3292 if (NumSrcElts == NumElts)
3293 return SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: DemandedElts, KnownUndef,
3294 KnownZero, TLO, Depth: Depth + 1);
3295
3296 APInt SrcDemandedElts, SrcZero, SrcUndef;
3297
3298 // Bitcast from 'large element' src vector to 'small element' vector, we
3299 // must demand a source element if any DemandedElt maps to it.
3300 if ((NumElts % NumSrcElts) == 0) {
3301 unsigned Scale = NumElts / NumSrcElts;
3302 SrcDemandedElts = APIntOps::ScaleBitMask(A: DemandedElts, NewBitWidth: NumSrcElts);
3303 if (SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: SrcDemandedElts, KnownUndef&: SrcUndef, KnownZero&: SrcZero,
3304 TLO, Depth: Depth + 1))
3305 return true;
3306
3307 // Try calling SimplifyDemandedBits, converting demanded elts to the bits
3308 // of the large element.
3309 // TODO - bigendian once we have test coverage.
3310 if (IsLE) {
3311 unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
3312 APInt SrcDemandedBits = APInt::getZero(numBits: SrcEltSizeInBits);
3313 for (unsigned i = 0; i != NumElts; ++i)
3314 if (DemandedElts[i]) {
3315 unsigned Ofs = (i % Scale) * EltSizeInBits;
3316 SrcDemandedBits.setBits(loBit: Ofs, hiBit: Ofs + EltSizeInBits);
3317 }
3318
3319 KnownBits Known;
3320 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: SrcDemandedBits, OriginalDemandedElts: SrcDemandedElts, Known,
3321 TLO, Depth: Depth + 1))
3322 return true;
3323
3324 // The bitcast has split each wide element into a number of
3325 // narrow subelements. We have just computed the Known bits
3326 // for wide elements. See if element splitting results in
3327 // some subelements being zero. Only for demanded elements!
3328 for (unsigned SubElt = 0; SubElt != Scale; ++SubElt) {
3329 if (!Known.Zero.extractBits(numBits: EltSizeInBits, bitPosition: SubElt * EltSizeInBits)
3330 .isAllOnes())
3331 continue;
3332 for (unsigned SrcElt = 0; SrcElt != NumSrcElts; ++SrcElt) {
3333 unsigned Elt = Scale * SrcElt + SubElt;
3334 if (DemandedElts[Elt])
3335 KnownZero.setBit(Elt);
3336 }
3337 }
3338 }
3339
3340 // If the src element is zero/undef then all the output elements will be -
3341 // only demanded elements are guaranteed to be correct.
3342 for (unsigned i = 0; i != NumSrcElts; ++i) {
3343 if (SrcDemandedElts[i]) {
3344 if (SrcZero[i])
3345 KnownZero.setBits(loBit: i * Scale, hiBit: (i + 1) * Scale);
3346 if (SrcUndef[i])
3347 KnownUndef.setBits(loBit: i * Scale, hiBit: (i + 1) * Scale);
3348 }
3349 }
3350 }
3351
3352 // Bitcast from 'small element' src vector to 'large element' vector, we
3353 // demand all smaller source elements covered by the larger demanded element
3354 // of this vector.
3355 if ((NumSrcElts % NumElts) == 0) {
3356 unsigned Scale = NumSrcElts / NumElts;
3357 SrcDemandedElts = APIntOps::ScaleBitMask(A: DemandedElts, NewBitWidth: NumSrcElts);
3358 if (SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: SrcDemandedElts, KnownUndef&: SrcUndef, KnownZero&: SrcZero,
3359 TLO, Depth: Depth + 1))
3360 return true;
3361
3362 // If all the src elements covering an output element are zero/undef, then
3363 // the output element will be as well, assuming it was demanded.
3364 for (unsigned i = 0; i != NumElts; ++i) {
3365 if (DemandedElts[i]) {
3366 if (SrcZero.extractBits(numBits: Scale, bitPosition: i * Scale).isAllOnes())
3367 KnownZero.setBit(i);
3368 if (SrcUndef.extractBits(numBits: Scale, bitPosition: i * Scale).isAllOnes())
3369 KnownUndef.setBit(i);
3370 }
3371 }
3372 }
3373 break;
3374 }
3375 case ISD::FREEZE: {
3376 SDValue N0 = Op.getOperand(i: 0);
3377 if (TLO.DAG.isGuaranteedNotToBeUndefOrPoison(Op: N0, DemandedElts,
3378 /*PoisonOnly=*/false,
3379 Depth: Depth + 1))
3380 return TLO.CombineTo(O: Op, N: N0);
3381
3382 // TODO: Replace this with the general fold from DAGCombiner::visitFREEZE
3383 // freeze(op(x, ...)) -> op(freeze(x), ...).
3384 if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && DemandedElts == 1)
3385 return TLO.CombineTo(
3386 O: Op, N: TLO.DAG.getNode(Opcode: ISD::SCALAR_TO_VECTOR, DL, VT,
3387 Operand: TLO.DAG.getFreeze(V: N0.getOperand(i: 0))));
3388 break;
3389 }
3390 case ISD::BUILD_VECTOR: {
3391 // Check all elements and simplify any unused elements with UNDEF.
3392 if (!DemandedElts.isAllOnes()) {
3393 // Don't simplify BROADCASTS.
3394 if (llvm::any_of(Range: Op->op_values(),
3395 P: [&](SDValue Elt) { return Op.getOperand(i: 0) != Elt; })) {
3396 SmallVector<SDValue, 32> Ops(Op->ops());
3397 bool Updated = false;
3398 for (unsigned i = 0; i != NumElts; ++i) {
3399 if (!DemandedElts[i] && !Ops[i].isUndef()) {
3400 Ops[i] = TLO.DAG.getUNDEF(VT: Ops[0].getValueType());
3401 KnownUndef.setBit(i);
3402 Updated = true;
3403 }
3404 }
3405 if (Updated)
3406 return TLO.CombineTo(O: Op, N: TLO.DAG.getBuildVector(VT, DL, Ops));
3407 }
3408 }
3409 for (unsigned i = 0; i != NumElts; ++i) {
3410 SDValue SrcOp = Op.getOperand(i);
3411 if (SrcOp.isUndef()) {
3412 KnownUndef.setBit(i);
3413 } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
3414 (isNullConstant(V: SrcOp) || isNullFPConstant(V: SrcOp))) {
3415 KnownZero.setBit(i);
3416 }
3417 }
3418 break;
3419 }
3420 case ISD::CONCAT_VECTORS: {
3421 EVT SubVT = Op.getOperand(i: 0).getValueType();
3422 unsigned NumSubVecs = Op.getNumOperands();
3423 unsigned NumSubElts = SubVT.getVectorNumElements();
3424 for (unsigned i = 0; i != NumSubVecs; ++i) {
3425 SDValue SubOp = Op.getOperand(i);
3426 APInt SubElts = DemandedElts.extractBits(numBits: NumSubElts, bitPosition: i * NumSubElts);
3427 APInt SubUndef, SubZero;
3428 if (SimplifyDemandedVectorElts(Op: SubOp, OriginalDemandedElts: SubElts, KnownUndef&: SubUndef, KnownZero&: SubZero, TLO,
3429 Depth: Depth + 1))
3430 return true;
3431 KnownUndef.insertBits(SubBits: SubUndef, bitPosition: i * NumSubElts);
3432 KnownZero.insertBits(SubBits: SubZero, bitPosition: i * NumSubElts);
3433 }
3434
3435 // Attempt to avoid multi-use ops if we don't need anything from them.
3436 if (!DemandedElts.isAllOnes()) {
3437 bool FoundNewSub = false;
3438 SmallVector<SDValue, 2> DemandedSubOps;
3439 for (unsigned i = 0; i != NumSubVecs; ++i) {
3440 SDValue SubOp = Op.getOperand(i);
3441 APInt SubElts = DemandedElts.extractBits(numBits: NumSubElts, bitPosition: i * NumSubElts);
3442 SDValue NewSubOp = SimplifyMultipleUseDemandedVectorElts(
3443 Op: SubOp, DemandedElts: SubElts, DAG&: TLO.DAG, Depth: Depth + 1);
3444 DemandedSubOps.push_back(Elt: NewSubOp ? NewSubOp : SubOp);
3445 FoundNewSub = NewSubOp ? true : FoundNewSub;
3446 }
3447 if (FoundNewSub) {
3448 SDValue NewOp =
3449 TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: SDLoc(Op), VT, Ops: DemandedSubOps);
3450 return TLO.CombineTo(O: Op, N: NewOp);
3451 }
3452 }
3453 break;
3454 }
3455 case ISD::INSERT_SUBVECTOR: {
3456 // Demand any elements from the subvector and the remainder from the src it
3457 // is inserted into.
3458 SDValue Src = Op.getOperand(i: 0);
3459 SDValue Sub = Op.getOperand(i: 1);
3460 uint64_t Idx = Op.getConstantOperandVal(i: 2);
3461 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
3462 APInt DemandedSubElts = DemandedElts.extractBits(numBits: NumSubElts, bitPosition: Idx);
3463 APInt DemandedSrcElts = DemandedElts;
3464 DemandedSrcElts.clearBits(LoBit: Idx, HiBit: Idx + NumSubElts);
3465
3466 // If none of the sub operand elements are demanded, bypass the insert.
3467 if (!DemandedSubElts)
3468 return TLO.CombineTo(O: Op, N: Src);
3469
3470 APInt SubUndef, SubZero;
3471 if (SimplifyDemandedVectorElts(Op: Sub, OriginalDemandedElts: DemandedSubElts, KnownUndef&: SubUndef, KnownZero&: SubZero, TLO,
3472 Depth: Depth + 1))
3473 return true;
3474
3475 // If none of the src operand elements are demanded, replace it with undef.
3476 if (!DemandedSrcElts && !Src.isUndef())
3477 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT,
3478 N1: TLO.DAG.getUNDEF(VT), N2: Sub,
3479 N3: Op.getOperand(i: 2)));
3480
3481 if (SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: DemandedSrcElts, KnownUndef, KnownZero,
3482 TLO, Depth: Depth + 1))
3483 return true;
3484 KnownUndef.insertBits(SubBits: SubUndef, bitPosition: Idx);
3485 KnownZero.insertBits(SubBits: SubZero, bitPosition: Idx);
3486
3487 // Attempt to avoid multi-use ops if we don't need anything from them.
3488 if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) {
3489 SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3490 Op: Src, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + 1);
3491 SDValue NewSub = SimplifyMultipleUseDemandedVectorElts(
3492 Op: Sub, DemandedElts: DemandedSubElts, DAG&: TLO.DAG, Depth: Depth + 1);
3493 if (NewSrc || NewSub) {
3494 NewSrc = NewSrc ? NewSrc : Src;
3495 NewSub = NewSub ? NewSub : Sub;
3496 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: SDLoc(Op), VT, N1: NewSrc,
3497 N2: NewSub, N3: Op.getOperand(i: 2));
3498 return TLO.CombineTo(O: Op, N: NewOp);
3499 }
3500 }
3501 break;
3502 }
3503 case ISD::EXTRACT_SUBVECTOR: {
3504 // Offset the demanded elts by the subvector index.
3505 SDValue Src = Op.getOperand(i: 0);
3506 if (Src.getValueType().isScalableVector())
3507 break;
3508 uint64_t Idx = Op.getConstantOperandVal(i: 1);
3509 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3510 APInt DemandedSrcElts = DemandedElts.zext(width: NumSrcElts).shl(shiftAmt: Idx);
3511
3512 APInt SrcUndef, SrcZero;
3513 if (SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: DemandedSrcElts, KnownUndef&: SrcUndef, KnownZero&: SrcZero, TLO,
3514 Depth: Depth + 1))
3515 return true;
3516 KnownUndef = SrcUndef.extractBits(numBits: NumElts, bitPosition: Idx);
3517 KnownZero = SrcZero.extractBits(numBits: NumElts, bitPosition: Idx);
3518
3519 // Attempt to avoid multi-use ops if we don't need anything from them.
3520 if (!DemandedElts.isAllOnes()) {
3521 SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3522 Op: Src, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + 1);
3523 if (NewSrc) {
3524 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: SDLoc(Op), VT, N1: NewSrc,
3525 N2: Op.getOperand(i: 1));
3526 return TLO.CombineTo(O: Op, N: NewOp);
3527 }
3528 }
3529 break;
3530 }
3531 case ISD::INSERT_VECTOR_ELT: {
3532 SDValue Vec = Op.getOperand(i: 0);
3533 SDValue Scl = Op.getOperand(i: 1);
3534 auto *CIdx = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 2));
3535
3536 // For a legal, constant insertion index, if we don't need this insertion
3537 // then strip it, else remove it from the demanded elts.
3538 if (CIdx && CIdx->getAPIntValue().ult(RHS: NumElts)) {
3539 unsigned Idx = CIdx->getZExtValue();
3540 if (!DemandedElts[Idx])
3541 return TLO.CombineTo(O: Op, N: Vec);
3542
3543 APInt DemandedVecElts(DemandedElts);
3544 DemandedVecElts.clearBit(BitPosition: Idx);
3545 if (SimplifyDemandedVectorElts(Op: Vec, OriginalDemandedElts: DemandedVecElts, KnownUndef,
3546 KnownZero, TLO, Depth: Depth + 1))
3547 return true;
3548
3549 KnownUndef.setBitVal(BitPosition: Idx, BitValue: Scl.isUndef());
3550
3551 KnownZero.setBitVal(BitPosition: Idx, BitValue: isNullConstant(V: Scl) || isNullFPConstant(V: Scl));
3552 break;
3553 }
3554
3555 APInt VecUndef, VecZero;
3556 if (SimplifyDemandedVectorElts(Op: Vec, OriginalDemandedElts: DemandedElts, KnownUndef&: VecUndef, KnownZero&: VecZero, TLO,
3557 Depth: Depth + 1))
3558 return true;
3559 // Without knowing the insertion index we can't set KnownUndef/KnownZero.
3560 break;
3561 }
3562 case ISD::VSELECT: {
3563 SDValue Sel = Op.getOperand(i: 0);
3564 SDValue LHS = Op.getOperand(i: 1);
3565 SDValue RHS = Op.getOperand(i: 2);
3566
3567 // Try to transform the select condition based on the current demanded
3568 // elements.
3569 APInt UndefSel, ZeroSel;
3570 if (SimplifyDemandedVectorElts(Op: Sel, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefSel, KnownZero&: ZeroSel, TLO,
3571 Depth: Depth + 1))
3572 return true;
3573
3574 // See if we can simplify either vselect operand.
3575 APInt DemandedLHS(DemandedElts);
3576 APInt DemandedRHS(DemandedElts);
3577 APInt UndefLHS, ZeroLHS;
3578 APInt UndefRHS, ZeroRHS;
3579 if (SimplifyDemandedVectorElts(Op: LHS, OriginalDemandedElts: DemandedLHS, KnownUndef&: UndefLHS, KnownZero&: ZeroLHS, TLO,
3580 Depth: Depth + 1))
3581 return true;
3582 if (SimplifyDemandedVectorElts(Op: RHS, OriginalDemandedElts: DemandedRHS, KnownUndef&: UndefRHS, KnownZero&: ZeroRHS, TLO,
3583 Depth: Depth + 1))
3584 return true;
3585
3586 KnownUndef = UndefLHS & UndefRHS;
3587 KnownZero = ZeroLHS & ZeroRHS;
3588
3589 // If we know that the selected element is always zero, we don't need the
3590 // select value element.
3591 APInt DemandedSel = DemandedElts & ~KnownZero;
3592 if (DemandedSel != DemandedElts)
3593 if (SimplifyDemandedVectorElts(Op: Sel, OriginalDemandedElts: DemandedSel, KnownUndef&: UndefSel, KnownZero&: ZeroSel, TLO,
3594 Depth: Depth + 1))
3595 return true;
3596
3597 break;
3598 }
3599 case ISD::VECTOR_SHUFFLE: {
3600 SDValue LHS = Op.getOperand(i: 0);
3601 SDValue RHS = Op.getOperand(i: 1);
3602 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Val&: Op)->getMask();
3603
3604 // Collect demanded elements from shuffle operands..
3605 APInt DemandedLHS(NumElts, 0);
3606 APInt DemandedRHS(NumElts, 0);
3607 for (unsigned i = 0; i != NumElts; ++i) {
3608 int M = ShuffleMask[i];
3609 if (M < 0 || !DemandedElts[i])
3610 continue;
3611 assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
3612 if (M < (int)NumElts)
3613 DemandedLHS.setBit(M);
3614 else
3615 DemandedRHS.setBit(M - NumElts);
3616 }
3617
3618 // If either side isn't demanded, replace it by UNDEF. We handle this
3619 // explicitly here to also simplify in case of multiple uses (on the
3620 // contrary to the SimplifyDemandedVectorElts calls below).
3621 bool FoldLHS = !DemandedLHS && !LHS.isUndef();
3622 bool FoldRHS = !DemandedRHS && !RHS.isUndef();
3623 if (FoldLHS || FoldRHS) {
3624 LHS = FoldLHS ? TLO.DAG.getUNDEF(VT: LHS.getValueType()) : LHS;
3625 RHS = FoldRHS ? TLO.DAG.getUNDEF(VT: RHS.getValueType()) : RHS;
3626 SDValue NewOp =
3627 TLO.DAG.getVectorShuffle(VT, dl: SDLoc(Op), N1: LHS, N2: RHS, Mask: ShuffleMask);
3628 return TLO.CombineTo(O: Op, N: NewOp);
3629 }
3630
3631 // See if we can simplify either shuffle operand.
3632 APInt UndefLHS, ZeroLHS;
3633 APInt UndefRHS, ZeroRHS;
3634 if (SimplifyDemandedVectorElts(Op: LHS, OriginalDemandedElts: DemandedLHS, KnownUndef&: UndefLHS, KnownZero&: ZeroLHS, TLO,
3635 Depth: Depth + 1))
3636 return true;
3637 if (SimplifyDemandedVectorElts(Op: RHS, OriginalDemandedElts: DemandedRHS, KnownUndef&: UndefRHS, KnownZero&: ZeroRHS, TLO,
3638 Depth: Depth + 1))
3639 return true;
3640
3641 // Simplify mask using undef elements from LHS/RHS.
3642 bool Updated = false;
3643 bool IdentityLHS = true, IdentityRHS = true;
3644 SmallVector<int, 32> NewMask(ShuffleMask);
3645 for (unsigned i = 0; i != NumElts; ++i) {
3646 int &M = NewMask[i];
3647 if (M < 0)
3648 continue;
3649 if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
3650 (M >= (int)NumElts && UndefRHS[M - NumElts])) {
3651 Updated = true;
3652 M = -1;
3653 }
3654 IdentityLHS &= (M < 0) || (M == (int)i);
3655 IdentityRHS &= (M < 0) || ((M - NumElts) == i);
3656 }
3657
3658 // Update legal shuffle masks based on demanded elements if it won't reduce
3659 // to Identity which can cause premature removal of the shuffle mask.
3660 if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
3661 SDValue LegalShuffle =
3662 buildLegalVectorShuffle(VT, DL, N0: LHS, N1: RHS, Mask: NewMask, DAG&: TLO.DAG);
3663 if (LegalShuffle)
3664 return TLO.CombineTo(O: Op, N: LegalShuffle);
3665 }
3666
3667 // Propagate undef/zero elements from LHS/RHS.
3668 for (unsigned i = 0; i != NumElts; ++i) {
3669 int M = ShuffleMask[i];
3670 if (M < 0) {
3671 KnownUndef.setBit(i);
3672 } else if (M < (int)NumElts) {
3673 if (UndefLHS[M])
3674 KnownUndef.setBit(i);
3675 if (ZeroLHS[M])
3676 KnownZero.setBit(i);
3677 } else {
3678 if (UndefRHS[M - NumElts])
3679 KnownUndef.setBit(i);
3680 if (ZeroRHS[M - NumElts])
3681 KnownZero.setBit(i);
3682 }
3683 }
3684 break;
3685 }
3686 case ISD::ANY_EXTEND_VECTOR_INREG:
3687 case ISD::SIGN_EXTEND_VECTOR_INREG:
3688 case ISD::ZERO_EXTEND_VECTOR_INREG: {
3689 APInt SrcUndef, SrcZero;
3690 SDValue Src = Op.getOperand(i: 0);
3691 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3692 APInt DemandedSrcElts = DemandedElts.zext(width: NumSrcElts);
3693 if (SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: DemandedSrcElts, KnownUndef&: SrcUndef, KnownZero&: SrcZero, TLO,
3694 Depth: Depth + 1))
3695 return true;
3696 KnownZero = SrcZero.zextOrTrunc(width: NumElts);
3697 KnownUndef = SrcUndef.zextOrTrunc(width: NumElts);
3698
3699 if (IsLE && Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
3700 Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
3701 DemandedSrcElts == 1) {
3702 // aext - if we just need the bottom element then we can bitcast.
3703 return TLO.CombineTo(O: Op, N: TLO.DAG.getBitcast(VT, V: Src));
3704 }
3705
3706 if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
3707 // zext(undef) upper bits are guaranteed to be zero.
3708 if (DemandedElts.isSubsetOf(RHS: KnownUndef))
3709 return TLO.CombineTo(O: Op, N: TLO.DAG.getConstant(Val: 0, DL: SDLoc(Op), VT));
3710 KnownUndef.clearAllBits();
3711
3712 // zext - if we just need the bottom element then we can mask:
3713 // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
3714 if (IsLE && DemandedSrcElts == 1 && Src.getOpcode() == ISD::AND &&
3715 Op->isOnlyUserOf(N: Src.getNode()) &&
3716 Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
3717 SDLoc DL(Op);
3718 EVT SrcVT = Src.getValueType();
3719 EVT SrcSVT = SrcVT.getScalarType();
3720
3721 // If we're after type legalization and SrcSVT is not legal, use the
3722 // promoted type for creating constants to avoid creating nodes with
3723 // illegal types.
3724 if (AfterLegalizeTypes)
3725 SrcSVT = getLegalTypeToTransformTo(Context&: *TLO.DAG.getContext(), VT: SrcSVT);
3726
3727 SmallVector<SDValue> MaskElts;
3728 MaskElts.push_back(Elt: TLO.DAG.getAllOnesConstant(DL, VT: SrcSVT));
3729 MaskElts.append(NumInputs: NumSrcElts - 1, Elt: TLO.DAG.getConstant(Val: 0, DL, VT: SrcSVT));
3730 SDValue Mask = TLO.DAG.getBuildVector(VT: SrcVT, DL, Ops: MaskElts);
3731 if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
3732 Opcode: ISD::AND, DL, VT: SrcVT, Ops: {Src.getOperand(i: 1), Mask})) {
3733 Fold = TLO.DAG.getNode(Opcode: ISD::AND, DL, VT: SrcVT, N1: Src.getOperand(i: 0), N2: Fold);
3734 return TLO.CombineTo(O: Op, N: TLO.DAG.getBitcast(VT, V: Fold));
3735 }
3736 }
3737 }
3738 break;
3739 }
3740
3741 // TODO: There are more binop opcodes that could be handled here - MIN,
3742 // MAX, saturated math, etc.
3743 case ISD::ADD: {
3744 SDValue Op0 = Op.getOperand(i: 0);
3745 SDValue Op1 = Op.getOperand(i: 1);
3746 if (Op0 == Op1 && Op->isOnlyUserOf(N: Op0.getNode())) {
3747 APInt UndefLHS, ZeroLHS;
3748 if (SimplifyDemandedVectorElts(Op: Op0, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefLHS, KnownZero&: ZeroLHS, TLO,
3749 Depth: Depth + 1, /*AssumeSingleUse*/ true))
3750 return true;
3751 }
3752 [[fallthrough]];
3753 }
3754 case ISD::AVGCEILS:
3755 case ISD::AVGCEILU:
3756 case ISD::AVGFLOORS:
3757 case ISD::AVGFLOORU:
3758 case ISD::OR:
3759 case ISD::XOR:
3760 case ISD::SUB:
3761 case ISD::FADD:
3762 case ISD::FSUB:
3763 case ISD::FMUL:
3764 case ISD::FDIV:
3765 case ISD::FREM: {
3766 SDValue Op0 = Op.getOperand(i: 0);
3767 SDValue Op1 = Op.getOperand(i: 1);
3768
3769 APInt UndefRHS, ZeroRHS;
3770 if (SimplifyDemandedVectorElts(Op: Op1, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefRHS, KnownZero&: ZeroRHS, TLO,
3771 Depth: Depth + 1))
3772 return true;
3773 APInt UndefLHS, ZeroLHS;
3774 if (SimplifyDemandedVectorElts(Op: Op0, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefLHS, KnownZero&: ZeroLHS, TLO,
3775 Depth: Depth + 1))
3776 return true;
3777
3778 KnownZero = ZeroLHS & ZeroRHS;
3779 KnownUndef = getKnownUndefForVectorBinop(BO: Op, DAG&: TLO.DAG, UndefOp0: UndefLHS, UndefOp1: UndefRHS);
3780
3781 // Attempt to avoid multi-use ops if we don't need anything from them.
3782 // TODO - use KnownUndef to relax the demandedelts?
3783 if (!DemandedElts.isAllOnes())
3784 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3785 return true;
3786 break;
3787 }
3788 case ISD::SHL:
3789 case ISD::SRL:
3790 case ISD::SRA:
3791 case ISD::ROTL:
3792 case ISD::ROTR: {
3793 SDValue Op0 = Op.getOperand(i: 0);
3794 SDValue Op1 = Op.getOperand(i: 1);
3795
3796 APInt UndefRHS, ZeroRHS;
3797 if (SimplifyDemandedVectorElts(Op: Op1, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefRHS, KnownZero&: ZeroRHS, TLO,
3798 Depth: Depth + 1))
3799 return true;
3800 APInt UndefLHS, ZeroLHS;
3801 if (SimplifyDemandedVectorElts(Op: Op0, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefLHS, KnownZero&: ZeroLHS, TLO,
3802 Depth: Depth + 1))
3803 return true;
3804
3805 KnownZero = ZeroLHS;
3806 KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
3807
3808 // Attempt to avoid multi-use ops if we don't need anything from them.
3809 // TODO - use KnownUndef to relax the demandedelts?
3810 if (!DemandedElts.isAllOnes())
3811 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3812 return true;
3813 break;
3814 }
3815 case ISD::MUL:
3816 case ISD::MULHU:
3817 case ISD::MULHS:
3818 case ISD::AND: {
3819 SDValue Op0 = Op.getOperand(i: 0);
3820 SDValue Op1 = Op.getOperand(i: 1);
3821
3822 APInt SrcUndef, SrcZero;
3823 if (SimplifyDemandedVectorElts(Op: Op1, OriginalDemandedElts: DemandedElts, KnownUndef&: SrcUndef, KnownZero&: SrcZero, TLO,
3824 Depth: Depth + 1))
3825 return true;
3826 // FIXME: If we know that a demanded element was zero in Op1 we don't need
3827 // to demand it in Op0 - its guaranteed to be zero. There is however a
3828 // restriction, as we must not make any of the originally demanded elements
3829 // more poisonous. We could reduce amount of elements demanded, but then we
3830 // also need a to inform SimplifyDemandedVectorElts that some elements must
3831 // not be made more poisonous.
3832 if (SimplifyDemandedVectorElts(Op: Op0, OriginalDemandedElts: DemandedElts, KnownUndef, KnownZero,
3833 TLO, Depth: Depth + 1))
3834 return true;
3835
3836 KnownUndef &= DemandedElts;
3837 KnownZero &= DemandedElts;
3838
3839 // If every element pair has a zero/undef/poison then just fold to zero.
3840 // fold (and x, undef/poison) -> 0 / (and x, 0) -> 0
3841 // fold (mul x, undef/poison) -> 0 / (mul x, 0) -> 0
3842 if (DemandedElts.isSubsetOf(RHS: SrcZero | KnownZero | SrcUndef | KnownUndef))
3843 return TLO.CombineTo(O: Op, N: TLO.DAG.getConstant(Val: 0, DL: SDLoc(Op), VT));
3844
3845 // If either side has a zero element, then the result element is zero, even
3846 // if the other is an UNDEF.
3847 // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
3848 // and then handle 'and' nodes with the rest of the binop opcodes.
3849 KnownZero |= SrcZero;
3850 KnownUndef &= SrcUndef;
3851 KnownUndef &= ~KnownZero;
3852
3853 // Attempt to avoid multi-use ops if we don't need anything from them.
3854 if (!DemandedElts.isAllOnes())
3855 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3856 return true;
3857 break;
3858 }
3859 case ISD::TRUNCATE:
3860 case ISD::SIGN_EXTEND:
3861 case ISD::ZERO_EXTEND:
3862 if (SimplifyDemandedVectorElts(Op: Op.getOperand(i: 0), OriginalDemandedElts: DemandedElts, KnownUndef,
3863 KnownZero, TLO, Depth: Depth + 1))
3864 return true;
3865
3866 if (!DemandedElts.isAllOnes())
3867 if (SDValue NewOp = SimplifyMultipleUseDemandedVectorElts(
3868 Op: Op.getOperand(i: 0), DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1))
3869 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode, DL: SDLoc(Op), VT, Operand: NewOp));
3870
3871 if (Op.getOpcode() == ISD::ZERO_EXTEND) {
3872 // zext(undef) upper bits are guaranteed to be zero.
3873 if (DemandedElts.isSubsetOf(RHS: KnownUndef))
3874 return TLO.CombineTo(O: Op, N: TLO.DAG.getConstant(Val: 0, DL: SDLoc(Op), VT));
3875 KnownUndef.clearAllBits();
3876 }
3877 break;
3878 case ISD::SINT_TO_FP:
3879 case ISD::UINT_TO_FP:
3880 case ISD::FP_TO_SINT:
3881 case ISD::FP_TO_UINT:
3882 if (SimplifyDemandedVectorElts(Op: Op.getOperand(i: 0), OriginalDemandedElts: DemandedElts, KnownUndef,
3883 KnownZero, TLO, Depth: Depth + 1))
3884 return true;
3885 // Don't fall through to generic undef -> undef handling.
3886 return false;
3887 default: {
3888 if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
3889 if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
3890 KnownZero, TLO, Depth))
3891 return true;
3892 } else {
3893 KnownBits Known;
3894 APInt DemandedBits = APInt::getAllOnes(numBits: EltSizeInBits);
3895 if (SimplifyDemandedBits(Op, OriginalDemandedBits: DemandedBits, OriginalDemandedElts, Known,
3896 TLO, Depth, AssumeSingleUse))
3897 return true;
3898 }
3899 break;
3900 }
3901 }
3902 assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
3903
3904 // Constant fold all undef cases.
3905 // TODO: Handle zero cases as well.
3906 if (DemandedElts.isSubsetOf(RHS: KnownUndef))
3907 return TLO.CombineTo(O: Op, N: TLO.DAG.getUNDEF(VT));
3908
3909 return false;
3910}
3911
3912/// Determine which of the bits specified in Mask are known to be either zero or
3913/// one and return them in the Known.
3914void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
3915 KnownBits &Known,
3916 const APInt &DemandedElts,
3917 const SelectionDAG &DAG,
3918 unsigned Depth) const {
3919 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3920 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3921 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3922 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3923 "Should use MaskedValueIsZero if you don't know whether Op"
3924 " is a target node!");
3925 Known.resetAll();
3926}
3927
3928void TargetLowering::computeKnownBitsForTargetInstr(
3929 GISelValueTracking &Analysis, Register R, KnownBits &Known,
3930 const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3931 unsigned Depth) const {
3932 Known.resetAll();
3933}
3934
3935void TargetLowering::computeKnownFPClassForTargetInstr(
3936 GISelValueTracking &Analysis, Register R, KnownFPClass &Known,
3937 const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3938 unsigned Depth) const {
3939 Known.resetAll();
3940}
3941
3942void TargetLowering::computeKnownBitsForFrameIndex(
3943 const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
3944 // The low bits are known zero if the pointer is aligned.
3945 Known.Zero.setLowBits(Log2(A: MF.getFrameInfo().getObjectAlign(ObjectIdx: FrameIdx)));
3946}
3947
3948Align TargetLowering::computeKnownAlignForTargetInstr(
3949 GISelValueTracking &Analysis, Register R, const MachineRegisterInfo &MRI,
3950 unsigned Depth) const {
3951 return Align(1);
3952}
3953
3954/// This method can be implemented by targets that want to expose additional
3955/// information about sign bits to the DAG Combiner.
3956unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
3957 const APInt &,
3958 const SelectionDAG &,
3959 unsigned Depth) const {
3960 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3961 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3962 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3963 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3964 "Should use ComputeNumSignBits if you don't know whether Op"
3965 " is a target node!");
3966 return 1;
3967}
3968
3969unsigned TargetLowering::computeNumSignBitsForTargetInstr(
3970 GISelValueTracking &Analysis, Register R, const APInt &DemandedElts,
3971 const MachineRegisterInfo &MRI, unsigned Depth) const {
3972 return 1;
3973}
3974
3975bool TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
3976 SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
3977 TargetLoweringOpt &TLO, unsigned Depth) const {
3978 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3979 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3980 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3981 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3982 "Should use SimplifyDemandedVectorElts if you don't know whether Op"
3983 " is a target node!");
3984 return false;
3985}
3986
3987bool TargetLowering::SimplifyDemandedBitsForTargetNode(
3988 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3989 KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
3990 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3991 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3992 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3993 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3994 "Should use SimplifyDemandedBits if you don't know whether Op"
3995 " is a target node!");
3996 computeKnownBitsForTargetNode(Op, Known, DemandedElts, DAG: TLO.DAG, Depth);
3997 return false;
3998}
3999
4000SDValue TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
4001 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
4002 SelectionDAG &DAG, unsigned Depth) const {
4003 assert(
4004 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4005 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4006 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4007 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4008 "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
4009 " is a target node!");
4010 return SDValue();
4011}
4012
4013SDValue
4014TargetLowering::buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0,
4015 SDValue N1, MutableArrayRef<int> Mask,
4016 SelectionDAG &DAG) const {
4017 bool LegalMask = isShuffleMaskLegal(Mask, VT);
4018 if (!LegalMask) {
4019 std::swap(a&: N0, b&: N1);
4020 ShuffleVectorSDNode::commuteMask(Mask);
4021 LegalMask = isShuffleMaskLegal(Mask, VT);
4022 }
4023
4024 if (!LegalMask)
4025 return SDValue();
4026
4027 return DAG.getVectorShuffle(VT, dl: DL, N1: N0, N2: N1, Mask);
4028}
4029
4030const Constant *TargetLowering::getTargetConstantFromLoad(LoadSDNode*) const {
4031 return nullptr;
4032}
4033
4034bool TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode(
4035 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
4036 bool PoisonOnly, unsigned Depth) const {
4037 assert(
4038 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4039 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4040 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4041 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4042 "Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"
4043 " is a target node!");
4044
4045 // If Op can't create undef/poison and none of its operands are undef/poison
4046 // then Op is never undef/poison.
4047 return !canCreateUndefOrPoisonForTargetNode(Op, DemandedElts, DAG, PoisonOnly,
4048 /*ConsiderFlags*/ true, Depth) &&
4049 all_of(Range: Op->ops(), P: [&](SDValue V) {
4050 return DAG.isGuaranteedNotToBeUndefOrPoison(Op: V, PoisonOnly,
4051 Depth: Depth + 1);
4052 });
4053}
4054
4055bool TargetLowering::canCreateUndefOrPoisonForTargetNode(
4056 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
4057 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
4058 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4059 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4060 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4061 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4062 "Should use canCreateUndefOrPoison if you don't know whether Op"
4063 " is a target node!");
4064 // Be conservative and return true.
4065 return true;
4066}
4067
4068void TargetLowering::computeKnownFPClassForTargetNode(const SDValue Op,
4069 KnownFPClass &Known,
4070 const APInt &DemandedElts,
4071 const SelectionDAG &DAG,
4072 unsigned Depth) const {
4073 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4074 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4075 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4076 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4077 "Should use computeKnownFPClass if you don't know whether Op"
4078 " is a target node!");
4079}
4080
4081bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
4082 const APInt &DemandedElts,
4083 const SelectionDAG &DAG,
4084 bool SNaN,
4085 unsigned Depth) const {
4086 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4087 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4088 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4089 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4090 "Should use isKnownNeverNaN if you don't know whether Op"
4091 " is a target node!");
4092 return false;
4093}
4094
4095bool TargetLowering::isSplatValueForTargetNode(SDValue Op,
4096 const APInt &DemandedElts,
4097 APInt &UndefElts,
4098 const SelectionDAG &DAG,
4099 unsigned Depth) const {
4100 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4101 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4102 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4103 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4104 "Should use isSplatValue if you don't know whether Op"
4105 " is a target node!");
4106 return false;
4107}
4108
4109// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
4110// work with truncating build vectors and vectors with elements of less than
4111// 8 bits.
4112bool TargetLowering::isConstTrueVal(SDValue N) const {
4113 if (!N)
4114 return false;
4115
4116 unsigned EltWidth;
4117 APInt CVal;
4118 if (ConstantSDNode *CN = isConstOrConstSplat(N, /*AllowUndefs=*/false,
4119 /*AllowTruncation=*/true)) {
4120 CVal = CN->getAPIntValue();
4121 EltWidth = N.getValueType().getScalarSizeInBits();
4122 } else
4123 return false;
4124
4125 // If this is a truncating splat, truncate the splat value.
4126 // Otherwise, we may fail to match the expected values below.
4127 if (EltWidth < CVal.getBitWidth())
4128 CVal = CVal.trunc(width: EltWidth);
4129
4130 switch (getBooleanContents(Type: N.getValueType())) {
4131 case UndefinedBooleanContent:
4132 return CVal[0];
4133 case ZeroOrOneBooleanContent:
4134 return CVal.isOne();
4135 case ZeroOrNegativeOneBooleanContent:
4136 return CVal.isAllOnes();
4137 }
4138
4139 llvm_unreachable("Invalid boolean contents");
4140}
4141
4142bool TargetLowering::isConstFalseVal(SDValue N) const {
4143 if (!N)
4144 return false;
4145
4146 const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val&: N);
4147 if (!CN) {
4148 const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Val&: N);
4149 if (!BV)
4150 return false;
4151
4152 // Only interested in constant splats, we don't care about undef
4153 // elements in identifying boolean constants and getConstantSplatNode
4154 // returns NULL if all ops are undef;
4155 CN = BV->getConstantSplatNode();
4156 if (!CN)
4157 return false;
4158 }
4159
4160 if (getBooleanContents(Type: N->getValueType(ResNo: 0)) == UndefinedBooleanContent)
4161 return !CN->getAPIntValue()[0];
4162
4163 return CN->isZero();
4164}
4165
4166bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
4167 bool SExt) const {
4168 if (VT == MVT::i1)
4169 return N->isOne();
4170
4171 TargetLowering::BooleanContent Cnt = getBooleanContents(Type: VT);
4172 switch (Cnt) {
4173 case TargetLowering::ZeroOrOneBooleanContent:
4174 // An extended value of 1 is always true, unless its original type is i1,
4175 // in which case it will be sign extended to -1.
4176 return (N->isOne() && !SExt) || (SExt && (N->getValueType(ResNo: 0) != MVT::i1));
4177 case TargetLowering::UndefinedBooleanContent:
4178 case TargetLowering::ZeroOrNegativeOneBooleanContent:
4179 return N->isAllOnes() && SExt;
4180 }
4181 llvm_unreachable("Unexpected enumeration.");
4182}
4183
4184/// This helper function of SimplifySetCC tries to optimize the comparison when
4185/// either operand of the SetCC node is a bitwise-and instruction.
4186SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
4187 ISD::CondCode Cond, const SDLoc &DL,
4188 DAGCombinerInfo &DCI) const {
4189 if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
4190 std::swap(a&: N0, b&: N1);
4191
4192 SelectionDAG &DAG = DCI.DAG;
4193 EVT OpVT = N0.getValueType();
4194 if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
4195 (Cond != ISD::SETEQ && Cond != ISD::SETNE))
4196 return SDValue();
4197
4198 // (X & Y) != 0 --> zextOrTrunc(X & Y)
4199 // iff everything but LSB is known zero:
4200 if (Cond == ISD::SETNE && isNullConstant(V: N1) &&
4201 (getBooleanContents(Type: OpVT) == TargetLowering::UndefinedBooleanContent ||
4202 getBooleanContents(Type: OpVT) == TargetLowering::ZeroOrOneBooleanContent)) {
4203 unsigned NumEltBits = OpVT.getScalarSizeInBits();
4204 APInt UpperBits = APInt::getHighBitsSet(numBits: NumEltBits, hiBitsSet: NumEltBits - 1);
4205 if (DAG.MaskedValueIsZero(Op: N0, Mask: UpperBits))
4206 return DAG.getBoolExtOrTrunc(Op: N0, SL: DL, VT, OpVT);
4207 }
4208
4209 // Try to eliminate a power-of-2 mask constant by converting to a signbit
4210 // test in a narrow type that we can truncate to with no cost. Examples:
4211 // (i32 X & 32768) == 0 --> (trunc X to i16) >= 0
4212 // (i32 X & 32768) != 0 --> (trunc X to i16) < 0
4213 // TODO: This conservatively checks for type legality on the source and
4214 // destination types. That may inhibit optimizations, but it also
4215 // allows setcc->shift transforms that may be more beneficial.
4216 auto *AndC = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1));
4217 if (AndC && isNullConstant(V: N1) && AndC->getAPIntValue().isPowerOf2() &&
4218 isTypeLegal(VT: OpVT) && N0.hasOneUse()) {
4219 EVT NarrowVT = EVT::getIntegerVT(Context&: *DAG.getContext(),
4220 BitWidth: AndC->getAPIntValue().getActiveBits());
4221 if (isTruncateFree(FromVT: OpVT, ToVT: NarrowVT) && isTypeLegal(VT: NarrowVT)) {
4222 SDValue Trunc = DAG.getZExtOrTrunc(Op: N0.getOperand(i: 0), DL, VT: NarrowVT);
4223 SDValue Zero = DAG.getConstant(Val: 0, DL, VT: NarrowVT);
4224 return DAG.getSetCC(DL, VT, LHS: Trunc, RHS: Zero,
4225 Cond: Cond == ISD::SETEQ ? ISD::SETGE : ISD::SETLT);
4226 }
4227 }
4228
4229 // Match these patterns in any of their permutations:
4230 // (X & Y) == Y
4231 // (X & Y) != Y
4232 SDValue X, Y;
4233 if (N0.getOperand(i: 0) == N1) {
4234 X = N0.getOperand(i: 1);
4235 Y = N0.getOperand(i: 0);
4236 } else if (N0.getOperand(i: 1) == N1) {
4237 X = N0.getOperand(i: 0);
4238 Y = N0.getOperand(i: 1);
4239 } else {
4240 return SDValue();
4241 }
4242
4243 // TODO: We should invert (X & Y) eq/ne 0 -> (X & Y) ne/eq Y if
4244 // `isXAndYEqZeroPreferableToXAndYEqY` is false. This is a bit difficult as
4245 // its liable to create and infinite loop.
4246 SDValue Zero = DAG.getConstant(Val: 0, DL, VT: OpVT);
4247 if (isXAndYEqZeroPreferableToXAndYEqY(Cond, OpVT) &&
4248 DAG.isKnownToBeAPowerOfTwo(Val: Y)) {
4249 // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
4250 // Note that where Y is variable and is known to have at most one bit set
4251 // (for example, if it is Z & 1) we cannot do this; the expressions are not
4252 // equivalent when Y == 0.
4253 assert(OpVT.isInteger());
4254 Cond = ISD::getSetCCInverse(Operation: Cond, Type: OpVT);
4255 if (DCI.isBeforeLegalizeOps() ||
4256 isCondCodeLegal(CC: Cond, VT: N0.getSimpleValueType()))
4257 return DAG.getSetCC(DL, VT, LHS: N0, RHS: Zero, Cond);
4258 } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
4259 // If the target supports an 'and-not' or 'and-complement' logic operation,
4260 // try to use that to make a comparison operation more efficient.
4261 // But don't do this transform if the mask is a single bit because there are
4262 // more efficient ways to deal with that case (for example, 'bt' on x86 or
4263 // 'rlwinm' on PPC).
4264
4265 // Bail out if the compare operand that we want to turn into a zero is
4266 // already a zero (otherwise, infinite loop).
4267 if (isNullConstant(V: Y))
4268 return SDValue();
4269
4270 // Transform this into: ~X & Y == 0.
4271 SDValue NotX = DAG.getNOT(DL: SDLoc(X), Val: X, VT: OpVT);
4272 SDValue NewAnd = DAG.getNode(Opcode: ISD::AND, DL: SDLoc(N0), VT: OpVT, N1: NotX, N2: Y);
4273 return DAG.getSetCC(DL, VT, LHS: NewAnd, RHS: Zero, Cond);
4274 }
4275
4276 return SDValue();
4277}
4278
4279/// This helper function of SimplifySetCC tries to optimize the comparison when
4280/// either operand of the SetCC node is a bitwise-or instruction.
4281/// For now, this just transforms (X | Y) ==/!= Y into X & ~Y ==/!= 0.
4282SDValue TargetLowering::foldSetCCWithOr(EVT VT, SDValue N0, SDValue N1,
4283 ISD::CondCode Cond, const SDLoc &DL,
4284 DAGCombinerInfo &DCI) const {
4285 if (N1.getOpcode() == ISD::OR && N0.getOpcode() != ISD::OR)
4286 std::swap(a&: N0, b&: N1);
4287
4288 SelectionDAG &DAG = DCI.DAG;
4289 EVT OpVT = N0.getValueType();
4290 if (!N0.hasOneUse() || !OpVT.isInteger() ||
4291 (Cond != ISD::SETEQ && Cond != ISD::SETNE))
4292 return SDValue();
4293
4294 // (X | Y) == Y
4295 // (X | Y) != Y
4296 SDValue X;
4297 if (sd_match(N: N0, P: m_Or(L: m_Value(N&: X), R: m_Specific(N: N1))) && hasAndNotCompare(Y: X)) {
4298 // If the target supports an 'and-not' or 'and-complement' logic operation,
4299 // try to use that to make a comparison operation more efficient.
4300
4301 // Bail out if the compare operand that we want to turn into a zero is
4302 // already a zero (otherwise, infinite loop).
4303 if (isNullConstant(V: N1))
4304 return SDValue();
4305
4306 // Transform this into: X & ~Y ==/!= 0.
4307 SDValue NotY = DAG.getNOT(DL: SDLoc(N1), Val: N1, VT: OpVT);
4308 SDValue NewAnd = DAG.getNode(Opcode: ISD::AND, DL: SDLoc(N0), VT: OpVT, N1: X, N2: NotY);
4309 return DAG.getSetCC(DL, VT, LHS: NewAnd, RHS: DAG.getConstant(Val: 0, DL, VT: OpVT), Cond);
4310 }
4311
4312 return SDValue();
4313}
4314
4315/// There are multiple IR patterns that could be checking whether certain
4316/// truncation of a signed number would be lossy or not. The pattern which is
4317/// best at IR level, may not lower optimally. Thus, we want to unfold it.
4318/// We are looking for the following pattern: (KeptBits is a constant)
4319/// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
4320/// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
4321/// KeptBits also can't be 1, that would have been folded to %x dstcond 0
4322/// We will unfold it into the natural trunc+sext pattern:
4323/// ((%x << C) a>> C) dstcond %x
4324/// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
4325SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
4326 EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
4327 const SDLoc &DL) const {
4328 // We must be comparing with a constant.
4329 ConstantSDNode *C1;
4330 if (!(C1 = dyn_cast<ConstantSDNode>(Val&: N1)))
4331 return SDValue();
4332
4333 // N0 should be: add %x, (1 << (KeptBits-1))
4334 if (N0->getOpcode() != ISD::ADD)
4335 return SDValue();
4336
4337 // And we must be 'add'ing a constant.
4338 ConstantSDNode *C01;
4339 if (!(C01 = dyn_cast<ConstantSDNode>(Val: N0->getOperand(Num: 1))))
4340 return SDValue();
4341
4342 SDValue X = N0->getOperand(Num: 0);
4343 EVT XVT = X.getValueType();
4344
4345 // Validate constants ...
4346
4347 APInt I1 = C1->getAPIntValue();
4348
4349 ISD::CondCode NewCond;
4350 if (Cond == ISD::CondCode::SETULT) {
4351 NewCond = ISD::CondCode::SETEQ;
4352 } else if (Cond == ISD::CondCode::SETULE) {
4353 NewCond = ISD::CondCode::SETEQ;
4354 // But need to 'canonicalize' the constant.
4355 I1 += 1;
4356 } else if (Cond == ISD::CondCode::SETUGT) {
4357 NewCond = ISD::CondCode::SETNE;
4358 // But need to 'canonicalize' the constant.
4359 I1 += 1;
4360 } else if (Cond == ISD::CondCode::SETUGE) {
4361 NewCond = ISD::CondCode::SETNE;
4362 } else
4363 return SDValue();
4364
4365 APInt I01 = C01->getAPIntValue();
4366
4367 auto checkConstants = [&I1, &I01]() -> bool {
4368 // Both of them must be power-of-two, and the constant from setcc is bigger.
4369 return I1.ugt(RHS: I01) && I1.isPowerOf2() && I01.isPowerOf2();
4370 };
4371
4372 if (checkConstants()) {
4373 // Great, e.g. got icmp ult i16 (add i16 %x, 128), 256
4374 } else {
4375 // What if we invert constants? (and the target predicate)
4376 I1.negate();
4377 I01.negate();
4378 assert(XVT.isInteger());
4379 NewCond = getSetCCInverse(Operation: NewCond, Type: XVT);
4380 if (!checkConstants())
4381 return SDValue();
4382 // Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
4383 }
4384
4385 // They are power-of-two, so which bit is set?
4386 const unsigned KeptBits = I1.logBase2();
4387 const unsigned KeptBitsMinusOne = I01.logBase2();
4388
4389 // Magic!
4390 if (KeptBits != (KeptBitsMinusOne + 1))
4391 return SDValue();
4392 assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
4393
4394 // We don't want to do this in every single case.
4395 SelectionDAG &DAG = DCI.DAG;
4396 if (!shouldTransformSignedTruncationCheck(XVT, KeptBits))
4397 return SDValue();
4398
4399 // Unfold into: sext_inreg(%x) cond %x
4400 // Where 'cond' will be either 'eq' or 'ne'.
4401 SDValue SExtInReg = DAG.getNode(
4402 Opcode: ISD::SIGN_EXTEND_INREG, DL, VT: XVT, N1: X,
4403 N2: DAG.getValueType(EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: KeptBits)));
4404 return DAG.getSetCC(DL, VT: SCCVT, LHS: SExtInReg, RHS: X, Cond: NewCond);
4405}
4406
4407// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
4408SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
4409 EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
4410 DAGCombinerInfo &DCI, const SDLoc &DL) const {
4411 assert(isConstOrConstSplat(N1C) && isConstOrConstSplat(N1C)->isZero() &&
4412 "Should be a comparison with 0.");
4413 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4414 "Valid only for [in]equality comparisons.");
4415
4416 unsigned NewShiftOpcode;
4417 SDValue X, C, Y;
4418
4419 SelectionDAG &DAG = DCI.DAG;
4420
4421 // Look for '(C l>>/<< Y)'.
4422 auto Match = [&NewShiftOpcode, &X, &C, &Y, &DAG, this](SDValue V) {
4423 // The shift should be one-use.
4424 if (!V.hasOneUse())
4425 return false;
4426 unsigned OldShiftOpcode = V.getOpcode();
4427 switch (OldShiftOpcode) {
4428 case ISD::SHL:
4429 NewShiftOpcode = ISD::SRL;
4430 break;
4431 case ISD::SRL:
4432 NewShiftOpcode = ISD::SHL;
4433 break;
4434 default:
4435 return false; // must be a logical shift.
4436 }
4437 // We should be shifting a constant.
4438 // FIXME: best to use isConstantOrConstantVector().
4439 C = V.getOperand(i: 0);
4440 ConstantSDNode *CC =
4441 isConstOrConstSplat(N: C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4442 if (!CC)
4443 return false;
4444 Y = V.getOperand(i: 1);
4445
4446 ConstantSDNode *XC =
4447 isConstOrConstSplat(N: X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4448 return shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
4449 X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
4450 };
4451
4452 // LHS of comparison should be an one-use 'and'.
4453 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
4454 return SDValue();
4455
4456 X = N0.getOperand(i: 0);
4457 SDValue Mask = N0.getOperand(i: 1);
4458
4459 // 'and' is commutative!
4460 if (!Match(Mask)) {
4461 std::swap(a&: X, b&: Mask);
4462 if (!Match(Mask))
4463 return SDValue();
4464 }
4465
4466 EVT VT = X.getValueType();
4467
4468 // Produce:
4469 // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
4470 SDValue T0 = DAG.getNode(Opcode: NewShiftOpcode, DL, VT, N1: X, N2: Y);
4471 SDValue T1 = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: T0, N2: C);
4472 SDValue T2 = DAG.getSetCC(DL, VT: SCCVT, LHS: T1, RHS: N1C, Cond);
4473 return T2;
4474}
4475
4476/// Try to fold an equality comparison with a {add/sub/xor} binary operation as
4477/// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
4478/// handle the commuted versions of these patterns.
4479SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
4480 ISD::CondCode Cond, const SDLoc &DL,
4481 DAGCombinerInfo &DCI) const {
4482 unsigned BOpcode = N0.getOpcode();
4483 assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
4484 "Unexpected binop");
4485 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
4486
4487 // (X + Y) == X --> Y == 0
4488 // (X - Y) == X --> Y == 0
4489 // (X ^ Y) == X --> Y == 0
4490 SelectionDAG &DAG = DCI.DAG;
4491 EVT OpVT = N0.getValueType();
4492 SDValue X = N0.getOperand(i: 0);
4493 SDValue Y = N0.getOperand(i: 1);
4494 if (X == N1)
4495 return DAG.getSetCC(DL, VT, LHS: Y, RHS: DAG.getConstant(Val: 0, DL, VT: OpVT), Cond);
4496
4497 if (Y != N1)
4498 return SDValue();
4499
4500 // (X + Y) == Y --> X == 0
4501 // (X ^ Y) == Y --> X == 0
4502 if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
4503 return DAG.getSetCC(DL, VT, LHS: X, RHS: DAG.getConstant(Val: 0, DL, VT: OpVT), Cond);
4504
4505 // The shift would not be valid if the operands are boolean (i1).
4506 if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
4507 return SDValue();
4508
4509 // (X - Y) == Y --> X == Y << 1
4510 SDValue One = DAG.getShiftAmountConstant(Val: 1, VT: OpVT, DL);
4511 SDValue YShl1 = DAG.getNode(Opcode: ISD::SHL, DL, VT: N1.getValueType(), N1: Y, N2: One);
4512 if (!DCI.isCalledByLegalizer())
4513 DCI.AddToWorklist(N: YShl1.getNode());
4514 return DAG.getSetCC(DL, VT, LHS: X, RHS: YShl1, Cond);
4515}
4516
4517static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT,
4518 SDValue N0, const APInt &C1,
4519 ISD::CondCode Cond, const SDLoc &dl,
4520 SelectionDAG &DAG) {
4521 // Look through truncs that don't change the value of a ctpop.
4522 // FIXME: Add vector support? Need to be careful with setcc result type below.
4523 SDValue CTPOP = N0;
4524 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
4525 N0.getScalarValueSizeInBits() > Log2_32(Value: N0.getOperand(i: 0).getScalarValueSizeInBits()))
4526 CTPOP = N0.getOperand(i: 0);
4527
4528 if (CTPOP.getOpcode() != ISD::CTPOP || !CTPOP.hasOneUse())
4529 return SDValue();
4530
4531 EVT CTVT = CTPOP.getValueType();
4532 SDValue CTOp = CTPOP.getOperand(i: 0);
4533
4534 // Expand a power-of-2-or-zero comparison based on ctpop:
4535 // (ctpop x) u< 2 -> (x & x-1) == 0
4536 // (ctpop x) u> 1 -> (x & x-1) != 0
4537 if (Cond == ISD::SETULT || Cond == ISD::SETUGT) {
4538 // Keep the CTPOP if it is a cheap vector op.
4539 if (CTVT.isVector() && TLI.isCtpopFast(VT: CTVT))
4540 return SDValue();
4541
4542 unsigned CostLimit = TLI.getCustomCtpopCost(VT: CTVT, Cond);
4543 if (C1.ugt(RHS: CostLimit + (Cond == ISD::SETULT)))
4544 return SDValue();
4545 if (C1 == 0 && (Cond == ISD::SETULT))
4546 return SDValue(); // This is handled elsewhere.
4547
4548 unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);
4549
4550 SDValue NegOne = DAG.getAllOnesConstant(DL: dl, VT: CTVT);
4551 SDValue Result = CTOp;
4552 for (unsigned i = 0; i < Passes; i++) {
4553 SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: CTVT, N1: Result, N2: NegOne);
4554 Result = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: CTVT, N1: Result, N2: Add);
4555 }
4556 ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;
4557 return DAG.getSetCC(DL: dl, VT, LHS: Result, RHS: DAG.getConstant(Val: 0, DL: dl, VT: CTVT), Cond: CC);
4558 }
4559
4560 // Expand a power-of-2 comparison based on ctpop
4561 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
4562 // Keep the CTPOP if it is cheap.
4563 if (TLI.isCtpopFast(VT: CTVT))
4564 return SDValue();
4565
4566 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT: CTVT);
4567 SDValue NegOne = DAG.getAllOnesConstant(DL: dl, VT: CTVT);
4568 assert(CTVT.isInteger());
4569 SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: CTVT, N1: CTOp, N2: NegOne);
4570
4571 // Its not uncommon for known-never-zero X to exist in (ctpop X) eq/ne 1, so
4572 // check before emitting a potentially unnecessary op.
4573 if (DAG.isKnownNeverZero(Op: CTOp)) {
4574 // (ctpop x) == 1 --> (x & x-1) == 0
4575 // (ctpop x) != 1 --> (x & x-1) != 0
4576 SDValue And = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: CTVT, N1: CTOp, N2: Add);
4577 SDValue RHS = DAG.getSetCC(DL: dl, VT, LHS: And, RHS: Zero, Cond);
4578 return RHS;
4579 }
4580
4581 // (ctpop x) == 1 --> (x ^ x-1) > x-1
4582 // (ctpop x) != 1 --> (x ^ x-1) <= x-1
4583 SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: CTVT, N1: CTOp, N2: Add);
4584 ISD::CondCode CmpCond = Cond == ISD::SETEQ ? ISD::SETUGT : ISD::SETULE;
4585 return DAG.getSetCC(DL: dl, VT, LHS: Xor, RHS: Add, Cond: CmpCond);
4586 }
4587
4588 return SDValue();
4589}
4590
4591static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1,
4592 ISD::CondCode Cond, const SDLoc &dl,
4593 SelectionDAG &DAG) {
4594 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4595 return SDValue();
4596
4597 auto *C1 = isConstOrConstSplat(N: N1, /* AllowUndefs */ true);
4598 if (!C1 || !(C1->isZero() || C1->isAllOnes()))
4599 return SDValue();
4600
4601 auto getRotateSource = [](SDValue X) {
4602 if (X.getOpcode() == ISD::ROTL || X.getOpcode() == ISD::ROTR)
4603 return X.getOperand(i: 0);
4604 return SDValue();
4605 };
4606
4607 // Peek through a rotated value compared against 0 or -1:
4608 // (rot X, Y) == 0/-1 --> X == 0/-1
4609 // (rot X, Y) != 0/-1 --> X != 0/-1
4610 if (SDValue R = getRotateSource(N0))
4611 return DAG.getSetCC(DL: dl, VT, LHS: R, RHS: N1, Cond);
4612
4613 // Peek through an 'or' of a rotated value compared against 0:
4614 // or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
4615 // or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
4616 //
4617 // TODO: Add the 'and' with -1 sibling.
4618 // TODO: Recurse through a series of 'or' ops to find the rotate.
4619 EVT OpVT = N0.getValueType();
4620 if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) {
4621 if (SDValue R = getRotateSource(N0.getOperand(i: 0))) {
4622 SDValue NewOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1: R, N2: N0.getOperand(i: 1));
4623 return DAG.getSetCC(DL: dl, VT, LHS: NewOr, RHS: N1, Cond);
4624 }
4625 if (SDValue R = getRotateSource(N0.getOperand(i: 1))) {
4626 SDValue NewOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1: R, N2: N0.getOperand(i: 0));
4627 return DAG.getSetCC(DL: dl, VT, LHS: NewOr, RHS: N1, Cond);
4628 }
4629 }
4630
4631 return SDValue();
4632}
4633
4634static SDValue foldSetCCWithFunnelShift(EVT VT, SDValue N0, SDValue N1,
4635 ISD::CondCode Cond, const SDLoc &dl,
4636 SelectionDAG &DAG) {
4637 // If we are testing for all-bits-clear, we might be able to do that with
4638 // less shifting since bit-order does not matter.
4639 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4640 return SDValue();
4641
4642 auto *C1 = isConstOrConstSplat(N: N1, /* AllowUndefs */ true);
4643 if (!C1 || !C1->isZero())
4644 return SDValue();
4645
4646 if (!N0.hasOneUse() ||
4647 (N0.getOpcode() != ISD::FSHL && N0.getOpcode() != ISD::FSHR))
4648 return SDValue();
4649
4650 unsigned BitWidth = N0.getScalarValueSizeInBits();
4651 auto *ShAmtC = isConstOrConstSplat(N: N0.getOperand(i: 2));
4652 if (!ShAmtC)
4653 return SDValue();
4654
4655 uint64_t ShAmt = ShAmtC->getAPIntValue().urem(RHS: BitWidth);
4656 if (ShAmt == 0)
4657 return SDValue();
4658
4659 // Canonicalize fshr as fshl to reduce pattern-matching.
4660 if (N0.getOpcode() == ISD::FSHR)
4661 ShAmt = BitWidth - ShAmt;
4662
4663 // Match an 'or' with a specific operand 'Other' in either commuted variant.
4664 SDValue X, Y;
4665 auto matchOr = [&X, &Y](SDValue Or, SDValue Other) {
4666 if (Or.getOpcode() != ISD::OR || !Or.hasOneUse())
4667 return false;
4668 if (Or.getOperand(i: 0) == Other) {
4669 X = Or.getOperand(i: 0);
4670 Y = Or.getOperand(i: 1);
4671 return true;
4672 }
4673 if (Or.getOperand(i: 1) == Other) {
4674 X = Or.getOperand(i: 1);
4675 Y = Or.getOperand(i: 0);
4676 return true;
4677 }
4678 return false;
4679 };
4680
4681 EVT OpVT = N0.getValueType();
4682 EVT ShAmtVT = N0.getOperand(i: 2).getValueType();
4683 SDValue F0 = N0.getOperand(i: 0);
4684 SDValue F1 = N0.getOperand(i: 1);
4685 if (matchOr(F0, F1)) {
4686 // fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0
4687 SDValue NewShAmt = DAG.getConstant(Val: ShAmt, DL: dl, VT: ShAmtVT);
4688 SDValue Shift = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: OpVT, N1: Y, N2: NewShAmt);
4689 SDValue NewOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1: Shift, N2: X);
4690 return DAG.getSetCC(DL: dl, VT, LHS: NewOr, RHS: N1, Cond);
4691 }
4692 if (matchOr(F1, F0)) {
4693 // fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0
4694 SDValue NewShAmt = DAG.getConstant(Val: BitWidth - ShAmt, DL: dl, VT: ShAmtVT);
4695 SDValue Shift = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: OpVT, N1: Y, N2: NewShAmt);
4696 SDValue NewOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1: Shift, N2: X);
4697 return DAG.getSetCC(DL: dl, VT, LHS: NewOr, RHS: N1, Cond);
4698 }
4699
4700 return SDValue();
4701}
4702
4703/// Try to simplify a setcc built with the specified operands and cc. If it is
4704/// unable to simplify it, return a null SDValue.
4705SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
4706 ISD::CondCode Cond, bool foldBooleans,
4707 DAGCombinerInfo &DCI,
4708 const SDLoc &dl) const {
4709 SelectionDAG &DAG = DCI.DAG;
4710 const DataLayout &Layout = DAG.getDataLayout();
4711 EVT OpVT = N0.getValueType();
4712 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4713
4714 // Constant fold or commute setcc.
4715 if (SDValue Fold = DAG.FoldSetCC(VT, N1: N0, N2: N1, Cond, dl))
4716 return Fold;
4717
4718 bool N0ConstOrSplat =
4719 isConstOrConstSplat(N: N0, /*AllowUndefs*/ false, /*AllowTruncate*/ AllowTruncation: true);
4720 bool N1ConstOrSplat =
4721 isConstOrConstSplat(N: N1, /*AllowUndefs*/ false, /*AllowTruncate*/ AllowTruncation: true);
4722
4723 // Canonicalize toward having the constant on the RHS.
4724 // TODO: Handle non-splat vector constants. All undef causes trouble.
4725 // FIXME: We can't yet fold constant scalable vector splats, so avoid an
4726 // infinite loop here when we encounter one.
4727 ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Operation: Cond);
4728 if (N0ConstOrSplat && !N1ConstOrSplat &&
4729 (DCI.isBeforeLegalizeOps() ||
4730 isCondCodeLegal(CC: SwappedCC, VT: N0.getSimpleValueType())))
4731 return DAG.getSetCC(DL: dl, VT, LHS: N1, RHS: N0, Cond: SwappedCC);
4732
4733 // If we have a subtract with the same 2 non-constant operands as this setcc
4734 // -- but in reverse order -- then try to commute the operands of this setcc
4735 // to match. A matching pair of setcc (cmp) and sub may be combined into 1
4736 // instruction on some targets.
4737 if (!N0ConstOrSplat && !N1ConstOrSplat &&
4738 (DCI.isBeforeLegalizeOps() ||
4739 isCondCodeLegal(CC: SwappedCC, VT: N0.getSimpleValueType())) &&
4740 DAG.doesNodeExist(Opcode: ISD::SUB, VTList: DAG.getVTList(VT: OpVT), Ops: {N1, N0}) &&
4741 !DAG.doesNodeExist(Opcode: ISD::SUB, VTList: DAG.getVTList(VT: OpVT), Ops: {N0, N1}))
4742 return DAG.getSetCC(DL: dl, VT, LHS: N1, RHS: N0, Cond: SwappedCC);
4743
4744 if (SDValue V = foldSetCCWithRotate(VT, N0, N1, Cond, dl, DAG))
4745 return V;
4746
4747 if (SDValue V = foldSetCCWithFunnelShift(VT, N0, N1, Cond, dl, DAG))
4748 return V;
4749
4750 if (auto *N1C = isConstOrConstSplat(N: N1)) {
4751 const APInt &C1 = N1C->getAPIntValue();
4752
4753 // Optimize some CTPOP cases.
4754 if (SDValue V = simplifySetCCWithCTPOP(TLI: *this, VT, N0, C1, Cond, dl, DAG))
4755 return V;
4756
4757 // For equality to 0 of a no-wrap multiply, decompose and test each op:
4758 // X * Y == 0 --> (X == 0) || (Y == 0)
4759 // X * Y != 0 --> (X != 0) && (Y != 0)
4760 // TODO: This bails out if minsize is set, but if the target doesn't have a
4761 // single instruction multiply for this type, it would likely be
4762 // smaller to decompose.
4763 if (C1.isZero() && (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4764 N0.getOpcode() == ISD::MUL && N0.hasOneUse() &&
4765 (N0->getFlags().hasNoUnsignedWrap() ||
4766 N0->getFlags().hasNoSignedWrap()) &&
4767 !Attr.hasFnAttr(Kind: Attribute::MinSize)) {
4768 SDValue IsXZero = DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0), RHS: N1, Cond);
4769 SDValue IsYZero = DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 1), RHS: N1, Cond);
4770 unsigned LogicOp = Cond == ISD::SETEQ ? ISD::OR : ISD::AND;
4771 return DAG.getNode(Opcode: LogicOp, DL: dl, VT, N1: IsXZero, N2: IsYZero);
4772 }
4773
4774 // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
4775 // equality comparison, then we're just comparing whether X itself is
4776 // zero.
4777 if (N0.getOpcode() == ISD::SRL && (C1.isZero() || C1.isOne()) &&
4778 N0.getOperand(i: 0).getOpcode() == ISD::CTLZ &&
4779 llvm::has_single_bit<uint32_t>(Value: N0.getScalarValueSizeInBits())) {
4780 if (ConstantSDNode *ShAmt = isConstOrConstSplat(N: N0.getOperand(i: 1))) {
4781 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4782 ShAmt->getAPIntValue() == Log2_32(Value: N0.getScalarValueSizeInBits())) {
4783 if ((C1 == 0) == (Cond == ISD::SETEQ)) {
4784 // (srl (ctlz x), 5) == 0 -> X != 0
4785 // (srl (ctlz x), 5) != 1 -> X != 0
4786 Cond = ISD::SETNE;
4787 } else {
4788 // (srl (ctlz x), 5) != 0 -> X == 0
4789 // (srl (ctlz x), 5) == 1 -> X == 0
4790 Cond = ISD::SETEQ;
4791 }
4792 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT: N0.getValueType());
4793 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0).getOperand(i: 0), RHS: Zero,
4794 Cond);
4795 }
4796 }
4797 }
4798 }
4799
4800 // setcc X, 0, setlt --> X (when X is all sign bits)
4801 // setcc X, 0, setne --> X (when X is all sign bits)
4802 //
4803 // When we know that X has 0 or -1 in each element (or scalar), this
4804 // comparison will produce X. This is only true when boolean contents are
4805 // represented via 0s and -1s.
4806 if (VT == OpVT &&
4807 // Check that the result of setcc is 0 and -1.
4808 getBooleanContents(Type: VT) == ZeroOrNegativeOneBooleanContent &&
4809 // Match only for checks X < 0 and X != 0
4810 (Cond == ISD::SETLT || Cond == ISD::SETNE) && isNullOrNullSplat(V: N1) &&
4811 // The identity holds iff we know all sign bits for all lanes.
4812 DAG.ComputeNumSignBits(Op: N0) == N0.getScalarValueSizeInBits())
4813 return N0;
4814
4815 // FIXME: Support vectors.
4816 if (auto *N1C = dyn_cast<ConstantSDNode>(Val: N1.getNode())) {
4817 const APInt &C1 = N1C->getAPIntValue();
4818
4819 // (zext x) == C --> x == (trunc C)
4820 // (sext x) == C --> x == (trunc C)
4821 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4822 DCI.isBeforeLegalize() && N0->hasOneUse()) {
4823 unsigned MinBits = N0.getValueSizeInBits();
4824 SDValue PreExt;
4825 bool Signed = false;
4826 if (N0->getOpcode() == ISD::ZERO_EXTEND) {
4827 // ZExt
4828 MinBits = N0->getOperand(Num: 0).getValueSizeInBits();
4829 PreExt = N0->getOperand(Num: 0);
4830 } else if (N0->getOpcode() == ISD::AND) {
4831 // DAGCombine turns costly ZExts into ANDs
4832 if (auto *C = dyn_cast<ConstantSDNode>(Val: N0->getOperand(Num: 1)))
4833 if ((C->getAPIntValue()+1).isPowerOf2()) {
4834 MinBits = C->getAPIntValue().countr_one();
4835 PreExt = N0->getOperand(Num: 0);
4836 }
4837 } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
4838 // SExt
4839 MinBits = N0->getOperand(Num: 0).getValueSizeInBits();
4840 PreExt = N0->getOperand(Num: 0);
4841 Signed = true;
4842 } else if (auto *LN0 = dyn_cast<LoadSDNode>(Val&: N0)) {
4843 // ZEXTLOAD / SEXTLOAD
4844 if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
4845 MinBits = LN0->getMemoryVT().getSizeInBits();
4846 PreExt = N0;
4847 } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
4848 Signed = true;
4849 MinBits = LN0->getMemoryVT().getSizeInBits();
4850 PreExt = N0;
4851 }
4852 }
4853
4854 // Figure out how many bits we need to preserve this constant.
4855 unsigned ReqdBits = Signed ? C1.getSignificantBits() : C1.getActiveBits();
4856
4857 // Make sure we're not losing bits from the constant.
4858 if (MinBits > 0 &&
4859 MinBits < C1.getBitWidth() &&
4860 MinBits >= ReqdBits) {
4861 EVT MinVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: MinBits);
4862 if (isTypeDesirableForOp(ISD::SETCC, VT: MinVT)) {
4863 // Will get folded away.
4864 SDValue Trunc = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MinVT, Operand: PreExt);
4865 if (MinBits == 1 && C1 == 1)
4866 // Invert the condition.
4867 return DAG.getSetCC(DL: dl, VT, LHS: Trunc, RHS: DAG.getConstant(Val: 0, DL: dl, VT: MVT::i1),
4868 Cond: Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4869 SDValue C = DAG.getConstant(Val: C1.trunc(width: MinBits), DL: dl, VT: MinVT);
4870 return DAG.getSetCC(DL: dl, VT, LHS: Trunc, RHS: C, Cond);
4871 }
4872
4873 // If truncating the setcc operands is not desirable, we can still
4874 // simplify the expression in some cases:
4875 // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
4876 // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
4877 // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
4878 // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
4879 // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
4880 // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
4881 SDValue TopSetCC = N0->getOperand(Num: 0);
4882 unsigned N0Opc = N0->getOpcode();
4883 bool SExt = (N0Opc == ISD::SIGN_EXTEND);
4884 if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
4885 TopSetCC.getOpcode() == ISD::SETCC &&
4886 (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
4887 (isConstFalseVal(N: N1) ||
4888 isExtendedTrueVal(N: N1C, VT: N0->getValueType(ResNo: 0), SExt))) {
4889
4890 bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) ||
4891 (!N1C->isZero() && Cond == ISD::SETNE);
4892
4893 if (!Inverse)
4894 return TopSetCC;
4895
4896 ISD::CondCode InvCond = ISD::getSetCCInverse(
4897 Operation: cast<CondCodeSDNode>(Val: TopSetCC.getOperand(i: 2))->get(),
4898 Type: TopSetCC.getOperand(i: 0).getValueType());
4899 return DAG.getSetCC(DL: dl, VT, LHS: TopSetCC.getOperand(i: 0),
4900 RHS: TopSetCC.getOperand(i: 1),
4901 Cond: InvCond);
4902 }
4903 }
4904 }
4905
4906 // If the LHS is '(and load, const)', the RHS is 0, the test is for
4907 // equality or unsigned, and all 1 bits of the const are in the same
4908 // partial word, see if we can shorten the load.
4909 if (DCI.isBeforeLegalize() &&
4910 !ISD::isSignedIntSetCC(Code: Cond) &&
4911 N0.getOpcode() == ISD::AND && C1 == 0 &&
4912 N0.getNode()->hasOneUse() &&
4913 isa<LoadSDNode>(Val: N0.getOperand(i: 0)) &&
4914 N0.getOperand(i: 0).getNode()->hasOneUse() &&
4915 isa<ConstantSDNode>(Val: N0.getOperand(i: 1))) {
4916 auto *Lod = cast<LoadSDNode>(Val: N0.getOperand(i: 0));
4917 APInt bestMask;
4918 unsigned bestWidth = 0, bestOffset = 0;
4919 if (Lod->isSimple() && Lod->isUnindexed() &&
4920 (Lod->getMemoryVT().isByteSized() ||
4921 isPaddedAtMostSignificantBitsWhenStored(VT: Lod->getMemoryVT()))) {
4922 unsigned memWidth = Lod->getMemoryVT().getStoreSizeInBits();
4923 unsigned origWidth = N0.getValueSizeInBits();
4924 unsigned maskWidth = origWidth;
4925 // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
4926 // 8 bits, but have to be careful...
4927 if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
4928 origWidth = Lod->getMemoryVT().getSizeInBits();
4929 const APInt &Mask = N0.getConstantOperandAPInt(i: 1);
4930 // Only consider power-of-2 widths (and at least one byte) as candiates
4931 // for the narrowed load.
4932 for (unsigned width = 8; width < origWidth; width *= 2) {
4933 EVT newVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: width);
4934 APInt newMask = APInt::getLowBitsSet(numBits: maskWidth, loBitsSet: width);
4935 // Avoid accessing any padding here for now (we could use memWidth
4936 // instead of origWidth here otherwise).
4937 unsigned maxOffset = origWidth - width;
4938 for (unsigned offset = 0; offset <= maxOffset; offset += 8) {
4939 if (Mask.isSubsetOf(RHS: newMask)) {
4940 unsigned ptrOffset =
4941 Layout.isLittleEndian() ? offset : memWidth - width - offset;
4942 unsigned IsFast = 0;
4943 assert((ptrOffset % 8) == 0 && "Non-Bytealigned pointer offset");
4944 Align NewAlign = commonAlignment(A: Lod->getAlign(), Offset: ptrOffset / 8);
4945 if (shouldReduceLoadWidth(Load: Lod, ExtTy: ISD::NON_EXTLOAD, NewVT: newVT,
4946 ByteOffset: ptrOffset / 8) &&
4947 allowsMemoryAccess(
4948 Context&: *DAG.getContext(), DL: Layout, VT: newVT, AddrSpace: Lod->getAddressSpace(),
4949 Alignment: NewAlign, Flags: Lod->getMemOperand()->getFlags(), Fast: &IsFast) &&
4950 IsFast) {
4951 bestOffset = ptrOffset / 8;
4952 bestMask = Mask.lshr(shiftAmt: offset);
4953 bestWidth = width;
4954 break;
4955 }
4956 }
4957 newMask <<= 8;
4958 }
4959 if (bestWidth)
4960 break;
4961 }
4962 }
4963 if (bestWidth) {
4964 EVT newVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: bestWidth);
4965 SDValue Ptr = Lod->getBasePtr();
4966 if (bestOffset != 0)
4967 Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: TypeSize::getFixed(ExactSize: bestOffset));
4968 SDValue NewLoad =
4969 DAG.getLoad(VT: newVT, dl, Chain: Lod->getChain(), Ptr,
4970 PtrInfo: Lod->getPointerInfo().getWithOffset(O: bestOffset),
4971 Alignment: Lod->getBaseAlign());
4972 SDValue And =
4973 DAG.getNode(Opcode: ISD::AND, DL: dl, VT: newVT, N1: NewLoad,
4974 N2: DAG.getConstant(Val: bestMask.trunc(width: bestWidth), DL: dl, VT: newVT));
4975 return DAG.getSetCC(DL: dl, VT, LHS: And, RHS: DAG.getConstant(Val: 0LL, DL: dl, VT: newVT), Cond);
4976 }
4977 }
4978
4979 // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
4980 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
4981 unsigned InSize = N0.getOperand(i: 0).getValueSizeInBits();
4982
4983 // If the comparison constant has bits in the upper part, the
4984 // zero-extended value could never match.
4985 if (C1.intersects(RHS: APInt::getHighBitsSet(numBits: C1.getBitWidth(),
4986 hiBitsSet: C1.getBitWidth() - InSize))) {
4987 switch (Cond) {
4988 case ISD::SETUGT:
4989 case ISD::SETUGE:
4990 case ISD::SETEQ:
4991 return DAG.getConstant(Val: 0, DL: dl, VT);
4992 case ISD::SETULT:
4993 case ISD::SETULE:
4994 case ISD::SETNE:
4995 return DAG.getConstant(Val: 1, DL: dl, VT);
4996 case ISD::SETGT:
4997 case ISD::SETGE:
4998 // True if the sign bit of C1 is set.
4999 return DAG.getConstant(Val: C1.isNegative(), DL: dl, VT);
5000 case ISD::SETLT:
5001 case ISD::SETLE:
5002 // True if the sign bit of C1 isn't set.
5003 return DAG.getConstant(Val: C1.isNonNegative(), DL: dl, VT);
5004 default:
5005 break;
5006 }
5007 }
5008
5009 // Otherwise, we can perform the comparison with the low bits.
5010 switch (Cond) {
5011 case ISD::SETEQ:
5012 case ISD::SETNE:
5013 case ISD::SETUGT:
5014 case ISD::SETUGE:
5015 case ISD::SETULT:
5016 case ISD::SETULE: {
5017 EVT newVT = N0.getOperand(i: 0).getValueType();
5018 // FIXME: Should use isNarrowingProfitable.
5019 if (DCI.isBeforeLegalizeOps() ||
5020 (isOperationLegal(Op: ISD::SETCC, VT: newVT) &&
5021 isCondCodeLegal(CC: Cond, VT: newVT.getSimpleVT()) &&
5022 isTypeDesirableForOp(ISD::SETCC, VT: newVT))) {
5023 EVT NewSetCCVT = getSetCCResultType(DL: Layout, Context&: *DAG.getContext(), VT: newVT);
5024 SDValue NewConst = DAG.getConstant(Val: C1.trunc(width: InSize), DL: dl, VT: newVT);
5025
5026 SDValue NewSetCC = DAG.getSetCC(DL: dl, VT: NewSetCCVT, LHS: N0.getOperand(i: 0),
5027 RHS: NewConst, Cond);
5028 return DAG.getBoolExtOrTrunc(Op: NewSetCC, SL: dl, VT, OpVT: N0.getValueType());
5029 }
5030 break;
5031 }
5032 default:
5033 break; // todo, be more careful with signed comparisons
5034 }
5035 } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
5036 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5037 !isSExtCheaperThanZExt(FromTy: cast<VTSDNode>(Val: N0.getOperand(i: 1))->getVT(),
5038 ToTy: OpVT)) {
5039 EVT ExtSrcTy = cast<VTSDNode>(Val: N0.getOperand(i: 1))->getVT();
5040 unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
5041 EVT ExtDstTy = N0.getValueType();
5042 unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
5043
5044 // If the constant doesn't fit into the number of bits for the source of
5045 // the sign extension, it is impossible for both sides to be equal.
5046 if (C1.getSignificantBits() > ExtSrcTyBits)
5047 return DAG.getBoolConstant(V: Cond == ISD::SETNE, DL: dl, VT, OpVT);
5048
5049 assert(ExtDstTy == N0.getOperand(0).getValueType() &&
5050 ExtDstTy != ExtSrcTy && "Unexpected types!");
5051 APInt Imm = APInt::getLowBitsSet(numBits: ExtDstTyBits, loBitsSet: ExtSrcTyBits);
5052 SDValue ZextOp = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ExtDstTy, N1: N0.getOperand(i: 0),
5053 N2: DAG.getConstant(Val: Imm, DL: dl, VT: ExtDstTy));
5054 if (!DCI.isCalledByLegalizer())
5055 DCI.AddToWorklist(N: ZextOp.getNode());
5056 // Otherwise, make this a use of a zext.
5057 return DAG.getSetCC(DL: dl, VT, LHS: ZextOp,
5058 RHS: DAG.getConstant(Val: C1 & Imm, DL: dl, VT: ExtDstTy), Cond);
5059 } else if ((N1C->isZero() || N1C->isOne()) &&
5060 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5061 // SETCC (X), [0|1], [EQ|NE] -> X if X is known 0/1. i1 types are
5062 // excluded as they are handled below whilst checking for foldBooleans.
5063 if ((N0.getOpcode() == ISD::SETCC || VT.getScalarType() != MVT::i1) &&
5064 isTypeLegal(VT) && VT.bitsLE(VT: N0.getValueType()) &&
5065 (N0.getValueType() == MVT::i1 ||
5066 getBooleanContents(Type: N0.getValueType()) == ZeroOrOneBooleanContent) &&
5067 DAG.MaskedValueIsZero(
5068 Op: N0, Mask: APInt::getBitsSetFrom(numBits: N0.getValueSizeInBits(), loBit: 1))) {
5069 bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
5070 if (TrueWhenTrue)
5071 return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: N0);
5072 // Invert the condition.
5073 if (N0.getOpcode() == ISD::SETCC) {
5074 ISD::CondCode CC = cast<CondCodeSDNode>(Val: N0.getOperand(i: 2))->get();
5075 CC = ISD::getSetCCInverse(Operation: CC, Type: N0.getOperand(i: 0).getValueType());
5076 if (DCI.isBeforeLegalizeOps() ||
5077 isCondCodeLegal(CC, VT: N0.getOperand(i: 0).getSimpleValueType()))
5078 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0), RHS: N0.getOperand(i: 1), Cond: CC);
5079 }
5080 }
5081
5082 if ((N0.getOpcode() == ISD::XOR ||
5083 (N0.getOpcode() == ISD::AND &&
5084 N0.getOperand(i: 0).getOpcode() == ISD::XOR &&
5085 N0.getOperand(i: 1) == N0.getOperand(i: 0).getOperand(i: 1))) &&
5086 isOneConstant(V: N0.getOperand(i: 1))) {
5087 // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
5088 // can only do this if the top bits are known zero.
5089 unsigned BitWidth = N0.getValueSizeInBits();
5090 if (DAG.MaskedValueIsZero(Op: N0,
5091 Mask: APInt::getHighBitsSet(numBits: BitWidth,
5092 hiBitsSet: BitWidth-1))) {
5093 // Okay, get the un-inverted input value.
5094 SDValue Val;
5095 if (N0.getOpcode() == ISD::XOR) {
5096 Val = N0.getOperand(i: 0);
5097 } else {
5098 assert(N0.getOpcode() == ISD::AND &&
5099 N0.getOperand(0).getOpcode() == ISD::XOR);
5100 // ((X^1)&1)^1 -> X & 1
5101 Val = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: N0.getValueType(),
5102 N1: N0.getOperand(i: 0).getOperand(i: 0),
5103 N2: N0.getOperand(i: 1));
5104 }
5105
5106 return DAG.getSetCC(DL: dl, VT, LHS: Val, RHS: N1,
5107 Cond: Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
5108 }
5109 } else if (N1C->isOne()) {
5110 SDValue Op0 = N0;
5111 if (Op0.getOpcode() == ISD::TRUNCATE)
5112 Op0 = Op0.getOperand(i: 0);
5113
5114 if ((Op0.getOpcode() == ISD::XOR) &&
5115 Op0.getOperand(i: 0).getOpcode() == ISD::SETCC &&
5116 Op0.getOperand(i: 1).getOpcode() == ISD::SETCC) {
5117 SDValue XorLHS = Op0.getOperand(i: 0);
5118 SDValue XorRHS = Op0.getOperand(i: 1);
5119 // Ensure that the input setccs return an i1 type or 0/1 value.
5120 if (Op0.getValueType() == MVT::i1 ||
5121 (getBooleanContents(Type: XorLHS.getOperand(i: 0).getValueType()) ==
5122 ZeroOrOneBooleanContent &&
5123 getBooleanContents(Type: XorRHS.getOperand(i: 0).getValueType()) ==
5124 ZeroOrOneBooleanContent)) {
5125 // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
5126 Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ;
5127 return DAG.getSetCC(DL: dl, VT, LHS: XorLHS, RHS: XorRHS, Cond);
5128 }
5129 }
5130 if (Op0.getOpcode() == ISD::AND && isOneConstant(V: Op0.getOperand(i: 1))) {
5131 // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
5132 if (Op0.getValueType().bitsGT(VT))
5133 Op0 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
5134 N1: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Op0.getOperand(i: 0)),
5135 N2: DAG.getConstant(Val: 1, DL: dl, VT));
5136 else if (Op0.getValueType().bitsLT(VT))
5137 Op0 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
5138 N1: DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT, Operand: Op0.getOperand(i: 0)),
5139 N2: DAG.getConstant(Val: 1, DL: dl, VT));
5140
5141 return DAG.getSetCC(DL: dl, VT, LHS: Op0,
5142 RHS: DAG.getConstant(Val: 0, DL: dl, VT: Op0.getValueType()),
5143 Cond: Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
5144 }
5145 if (Op0.getOpcode() == ISD::AssertZext &&
5146 cast<VTSDNode>(Val: Op0.getOperand(i: 1))->getVT() == MVT::i1)
5147 return DAG.getSetCC(DL: dl, VT, LHS: Op0,
5148 RHS: DAG.getConstant(Val: 0, DL: dl, VT: Op0.getValueType()),
5149 Cond: Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
5150 }
5151 }
5152
5153 // Given:
5154 // icmp eq/ne (urem %x, %y), 0
5155 // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
5156 // icmp eq/ne %x, 0
5157 if (N0.getOpcode() == ISD::UREM && N1C->isZero() &&
5158 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5159 KnownBits XKnown = DAG.computeKnownBits(Op: N0.getOperand(i: 0));
5160 KnownBits YKnown = DAG.computeKnownBits(Op: N0.getOperand(i: 1));
5161 if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
5162 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0), RHS: N1, Cond);
5163 }
5164
5165 // Fold set_cc seteq (ashr X, BW-1), -1 -> set_cc setlt X, 0
5166 // and set_cc setne (ashr X, BW-1), -1 -> set_cc setge X, 0
5167 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5168 N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(Val: N0.getOperand(i: 1)) &&
5169 N0.getConstantOperandAPInt(i: 1) == OpVT.getScalarSizeInBits() - 1 &&
5170 N1C->isAllOnes()) {
5171 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0),
5172 RHS: DAG.getConstant(Val: 0, DL: dl, VT: OpVT),
5173 Cond: Cond == ISD::SETEQ ? ISD::SETLT : ISD::SETGE);
5174 }
5175
5176 // fold (setcc (trunc x) c) -> (setcc x c)
5177 if (N0.getOpcode() == ISD::TRUNCATE &&
5178 ((N0->getFlags().hasNoUnsignedWrap() && !ISD::isSignedIntSetCC(Code: Cond)) ||
5179 (N0->getFlags().hasNoSignedWrap() &&
5180 !ISD::isUnsignedIntSetCC(Code: Cond))) &&
5181 isTypeDesirableForOp(ISD::SETCC, VT: N0.getOperand(i: 0).getValueType())) {
5182 EVT NewVT = N0.getOperand(i: 0).getValueType();
5183 SDValue NewConst = DAG.getConstant(
5184 Val: (N0->getFlags().hasNoSignedWrap() && !ISD::isUnsignedIntSetCC(Code: Cond))
5185 ? C1.sext(width: NewVT.getSizeInBits())
5186 : C1.zext(width: NewVT.getSizeInBits()),
5187 DL: dl, VT: NewVT);
5188 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0), RHS: NewConst, Cond);
5189 }
5190
5191 if (SDValue V =
5192 optimizeSetCCOfSignedTruncationCheck(SCCVT: VT, N0, N1, Cond, DCI, DL: dl))
5193 return V;
5194 }
5195
5196 // These simplifications apply to splat vectors as well.
5197 // TODO: Handle more splat vector cases.
5198 if (auto *N1C = isConstOrConstSplat(N: N1)) {
5199 const APInt &C1 = N1C->getAPIntValue();
5200
5201 APInt MinVal, MaxVal;
5202 unsigned OperandBitSize = N1C->getValueType(ResNo: 0).getScalarSizeInBits();
5203 if (ISD::isSignedIntSetCC(Code: Cond)) {
5204 MinVal = APInt::getSignedMinValue(numBits: OperandBitSize);
5205 MaxVal = APInt::getSignedMaxValue(numBits: OperandBitSize);
5206 } else {
5207 MinVal = APInt::getMinValue(numBits: OperandBitSize);
5208 MaxVal = APInt::getMaxValue(numBits: OperandBitSize);
5209 }
5210
5211 // Canonicalize GE/LE comparisons to use GT/LT comparisons.
5212 if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
5213 // X >= MIN --> true
5214 if (C1 == MinVal)
5215 return DAG.getBoolConstant(V: true, DL: dl, VT, OpVT);
5216
5217 if (!VT.isVector()) { // TODO: Support this for vectors.
5218 // X >= C0 --> X > (C0 - 1)
5219 APInt C = C1 - 1;
5220 ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT;
5221 if ((DCI.isBeforeLegalizeOps() ||
5222 isCondCodeLegal(CC: NewCC, VT: OpVT.getSimpleVT())) &&
5223 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
5224 isLegalICmpImmediate(C.getSExtValue())))) {
5225 return DAG.getSetCC(DL: dl, VT, LHS: N0,
5226 RHS: DAG.getConstant(Val: C, DL: dl, VT: N1.getValueType()),
5227 Cond: NewCC);
5228 }
5229 }
5230 }
5231
5232 if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
5233 // X <= MAX --> true
5234 if (C1 == MaxVal)
5235 return DAG.getBoolConstant(V: true, DL: dl, VT, OpVT);
5236
5237 // X <= C0 --> X < (C0 + 1)
5238 if (!VT.isVector()) { // TODO: Support this for vectors.
5239 APInt C = C1 + 1;
5240 ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT;
5241 if ((DCI.isBeforeLegalizeOps() ||
5242 isCondCodeLegal(CC: NewCC, VT: OpVT.getSimpleVT())) &&
5243 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
5244 isLegalICmpImmediate(C.getSExtValue())))) {
5245 return DAG.getSetCC(DL: dl, VT, LHS: N0,
5246 RHS: DAG.getConstant(Val: C, DL: dl, VT: N1.getValueType()),
5247 Cond: NewCC);
5248 }
5249 }
5250 }
5251
5252 if (Cond == ISD::SETLT || Cond == ISD::SETULT) {
5253 if (C1 == MinVal)
5254 return DAG.getBoolConstant(V: false, DL: dl, VT, OpVT); // X < MIN --> false
5255
5256 // TODO: Support this for vectors after legalize ops.
5257 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5258 // Canonicalize setlt X, Max --> setne X, Max
5259 if (C1 == MaxVal)
5260 return DAG.getSetCC(DL: dl, VT, LHS: N0, RHS: N1, Cond: ISD::SETNE);
5261
5262 // If we have setult X, 1, turn it into seteq X, 0
5263 if (C1 == MinVal+1)
5264 return DAG.getSetCC(DL: dl, VT, LHS: N0,
5265 RHS: DAG.getConstant(Val: MinVal, DL: dl, VT: N0.getValueType()),
5266 Cond: ISD::SETEQ);
5267 }
5268 }
5269
5270 if (Cond == ISD::SETGT || Cond == ISD::SETUGT) {
5271 if (C1 == MaxVal)
5272 return DAG.getBoolConstant(V: false, DL: dl, VT, OpVT); // X > MAX --> false
5273
5274 // TODO: Support this for vectors after legalize ops.
5275 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5276 // Canonicalize setgt X, Min --> setne X, Min
5277 if (C1 == MinVal)
5278 return DAG.getSetCC(DL: dl, VT, LHS: N0, RHS: N1, Cond: ISD::SETNE);
5279
5280 // If we have setugt X, Max-1, turn it into seteq X, Max
5281 if (C1 == MaxVal-1)
5282 return DAG.getSetCC(DL: dl, VT, LHS: N0,
5283 RHS: DAG.getConstant(Val: MaxVal, DL: dl, VT: N0.getValueType()),
5284 Cond: ISD::SETEQ);
5285 }
5286 }
5287
5288 if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
5289 // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
5290 if (C1.isZero())
5291 if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
5292 SCCVT: VT, N0, N1C: N1, Cond, DCI, DL: dl))
5293 return CC;
5294
5295 // For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y).
5296 // For example, when high 32-bits of i64 X are known clear:
5297 // all bits clear: (X | (Y<<32)) == 0 --> (X | Y) == 0
5298 // all bits set: (X | (Y<<32)) == -1 --> (X & Y) == -1
5299 bool CmpZero = N1C->isZero();
5300 bool CmpNegOne = N1C->isAllOnes();
5301 if ((CmpZero || CmpNegOne) && N0.hasOneUse()) {
5302 // Match or(lo,shl(hi,bw/2)) pattern.
5303 auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
5304 unsigned EltBits = V.getScalarValueSizeInBits();
5305 if (V.getOpcode() != ISD::OR || (EltBits % 2) != 0)
5306 return false;
5307 SDValue LHS = V.getOperand(i: 0);
5308 SDValue RHS = V.getOperand(i: 1);
5309 APInt HiBits = APInt::getHighBitsSet(numBits: EltBits, hiBitsSet: EltBits / 2);
5310 // Unshifted element must have zero upperbits.
5311 if (RHS.getOpcode() == ISD::SHL &&
5312 isa<ConstantSDNode>(Val: RHS.getOperand(i: 1)) &&
5313 RHS.getConstantOperandAPInt(i: 1) == (EltBits / 2) &&
5314 DAG.MaskedValueIsZero(Op: LHS, Mask: HiBits)) {
5315 Lo = LHS;
5316 Hi = RHS.getOperand(i: 0);
5317 return true;
5318 }
5319 if (LHS.getOpcode() == ISD::SHL &&
5320 isa<ConstantSDNode>(Val: LHS.getOperand(i: 1)) &&
5321 LHS.getConstantOperandAPInt(i: 1) == (EltBits / 2) &&
5322 DAG.MaskedValueIsZero(Op: RHS, Mask: HiBits)) {
5323 Lo = RHS;
5324 Hi = LHS.getOperand(i: 0);
5325 return true;
5326 }
5327 return false;
5328 };
5329
5330 auto MergeConcat = [&](SDValue Lo, SDValue Hi) {
5331 unsigned EltBits = N0.getScalarValueSizeInBits();
5332 unsigned HalfBits = EltBits / 2;
5333 APInt HiBits = APInt::getHighBitsSet(numBits: EltBits, hiBitsSet: HalfBits);
5334 SDValue LoBits = DAG.getConstant(Val: ~HiBits, DL: dl, VT: OpVT);
5335 SDValue HiMask = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: OpVT, N1: Hi, N2: LoBits);
5336 SDValue NewN0 =
5337 DAG.getNode(Opcode: CmpZero ? ISD::OR : ISD::AND, DL: dl, VT: OpVT, N1: Lo, N2: HiMask);
5338 SDValue NewN1 = CmpZero ? DAG.getConstant(Val: 0, DL: dl, VT: OpVT) : LoBits;
5339 return DAG.getSetCC(DL: dl, VT, LHS: NewN0, RHS: NewN1, Cond);
5340 };
5341
5342 SDValue Lo, Hi;
5343 if (IsConcat(N0, Lo, Hi))
5344 return MergeConcat(Lo, Hi);
5345
5346 if (N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR) {
5347 SDValue Lo0, Lo1, Hi0, Hi1;
5348 if (IsConcat(N0.getOperand(i: 0), Lo0, Hi0) &&
5349 IsConcat(N0.getOperand(i: 1), Lo1, Hi1)) {
5350 return MergeConcat(DAG.getNode(Opcode: N0.getOpcode(), DL: dl, VT: OpVT, N1: Lo0, N2: Lo1),
5351 DAG.getNode(Opcode: N0.getOpcode(), DL: dl, VT: OpVT, N1: Hi0, N2: Hi1));
5352 }
5353 }
5354 }
5355 }
5356
5357 // If we have "setcc X, C0", check to see if we can shrink the immediate
5358 // by changing cc.
5359 // TODO: Support this for vectors after legalize ops.
5360 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5361 // SETUGT X, SINTMAX -> SETLT X, 0
5362 // SETUGE X, SINTMIN -> SETLT X, 0
5363 if ((Cond == ISD::SETUGT && C1.isMaxSignedValue()) ||
5364 (Cond == ISD::SETUGE && C1.isMinSignedValue()))
5365 return DAG.getSetCC(DL: dl, VT, LHS: N0,
5366 RHS: DAG.getConstant(Val: 0, DL: dl, VT: N1.getValueType()),
5367 Cond: ISD::SETLT);
5368
5369 // SETULT X, SINTMIN -> SETGT X, -1
5370 // SETULE X, SINTMAX -> SETGT X, -1
5371 if ((Cond == ISD::SETULT && C1.isMinSignedValue()) ||
5372 (Cond == ISD::SETULE && C1.isMaxSignedValue()))
5373 return DAG.getSetCC(DL: dl, VT, LHS: N0,
5374 RHS: DAG.getAllOnesConstant(DL: dl, VT: N1.getValueType()),
5375 Cond: ISD::SETGT);
5376 }
5377 }
5378
5379 // Back to non-vector simplifications.
5380 // TODO: Can we do these for vector splats?
5381 if (auto *N1C = dyn_cast<ConstantSDNode>(Val: N1.getNode())) {
5382 const APInt &C1 = N1C->getAPIntValue();
5383 EVT ShValTy = N0.getValueType();
5384
5385 // Fold bit comparisons when we can. This will result in an
5386 // incorrect value when boolean false is negative one, unless
5387 // the bitsize is 1 in which case the false value is the same
5388 // in practice regardless of the representation.
5389 if ((VT.getSizeInBits() == 1 ||
5390 getBooleanContents(Type: N0.getValueType()) == ZeroOrOneBooleanContent) &&
5391 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5392 (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(VT: ShValTy))) &&
5393 N0.getOpcode() == ISD::AND) {
5394 if (auto *AndRHS = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) {
5395 if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
5396 // Perform the xform if the AND RHS is a single bit.
5397 unsigned ShCt = AndRHS->getAPIntValue().logBase2();
5398 if (AndRHS->getAPIntValue().isPowerOf2() &&
5399 !shouldAvoidTransformToShift(VT: ShValTy, Amount: ShCt)) {
5400 return DAG.getNode(
5401 Opcode: ISD::TRUNCATE, DL: dl, VT,
5402 Operand: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: ShValTy, N1: N0,
5403 N2: DAG.getShiftAmountConstant(Val: ShCt, VT: ShValTy, DL: dl)));
5404 }
5405 } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
5406 // (X & 8) == 8 --> (X & 8) >> 3
5407 // Perform the xform if C1 is a single bit.
5408 unsigned ShCt = C1.logBase2();
5409 if (C1.isPowerOf2() && !shouldAvoidTransformToShift(VT: ShValTy, Amount: ShCt)) {
5410 return DAG.getNode(
5411 Opcode: ISD::TRUNCATE, DL: dl, VT,
5412 Operand: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: ShValTy, N1: N0,
5413 N2: DAG.getShiftAmountConstant(Val: ShCt, VT: ShValTy, DL: dl)));
5414 }
5415 }
5416 }
5417 }
5418
5419 if (C1.getSignificantBits() <= 64 &&
5420 !isLegalICmpImmediate(C1.getSExtValue())) {
5421 // (X & -256) == 256 -> (X >> 8) == 1
5422 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5423 N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
5424 if (auto *AndRHS = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) {
5425 const APInt &AndRHSC = AndRHS->getAPIntValue();
5426 if (AndRHSC.isNegatedPowerOf2() && C1.isSubsetOf(RHS: AndRHSC)) {
5427 unsigned ShiftBits = AndRHSC.countr_zero();
5428 if (!shouldAvoidTransformToShift(VT: ShValTy, Amount: ShiftBits)) {
5429 // If using an unsigned shift doesn't yield a legal compare
5430 // immediate, try using sra instead.
5431 APInt NewC = C1.lshr(shiftAmt: ShiftBits);
5432 if (NewC.getSignificantBits() <= 64 &&
5433 !isLegalICmpImmediate(NewC.getSExtValue())) {
5434 APInt SignedC = C1.ashr(ShiftAmt: ShiftBits);
5435 if (SignedC.getSignificantBits() <= 64 &&
5436 isLegalICmpImmediate(SignedC.getSExtValue())) {
5437 SDValue Shift = DAG.getNode(
5438 Opcode: ISD::SRA, DL: dl, VT: ShValTy, N1: N0.getOperand(i: 0),
5439 N2: DAG.getShiftAmountConstant(Val: ShiftBits, VT: ShValTy, DL: dl));
5440 SDValue CmpRHS = DAG.getConstant(Val: SignedC, DL: dl, VT: ShValTy);
5441 return DAG.getSetCC(DL: dl, VT, LHS: Shift, RHS: CmpRHS, Cond);
5442 }
5443 }
5444 SDValue Shift = DAG.getNode(
5445 Opcode: ISD::SRL, DL: dl, VT: ShValTy, N1: N0.getOperand(i: 0),
5446 N2: DAG.getShiftAmountConstant(Val: ShiftBits, VT: ShValTy, DL: dl));
5447 SDValue CmpRHS = DAG.getConstant(Val: NewC, DL: dl, VT: ShValTy);
5448 return DAG.getSetCC(DL: dl, VT, LHS: Shift, RHS: CmpRHS, Cond);
5449 }
5450 }
5451 }
5452 } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
5453 Cond == ISD::SETULE || Cond == ISD::SETUGT) {
5454 bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
5455 // X < 0x100000000 -> (X >> 32) < 1
5456 // X >= 0x100000000 -> (X >> 32) >= 1
5457 // X <= 0x0ffffffff -> (X >> 32) < 1
5458 // X > 0x0ffffffff -> (X >> 32) >= 1
5459 unsigned ShiftBits;
5460 APInt NewC = C1;
5461 ISD::CondCode NewCond = Cond;
5462 if (AdjOne) {
5463 ShiftBits = C1.countr_one();
5464 NewC = NewC + 1;
5465 NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
5466 } else {
5467 ShiftBits = C1.countr_zero();
5468 }
5469 NewC.lshrInPlace(ShiftAmt: ShiftBits);
5470 if (ShiftBits && NewC.getSignificantBits() <= 64 &&
5471 isLegalICmpImmediate(NewC.getSExtValue()) &&
5472 !shouldAvoidTransformToShift(VT: ShValTy, Amount: ShiftBits)) {
5473 SDValue Shift =
5474 DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: ShValTy, N1: N0,
5475 N2: DAG.getShiftAmountConstant(Val: ShiftBits, VT: ShValTy, DL: dl));
5476 SDValue CmpRHS = DAG.getConstant(Val: NewC, DL: dl, VT: ShValTy);
5477 return DAG.getSetCC(DL: dl, VT, LHS: Shift, RHS: CmpRHS, Cond: NewCond);
5478 }
5479 }
5480 }
5481 }
5482
5483 if (!isa<ConstantFPSDNode>(Val: N0) && isa<ConstantFPSDNode>(Val: N1)) {
5484 auto *CFP = cast<ConstantFPSDNode>(Val&: N1);
5485 assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
5486
5487 // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
5488 // constant if knowing that the operand is non-nan is enough. We prefer to
5489 // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
5490 // materialize 0.0.
5491 if (Cond == ISD::SETO || Cond == ISD::SETUO)
5492 return DAG.getSetCC(DL: dl, VT, LHS: N0, RHS: N0, Cond);
5493
5494 // setcc (fneg x), C -> setcc swap(pred) x, -C
5495 if (N0.getOpcode() == ISD::FNEG) {
5496 ISD::CondCode SwapCond = ISD::getSetCCSwappedOperands(Operation: Cond);
5497 if (DCI.isBeforeLegalizeOps() ||
5498 isCondCodeLegal(CC: SwapCond, VT: N0.getSimpleValueType())) {
5499 SDValue NegN1 = DAG.getNode(Opcode: ISD::FNEG, DL: dl, VT: N0.getValueType(), Operand: N1);
5500 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0), RHS: NegN1, Cond: SwapCond);
5501 }
5502 }
5503
5504 // setueq/setoeq X, (fabs Inf) -> is_fpclass X, fcInf
5505 if (isOperationLegalOrCustom(Op: ISD::IS_FPCLASS, VT: N0.getValueType()) &&
5506 !isFPImmLegal(CFP->getValueAPF(), CFP->getValueType(ResNo: 0))) {
5507 bool IsFabs = N0.getOpcode() == ISD::FABS;
5508 SDValue Op = IsFabs ? N0.getOperand(i: 0) : N0;
5509 if ((Cond == ISD::SETOEQ || Cond == ISD::SETUEQ) && CFP->isInfinity()) {
5510 FPClassTest Flag = CFP->isNegative() ? (IsFabs ? fcNone : fcNegInf)
5511 : (IsFabs ? fcInf : fcPosInf);
5512 if (Cond == ISD::SETUEQ)
5513 Flag |= fcNan;
5514 return DAG.getNode(Opcode: ISD::IS_FPCLASS, DL: dl, VT, N1: Op,
5515 N2: DAG.getTargetConstant(Val: Flag, DL: dl, VT: MVT::i32));
5516 }
5517 }
5518
5519 // If the condition is not legal, see if we can find an equivalent one
5520 // which is legal.
5521 if (!isCondCodeLegal(CC: Cond, VT: N0.getSimpleValueType())) {
5522 // If the comparison was an awkward floating-point == or != and one of
5523 // the comparison operands is infinity or negative infinity, convert the
5524 // condition to a less-awkward <= or >=.
5525 if (CFP->getValueAPF().isInfinity()) {
5526 bool IsNegInf = CFP->getValueAPF().isNegative();
5527 ISD::CondCode NewCond = ISD::SETCC_INVALID;
5528 switch (Cond) {
5529 case ISD::SETOEQ: NewCond = IsNegInf ? ISD::SETOLE : ISD::SETOGE; break;
5530 case ISD::SETUEQ: NewCond = IsNegInf ? ISD::SETULE : ISD::SETUGE; break;
5531 case ISD::SETUNE: NewCond = IsNegInf ? ISD::SETUGT : ISD::SETULT; break;
5532 case ISD::SETONE: NewCond = IsNegInf ? ISD::SETOGT : ISD::SETOLT; break;
5533 default: break;
5534 }
5535 if (NewCond != ISD::SETCC_INVALID &&
5536 isCondCodeLegal(CC: NewCond, VT: N0.getSimpleValueType()))
5537 return DAG.getSetCC(DL: dl, VT, LHS: N0, RHS: N1, Cond: NewCond);
5538 }
5539 }
5540 }
5541
5542 if (N0 == N1) {
5543 // The sext(setcc()) => setcc() optimization relies on the appropriate
5544 // constant being emitted.
5545 assert(!N0.getValueType().isInteger() &&
5546 "Integer types should be handled by FoldSetCC");
5547
5548 bool EqTrue = ISD::isTrueWhenEqual(Cond);
5549 unsigned UOF = ISD::getUnorderedFlavor(Cond);
5550 if (UOF == 2) // FP operators that are undefined on NaNs.
5551 return DAG.getBoolConstant(V: EqTrue, DL: dl, VT, OpVT);
5552 if (UOF == unsigned(EqTrue))
5553 return DAG.getBoolConstant(V: EqTrue, DL: dl, VT, OpVT);
5554 // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
5555 // if it is not already.
5556 ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
5557 if (NewCond != Cond &&
5558 (DCI.isBeforeLegalizeOps() ||
5559 isCondCodeLegal(CC: NewCond, VT: N0.getSimpleValueType())))
5560 return DAG.getSetCC(DL: dl, VT, LHS: N0, RHS: N1, Cond: NewCond);
5561 }
5562
5563 // ~X > ~Y --> Y > X
5564 // ~X < ~Y --> Y < X
5565 // ~X < C --> X > ~C
5566 // ~X > C --> X < ~C
5567 if ((isSignedIntSetCC(Code: Cond) || isUnsignedIntSetCC(Code: Cond)) &&
5568 N0.getValueType().isInteger()) {
5569 if (isBitwiseNot(V: N0)) {
5570 if (isBitwiseNot(V: N1))
5571 return DAG.getSetCC(DL: dl, VT, LHS: N1.getOperand(i: 0), RHS: N0.getOperand(i: 0), Cond);
5572
5573 if (DAG.isConstantIntBuildVectorOrConstantInt(N: N1) &&
5574 !DAG.isConstantIntBuildVectorOrConstantInt(N: N0.getOperand(i: 0))) {
5575 SDValue Not = DAG.getNOT(DL: dl, Val: N1, VT: OpVT);
5576 return DAG.getSetCC(DL: dl, VT, LHS: Not, RHS: N0.getOperand(i: 0), Cond);
5577 }
5578 }
5579 }
5580
5581 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5582 N0.getValueType().isInteger()) {
5583 if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
5584 N0.getOpcode() == ISD::XOR) {
5585 // Simplify (X+Y) == (X+Z) --> Y == Z
5586 if (N0.getOpcode() == N1.getOpcode()) {
5587 if (N0.getOperand(i: 0) == N1.getOperand(i: 0))
5588 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 1), RHS: N1.getOperand(i: 1), Cond);
5589 if (N0.getOperand(i: 1) == N1.getOperand(i: 1))
5590 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0), RHS: N1.getOperand(i: 0), Cond);
5591 if (isCommutativeBinOp(Opcode: N0.getOpcode())) {
5592 // If X op Y == Y op X, try other combinations.
5593 if (N0.getOperand(i: 0) == N1.getOperand(i: 1))
5594 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 1), RHS: N1.getOperand(i: 0),
5595 Cond);
5596 if (N0.getOperand(i: 1) == N1.getOperand(i: 0))
5597 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0), RHS: N1.getOperand(i: 1),
5598 Cond);
5599 }
5600 }
5601
5602 // If RHS is a legal immediate value for a compare instruction, we need
5603 // to be careful about increasing register pressure needlessly.
5604 bool LegalRHSImm = false;
5605
5606 if (auto *RHSC = dyn_cast<ConstantSDNode>(Val&: N1)) {
5607 if (auto *LHSR = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) {
5608 // Turn (X+C1) == C2 --> X == C2-C1
5609 if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse())
5610 return DAG.getSetCC(
5611 DL: dl, VT, LHS: N0.getOperand(i: 0),
5612 RHS: DAG.getConstant(Val: RHSC->getAPIntValue() - LHSR->getAPIntValue(),
5613 DL: dl, VT: N0.getValueType()),
5614 Cond);
5615
5616 // Turn (X^C1) == C2 --> X == C1^C2
5617 if (N0.getOpcode() == ISD::XOR && N0.getNode()->hasOneUse())
5618 return DAG.getSetCC(
5619 DL: dl, VT, LHS: N0.getOperand(i: 0),
5620 RHS: DAG.getConstant(Val: LHSR->getAPIntValue() ^ RHSC->getAPIntValue(),
5621 DL: dl, VT: N0.getValueType()),
5622 Cond);
5623 }
5624
5625 // Turn (C1-X) == C2 --> X == C1-C2
5626 if (auto *SUBC = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 0)))
5627 if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse())
5628 return DAG.getSetCC(
5629 DL: dl, VT, LHS: N0.getOperand(i: 1),
5630 RHS: DAG.getConstant(Val: SUBC->getAPIntValue() - RHSC->getAPIntValue(),
5631 DL: dl, VT: N0.getValueType()),
5632 Cond);
5633
5634 // Could RHSC fold directly into a compare?
5635 if (RHSC->getValueType(ResNo: 0).getSizeInBits() <= 64)
5636 LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
5637 }
5638
5639 // (X+Y) == X --> Y == 0 and similar folds.
5640 // Don't do this if X is an immediate that can fold into a cmp
5641 // instruction and X+Y has other uses. It could be an induction variable
5642 // chain, and the transform would increase register pressure.
5643 if (!LegalRHSImm || N0.hasOneUse())
5644 if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, DL: dl, DCI))
5645 return V;
5646 }
5647
5648 if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
5649 N1.getOpcode() == ISD::XOR)
5650 if (SDValue V = foldSetCCWithBinOp(VT, N0: N1, N1: N0, Cond, DL: dl, DCI))
5651 return V;
5652
5653 if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, DL: dl, DCI))
5654 return V;
5655
5656 if (SDValue V = foldSetCCWithOr(VT, N0, N1, Cond, DL: dl, DCI))
5657 return V;
5658 }
5659
5660 // Fold remainder of division by a constant.
5661 if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) &&
5662 N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5663 // When division is cheap or optimizing for minimum size,
5664 // fall through to DIVREM creation by skipping this fold.
5665 if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Kind: Attribute::MinSize)) {
5666 if (N0.getOpcode() == ISD::UREM) {
5667 if (SDValue Folded = buildUREMEqFold(SETCCVT: VT, REMNode: N0, CompTargetNode: N1, Cond, DCI, DL: dl))
5668 return Folded;
5669 } else if (N0.getOpcode() == ISD::SREM) {
5670 if (SDValue Folded = buildSREMEqFold(SETCCVT: VT, REMNode: N0, CompTargetNode: N1, Cond, DCI, DL: dl))
5671 return Folded;
5672 }
5673 }
5674 }
5675
5676 // Fold away ALL boolean setcc's.
5677 if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
5678 SDValue Temp;
5679 switch (Cond) {
5680 default: llvm_unreachable("Unknown integer setcc!");
5681 case ISD::SETEQ: // X == Y -> ~(X^Y)
5682 Temp = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: OpVT, N1: N0, N2: N1);
5683 N0 = DAG.getNOT(DL: dl, Val: Temp, VT: OpVT);
5684 if (!DCI.isCalledByLegalizer())
5685 DCI.AddToWorklist(N: Temp.getNode());
5686 break;
5687 case ISD::SETNE: // X != Y --> (X^Y)
5688 N0 = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: OpVT, N1: N0, N2: N1);
5689 break;
5690 case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
5691 case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
5692 Temp = DAG.getNOT(DL: dl, Val: N0, VT: OpVT);
5693 N0 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: OpVT, N1, N2: Temp);
5694 if (!DCI.isCalledByLegalizer())
5695 DCI.AddToWorklist(N: Temp.getNode());
5696 break;
5697 case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
5698 case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
5699 Temp = DAG.getNOT(DL: dl, Val: N1, VT: OpVT);
5700 N0 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: OpVT, N1: N0, N2: Temp);
5701 if (!DCI.isCalledByLegalizer())
5702 DCI.AddToWorklist(N: Temp.getNode());
5703 break;
5704 case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
5705 case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
5706 Temp = DAG.getNOT(DL: dl, Val: N0, VT: OpVT);
5707 N0 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1, N2: Temp);
5708 if (!DCI.isCalledByLegalizer())
5709 DCI.AddToWorklist(N: Temp.getNode());
5710 break;
5711 case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
5712 case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
5713 Temp = DAG.getNOT(DL: dl, Val: N1, VT: OpVT);
5714 N0 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1: N0, N2: Temp);
5715 break;
5716 }
5717 if (VT.getScalarType() != MVT::i1) {
5718 if (!DCI.isCalledByLegalizer())
5719 DCI.AddToWorklist(N: N0.getNode());
5720 // FIXME: If running after legalize, we probably can't do this.
5721 ISD::NodeType ExtendCode = getExtendForContent(Content: getBooleanContents(Type: OpVT));
5722 N0 = DAG.getNode(Opcode: ExtendCode, DL: dl, VT, Operand: N0);
5723 }
5724 return N0;
5725 }
5726
5727 // Fold (setcc (trunc x) (trunc y)) -> (setcc x y)
5728 if (N0.getOpcode() == ISD::TRUNCATE && N1.getOpcode() == ISD::TRUNCATE &&
5729 N0.getOperand(i: 0).getValueType() == N1.getOperand(i: 0).getValueType() &&
5730 ((!ISD::isSignedIntSetCC(Code: Cond) && N0->getFlags().hasNoUnsignedWrap() &&
5731 N1->getFlags().hasNoUnsignedWrap()) ||
5732 (!ISD::isUnsignedIntSetCC(Code: Cond) && N0->getFlags().hasNoSignedWrap() &&
5733 N1->getFlags().hasNoSignedWrap())) &&
5734 isTypeDesirableForOp(ISD::SETCC, VT: N0.getOperand(i: 0).getValueType())) {
5735 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0), RHS: N1.getOperand(i: 0), Cond);
5736 }
5737
5738 // Fold (setcc (sub nsw a, b), zero, s??) -> (setcc a, b, s??)
5739 // TODO: Remove that .isVector() check
5740 if (VT.isVector() && isZeroOrZeroSplat(N: N1) && N0.getOpcode() == ISD::SUB &&
5741 N0->getFlags().hasNoSignedWrap() && ISD::isSignedIntSetCC(Code: Cond)) {
5742 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0), RHS: N0.getOperand(i: 1), Cond);
5743 }
5744
5745 // Could not fold it.
5746 return SDValue();
5747}
5748
5749/// Returns true (and the GlobalValue and the offset) if the node is a
5750/// GlobalAddress + offset.
5751bool TargetLowering::isGAPlusOffset(SDNode *WN, const GlobalValue *&GA,
5752 int64_t &Offset) const {
5753
5754 SDNode *N = unwrapAddress(N: SDValue(WN, 0)).getNode();
5755
5756 if (auto *GASD = dyn_cast<GlobalAddressSDNode>(Val: N)) {
5757 GA = GASD->getGlobal();
5758 Offset += GASD->getOffset();
5759 return true;
5760 }
5761
5762 if (N->isAnyAdd()) {
5763 SDValue N1 = N->getOperand(Num: 0);
5764 SDValue N2 = N->getOperand(Num: 1);
5765 if (isGAPlusOffset(WN: N1.getNode(), GA, Offset)) {
5766 if (auto *V = dyn_cast<ConstantSDNode>(Val&: N2)) {
5767 Offset += V->getSExtValue();
5768 return true;
5769 }
5770 } else if (isGAPlusOffset(WN: N2.getNode(), GA, Offset)) {
5771 if (auto *V = dyn_cast<ConstantSDNode>(Val&: N1)) {
5772 Offset += V->getSExtValue();
5773 return true;
5774 }
5775 }
5776 }
5777
5778 return false;
5779}
5780
5781SDValue TargetLowering::PerformDAGCombine(SDNode *N,
5782 DAGCombinerInfo &DCI) const {
5783 // Default implementation: no optimization.
5784 return SDValue();
5785}
5786
5787//===----------------------------------------------------------------------===//
5788// Inline Assembler Implementation Methods
5789//===----------------------------------------------------------------------===//
5790
5791TargetLowering::ConstraintType
5792TargetLowering::getConstraintType(StringRef Constraint) const {
5793 unsigned S = Constraint.size();
5794
5795 if (S == 1) {
5796 switch (Constraint[0]) {
5797 default: break;
5798 case 'r':
5799 return C_RegisterClass;
5800 case 'm': // memory
5801 case 'o': // offsetable
5802 case 'V': // not offsetable
5803 return C_Memory;
5804 case 'p': // Address.
5805 return C_Address;
5806 case 'n': // Simple Integer
5807 case 'E': // Floating Point Constant
5808 case 'F': // Floating Point Constant
5809 return C_Immediate;
5810 case 'i': // Simple Integer or Relocatable Constant
5811 case 's': // Relocatable Constant
5812 case 'X': // Allow ANY value.
5813 case 'I': // Target registers.
5814 case 'J':
5815 case 'K':
5816 case 'L':
5817 case 'M':
5818 case 'N':
5819 case 'O':
5820 case 'P':
5821 case '<':
5822 case '>':
5823 return C_Other;
5824 }
5825 }
5826
5827 if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') {
5828 if (S == 8 && Constraint.substr(Start: 1, N: 6) == "memory") // "{memory}"
5829 return C_Memory;
5830 return C_Register;
5831 }
5832 return C_Unknown;
5833}
5834
5835/// Try to replace an X constraint, which matches anything, with another that
5836/// has more specific requirements based on the type of the corresponding
5837/// operand.
5838const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
5839 if (ConstraintVT.isInteger())
5840 return "r";
5841 if (ConstraintVT.isFloatingPoint())
5842 return "f"; // works for many targets
5843 return nullptr;
5844}
5845
5846SDValue TargetLowering::LowerAsmOutputForConstraint(
5847 SDValue &Chain, SDValue &Glue, const SDLoc &DL,
5848 const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
5849 return SDValue();
5850}
5851
5852/// Lower the specified operand into the Ops vector.
5853/// If it is invalid, don't add anything to Ops.
5854void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
5855 StringRef Constraint,
5856 std::vector<SDValue> &Ops,
5857 SelectionDAG &DAG) const {
5858
5859 if (Constraint.size() > 1)
5860 return;
5861
5862 char ConstraintLetter = Constraint[0];
5863 switch (ConstraintLetter) {
5864 default: break;
5865 case 'X': // Allows any operand
5866 case 'i': // Simple Integer or Relocatable Constant
5867 case 'n': // Simple Integer
5868 case 's': { // Relocatable Constant
5869
5870 ConstantSDNode *C;
5871 uint64_t Offset = 0;
5872
5873 // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
5874 // etc., since getelementpointer is variadic. We can't use
5875 // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
5876 // while in this case the GA may be furthest from the root node which is
5877 // likely an ISD::ADD.
5878 while (true) {
5879 if ((C = dyn_cast<ConstantSDNode>(Val&: Op)) && ConstraintLetter != 's') {
5880 // gcc prints these as sign extended. Sign extend value to 64 bits
5881 // now; without this it would get ZExt'd later in
5882 // ScheduleDAGSDNodes::EmitNode, which is very generic.
5883 bool IsBool = C->getConstantIntValue()->getBitWidth() == 1;
5884 BooleanContent BCont = getBooleanContents(Type: MVT::i64);
5885 ISD::NodeType ExtOpc =
5886 IsBool ? getExtendForContent(Content: BCont) : ISD::SIGN_EXTEND;
5887 int64_t ExtVal =
5888 ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue() : C->getSExtValue();
5889 Ops.push_back(
5890 x: DAG.getTargetConstant(Val: Offset + ExtVal, DL: SDLoc(C), VT: MVT::i64));
5891 return;
5892 }
5893 if (ConstraintLetter != 'n') {
5894 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Val&: Op)) {
5895 Ops.push_back(x: DAG.getTargetGlobalAddress(GV: GA->getGlobal(), DL: SDLoc(Op),
5896 VT: GA->getValueType(ResNo: 0),
5897 offset: Offset + GA->getOffset()));
5898 return;
5899 }
5900 if (const auto *BA = dyn_cast<BlockAddressSDNode>(Val&: Op)) {
5901 Ops.push_back(x: DAG.getTargetBlockAddress(
5902 BA: BA->getBlockAddress(), VT: BA->getValueType(ResNo: 0),
5903 Offset: Offset + BA->getOffset(), TargetFlags: BA->getTargetFlags()));
5904 return;
5905 }
5906 if (isa<BasicBlockSDNode>(Val: Op)) {
5907 Ops.push_back(x: Op);
5908 return;
5909 }
5910 }
5911 const unsigned OpCode = Op.getOpcode();
5912 if (OpCode == ISD::ADD || OpCode == ISD::SUB) {
5913 if ((C = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 0))))
5914 Op = Op.getOperand(i: 1);
5915 // Subtraction is not commutative.
5916 else if (OpCode == ISD::ADD &&
5917 (C = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 1))))
5918 Op = Op.getOperand(i: 0);
5919 else
5920 return;
5921 Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue();
5922 continue;
5923 }
5924 return;
5925 }
5926 break;
5927 }
5928 }
5929}
5930
5931void TargetLowering::CollectTargetIntrinsicOperands(
5932 const CallInst &I, SmallVectorImpl<SDValue> &Ops, SelectionDAG &DAG) const {
5933}
5934
5935std::pair<unsigned, const TargetRegisterClass *>
5936TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
5937 StringRef Constraint,
5938 MVT VT) const {
5939 if (!Constraint.starts_with(Prefix: "{"))
5940 return std::make_pair(x: 0u, y: static_cast<TargetRegisterClass *>(nullptr));
5941 assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?");
5942
5943 // Remove the braces from around the name.
5944 StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
5945
5946 std::pair<unsigned, const TargetRegisterClass *> R =
5947 std::make_pair(x: 0u, y: static_cast<const TargetRegisterClass *>(nullptr));
5948
5949 // Figure out which register class contains this reg.
5950 for (const TargetRegisterClass *RC : RI->regclasses()) {
5951 // If none of the value types for this register class are valid, we
5952 // can't use it. For example, 64-bit reg classes on 32-bit targets.
5953 if (!isLegalRC(TRI: *RI, RC: *RC))
5954 continue;
5955
5956 for (const MCPhysReg &PR : *RC) {
5957 if (RegName.equals_insensitive(RHS: RI->getRegAsmName(Reg: PR))) {
5958 std::pair<unsigned, const TargetRegisterClass *> S =
5959 std::make_pair(x: PR, y&: RC);
5960
5961 // If this register class has the requested value type, return it,
5962 // otherwise keep searching and return the first class found
5963 // if no other is found which explicitly has the requested type.
5964 if (RI->isTypeLegalForClass(RC: *RC, T: VT))
5965 return S;
5966 if (!R.second)
5967 R = S;
5968 }
5969 }
5970 }
5971
5972 return R;
5973}
5974
5975//===----------------------------------------------------------------------===//
5976// Constraint Selection.
5977
5978/// Return true of this is an input operand that is a matching constraint like
5979/// "4".
5980bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {
5981 assert(!ConstraintCode.empty() && "No known constraint!");
5982 return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
5983}
5984
5985/// If this is an input matching constraint, this method returns the output
5986/// operand it matches.
5987unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
5988 assert(!ConstraintCode.empty() && "No known constraint!");
5989 return atoi(nptr: ConstraintCode.c_str());
5990}
5991
5992/// Split up the constraint string from the inline assembly value into the
5993/// specific constraints and their prefixes, and also tie in the associated
5994/// operand values.
5995/// If this returns an empty vector, and if the constraint string itself
5996/// isn't empty, there was an error parsing.
5997TargetLowering::AsmOperandInfoVector
5998TargetLowering::ParseConstraints(const DataLayout &DL,
5999 const TargetRegisterInfo *TRI,
6000 const CallBase &Call) const {
6001 /// Information about all of the constraints.
6002 AsmOperandInfoVector ConstraintOperands;
6003 const InlineAsm *IA = cast<InlineAsm>(Val: Call.getCalledOperand());
6004 unsigned maCount = 0; // Largest number of multiple alternative constraints.
6005
6006 // Do a prepass over the constraints, canonicalizing them, and building up the
6007 // ConstraintOperands list.
6008 unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
6009 unsigned ResNo = 0; // ResNo - The result number of the next output.
6010 unsigned LabelNo = 0; // LabelNo - CallBr indirect dest number.
6011
6012 for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
6013 ConstraintOperands.emplace_back(args: std::move(CI));
6014 AsmOperandInfo &OpInfo = ConstraintOperands.back();
6015
6016 // Update multiple alternative constraint count.
6017 if (OpInfo.multipleAlternatives.size() > maCount)
6018 maCount = OpInfo.multipleAlternatives.size();
6019
6020 OpInfo.ConstraintVT = MVT::Other;
6021
6022 // Compute the value type for each operand.
6023 switch (OpInfo.Type) {
6024 case InlineAsm::isOutput: {
6025 // Indirect outputs just consume an argument.
6026 if (OpInfo.isIndirect) {
6027 OpInfo.CallOperandVal = Call.getArgOperand(i: ArgNo);
6028 break;
6029 }
6030
6031 // The return value of the call is this value. As such, there is no
6032 // corresponding argument.
6033 assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
6034 EVT VT;
6035 if (auto *STy = dyn_cast<StructType>(Val: Call.getType())) {
6036 VT = getAsmOperandValueType(DL, Ty: STy->getElementType(N: ResNo));
6037 } else {
6038 assert(ResNo == 0 && "Asm only has one result!");
6039 VT = getAsmOperandValueType(DL, Ty: Call.getType());
6040 }
6041 OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
6042 ++ResNo;
6043 break;
6044 }
6045 case InlineAsm::isInput:
6046 OpInfo.CallOperandVal = Call.getArgOperand(i: ArgNo);
6047 break;
6048 case InlineAsm::isLabel:
6049 OpInfo.CallOperandVal = cast<CallBrInst>(Val: &Call)->getIndirectDest(i: LabelNo);
6050 ++LabelNo;
6051 continue;
6052 case InlineAsm::isClobber:
6053 // Nothing to do.
6054 break;
6055 }
6056
6057 if (OpInfo.CallOperandVal) {
6058 llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
6059 if (OpInfo.isIndirect) {
6060 OpTy = Call.getParamElementType(ArgNo);
6061 assert(OpTy && "Indirect operand must have elementtype attribute");
6062 }
6063
6064 // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
6065 if (StructType *STy = dyn_cast<StructType>(Val: OpTy))
6066 if (STy->getNumElements() == 1)
6067 OpTy = STy->getElementType(N: 0);
6068
6069 // If OpTy is not a single value, it may be a struct/union that we
6070 // can tile with integers.
6071 if (!OpTy->isSingleValueType() && OpTy->isSized()) {
6072 unsigned BitSize = DL.getTypeSizeInBits(Ty: OpTy);
6073 switch (BitSize) {
6074 default: break;
6075 case 1:
6076 case 8:
6077 case 16:
6078 case 32:
6079 case 64:
6080 case 128:
6081 OpTy = IntegerType::get(C&: OpTy->getContext(), NumBits: BitSize);
6082 break;
6083 }
6084 }
6085
6086 EVT VT = getAsmOperandValueType(DL, Ty: OpTy, AllowUnknown: true);
6087 OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
6088 ArgNo++;
6089 }
6090 }
6091
6092 // If we have multiple alternative constraints, select the best alternative.
6093 if (!ConstraintOperands.empty()) {
6094 if (maCount) {
6095 unsigned bestMAIndex = 0;
6096 int bestWeight = -1;
6097 // weight: -1 = invalid match, and 0 = so-so match to 5 = good match.
6098 int weight = -1;
6099 unsigned maIndex;
6100 // Compute the sums of the weights for each alternative, keeping track
6101 // of the best (highest weight) one so far.
6102 for (maIndex = 0; maIndex < maCount; ++maIndex) {
6103 int weightSum = 0;
6104 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
6105 cIndex != eIndex; ++cIndex) {
6106 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
6107 if (OpInfo.Type == InlineAsm::isClobber)
6108 continue;
6109
6110 // If this is an output operand with a matching input operand,
6111 // look up the matching input. If their types mismatch, e.g. one
6112 // is an integer, the other is floating point, or their sizes are
6113 // different, flag it as an maCantMatch.
6114 if (OpInfo.hasMatchingInput()) {
6115 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
6116 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
6117 if ((OpInfo.ConstraintVT.isInteger() !=
6118 Input.ConstraintVT.isInteger()) ||
6119 (OpInfo.ConstraintVT.getSizeInBits() !=
6120 Input.ConstraintVT.getSizeInBits())) {
6121 weightSum = -1; // Can't match.
6122 break;
6123 }
6124 }
6125 }
6126 weight = getMultipleConstraintMatchWeight(info&: OpInfo, maIndex);
6127 if (weight == -1) {
6128 weightSum = -1;
6129 break;
6130 }
6131 weightSum += weight;
6132 }
6133 // Update best.
6134 if (weightSum > bestWeight) {
6135 bestWeight = weightSum;
6136 bestMAIndex = maIndex;
6137 }
6138 }
6139
6140 // Now select chosen alternative in each constraint.
6141 for (AsmOperandInfo &cInfo : ConstraintOperands)
6142 if (cInfo.Type != InlineAsm::isClobber)
6143 cInfo.selectAlternative(index: bestMAIndex);
6144 }
6145 }
6146
6147 // Check and hook up tied operands, choose constraint code to use.
6148 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
6149 cIndex != eIndex; ++cIndex) {
6150 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
6151
6152 // If this is an output operand with a matching input operand, look up the
6153 // matching input. If their types mismatch, e.g. one is an integer, the
6154 // other is floating point, or their sizes are different, flag it as an
6155 // error.
6156 if (OpInfo.hasMatchingInput()) {
6157 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
6158
6159 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
6160 std::pair<unsigned, const TargetRegisterClass *> MatchRC =
6161 getRegForInlineAsmConstraint(RI: TRI, Constraint: OpInfo.ConstraintCode,
6162 VT: OpInfo.ConstraintVT);
6163 std::pair<unsigned, const TargetRegisterClass *> InputRC =
6164 getRegForInlineAsmConstraint(RI: TRI, Constraint: Input.ConstraintCode,
6165 VT: Input.ConstraintVT);
6166 const bool OutOpIsIntOrFP = OpInfo.ConstraintVT.isInteger() ||
6167 OpInfo.ConstraintVT.isFloatingPoint();
6168 const bool InOpIsIntOrFP = Input.ConstraintVT.isInteger() ||
6169 Input.ConstraintVT.isFloatingPoint();
6170 if ((OutOpIsIntOrFP != InOpIsIntOrFP) ||
6171 (MatchRC.second != InputRC.second)) {
6172 report_fatal_error(reason: "Unsupported asm: input constraint"
6173 " with a matching output constraint of"
6174 " incompatible type!");
6175 }
6176 }
6177 }
6178 }
6179
6180 return ConstraintOperands;
6181}
6182
6183/// Return a number indicating our preference for chosing a type of constraint
6184/// over another, for the purpose of sorting them. Immediates are almost always
6185/// preferrable (when they can be emitted). A higher return value means a
6186/// stronger preference for one constraint type relative to another.
6187/// FIXME: We should prefer registers over memory but doing so may lead to
6188/// unrecoverable register exhaustion later.
6189/// https://github.com/llvm/llvm-project/issues/20571
6190static unsigned getConstraintPiority(TargetLowering::ConstraintType CT) {
6191 switch (CT) {
6192 case TargetLowering::C_Immediate:
6193 case TargetLowering::C_Other:
6194 return 4;
6195 case TargetLowering::C_Memory:
6196 case TargetLowering::C_Address:
6197 return 3;
6198 case TargetLowering::C_RegisterClass:
6199 return 2;
6200 case TargetLowering::C_Register:
6201 return 1;
6202 case TargetLowering::C_Unknown:
6203 return 0;
6204 }
6205 llvm_unreachable("Invalid constraint type");
6206}
6207
6208/// Examine constraint type and operand type and determine a weight value.
6209/// This object must already have been set up with the operand type
6210/// and the current alternative constraint selected.
6211TargetLowering::ConstraintWeight
6212 TargetLowering::getMultipleConstraintMatchWeight(
6213 AsmOperandInfo &info, int maIndex) const {
6214 InlineAsm::ConstraintCodeVector *rCodes;
6215 if (maIndex >= (int)info.multipleAlternatives.size())
6216 rCodes = &info.Codes;
6217 else
6218 rCodes = &info.multipleAlternatives[maIndex].Codes;
6219 ConstraintWeight BestWeight = CW_Invalid;
6220
6221 // Loop over the options, keeping track of the most general one.
6222 for (const std::string &rCode : *rCodes) {
6223 ConstraintWeight weight =
6224 getSingleConstraintMatchWeight(info, constraint: rCode.c_str());
6225 if (weight > BestWeight)
6226 BestWeight = weight;
6227 }
6228
6229 return BestWeight;
6230}
6231
6232/// Examine constraint type and operand type and determine a weight value.
6233/// This object must already have been set up with the operand type
6234/// and the current alternative constraint selected.
6235TargetLowering::ConstraintWeight
6236 TargetLowering::getSingleConstraintMatchWeight(
6237 AsmOperandInfo &info, const char *constraint) const {
6238 ConstraintWeight weight = CW_Invalid;
6239 Value *CallOperandVal = info.CallOperandVal;
6240 // If we don't have a value, we can't do a match,
6241 // but allow it at the lowest weight.
6242 if (!CallOperandVal)
6243 return CW_Default;
6244 // Look at the constraint type.
6245 switch (*constraint) {
6246 case 'i': // immediate integer.
6247 case 'n': // immediate integer with a known value.
6248 if (isa<ConstantInt>(Val: CallOperandVal))
6249 weight = CW_Constant;
6250 break;
6251 case 's': // non-explicit intregal immediate.
6252 if (isa<GlobalValue>(Val: CallOperandVal))
6253 weight = CW_Constant;
6254 break;
6255 case 'E': // immediate float if host format.
6256 case 'F': // immediate float.
6257 if (isa<ConstantFP>(Val: CallOperandVal))
6258 weight = CW_Constant;
6259 break;
6260 case '<': // memory operand with autodecrement.
6261 case '>': // memory operand with autoincrement.
6262 case 'm': // memory operand.
6263 case 'o': // offsettable memory operand
6264 case 'V': // non-offsettable memory operand
6265 weight = CW_Memory;
6266 break;
6267 case 'r': // general register.
6268 case 'g': // general register, memory operand or immediate integer.
6269 // note: Clang converts "g" to "imr".
6270 if (CallOperandVal->getType()->isIntegerTy())
6271 weight = CW_Register;
6272 break;
6273 case 'X': // any operand.
6274 default:
6275 weight = CW_Default;
6276 break;
6277 }
6278 return weight;
6279}
6280
6281/// If there are multiple different constraints that we could pick for this
6282/// operand (e.g. "imr") try to pick the 'best' one.
6283/// This is somewhat tricky: constraints (TargetLowering::ConstraintType) fall
6284/// into seven classes:
6285/// Register -> one specific register
6286/// RegisterClass -> a group of regs
6287/// Memory -> memory
6288/// Address -> a symbolic memory reference
6289/// Immediate -> immediate values
6290/// Other -> magic values (such as "Flag Output Operands")
6291/// Unknown -> something we don't recognize yet and can't handle
6292/// Ideally, we would pick the most specific constraint possible: if we have
6293/// something that fits into a register, we would pick it. The problem here
6294/// is that if we have something that could either be in a register or in
6295/// memory that use of the register could cause selection of *other*
6296/// operands to fail: they might only succeed if we pick memory. Because of
6297/// this the heuristic we use is:
6298///
6299/// 1) If there is an 'other' constraint, and if the operand is valid for
6300/// that constraint, use it. This makes us take advantage of 'i'
6301/// constraints when available.
6302/// 2) Otherwise, pick the most general constraint present. This prefers
6303/// 'm' over 'r', for example.
6304///
6305TargetLowering::ConstraintGroup TargetLowering::getConstraintPreferences(
6306 TargetLowering::AsmOperandInfo &OpInfo) const {
6307 ConstraintGroup Ret;
6308
6309 Ret.reserve(N: OpInfo.Codes.size());
6310 for (StringRef Code : OpInfo.Codes) {
6311 TargetLowering::ConstraintType CType = getConstraintType(Constraint: Code);
6312
6313 // Indirect 'other' or 'immediate' constraints are not allowed.
6314 if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory ||
6315 CType == TargetLowering::C_Register ||
6316 CType == TargetLowering::C_RegisterClass))
6317 continue;
6318
6319 // Things with matching constraints can only be registers, per gcc
6320 // documentation. This mainly affects "g" constraints.
6321 if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
6322 continue;
6323
6324 Ret.emplace_back(Args&: Code, Args&: CType);
6325 }
6326
6327 llvm::stable_sort(Range&: Ret, C: [](ConstraintPair a, ConstraintPair b) {
6328 return getConstraintPiority(CT: a.second) > getConstraintPiority(CT: b.second);
6329 });
6330
6331 return Ret;
6332}
6333
6334/// If we have an immediate, see if we can lower it. Return true if we can,
6335/// false otherwise.
6336static bool lowerImmediateIfPossible(TargetLowering::ConstraintPair &P,
6337 SDValue Op, SelectionDAG *DAG,
6338 const TargetLowering &TLI) {
6339
6340 assert((P.second == TargetLowering::C_Other ||
6341 P.second == TargetLowering::C_Immediate) &&
6342 "need immediate or other");
6343
6344 if (!Op.getNode())
6345 return false;
6346
6347 std::vector<SDValue> ResultOps;
6348 TLI.LowerAsmOperandForConstraint(Op, Constraint: P.first, Ops&: ResultOps, DAG&: *DAG);
6349 return !ResultOps.empty();
6350}
6351
6352/// Determines the constraint code and constraint type to use for the specific
6353/// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
6354void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
6355 SDValue Op,
6356 SelectionDAG *DAG) const {
6357 assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
6358
6359 // Single-letter constraints ('r') are very common.
6360 if (OpInfo.Codes.size() == 1) {
6361 OpInfo.ConstraintCode = OpInfo.Codes[0];
6362 OpInfo.ConstraintType = getConstraintType(Constraint: OpInfo.ConstraintCode);
6363 } else {
6364 ConstraintGroup G = getConstraintPreferences(OpInfo);
6365 if (G.empty())
6366 return;
6367
6368 unsigned BestIdx = 0;
6369 for (const unsigned E = G.size();
6370 BestIdx < E && (G[BestIdx].second == TargetLowering::C_Other ||
6371 G[BestIdx].second == TargetLowering::C_Immediate);
6372 ++BestIdx) {
6373 if (lowerImmediateIfPossible(P&: G[BestIdx], Op, DAG, TLI: *this))
6374 break;
6375 // If we're out of constraints, just pick the first one.
6376 if (BestIdx + 1 == E) {
6377 BestIdx = 0;
6378 break;
6379 }
6380 }
6381
6382 OpInfo.ConstraintCode = G[BestIdx].first;
6383 OpInfo.ConstraintType = G[BestIdx].second;
6384 }
6385
6386 // 'X' matches anything.
6387 if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
6388 // Constants are handled elsewhere. For Functions, the type here is the
6389 // type of the result, which is not what we want to look at; leave them
6390 // alone.
6391 Value *v = OpInfo.CallOperandVal;
6392 if (isa<ConstantInt>(Val: v) || isa<Function>(Val: v)) {
6393 return;
6394 }
6395
6396 if (isa<BasicBlock>(Val: v) || isa<BlockAddress>(Val: v)) {
6397 OpInfo.ConstraintCode = "i";
6398 return;
6399 }
6400
6401 // Otherwise, try to resolve it to something we know about by looking at
6402 // the actual operand type.
6403 if (const char *Repl = LowerXConstraint(ConstraintVT: OpInfo.ConstraintVT)) {
6404 OpInfo.ConstraintCode = Repl;
6405 OpInfo.ConstraintType = getConstraintType(Constraint: OpInfo.ConstraintCode);
6406 }
6407 }
6408}
6409
6410/// Given an exact SDIV by a constant, create a multiplication
6411/// with the multiplicative inverse of the constant.
6412/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6413static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
6414 const SDLoc &dl, SelectionDAG &DAG,
6415 SmallVectorImpl<SDNode *> &Created) {
6416 SDValue Op0 = N->getOperand(Num: 0);
6417 SDValue Op1 = N->getOperand(Num: 1);
6418 EVT VT = N->getValueType(ResNo: 0);
6419 EVT ShVT = TLI.getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
6420 EVT ShSVT = ShVT.getScalarType();
6421
6422 bool UseSRA = false;
6423 SmallVector<SDValue, 16> Shifts, Factors;
6424
6425 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6426 if (C->isZero())
6427 return false;
6428
6429 EVT CT = C->getValueType(ResNo: 0);
6430 APInt Divisor = C->getAPIntValue();
6431 unsigned Shift = Divisor.countr_zero();
6432 if (Shift) {
6433 Divisor.ashrInPlace(ShiftAmt: Shift);
6434 UseSRA = true;
6435 }
6436 APInt Factor = Divisor.multiplicativeInverse();
6437 Shifts.push_back(Elt: DAG.getConstant(Val: Shift, DL: dl, VT: ShSVT));
6438 Factors.push_back(Elt: DAG.getConstant(Val: Factor, DL: dl, VT: CT));
6439 return true;
6440 };
6441
6442 // Collect all magic values from the build vector.
6443 if (!ISD::matchUnaryPredicate(Op: Op1, Match: BuildSDIVPattern))
6444 return SDValue();
6445
6446 SDValue Shift, Factor;
6447 if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6448 Shift = DAG.getBuildVector(VT: ShVT, DL: dl, Ops: Shifts);
6449 Factor = DAG.getBuildVector(VT, DL: dl, Ops: Factors);
6450 } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6451 assert(Shifts.size() == 1 && Factors.size() == 1 &&
6452 "Expected matchUnaryPredicate to return one element for scalable "
6453 "vectors");
6454 Shift = DAG.getSplatVector(VT: ShVT, DL: dl, Op: Shifts[0]);
6455 Factor = DAG.getSplatVector(VT, DL: dl, Op: Factors[0]);
6456 } else {
6457 assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6458 Shift = Shifts[0];
6459 Factor = Factors[0];
6460 }
6461
6462 SDValue Res = Op0;
6463 if (UseSRA) {
6464 Res = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: Res, N2: Shift, Flags: SDNodeFlags::Exact);
6465 Created.push_back(Elt: Res.getNode());
6466 }
6467
6468 return DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: Res, N2: Factor);
6469}
6470
6471/// Given an exact UDIV by a constant, create a multiplication
6472/// with the multiplicative inverse of the constant.
6473/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6474static SDValue BuildExactUDIV(const TargetLowering &TLI, SDNode *N,
6475 const SDLoc &dl, SelectionDAG &DAG,
6476 SmallVectorImpl<SDNode *> &Created) {
6477 EVT VT = N->getValueType(ResNo: 0);
6478 EVT ShVT = TLI.getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
6479 EVT ShSVT = ShVT.getScalarType();
6480
6481 bool UseSRL = false;
6482 SmallVector<SDValue, 16> Shifts, Factors;
6483
6484 auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6485 if (C->isZero())
6486 return false;
6487
6488 EVT CT = C->getValueType(ResNo: 0);
6489 APInt Divisor = C->getAPIntValue();
6490 unsigned Shift = Divisor.countr_zero();
6491 if (Shift) {
6492 Divisor.lshrInPlace(ShiftAmt: Shift);
6493 UseSRL = true;
6494 }
6495 // Calculate the multiplicative inverse modulo BW.
6496 APInt Factor = Divisor.multiplicativeInverse();
6497 Shifts.push_back(Elt: DAG.getConstant(Val: Shift, DL: dl, VT: ShSVT));
6498 Factors.push_back(Elt: DAG.getConstant(Val: Factor, DL: dl, VT: CT));
6499 return true;
6500 };
6501
6502 SDValue Op1 = N->getOperand(Num: 1);
6503
6504 // Collect all magic values from the build vector.
6505 if (!ISD::matchUnaryPredicate(Op: Op1, Match: BuildUDIVPattern))
6506 return SDValue();
6507
6508 SDValue Shift, Factor;
6509 if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6510 Shift = DAG.getBuildVector(VT: ShVT, DL: dl, Ops: Shifts);
6511 Factor = DAG.getBuildVector(VT, DL: dl, Ops: Factors);
6512 } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6513 assert(Shifts.size() == 1 && Factors.size() == 1 &&
6514 "Expected matchUnaryPredicate to return one element for scalable "
6515 "vectors");
6516 Shift = DAG.getSplatVector(VT: ShVT, DL: dl, Op: Shifts[0]);
6517 Factor = DAG.getSplatVector(VT, DL: dl, Op: Factors[0]);
6518 } else {
6519 assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6520 Shift = Shifts[0];
6521 Factor = Factors[0];
6522 }
6523
6524 SDValue Res = N->getOperand(Num: 0);
6525 if (UseSRL) {
6526 Res = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Res, N2: Shift, Flags: SDNodeFlags::Exact);
6527 Created.push_back(Elt: Res.getNode());
6528 }
6529
6530 return DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: Res, N2: Factor);
6531}
6532
6533SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
6534 SelectionDAG &DAG,
6535 SmallVectorImpl<SDNode *> &Created) const {
6536 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6537 if (isIntDivCheap(VT: N->getValueType(ResNo: 0), Attr))
6538 return SDValue(N, 0); // Lower SDIV as SDIV
6539 return SDValue();
6540}
6541
6542SDValue
6543TargetLowering::BuildSREMPow2(SDNode *N, const APInt &Divisor,
6544 SelectionDAG &DAG,
6545 SmallVectorImpl<SDNode *> &Created) const {
6546 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6547 if (isIntDivCheap(VT: N->getValueType(ResNo: 0), Attr))
6548 return SDValue(N, 0); // Lower SREM as SREM
6549 return SDValue();
6550}
6551
6552/// Build sdiv by power-of-2 with conditional move instructions
6553/// Ref: "Hacker's Delight" by Henry Warren 10-1
6554/// If conditional move/branch is preferred, we lower sdiv x, +/-2**k into:
6555/// bgez x, label
6556/// add x, x, 2**k-1
6557/// label:
6558/// sra res, x, k
6559/// neg res, res (when the divisor is negative)
6560SDValue TargetLowering::buildSDIVPow2WithCMov(
6561 SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
6562 SmallVectorImpl<SDNode *> &Created) const {
6563 unsigned Lg2 = Divisor.countr_zero();
6564 EVT VT = N->getValueType(ResNo: 0);
6565
6566 SDLoc DL(N);
6567 SDValue N0 = N->getOperand(Num: 0);
6568 SDValue Zero = DAG.getConstant(Val: 0, DL, VT);
6569 APInt Lg2Mask = APInt::getLowBitsSet(numBits: VT.getSizeInBits(), loBitsSet: Lg2);
6570 SDValue Pow2MinusOne = DAG.getConstant(Val: Lg2Mask, DL, VT);
6571
6572 // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
6573 EVT CCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
6574 SDValue Cmp = DAG.getSetCC(DL, VT: CCVT, LHS: N0, RHS: Zero, Cond: ISD::SETLT);
6575 SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: N0, N2: Pow2MinusOne);
6576 SDValue CMov = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: Cmp, N2: Add, N3: N0);
6577
6578 Created.push_back(Elt: Cmp.getNode());
6579 Created.push_back(Elt: Add.getNode());
6580 Created.push_back(Elt: CMov.getNode());
6581
6582 // Divide by pow2.
6583 SDValue SRA = DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: CMov,
6584 N2: DAG.getShiftAmountConstant(Val: Lg2, VT, DL));
6585
6586 // If we're dividing by a positive value, we're done. Otherwise, we must
6587 // negate the result.
6588 if (Divisor.isNonNegative())
6589 return SRA;
6590
6591 Created.push_back(Elt: SRA.getNode());
6592 return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Zero, N2: SRA);
6593}
6594
6595/// Given an ISD::SDIV node expressing a divide by constant,
6596/// return a DAG expression to select that will generate the same value by
6597/// multiplying by a magic number.
6598/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6599SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
6600 bool IsAfterLegalization,
6601 bool IsAfterLegalTypes,
6602 SmallVectorImpl<SDNode *> &Created) const {
6603 SDLoc dl(N);
6604
6605 // If the sdiv has an 'exact' bit we can use a simpler lowering.
6606 if (N->getFlags().hasExact())
6607 return BuildExactSDIV(TLI: *this, N, dl, DAG, Created);
6608
6609 EVT VT = N->getValueType(ResNo: 0);
6610 EVT SVT = VT.getScalarType();
6611 EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
6612 EVT ShSVT = ShVT.getScalarType();
6613 unsigned EltBits = VT.getScalarSizeInBits();
6614 EVT MulVT;
6615
6616 // Check to see if we can do this.
6617 // FIXME: We should be more aggressive here.
6618 EVT QueryVT = VT;
6619 if (VT.isVector()) {
6620 // If the vector type will be legalized to a vector type with the same
6621 // element type, allow the transform before type legalization if MULHS or
6622 // SMUL_LOHI are supported.
6623 QueryVT = getLegalTypeToTransformTo(Context&: *DAG.getContext(), VT);
6624 if (!QueryVT.isVector() ||
6625 QueryVT.getVectorElementType() != VT.getVectorElementType())
6626 return SDValue();
6627 } else if (!isTypeLegal(VT)) {
6628 // Limit this to simple scalars for now.
6629 if (!VT.isSimple())
6630 return SDValue();
6631
6632 // If this type will be promoted to a large enough type with a legal
6633 // multiply operation, we can go ahead and do this transform.
6634 if (getTypeAction(VT: VT.getSimpleVT()) != TypePromoteInteger)
6635 return SDValue();
6636
6637 MulVT = getTypeToTransformTo(Context&: *DAG.getContext(), VT);
6638 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6639 !isOperationLegal(Op: ISD::MUL, VT: MulVT))
6640 return SDValue();
6641 }
6642
6643 bool HasMULHS =
6644 isOperationLegalOrCustom(Op: ISD::MULHS, VT: QueryVT, LegalOnly: IsAfterLegalization);
6645 bool HasSMUL_LOHI =
6646 isOperationLegalOrCustom(Op: ISD::SMUL_LOHI, VT: QueryVT, LegalOnly: IsAfterLegalization);
6647
6648 if (isTypeLegal(VT) && !HasMULHS && !HasSMUL_LOHI && MulVT == EVT()) {
6649 // If type twice as wide legal, widen and use a mul plus a shift.
6650 EVT WideVT = VT.widenIntegerElementType(Context&: *DAG.getContext());
6651 // Some targets like AMDGPU try to go from SDIV to SDIVREM which is then
6652 // custom lowered. This is very expensive so avoid it at all costs for
6653 // constant divisors.
6654 if ((!IsAfterLegalTypes && isOperationExpand(Op: ISD::SDIV, VT) &&
6655 isOperationCustom(Op: ISD::SDIVREM, VT: VT.getScalarType())) ||
6656 isOperationLegalOrCustom(Op: ISD::MUL, VT: WideVT))
6657 MulVT = WideVT;
6658 }
6659
6660 if (!HasMULHS && !HasSMUL_LOHI && MulVT == EVT())
6661 return SDValue();
6662
6663 // If we're after type legalization and SVT is not legal, use the
6664 // promoted type for creating constants to avoid creating nodes with
6665 // illegal types.
6666 if (IsAfterLegalTypes && VT.isVector()) {
6667 SVT = getTypeToTransformTo(Context&: *DAG.getContext(), VT: SVT);
6668 if (SVT.bitsLT(VT: VT.getScalarType()))
6669 return SDValue();
6670 ShSVT = getTypeToTransformTo(Context&: *DAG.getContext(), VT: ShSVT);
6671 if (ShSVT.bitsLT(VT: ShVT.getScalarType()))
6672 return SDValue();
6673 }
6674 const unsigned SVTBits = SVT.getSizeInBits();
6675
6676 SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
6677
6678 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6679 if (C->isZero())
6680 return false;
6681 // Truncate the divisor to the target scalar type in case it was promoted
6682 // during type legalization.
6683 APInt Divisor = C->getAPIntValue().trunc(width: EltBits);
6684 SignedDivisionByConstantInfo magics = SignedDivisionByConstantInfo::get(D: Divisor);
6685 int NumeratorFactor = 0;
6686 int ShiftMask = -1;
6687
6688 if (Divisor.isOne() || Divisor.isAllOnes()) {
6689 // If d is +1/-1, we just multiply the numerator by +1/-1.
6690 NumeratorFactor = Divisor.getSExtValue();
6691 magics.Magic = 0;
6692 magics.ShiftAmount = 0;
6693 ShiftMask = 0;
6694 } else if (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) {
6695 // If d > 0 and m < 0, add the numerator.
6696 NumeratorFactor = 1;
6697 } else if (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) {
6698 // If d < 0 and m > 0, subtract the numerator.
6699 NumeratorFactor = -1;
6700 }
6701
6702 MagicFactors.push_back(
6703 Elt: DAG.getConstant(Val: magics.Magic.zext(width: SVTBits), DL: dl, VT: SVT));
6704 Factors.push_back(Elt: DAG.getSignedConstant(Val: NumeratorFactor, DL: dl, VT: SVT));
6705 Shifts.push_back(Elt: DAG.getConstant(Val: magics.ShiftAmount, DL: dl, VT: ShSVT));
6706 ShiftMasks.push_back(Elt: DAG.getSignedConstant(Val: ShiftMask, DL: dl, VT: SVT));
6707 return true;
6708 };
6709
6710 SDValue N0 = N->getOperand(Num: 0);
6711 SDValue N1 = N->getOperand(Num: 1);
6712
6713 // Collect the shifts / magic values from each element.
6714 if (!ISD::matchUnaryPredicate(Op: N1, Match: BuildSDIVPattern, /*AllowUndefs=*/false,
6715 /*AllowTruncation=*/true))
6716 return SDValue();
6717
6718 SDValue MagicFactor, Factor, Shift, ShiftMask;
6719 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6720 MagicFactor = DAG.getBuildVector(VT, DL: dl, Ops: MagicFactors);
6721 Factor = DAG.getBuildVector(VT, DL: dl, Ops: Factors);
6722 Shift = DAG.getBuildVector(VT: ShVT, DL: dl, Ops: Shifts);
6723 ShiftMask = DAG.getBuildVector(VT, DL: dl, Ops: ShiftMasks);
6724 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6725 assert(MagicFactors.size() == 1 && Factors.size() == 1 &&
6726 Shifts.size() == 1 && ShiftMasks.size() == 1 &&
6727 "Expected matchUnaryPredicate to return one element for scalable "
6728 "vectors");
6729 MagicFactor = DAG.getSplatVector(VT, DL: dl, Op: MagicFactors[0]);
6730 Factor = DAG.getSplatVector(VT, DL: dl, Op: Factors[0]);
6731 Shift = DAG.getSplatVector(VT: ShVT, DL: dl, Op: Shifts[0]);
6732 ShiftMask = DAG.getSplatVector(VT, DL: dl, Op: ShiftMasks[0]);
6733 } else {
6734 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6735 MagicFactor = MagicFactors[0];
6736 Factor = Factors[0];
6737 Shift = Shifts[0];
6738 ShiftMask = ShiftMasks[0];
6739 }
6740
6741 // Multiply the numerator (operand 0) by the magic value.
6742 auto GetMULHS = [&](SDValue X, SDValue Y) {
6743 if (HasMULHS)
6744 return DAG.getNode(Opcode: ISD::MULHS, DL: dl, VT, N1: X, N2: Y);
6745 if (HasSMUL_LOHI) {
6746 SDValue LoHi =
6747 DAG.getNode(Opcode: ISD::SMUL_LOHI, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: VT), N1: X, N2: Y);
6748 return LoHi.getValue(R: 1);
6749 }
6750
6751 X = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: MulVT, Operand: X);
6752 Y = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: MulVT, Operand: Y);
6753 Y = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: MulVT, N1: X, N2: Y);
6754 Y = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: MulVT, N1: Y,
6755 N2: DAG.getShiftAmountConstant(Val: EltBits, VT: MulVT, DL: dl));
6756 return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Y);
6757 };
6758
6759 SDValue Q = GetMULHS(N0, MagicFactor);
6760 if (!Q)
6761 return SDValue();
6762
6763 Created.push_back(Elt: Q.getNode());
6764
6765 // (Optionally) Add/subtract the numerator using Factor.
6766 Factor = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: N0, N2: Factor);
6767 Created.push_back(Elt: Factor.getNode());
6768 Q = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Q, N2: Factor);
6769 Created.push_back(Elt: Q.getNode());
6770
6771 // Shift right algebraic by shift value.
6772 Q = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: Q, N2: Shift);
6773 Created.push_back(Elt: Q.getNode());
6774
6775 // Extract the sign bit, mask it and add it to the quotient.
6776 SDValue SignShift = DAG.getConstant(Val: EltBits - 1, DL: dl, VT: ShVT);
6777 SDValue T = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Q, N2: SignShift);
6778 Created.push_back(Elt: T.getNode());
6779 T = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: T, N2: ShiftMask);
6780 Created.push_back(Elt: T.getNode());
6781 return DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Q, N2: T);
6782}
6783
6784/// Given an ISD::UDIV node expressing a divide by constant,
6785/// return a DAG expression to select that will generate the same value by
6786/// multiplying by a magic number.
6787/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6788SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
6789 bool IsAfterLegalization,
6790 bool IsAfterLegalTypes,
6791 SmallVectorImpl<SDNode *> &Created) const {
6792 SDLoc dl(N);
6793
6794 // If the udiv has an 'exact' bit we can use a simpler lowering.
6795 if (N->getFlags().hasExact())
6796 return BuildExactUDIV(TLI: *this, N, dl, DAG, Created);
6797
6798 EVT VT = N->getValueType(ResNo: 0);
6799 EVT SVT = VT.getScalarType();
6800 EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
6801 EVT ShSVT = ShVT.getScalarType();
6802 unsigned EltBits = VT.getScalarSizeInBits();
6803 EVT MulVT;
6804
6805 // Check to see if we can do this.
6806 // FIXME: We should be more aggressive here.
6807 EVT QueryVT = VT;
6808 if (VT.isVector()) {
6809 // If the vector type will be legalized to a vector type with the same
6810 // element type, allow the transform before type legalization if MULHU or
6811 // UMUL_LOHI are supported.
6812 QueryVT = getLegalTypeToTransformTo(Context&: *DAG.getContext(), VT);
6813 if (!QueryVT.isVector() ||
6814 QueryVT.getVectorElementType() != VT.getVectorElementType())
6815 return SDValue();
6816 } else if (!isTypeLegal(VT)) {
6817 // Limit this to simple scalars for now.
6818 if (!VT.isSimple())
6819 return SDValue();
6820
6821 // If this type will be promoted to a large enough type with a legal
6822 // multiply operation, we can go ahead and do this transform.
6823 if (getTypeAction(VT: VT.getSimpleVT()) != TypePromoteInteger)
6824 return SDValue();
6825
6826 MulVT = getTypeToTransformTo(Context&: *DAG.getContext(), VT);
6827 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6828 !isOperationLegal(Op: ISD::MUL, VT: MulVT))
6829 return SDValue();
6830 }
6831
6832 bool HasMULHU =
6833 isOperationLegalOrCustom(Op: ISD::MULHU, VT: QueryVT, LegalOnly: IsAfterLegalization);
6834 bool HasUMUL_LOHI =
6835 isOperationLegalOrCustom(Op: ISD::UMUL_LOHI, VT: QueryVT, LegalOnly: IsAfterLegalization);
6836
6837 if (isTypeLegal(VT) && !HasMULHU && !HasUMUL_LOHI && MulVT == EVT()) {
6838 // If type twice as wide legal, widen and use a mul plus a shift.
6839 EVT WideVT = VT.widenIntegerElementType(Context&: *DAG.getContext());
6840 // Some targets like AMDGPU try to go from UDIV to UDIVREM which is then
6841 // custom lowered. This is very expensive so avoid it at all costs for
6842 // constant divisors.
6843 if ((!IsAfterLegalTypes && isOperationExpand(Op: ISD::UDIV, VT) &&
6844 isOperationCustom(Op: ISD::UDIVREM, VT: VT.getScalarType())) ||
6845 isOperationLegalOrCustom(Op: ISD::MUL, VT: WideVT))
6846 MulVT = WideVT;
6847 }
6848
6849 if (!HasMULHU && !HasUMUL_LOHI && MulVT == EVT())
6850 return SDValue();
6851
6852 SDValue N0 = N->getOperand(Num: 0);
6853 SDValue N1 = N->getOperand(Num: 1);
6854
6855 // Try to use leading zeros of the dividend to reduce the multiplier and
6856 // avoid expensive fixups.
6857 unsigned KnownLeadingZeros = DAG.computeKnownBits(Op: N0).countMinLeadingZeros();
6858
6859 // If we're after type legalization and SVT is not legal, use the
6860 // promoted type for creating constants to avoid creating nodes with
6861 // illegal types.
6862 if (IsAfterLegalTypes && VT.isVector()) {
6863 SVT = getTypeToTransformTo(Context&: *DAG.getContext(), VT: SVT);
6864 if (SVT.bitsLT(VT: VT.getScalarType()))
6865 return SDValue();
6866 ShSVT = getTypeToTransformTo(Context&: *DAG.getContext(), VT: ShSVT);
6867 if (ShSVT.bitsLT(VT: ShVT.getScalarType()))
6868 return SDValue();
6869 }
6870 const unsigned SVTBits = SVT.getSizeInBits();
6871
6872 // Allow i32 to be widened to i64 for uncooperative divisors if i64 MULHU or
6873 // UMUL_LOHI is supported.
6874 const EVT WideSVT = MVT::i64;
6875 const bool HasWideMULHU =
6876 VT == MVT::i32 &&
6877 isOperationLegalOrCustom(Op: ISD::MULHU, VT: WideSVT, LegalOnly: IsAfterLegalization);
6878 const bool HasWideUMUL_LOHI =
6879 VT == MVT::i32 &&
6880 isOperationLegalOrCustom(Op: ISD::UMUL_LOHI, VT: WideSVT, LegalOnly: IsAfterLegalization);
6881 const bool AllowWiden = (HasWideMULHU || HasWideUMUL_LOHI);
6882
6883 bool UseNPQ = false, UsePreShift = false, UsePostShift = false;
6884 bool UseWiden = false;
6885 SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
6886
6887 auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6888 if (C->isZero())
6889 return false;
6890 // Truncate the divisor to the target scalar type in case it was promoted
6891 // during type legalization.
6892 APInt Divisor = C->getAPIntValue().trunc(width: EltBits);
6893
6894 SDValue PreShift, MagicFactor, NPQFactor, PostShift;
6895
6896 // Magic algorithm doesn't work for division by 1. We need to emit a select
6897 // at the end.
6898 if (Divisor.isOne()) {
6899 PreShift = PostShift = DAG.getUNDEF(VT: ShSVT);
6900 MagicFactor = NPQFactor = DAG.getUNDEF(VT: SVT);
6901 } else {
6902 UnsignedDivisionByConstantInfo magics =
6903 UnsignedDivisionByConstantInfo::get(
6904 D: Divisor, LeadingZeros: std::min(a: KnownLeadingZeros, b: Divisor.countl_zero()),
6905 /*AllowEvenDivisorOptimization=*/true,
6906 /*AllowWidenOptimization=*/AllowWiden);
6907
6908 if (magics.Widen) {
6909 UseWiden = true;
6910 MagicFactor = DAG.getConstant(Val: magics.Magic, DL: dl, VT: WideSVT);
6911 } else {
6912 MagicFactor = DAG.getConstant(Val: magics.Magic.zext(width: SVTBits), DL: dl, VT: SVT);
6913 }
6914
6915 assert(magics.PreShift < Divisor.getBitWidth() &&
6916 "We shouldn't generate an undefined shift!");
6917 assert(magics.PostShift < Divisor.getBitWidth() &&
6918 "We shouldn't generate an undefined shift!");
6919 assert((!magics.IsAdd || magics.PreShift == 0) &&
6920 "Unexpected pre-shift");
6921 PreShift = DAG.getConstant(Val: magics.PreShift, DL: dl, VT: ShSVT);
6922 PostShift = DAG.getConstant(Val: magics.PostShift, DL: dl, VT: ShSVT);
6923 NPQFactor = DAG.getConstant(
6924 Val: magics.IsAdd ? APInt::getOneBitSet(numBits: SVTBits, BitNo: EltBits - 1)
6925 : APInt::getZero(numBits: SVTBits),
6926 DL: dl, VT: SVT);
6927 UseNPQ |= magics.IsAdd;
6928 UsePreShift |= magics.PreShift != 0;
6929 UsePostShift |= magics.PostShift != 0;
6930 }
6931
6932 PreShifts.push_back(Elt: PreShift);
6933 MagicFactors.push_back(Elt: MagicFactor);
6934 NPQFactors.push_back(Elt: NPQFactor);
6935 PostShifts.push_back(Elt: PostShift);
6936 return true;
6937 };
6938
6939 // Collect the shifts/magic values from each element.
6940 if (!ISD::matchUnaryPredicate(Op: N1, Match: BuildUDIVPattern, /*AllowUndefs=*/false,
6941 /*AllowTruncation=*/true))
6942 return SDValue();
6943
6944 SDValue PreShift, PostShift, MagicFactor, NPQFactor;
6945 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6946 PreShift = DAG.getBuildVector(VT: ShVT, DL: dl, Ops: PreShifts);
6947 MagicFactor = DAG.getBuildVector(VT, DL: dl, Ops: MagicFactors);
6948 NPQFactor = DAG.getBuildVector(VT, DL: dl, Ops: NPQFactors);
6949 PostShift = DAG.getBuildVector(VT: ShVT, DL: dl, Ops: PostShifts);
6950 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6951 assert(PreShifts.size() == 1 && MagicFactors.size() == 1 &&
6952 NPQFactors.size() == 1 && PostShifts.size() == 1 &&
6953 "Expected matchUnaryPredicate to return one for scalable vectors");
6954 PreShift = DAG.getSplatVector(VT: ShVT, DL: dl, Op: PreShifts[0]);
6955 MagicFactor = DAG.getSplatVector(VT, DL: dl, Op: MagicFactors[0]);
6956 NPQFactor = DAG.getSplatVector(VT, DL: dl, Op: NPQFactors[0]);
6957 PostShift = DAG.getSplatVector(VT: ShVT, DL: dl, Op: PostShifts[0]);
6958 } else {
6959 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6960 PreShift = PreShifts[0];
6961 MagicFactor = MagicFactors[0];
6962 PostShift = PostShifts[0];
6963 }
6964
6965 if (UseWiden) {
6966 // Compute: (WideSVT(x) * MagicFactor) >> WideSVTBits.
6967 SDValue WideN0 = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: WideSVT, Operand: N0);
6968
6969 // Perform WideSVTxWideSVT -> 2*WideSVT multiplication and extract high
6970 // WideSVT bits
6971 SDValue High;
6972 if (HasWideMULHU) {
6973 High = DAG.getNode(Opcode: ISD::MULHU, DL: dl, VT: WideSVT, N1: WideN0, N2: MagicFactor);
6974 } else {
6975 assert(HasWideUMUL_LOHI);
6976 SDValue LoHi =
6977 DAG.getNode(Opcode: ISD::UMUL_LOHI, DL: dl, VTList: DAG.getVTList(VT1: WideSVT, VT2: WideSVT),
6978 N1: WideN0, N2: MagicFactor);
6979 High = LoHi.getValue(R: 1);
6980 }
6981
6982 Created.push_back(Elt: High.getNode());
6983 return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: High);
6984 }
6985
6986 SDValue Q = N0;
6987 if (UsePreShift) {
6988 Q = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Q, N2: PreShift);
6989 Created.push_back(Elt: Q.getNode());
6990 }
6991
6992 auto GetMULHU = [&](SDValue X, SDValue Y) {
6993 if (HasMULHU)
6994 return DAG.getNode(Opcode: ISD::MULHU, DL: dl, VT, N1: X, N2: Y);
6995 if (HasUMUL_LOHI) {
6996 SDValue LoHi =
6997 DAG.getNode(Opcode: ISD::UMUL_LOHI, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: VT), N1: X, N2: Y);
6998 return LoHi.getValue(R: 1);
6999 }
7000
7001 X = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: MulVT, Operand: X);
7002 Y = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: MulVT, Operand: Y);
7003 Y = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: MulVT, N1: X, N2: Y);
7004 Y = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: MulVT, N1: Y,
7005 N2: DAG.getShiftAmountConstant(Val: EltBits, VT: MulVT, DL: dl));
7006 return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Y);
7007 };
7008
7009 // Multiply the numerator (operand 0) by the magic value.
7010 Q = GetMULHU(Q, MagicFactor);
7011 if (!Q)
7012 return SDValue();
7013
7014 Created.push_back(Elt: Q.getNode());
7015
7016 if (UseNPQ) {
7017 SDValue NPQ = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: N0, N2: Q);
7018 Created.push_back(Elt: NPQ.getNode());
7019
7020 // For vectors we might have a mix of non-NPQ/NPQ paths, so use
7021 // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
7022 if (VT.isVector())
7023 NPQ = GetMULHU(NPQ, NPQFactor);
7024 else
7025 NPQ = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: NPQ, N2: DAG.getConstant(Val: 1, DL: dl, VT: ShVT));
7026
7027 Created.push_back(Elt: NPQ.getNode());
7028
7029 Q = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: NPQ, N2: Q);
7030 Created.push_back(Elt: Q.getNode());
7031 }
7032
7033 if (UsePostShift) {
7034 Q = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Q, N2: PostShift);
7035 Created.push_back(Elt: Q.getNode());
7036 }
7037
7038 EVT SetCCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
7039
7040 SDValue One = DAG.getConstant(Val: 1, DL: dl, VT);
7041 SDValue IsOne = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: N1, RHS: One, Cond: ISD::SETEQ);
7042 return DAG.getSelect(DL: dl, VT, Cond: IsOne, LHS: N0, RHS: Q);
7043}
7044
7045/// If all values in Values that *don't* match the predicate are same 'splat'
7046/// value, then replace all values with that splat value.
7047/// Else, if AlternativeReplacement was provided, then replace all values that
7048/// do match predicate with AlternativeReplacement value.
7049static void
7050turnVectorIntoSplatVector(MutableArrayRef<SDValue> Values,
7051 std::function<bool(SDValue)> Predicate,
7052 SDValue AlternativeReplacement = SDValue()) {
7053 SDValue Replacement;
7054 // Is there a value for which the Predicate does *NOT* match? What is it?
7055 auto SplatValue = llvm::find_if_not(Range&: Values, P: Predicate);
7056 if (SplatValue != Values.end()) {
7057 // Does Values consist only of SplatValue's and values matching Predicate?
7058 if (llvm::all_of(Range&: Values, P: [Predicate, SplatValue](SDValue Value) {
7059 return Value == *SplatValue || Predicate(Value);
7060 })) // Then we shall replace values matching predicate with SplatValue.
7061 Replacement = *SplatValue;
7062 }
7063 if (!Replacement) {
7064 // Oops, we did not find the "baseline" splat value.
7065 if (!AlternativeReplacement)
7066 return; // Nothing to do.
7067 // Let's replace with provided value then.
7068 Replacement = AlternativeReplacement;
7069 }
7070 std::replace_if(first: Values.begin(), last: Values.end(), pred: Predicate, new_value: Replacement);
7071}
7072
7073/// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
7074/// where the divisor and comparison target are constants,
7075/// return a DAG expression that will generate the same comparison result
7076/// using only multiplications, additions and shifts/rotations.
7077/// Ref: "Hacker's Delight" 10-17.
7078SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
7079 SDValue CompTargetNode,
7080 ISD::CondCode Cond,
7081 DAGCombinerInfo &DCI,
7082 const SDLoc &DL) const {
7083 SmallVector<SDNode *, 5> Built;
7084 if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
7085 DCI, DL, Created&: Built)) {
7086 for (SDNode *N : Built)
7087 DCI.AddToWorklist(N);
7088 return Folded;
7089 }
7090
7091 return SDValue();
7092}
7093
7094SDValue
7095TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
7096 SDValue CompTargetNode, ISD::CondCode Cond,
7097 DAGCombinerInfo &DCI, const SDLoc &DL,
7098 SmallVectorImpl<SDNode *> &Created) const {
7099 // fold (seteq/ne (urem N, D), C) ->
7100 // (setule/ugt (rotr (mul (sub N, C), P), K), Q)
7101 // - D must be constant, with D = D0 * 2^K where D0 is odd
7102 // - P is the multiplicative inverse of D0 modulo 2^W
7103 // - Q = floor(((2^W) - 1) / D)
7104 // where W is the width of the common type of N and D.
7105 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
7106 "Only applicable for (in)equality comparisons.");
7107
7108 SelectionDAG &DAG = DCI.DAG;
7109
7110 EVT VT = REMNode.getValueType();
7111 EVT SVT = VT.getScalarType();
7112 EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
7113 EVT ShSVT = ShVT.getScalarType();
7114
7115 // If MUL is unavailable, we cannot proceed in any case.
7116 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::MUL, VT))
7117 return SDValue();
7118
7119 bool ComparingWithAllZeros = true;
7120 bool AllComparisonsWithNonZerosAreTautological = true;
7121 bool HadTautologicalLanes = false;
7122 bool AllLanesAreTautological = true;
7123 bool HadEvenDivisor = false;
7124 bool AllDivisorsArePowerOfTwo = true;
7125 bool HadTautologicalInvertedLanes = false;
7126 SmallVector<SDValue, 16> PAmts, KAmts, QAmts;
7127
7128 auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
7129 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
7130 if (CDiv->isZero())
7131 return false;
7132
7133 const APInt &D = CDiv->getAPIntValue();
7134 const APInt &Cmp = CCmp->getAPIntValue();
7135
7136 ComparingWithAllZeros &= Cmp.isZero();
7137
7138 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
7139 // if C2 is not less than C1, the comparison is always false.
7140 // But we will only be able to produce the comparison that will give the
7141 // opposive tautological answer. So this lane would need to be fixed up.
7142 bool TautologicalInvertedLane = D.ule(RHS: Cmp);
7143 HadTautologicalInvertedLanes |= TautologicalInvertedLane;
7144
7145 // If all lanes are tautological (either all divisors are ones, or divisor
7146 // is not greater than the constant we are comparing with),
7147 // we will prefer to avoid the fold.
7148 bool TautologicalLane = D.isOne() || TautologicalInvertedLane;
7149 HadTautologicalLanes |= TautologicalLane;
7150 AllLanesAreTautological &= TautologicalLane;
7151
7152 // If we are comparing with non-zero, we need'll need to subtract said
7153 // comparison value from the LHS. But there is no point in doing that if
7154 // every lane where we are comparing with non-zero is tautological..
7155 if (!Cmp.isZero())
7156 AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
7157
7158 // Decompose D into D0 * 2^K
7159 unsigned K = D.countr_zero();
7160 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
7161 APInt D0 = D.lshr(shiftAmt: K);
7162
7163 // D is even if it has trailing zeros.
7164 HadEvenDivisor |= (K != 0);
7165 // D is a power-of-two if D0 is one.
7166 // If all divisors are power-of-two, we will prefer to avoid the fold.
7167 AllDivisorsArePowerOfTwo &= D0.isOne();
7168
7169 // P = inv(D0, 2^W)
7170 // 2^W requires W + 1 bits, so we have to extend and then truncate.
7171 unsigned W = D.getBitWidth();
7172 APInt P = D0.multiplicativeInverse();
7173 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
7174
7175 // Q = floor((2^W - 1) u/ D)
7176 // R = ((2^W - 1) u% D)
7177 APInt Q, R;
7178 APInt::udivrem(LHS: APInt::getAllOnes(numBits: W), RHS: D, Quotient&: Q, Remainder&: R);
7179
7180 // If we are comparing with zero, then that comparison constant is okay,
7181 // else it may need to be one less than that.
7182 if (Cmp.ugt(RHS: R))
7183 Q -= 1;
7184
7185 assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&
7186 "We are expecting that K is always less than all-ones for ShSVT");
7187
7188 // If the lane is tautological the result can be constant-folded.
7189 if (TautologicalLane) {
7190 // Set P and K amount to a bogus values so we can try to splat them.
7191 P = 0;
7192 KAmts.push_back(Elt: DAG.getAllOnesConstant(DL, VT: ShSVT));
7193 // And ensure that comparison constant is tautological,
7194 // it will always compare true/false.
7195 Q.setAllBits();
7196 } else {
7197 KAmts.push_back(Elt: DAG.getConstant(Val: K, DL, VT: ShSVT));
7198 }
7199
7200 PAmts.push_back(Elt: DAG.getConstant(Val: P, DL, VT: SVT));
7201 QAmts.push_back(Elt: DAG.getConstant(Val: Q, DL, VT: SVT));
7202 return true;
7203 };
7204
7205 SDValue N = REMNode.getOperand(i: 0);
7206 SDValue D = REMNode.getOperand(i: 1);
7207
7208 // Collect the values from each element.
7209 if (!ISD::matchBinaryPredicate(LHS: D, RHS: CompTargetNode, Match: BuildUREMPattern))
7210 return SDValue();
7211
7212 // If all lanes are tautological, the result can be constant-folded.
7213 if (AllLanesAreTautological)
7214 return SDValue();
7215
7216 // If this is a urem by a powers-of-two, avoid the fold since it can be
7217 // best implemented as a bit test.
7218 if (AllDivisorsArePowerOfTwo)
7219 return SDValue();
7220
7221 SDValue PVal, KVal, QVal;
7222 if (D.getOpcode() == ISD::BUILD_VECTOR) {
7223 if (HadTautologicalLanes) {
7224 // Try to turn PAmts into a splat, since we don't care about the values
7225 // that are currently '0'. If we can't, just keep '0'`s.
7226 turnVectorIntoSplatVector(Values: PAmts, Predicate: isNullConstant);
7227 // Try to turn KAmts into a splat, since we don't care about the values
7228 // that are currently '-1'. If we can't, change them to '0'`s.
7229 turnVectorIntoSplatVector(Values: KAmts, Predicate: isAllOnesConstant,
7230 AlternativeReplacement: DAG.getConstant(Val: 0, DL, VT: ShSVT));
7231 }
7232
7233 PVal = DAG.getBuildVector(VT, DL, Ops: PAmts);
7234 KVal = DAG.getBuildVector(VT: ShVT, DL, Ops: KAmts);
7235 QVal = DAG.getBuildVector(VT, DL, Ops: QAmts);
7236 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
7237 assert(PAmts.size() == 1 && KAmts.size() == 1 && QAmts.size() == 1 &&
7238 "Expected matchBinaryPredicate to return one element for "
7239 "SPLAT_VECTORs");
7240 PVal = DAG.getSplatVector(VT, DL, Op: PAmts[0]);
7241 KVal = DAG.getSplatVector(VT: ShVT, DL, Op: KAmts[0]);
7242 QVal = DAG.getSplatVector(VT, DL, Op: QAmts[0]);
7243 } else {
7244 PVal = PAmts[0];
7245 KVal = KAmts[0];
7246 QVal = QAmts[0];
7247 }
7248
7249 if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
7250 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::SUB, VT))
7251 return SDValue(); // FIXME: Could/should use `ISD::ADD`?
7252 assert(CompTargetNode.getValueType() == N.getValueType() &&
7253 "Expecting that the types on LHS and RHS of comparisons match.");
7254 N = DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: N, N2: CompTargetNode);
7255 }
7256
7257 // (mul N, P)
7258 SDValue Op0 = DAG.getNode(Opcode: ISD::MUL, DL, VT, N1: N, N2: PVal);
7259 Created.push_back(Elt: Op0.getNode());
7260
7261 // Rotate right only if any divisor was even. We avoid rotates for all-odd
7262 // divisors as a performance improvement, since rotating by 0 is a no-op.
7263 if (HadEvenDivisor) {
7264 // We need ROTR to do this.
7265 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::ROTR, VT))
7266 return SDValue();
7267 // UREM: (rotr (mul N, P), K)
7268 Op0 = DAG.getNode(Opcode: ISD::ROTR, DL, VT, N1: Op0, N2: KVal);
7269 Created.push_back(Elt: Op0.getNode());
7270 }
7271
7272 // UREM: (setule/setugt (rotr (mul N, P), K), Q)
7273 SDValue NewCC =
7274 DAG.getSetCC(DL, VT: SETCCVT, LHS: Op0, RHS: QVal,
7275 Cond: ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
7276 if (!HadTautologicalInvertedLanes)
7277 return NewCC;
7278
7279 // If any lanes previously compared always-false, the NewCC will give
7280 // always-true result for them, so we need to fixup those lanes.
7281 // Or the other way around for inequality predicate.
7282 assert(VT.isVector() && "Can/should only get here for vectors.");
7283 Created.push_back(Elt: NewCC.getNode());
7284
7285 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
7286 // if C2 is not less than C1, the comparison is always false.
7287 // But we have produced the comparison that will give the
7288 // opposive tautological answer. So these lanes would need to be fixed up.
7289 SDValue TautologicalInvertedChannels =
7290 DAG.getSetCC(DL, VT: SETCCVT, LHS: D, RHS: CompTargetNode, Cond: ISD::SETULE);
7291 Created.push_back(Elt: TautologicalInvertedChannels.getNode());
7292
7293 // NOTE: we avoid letting illegal types through even if we're before legalize
7294 // ops – legalization has a hard time producing good code for this.
7295 if (isOperationLegalOrCustom(Op: ISD::VSELECT, VT: SETCCVT)) {
7296 // If we have a vector select, let's replace the comparison results in the
7297 // affected lanes with the correct tautological result.
7298 SDValue Replacement = DAG.getBoolConstant(V: Cond == ISD::SETEQ ? false : true,
7299 DL, VT: SETCCVT, OpVT: SETCCVT);
7300 return DAG.getNode(Opcode: ISD::VSELECT, DL, VT: SETCCVT, N1: TautologicalInvertedChannels,
7301 N2: Replacement, N3: NewCC);
7302 }
7303
7304 // Else, we can just invert the comparison result in the appropriate lanes.
7305 //
7306 // NOTE: see the note above VSELECT above.
7307 if (isOperationLegalOrCustom(Op: ISD::XOR, VT: SETCCVT))
7308 return DAG.getNode(Opcode: ISD::XOR, DL, VT: SETCCVT, N1: NewCC,
7309 N2: TautologicalInvertedChannels);
7310
7311 return SDValue(); // Don't know how to lower.
7312}
7313
7314/// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
7315/// where the divisor is constant and the comparison target is zero,
7316/// return a DAG expression that will generate the same comparison result
7317/// using only multiplications, additions and shifts/rotations.
7318/// Ref: "Hacker's Delight" 10-17.
7319SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
7320 SDValue CompTargetNode,
7321 ISD::CondCode Cond,
7322 DAGCombinerInfo &DCI,
7323 const SDLoc &DL) const {
7324 SmallVector<SDNode *, 7> Built;
7325 if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
7326 DCI, DL, Created&: Built)) {
7327 assert(Built.size() <= 7 && "Max size prediction failed.");
7328 for (SDNode *N : Built)
7329 DCI.AddToWorklist(N);
7330 return Folded;
7331 }
7332
7333 return SDValue();
7334}
7335
7336SDValue
7337TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
7338 SDValue CompTargetNode, ISD::CondCode Cond,
7339 DAGCombinerInfo &DCI, const SDLoc &DL,
7340 SmallVectorImpl<SDNode *> &Created) const {
7341 // Derived from Hacker's Delight, 2nd Edition, by Hank Warren. Section 10-17.
7342 // Fold:
7343 // (seteq/ne (srem N, D), 0)
7344 // To:
7345 // (setule/ugt (rotr (add (mul N, P), A), K), Q)
7346 //
7347 // - D must be constant, with D = D0 * 2^K where D0 is odd
7348 // - P is the multiplicative inverse of D0 modulo 2^W
7349 // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
7350 // - Q = floor((2 * A) / (2^K))
7351 // where W is the width of the common type of N and D.
7352 //
7353 // When D is a power of two (and thus D0 is 1), the normal
7354 // formula for A and Q don't apply, because the derivation
7355 // depends on D not dividing 2^(W-1), and thus theorem ZRS
7356 // does not apply. This specifically fails when N = INT_MIN.
7357 //
7358 // Instead, for power-of-two D, we use:
7359 // FIXME: Why do we need to add anything?
7360 // - A = 2^(W-1)
7361 // |-> Order-preserving map from [-2^(W-1), 2^(W-1) - 1] to [0,2^W - 1])
7362 // - Q = 2^(W-K) - 1
7363 // |-> Test that the top K bits are zero after rotation
7364 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
7365 "Only applicable for (in)equality comparisons.");
7366
7367 SelectionDAG &DAG = DCI.DAG;
7368
7369 EVT VT = REMNode.getValueType();
7370 EVT SVT = VT.getScalarType();
7371 EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
7372 EVT ShSVT = ShVT.getScalarType();
7373
7374 // If we are after ops legalization, and MUL is unavailable, we can not
7375 // proceed.
7376 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::MUL, VT))
7377 return SDValue();
7378
7379 // TODO: Could support comparing with non-zero too.
7380 ConstantSDNode *CompTarget = isConstOrConstSplat(N: CompTargetNode);
7381 if (!CompTarget || !CompTarget->isZero())
7382 return SDValue();
7383
7384 bool HadOneDivisor = false;
7385 bool AllDivisorsAreOnes = true;
7386 bool HadEvenDivisor = false;
7387 bool AllDivisorsArePowerOfTwo = true;
7388 SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts;
7389
7390 auto BuildSREMPattern = [&](ConstantSDNode *C) {
7391 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
7392 if (C->isZero())
7393 return false;
7394
7395 // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
7396
7397 // WARNING: this fold is only valid for positive divisors!
7398 // `rem %X, -C` is equivalent to `rem %X, C`
7399 APInt D = C->getAPIntValue().abs();
7400
7401 // If all divisors are ones, we will prefer to avoid the fold.
7402 HadOneDivisor |= D.isOne();
7403 AllDivisorsAreOnes &= D.isOne();
7404
7405 // Decompose D into D0 * 2^K
7406 unsigned K = D.countr_zero();
7407 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
7408 APInt D0 = D.lshr(shiftAmt: K);
7409
7410 // D is even if it has trailing zeros.
7411 HadEvenDivisor |= (K != 0);
7412
7413 // D is a power-of-two if D0 is one. This includes INT_MIN.
7414 // If all divisors are power-of-two, we will prefer to avoid the fold.
7415 AllDivisorsArePowerOfTwo &= D0.isOne();
7416
7417 // P = inv(D0, 2^W)
7418 // 2^W requires W + 1 bits, so we have to extend and then truncate.
7419 unsigned W = D.getBitWidth();
7420 APInt P = D0.multiplicativeInverse();
7421 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
7422
7423 // A = floor((2^(W - 1) - 1) / D0) & -2^K
7424 APInt A = APInt::getSignedMaxValue(numBits: W).udiv(RHS: D0);
7425 A.clearLowBits(loBits: K);
7426
7427 // Q = floor((2 * A) / (2^K))
7428 APInt Q = (2 * A).udiv(RHS: APInt::getOneBitSet(numBits: W, BitNo: K));
7429
7430 assert(APInt::getAllOnes(SVT.getSizeInBits()).ugt(A) &&
7431 "We are expecting that A is always less than all-ones for SVT");
7432 assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&
7433 "We are expecting that K is always less than all-ones for ShSVT");
7434
7435 // If D was a power of two, apply the alternate constant derivation.
7436 if (D0.isOne()) {
7437 // A = 2^(W-1)
7438 A = APInt::getSignedMinValue(numBits: W);
7439 // - Q = 2^(W-K) - 1
7440 Q = APInt::getLowBitsSet(numBits: W, loBitsSet: W - K);
7441 }
7442
7443 // If the divisor is 1 the result can be constant-folded.
7444 if (D.isOne()) {
7445 // Set P, A and K to a bogus values so we can try to splat them.
7446 P = 0;
7447 A.setAllBits();
7448 KAmts.push_back(Elt: DAG.getAllOnesConstant(DL, VT: ShSVT));
7449
7450 // x ?% 1 == 0 <--> true <--> x u<= -1
7451 Q.setAllBits();
7452 } else {
7453 KAmts.push_back(Elt: DAG.getConstant(Val: K, DL, VT: ShSVT));
7454 }
7455
7456 PAmts.push_back(Elt: DAG.getConstant(Val: P, DL, VT: SVT));
7457 AAmts.push_back(Elt: DAG.getConstant(Val: A, DL, VT: SVT));
7458 QAmts.push_back(Elt: DAG.getConstant(Val: Q, DL, VT: SVT));
7459 return true;
7460 };
7461
7462 SDValue N = REMNode.getOperand(i: 0);
7463 SDValue D = REMNode.getOperand(i: 1);
7464
7465 // Collect the values from each element.
7466 if (!ISD::matchUnaryPredicate(Op: D, Match: BuildSREMPattern))
7467 return SDValue();
7468
7469 // If this is a srem by a one, avoid the fold since it can be constant-folded.
7470 if (AllDivisorsAreOnes)
7471 return SDValue();
7472
7473 // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
7474 // since it can be best implemented as a bit test.
7475 if (AllDivisorsArePowerOfTwo)
7476 return SDValue();
7477
7478 SDValue PVal, AVal, KVal, QVal;
7479 if (D.getOpcode() == ISD::BUILD_VECTOR) {
7480 if (HadOneDivisor) {
7481 // Try to turn PAmts into a splat, since we don't care about the values
7482 // that are currently '0'. If we can't, just keep '0'`s.
7483 turnVectorIntoSplatVector(Values: PAmts, Predicate: isNullConstant);
7484 // Try to turn AAmts into a splat, since we don't care about the
7485 // values that are currently '-1'. If we can't, change them to '0'`s.
7486 turnVectorIntoSplatVector(Values: AAmts, Predicate: isAllOnesConstant,
7487 AlternativeReplacement: DAG.getConstant(Val: 0, DL, VT: SVT));
7488 // Try to turn KAmts into a splat, since we don't care about the values
7489 // that are currently '-1'. If we can't, change them to '0'`s.
7490 turnVectorIntoSplatVector(Values: KAmts, Predicate: isAllOnesConstant,
7491 AlternativeReplacement: DAG.getConstant(Val: 0, DL, VT: ShSVT));
7492 }
7493
7494 PVal = DAG.getBuildVector(VT, DL, Ops: PAmts);
7495 AVal = DAG.getBuildVector(VT, DL, Ops: AAmts);
7496 KVal = DAG.getBuildVector(VT: ShVT, DL, Ops: KAmts);
7497 QVal = DAG.getBuildVector(VT, DL, Ops: QAmts);
7498 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
7499 assert(PAmts.size() == 1 && AAmts.size() == 1 && KAmts.size() == 1 &&
7500 QAmts.size() == 1 &&
7501 "Expected matchUnaryPredicate to return one element for scalable "
7502 "vectors");
7503 PVal = DAG.getSplatVector(VT, DL, Op: PAmts[0]);
7504 AVal = DAG.getSplatVector(VT, DL, Op: AAmts[0]);
7505 KVal = DAG.getSplatVector(VT: ShVT, DL, Op: KAmts[0]);
7506 QVal = DAG.getSplatVector(VT, DL, Op: QAmts[0]);
7507 } else {
7508 assert(isa<ConstantSDNode>(D) && "Expected a constant");
7509 PVal = PAmts[0];
7510 AVal = AAmts[0];
7511 KVal = KAmts[0];
7512 QVal = QAmts[0];
7513 }
7514
7515 // (mul N, P)
7516 SDValue Op0 = DAG.getNode(Opcode: ISD::MUL, DL, VT, N1: N, N2: PVal);
7517 Created.push_back(Elt: Op0.getNode());
7518
7519 // We need ADD to do this.
7520 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::ADD, VT))
7521 return SDValue();
7522
7523 // (add (mul N, P), A)
7524 Op0 = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Op0, N2: AVal);
7525 Created.push_back(Elt: Op0.getNode());
7526
7527 // Rotate right only if any divisor was even. We avoid rotates for all-odd
7528 // divisors as a performance improvement, since rotating by 0 is a no-op.
7529 if (HadEvenDivisor) {
7530 // We need ROTR to do this.
7531 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::ROTR, VT))
7532 return SDValue();
7533 // SREM: (rotr (add (mul N, P), A), K)
7534 Op0 = DAG.getNode(Opcode: ISD::ROTR, DL, VT, N1: Op0, N2: KVal);
7535 Created.push_back(Elt: Op0.getNode());
7536 }
7537
7538 // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
7539 return DAG.getSetCC(DL, VT: SETCCVT, LHS: Op0, RHS: QVal,
7540 Cond: (Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT);
7541}
7542
7543SDValue TargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
7544 const DenormalMode &Mode,
7545 SDNodeFlags Flags) const {
7546 SDLoc DL(Op);
7547 EVT VT = Op.getValueType();
7548 EVT CCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
7549 SDValue FPZero = DAG.getConstantFP(Val: 0.0, DL, VT);
7550
7551 // This is specifically a check for the handling of denormal inputs, not the
7552 // result.
7553 if (Mode.Input == DenormalMode::PreserveSign ||
7554 Mode.Input == DenormalMode::PositiveZero) {
7555 // Test = X == 0.0
7556 return DAG.getSetCC(DL, VT: CCVT, LHS: Op, RHS: FPZero, Cond: ISD::SETEQ, /*Chain=*/{},
7557 /*Signaling=*/IsSignaling: false, Flags);
7558 }
7559
7560 // Testing it with denormal inputs to avoid wrong estimate.
7561 //
7562 // Test = fabs(X) < SmallestNormal
7563 const fltSemantics &FltSem = VT.getFltSemantics();
7564 APFloat SmallestNorm = APFloat::getSmallestNormalized(Sem: FltSem);
7565 SDValue NormC = DAG.getConstantFP(Val: SmallestNorm, DL, VT);
7566 SDValue Fabs = DAG.getNode(Opcode: ISD::FABS, DL, VT, Operand: Op, Flags);
7567 return DAG.getSetCC(DL, VT: CCVT, LHS: Fabs, RHS: NormC, Cond: ISD::SETLT, /*Chain=*/{},
7568 /*Signaling=*/IsSignaling: false, Flags);
7569}
7570
7571SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
7572 bool LegalOps, bool OptForSize,
7573 NegatibleCost &Cost,
7574 unsigned Depth) const {
7575 // fneg is removable even if it has multiple uses.
7576 if (Op.getOpcode() == ISD::FNEG || Op.getOpcode() == ISD::VP_FNEG) {
7577 Cost = NegatibleCost::Cheaper;
7578 return Op.getOperand(i: 0);
7579 }
7580
7581 // Don't recurse exponentially.
7582 if (Depth > SelectionDAG::MaxRecursionDepth)
7583 return SDValue();
7584
7585 // Pre-increment recursion depth for use in recursive calls.
7586 ++Depth;
7587 const SDNodeFlags Flags = Op->getFlags();
7588 EVT VT = Op.getValueType();
7589 unsigned Opcode = Op.getOpcode();
7590
7591 // Don't allow anything with multiple uses unless we know it is free.
7592 if (!Op.hasOneUse() && Opcode != ISD::ConstantFP) {
7593 bool IsFreeExtend = Opcode == ISD::FP_EXTEND &&
7594 isFPExtFree(DestVT: VT, SrcVT: Op.getOperand(i: 0).getValueType());
7595 if (!IsFreeExtend)
7596 return SDValue();
7597 }
7598
7599 auto RemoveDeadNode = [&](SDValue N) {
7600 if (N && N.getNode()->use_empty())
7601 DAG.RemoveDeadNode(N: N.getNode());
7602 };
7603
7604 SDLoc DL(Op);
7605
7606 // Because getNegatedExpression can delete nodes we need a handle to keep
7607 // temporary nodes alive in case the recursion manages to create an identical
7608 // node.
7609 std::list<HandleSDNode> Handles;
7610
7611 switch (Opcode) {
7612 case ISD::ConstantFP: {
7613 // Don't invert constant FP values after legalization unless the target says
7614 // the negated constant is legal.
7615 bool IsOpLegal =
7616 isOperationLegal(Op: ISD::ConstantFP, VT) ||
7617 isFPImmLegal(neg(X: cast<ConstantFPSDNode>(Val&: Op)->getValueAPF()), VT,
7618 ForCodeSize: OptForSize);
7619
7620 if (LegalOps && !IsOpLegal)
7621 break;
7622
7623 APFloat V = cast<ConstantFPSDNode>(Val&: Op)->getValueAPF();
7624 V.changeSign();
7625 SDValue CFP = DAG.getConstantFP(Val: V, DL, VT);
7626
7627 // If we already have the use of the negated floating constant, it is free
7628 // to negate it even it has multiple uses.
7629 if (!Op.hasOneUse() && CFP.use_empty())
7630 break;
7631 Cost = NegatibleCost::Neutral;
7632 return CFP;
7633 }
7634 case ISD::SPLAT_VECTOR: {
7635 // fold splat_vector(fneg(X)) -> splat_vector(-X)
7636 SDValue X = Op.getOperand(i: 0);
7637 if (!isOperationLegal(Op: ISD::SPLAT_VECTOR, VT))
7638 break;
7639
7640 SDValue NegX = getCheaperNegatedExpression(Op: X, DAG, LegalOps, OptForSize);
7641 if (!NegX)
7642 break;
7643 Cost = NegatibleCost::Cheaper;
7644 return DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL, VT, Operand: NegX);
7645 }
7646 case ISD::BUILD_VECTOR: {
7647 // Only permit BUILD_VECTOR of constants.
7648 if (llvm::any_of(Range: Op->op_values(), P: [&](SDValue N) {
7649 return !N.isUndef() && !isa<ConstantFPSDNode>(Val: N);
7650 }))
7651 break;
7652
7653 bool IsOpLegal =
7654 (isOperationLegal(Op: ISD::ConstantFP, VT) &&
7655 isOperationLegal(Op: ISD::BUILD_VECTOR, VT)) ||
7656 llvm::all_of(Range: Op->op_values(), P: [&](SDValue N) {
7657 return N.isUndef() ||
7658 isFPImmLegal(neg(X: cast<ConstantFPSDNode>(Val&: N)->getValueAPF()), VT,
7659 ForCodeSize: OptForSize);
7660 });
7661
7662 if (LegalOps && !IsOpLegal)
7663 break;
7664
7665 SmallVector<SDValue, 4> Ops;
7666 for (SDValue C : Op->op_values()) {
7667 if (C.isUndef()) {
7668 Ops.push_back(Elt: C);
7669 continue;
7670 }
7671 APFloat V = cast<ConstantFPSDNode>(Val&: C)->getValueAPF();
7672 V.changeSign();
7673 Ops.push_back(Elt: DAG.getConstantFP(Val: V, DL, VT: C.getValueType()));
7674 }
7675 Cost = NegatibleCost::Neutral;
7676 return DAG.getBuildVector(VT, DL, Ops);
7677 }
7678 case ISD::FADD: {
7679 if (!Flags.hasNoSignedZeros())
7680 break;
7681
7682 // After operation legalization, it might not be legal to create new FSUBs.
7683 if (LegalOps && !isOperationLegalOrCustom(Op: ISD::FSUB, VT))
7684 break;
7685 SDValue X = Op.getOperand(i: 0), Y = Op.getOperand(i: 1);
7686
7687 // fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y)
7688 NegatibleCost CostX = NegatibleCost::Expensive;
7689 SDValue NegX =
7690 getNegatedExpression(Op: X, DAG, LegalOps, OptForSize, Cost&: CostX, Depth);
7691 // Prevent this node from being deleted by the next call.
7692 if (NegX)
7693 Handles.emplace_back(args&: NegX);
7694
7695 // fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
7696 NegatibleCost CostY = NegatibleCost::Expensive;
7697 SDValue NegY =
7698 getNegatedExpression(Op: Y, DAG, LegalOps, OptForSize, Cost&: CostY, Depth);
7699
7700 // We're done with the handles.
7701 Handles.clear();
7702
7703 // Negate the X if its cost is less or equal than Y.
7704 if (NegX && (CostX <= CostY)) {
7705 Cost = CostX;
7706 SDValue N = DAG.getNode(Opcode: ISD::FSUB, DL, VT, N1: NegX, N2: Y, Flags);
7707 if (NegY != N)
7708 RemoveDeadNode(NegY);
7709 return N;
7710 }
7711
7712 // Negate the Y if it is not expensive.
7713 if (NegY) {
7714 Cost = CostY;
7715 SDValue N = DAG.getNode(Opcode: ISD::FSUB, DL, VT, N1: NegY, N2: X, Flags);
7716 if (NegX != N)
7717 RemoveDeadNode(NegX);
7718 return N;
7719 }
7720 break;
7721 }
7722 case ISD::FSUB: {
7723 // We can't turn -(A-B) into B-A when we honor signed zeros.
7724 if (!Flags.hasNoSignedZeros())
7725 break;
7726
7727 SDValue X = Op.getOperand(i: 0), Y = Op.getOperand(i: 1);
7728 // fold (fneg (fsub 0, Y)) -> Y
7729 if (ConstantFPSDNode *C = isConstOrConstSplatFP(N: X, /*AllowUndefs*/ true))
7730 if (C->isZero()) {
7731 Cost = NegatibleCost::Cheaper;
7732 return Y;
7733 }
7734
7735 // fold (fneg (fsub X, Y)) -> (fsub Y, X)
7736 Cost = NegatibleCost::Neutral;
7737 return DAG.getNode(Opcode: ISD::FSUB, DL, VT, N1: Y, N2: X, Flags);
7738 }
7739 case ISD::FMUL:
7740 case ISD::FDIV: {
7741 SDValue X = Op.getOperand(i: 0), Y = Op.getOperand(i: 1);
7742
7743 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
7744 NegatibleCost CostX = NegatibleCost::Expensive;
7745 SDValue NegX =
7746 getNegatedExpression(Op: X, DAG, LegalOps, OptForSize, Cost&: CostX, Depth);
7747 // Prevent this node from being deleted by the next call.
7748 if (NegX)
7749 Handles.emplace_back(args&: NegX);
7750
7751 // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
7752 NegatibleCost CostY = NegatibleCost::Expensive;
7753 SDValue NegY =
7754 getNegatedExpression(Op: Y, DAG, LegalOps, OptForSize, Cost&: CostY, Depth);
7755
7756 // We're done with the handles.
7757 Handles.clear();
7758
7759 // Negate the X if its cost is less or equal than Y.
7760 if (NegX && (CostX <= CostY)) {
7761 Cost = CostX;
7762 SDValue N = DAG.getNode(Opcode, DL, VT, N1: NegX, N2: Y, Flags);
7763 if (NegY != N)
7764 RemoveDeadNode(NegY);
7765 return N;
7766 }
7767
7768 // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
7769 if (auto *C = isConstOrConstSplatFP(N: Op.getOperand(i: 1)))
7770 if (C->isExactlyValue(V: 2.0) && Op.getOpcode() == ISD::FMUL)
7771 break;
7772
7773 // Negate the Y if it is not expensive.
7774 if (NegY) {
7775 Cost = CostY;
7776 SDValue N = DAG.getNode(Opcode, DL, VT, N1: X, N2: NegY, Flags);
7777 if (NegX != N)
7778 RemoveDeadNode(NegX);
7779 return N;
7780 }
7781 break;
7782 }
7783 case ISD::FMA:
7784 case ISD::FMULADD:
7785 case ISD::FMAD: {
7786 if (!Flags.hasNoSignedZeros())
7787 break;
7788
7789 SDValue X = Op.getOperand(i: 0), Y = Op.getOperand(i: 1), Z = Op.getOperand(i: 2);
7790 NegatibleCost CostZ = NegatibleCost::Expensive;
7791 SDValue NegZ =
7792 getNegatedExpression(Op: Z, DAG, LegalOps, OptForSize, Cost&: CostZ, Depth);
7793 // Give up if fail to negate the Z.
7794 if (!NegZ)
7795 break;
7796
7797 // Prevent this node from being deleted by the next two calls.
7798 Handles.emplace_back(args&: NegZ);
7799
7800 // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
7801 NegatibleCost CostX = NegatibleCost::Expensive;
7802 SDValue NegX =
7803 getNegatedExpression(Op: X, DAG, LegalOps, OptForSize, Cost&: CostX, Depth);
7804 // Prevent this node from being deleted by the next call.
7805 if (NegX)
7806 Handles.emplace_back(args&: NegX);
7807
7808 // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
7809 NegatibleCost CostY = NegatibleCost::Expensive;
7810 SDValue NegY =
7811 getNegatedExpression(Op: Y, DAG, LegalOps, OptForSize, Cost&: CostY, Depth);
7812
7813 // We're done with the handles.
7814 Handles.clear();
7815
7816 // Negate the X if its cost is less or equal than Y.
7817 if (NegX && (CostX <= CostY)) {
7818 Cost = std::min(a: CostX, b: CostZ);
7819 SDValue N = DAG.getNode(Opcode, DL, VT, N1: NegX, N2: Y, N3: NegZ, Flags);
7820 if (NegY != N)
7821 RemoveDeadNode(NegY);
7822 return N;
7823 }
7824
7825 // Negate the Y if it is not expensive.
7826 if (NegY) {
7827 Cost = std::min(a: CostY, b: CostZ);
7828 SDValue N = DAG.getNode(Opcode, DL, VT, N1: X, N2: NegY, N3: NegZ, Flags);
7829 if (NegX != N)
7830 RemoveDeadNode(NegX);
7831 return N;
7832 }
7833 break;
7834 }
7835
7836 case ISD::FP_EXTEND:
7837 case ISD::FSIN:
7838 if (SDValue NegV = getNegatedExpression(Op: Op.getOperand(i: 0), DAG, LegalOps,
7839 OptForSize, Cost, Depth))
7840 return DAG.getNode(Opcode, DL, VT, Operand: NegV);
7841 break;
7842 case ISD::FP_ROUND:
7843 if (SDValue NegV = getNegatedExpression(Op: Op.getOperand(i: 0), DAG, LegalOps,
7844 OptForSize, Cost, Depth))
7845 return DAG.getNode(Opcode: ISD::FP_ROUND, DL, VT, N1: NegV, N2: Op.getOperand(i: 1));
7846 break;
7847 case ISD::SELECT:
7848 case ISD::VSELECT: {
7849 // fold (fneg (select C, LHS, RHS)) -> (select C, (fneg LHS), (fneg RHS))
7850 // iff at least one cost is cheaper and the other is neutral/cheaper
7851 SDValue LHS = Op.getOperand(i: 1);
7852 NegatibleCost CostLHS = NegatibleCost::Expensive;
7853 SDValue NegLHS =
7854 getNegatedExpression(Op: LHS, DAG, LegalOps, OptForSize, Cost&: CostLHS, Depth);
7855 if (!NegLHS || CostLHS > NegatibleCost::Neutral) {
7856 RemoveDeadNode(NegLHS);
7857 break;
7858 }
7859
7860 // Prevent this node from being deleted by the next call.
7861 Handles.emplace_back(args&: NegLHS);
7862
7863 SDValue RHS = Op.getOperand(i: 2);
7864 NegatibleCost CostRHS = NegatibleCost::Expensive;
7865 SDValue NegRHS =
7866 getNegatedExpression(Op: RHS, DAG, LegalOps, OptForSize, Cost&: CostRHS, Depth);
7867
7868 // We're done with the handles.
7869 Handles.clear();
7870
7871 if (!NegRHS || CostRHS > NegatibleCost::Neutral ||
7872 (CostLHS != NegatibleCost::Cheaper &&
7873 CostRHS != NegatibleCost::Cheaper)) {
7874 RemoveDeadNode(NegLHS);
7875 RemoveDeadNode(NegRHS);
7876 break;
7877 }
7878
7879 Cost = std::min(a: CostLHS, b: CostRHS);
7880 return DAG.getSelect(DL, VT, Cond: Op.getOperand(i: 0), LHS: NegLHS, RHS: NegRHS);
7881 }
7882 }
7883
7884 return SDValue();
7885}
7886
7887//===----------------------------------------------------------------------===//
7888// Legalization Utilities
7889//===----------------------------------------------------------------------===//
7890
7891bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
7892 SDValue LHS, SDValue RHS,
7893 SmallVectorImpl<SDValue> &Result,
7894 EVT HiLoVT, SelectionDAG &DAG,
7895 MulExpansionKind Kind, SDValue LL,
7896 SDValue LH, SDValue RL, SDValue RH) const {
7897 assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
7898 Opcode == ISD::SMUL_LOHI);
7899
7900 bool HasMULHS = (Kind == MulExpansionKind::Always) ||
7901 isOperationLegalOrCustom(Op: ISD::MULHS, VT: HiLoVT);
7902 bool HasMULHU = (Kind == MulExpansionKind::Always) ||
7903 isOperationLegalOrCustom(Op: ISD::MULHU, VT: HiLoVT);
7904 bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7905 isOperationLegalOrCustom(Op: ISD::SMUL_LOHI, VT: HiLoVT);
7906 bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7907 isOperationLegalOrCustom(Op: ISD::UMUL_LOHI, VT: HiLoVT);
7908
7909 if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
7910 return false;
7911
7912 unsigned OuterBitSize = VT.getScalarSizeInBits();
7913 unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
7914
7915 // LL, LH, RL, and RH must be either all NULL or all set to a value.
7916 assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
7917 (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
7918
7919 auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
7920 bool Signed) -> bool {
7921 if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
7922 SDVTList VTs = DAG.getVTList(VT1: HiLoVT, VT2: HiLoVT);
7923 Lo = DAG.getNode(Opcode: Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, DL: dl, VTList: VTs, N1: L, N2: R);
7924 Hi = Lo.getValue(R: 1);
7925 return true;
7926 }
7927 if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
7928 Lo = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: HiLoVT, N1: L, N2: R);
7929 Hi = DAG.getNode(Opcode: Signed ? ISD::MULHS : ISD::MULHU, DL: dl, VT: HiLoVT, N1: L, N2: R);
7930 return true;
7931 }
7932 return false;
7933 };
7934
7935 SDValue Lo, Hi;
7936
7937 if (!LL.getNode() && !RL.getNode() &&
7938 isOperationLegalOrCustom(Op: ISD::TRUNCATE, VT: HiLoVT)) {
7939 LL = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: LHS);
7940 RL = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: RHS);
7941 }
7942
7943 if (!LL.getNode())
7944 return false;
7945
7946 APInt HighMask = APInt::getHighBitsSet(numBits: OuterBitSize, hiBitsSet: InnerBitSize);
7947 if (DAG.MaskedValueIsZero(Op: LHS, Mask: HighMask) &&
7948 DAG.MaskedValueIsZero(Op: RHS, Mask: HighMask)) {
7949 // The inputs are both zero-extended.
7950 if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
7951 Result.push_back(Elt: Lo);
7952 Result.push_back(Elt: Hi);
7953 if (Opcode != ISD::MUL) {
7954 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT: HiLoVT);
7955 Result.push_back(Elt: Zero);
7956 Result.push_back(Elt: Zero);
7957 }
7958 return true;
7959 }
7960 }
7961
7962 if (!VT.isVector() && Opcode == ISD::MUL &&
7963 DAG.ComputeMaxSignificantBits(Op: LHS) <= InnerBitSize &&
7964 DAG.ComputeMaxSignificantBits(Op: RHS) <= InnerBitSize) {
7965 // The input values are both sign-extended.
7966 // TODO non-MUL case?
7967 if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
7968 Result.push_back(Elt: Lo);
7969 Result.push_back(Elt: Hi);
7970 return true;
7971 }
7972 }
7973
7974 unsigned ShiftAmount = OuterBitSize - InnerBitSize;
7975 SDValue Shift = DAG.getShiftAmountConstant(Val: ShiftAmount, VT, DL: dl);
7976
7977 if (!LH.getNode() && !RH.getNode() &&
7978 isOperationLegalOrCustom(Op: ISD::SRL, VT) &&
7979 isOperationLegalOrCustom(Op: ISD::TRUNCATE, VT: HiLoVT)) {
7980 LH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: LHS, N2: Shift);
7981 LH = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: LH);
7982 RH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: RHS, N2: Shift);
7983 RH = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: RH);
7984 }
7985
7986 if (!LH.getNode())
7987 return false;
7988
7989 if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
7990 return false;
7991
7992 Result.push_back(Elt: Lo);
7993
7994 if (Opcode == ISD::MUL) {
7995 RH = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: HiLoVT, N1: LL, N2: RH);
7996 LH = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: HiLoVT, N1: LH, N2: RL);
7997 Hi = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: HiLoVT, N1: Hi, N2: RH);
7998 Hi = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: HiLoVT, N1: Hi, N2: LH);
7999 Result.push_back(Elt: Hi);
8000 return true;
8001 }
8002
8003 // Compute the full width result.
8004 auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
8005 Lo = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: Lo);
8006 Hi = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: Hi);
8007 Hi = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Hi, N2: Shift);
8008 return DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Lo, N2: Hi);
8009 };
8010
8011 SDValue Next = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: Hi);
8012 if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
8013 return false;
8014
8015 // This is effectively the add part of a multiply-add of half-sized operands,
8016 // so it cannot overflow.
8017 Next = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Next, N2: Merge(Lo, Hi));
8018
8019 if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
8020 return false;
8021
8022 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT: HiLoVT);
8023 EVT BoolType = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
8024
8025 bool UseGlue = (isOperationLegalOrCustom(Op: ISD::ADDC, VT) &&
8026 isOperationLegalOrCustom(Op: ISD::ADDE, VT));
8027 if (UseGlue)
8028 Next = DAG.getNode(Opcode: ISD::ADDC, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: MVT::Glue), N1: Next,
8029 N2: Merge(Lo, Hi));
8030 else
8031 Next = DAG.getNode(Opcode: ISD::UADDO_CARRY, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: BoolType), N1: Next,
8032 N2: Merge(Lo, Hi), N3: DAG.getConstant(Val: 0, DL: dl, VT: BoolType));
8033
8034 SDValue Carry = Next.getValue(R: 1);
8035 Result.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: Next));
8036 Next = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Next, N2: Shift);
8037
8038 if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
8039 return false;
8040
8041 if (UseGlue)
8042 Hi = DAG.getNode(Opcode: ISD::ADDE, DL: dl, VTList: DAG.getVTList(VT1: HiLoVT, VT2: MVT::Glue), N1: Hi, N2: Zero,
8043 N3: Carry);
8044 else
8045 Hi = DAG.getNode(Opcode: ISD::UADDO_CARRY, DL: dl, VTList: DAG.getVTList(VT1: HiLoVT, VT2: BoolType), N1: Hi,
8046 N2: Zero, N3: Carry);
8047
8048 Next = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Next, N2: Merge(Lo, Hi));
8049
8050 if (Opcode == ISD::SMUL_LOHI) {
8051 SDValue NextSub = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Next,
8052 N2: DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: RL));
8053 Next = DAG.getSelectCC(DL: dl, LHS: LH, RHS: Zero, True: NextSub, False: Next, Cond: ISD::SETLT);
8054
8055 NextSub = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Next,
8056 N2: DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: LL));
8057 Next = DAG.getSelectCC(DL: dl, LHS: RH, RHS: Zero, True: NextSub, False: Next, Cond: ISD::SETLT);
8058 }
8059
8060 Result.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: Next));
8061 Next = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Next, N2: Shift);
8062 Result.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: Next));
8063 return true;
8064}
8065
8066bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
8067 SelectionDAG &DAG, MulExpansionKind Kind,
8068 SDValue LL, SDValue LH, SDValue RL,
8069 SDValue RH) const {
8070 SmallVector<SDValue, 2> Result;
8071 bool Ok = expandMUL_LOHI(Opcode: N->getOpcode(), VT: N->getValueType(ResNo: 0), dl: SDLoc(N),
8072 LHS: N->getOperand(Num: 0), RHS: N->getOperand(Num: 1), Result, HiLoVT,
8073 DAG, Kind, LL, LH, RL, RH);
8074 if (Ok) {
8075 assert(Result.size() == 2);
8076 Lo = Result[0];
8077 Hi = Result[1];
8078 }
8079 return Ok;
8080}
8081
8082// Optimize unsigned division or remainder by constants for types twice as large
8083// as a legal VT.
8084//
8085// If (1 << (BitWidth / 2)) % Constant == 1, then the remainder
8086// can be computed
8087// as:
8088// Sum = __builtin_uadd_overflow(Lo, High, &Sum);
8089// Remainder = Sum % Constant;
8090//
8091// If (1 << (BitWidth / 2)) % Constant != 1, we can search for a smaller value
8092// W such that W != (BitWidth / 2) and (1 << W) % Constant == 1. We can break
8093// High:Low into 3 chunks of W bits and compute remainder as
8094// Sum = Chunk0 + Chunk1 + Chunk2;
8095// Remainder = Sum % Constant;
8096//
8097// This is based on "Remainder by Summing Digits" from Hacker's Delight.
8098//
8099// For division, we can compute the remainder using the algorithm described
8100// above, subtract it from the dividend to get an exact multiple of Constant.
8101// Then multiply that exact multiply by the multiplicative inverse modulo
8102// (1 << (BitWidth / 2)) to get the quotient.
8103
8104// If Constant is even, we can shift right the dividend and the divisor by the
8105// number of trailing zeros in Constant before applying the remainder algorithm.
8106// If we're after the quotient, we can subtract this value from the shifted
8107// dividend and multiply by the multiplicative inverse of the shifted divisor.
8108// If we want the remainder, we shift the value left by the number of trailing
8109// zeros and add the bits that were shifted out of the dividend.
8110bool TargetLowering::expandDIVREMByConstant(SDNode *N,
8111 SmallVectorImpl<SDValue> &Result,
8112 EVT HiLoVT, SelectionDAG &DAG,
8113 SDValue LL, SDValue LH) const {
8114 unsigned Opcode = N->getOpcode();
8115 EVT VT = N->getValueType(ResNo: 0);
8116
8117 // TODO: Support signed division/remainder.
8118 if (Opcode == ISD::SREM || Opcode == ISD::SDIV || Opcode == ISD::SDIVREM)
8119 return false;
8120 assert(
8121 (Opcode == ISD::UREM || Opcode == ISD::UDIV || Opcode == ISD::UDIVREM) &&
8122 "Unexpected opcode");
8123
8124 auto *CN = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 1));
8125 if (!CN)
8126 return false;
8127
8128 APInt Divisor = CN->getAPIntValue();
8129 unsigned BitWidth = Divisor.getBitWidth();
8130 unsigned HBitWidth = BitWidth / 2;
8131 assert(VT.getScalarSizeInBits() == BitWidth &&
8132 HiLoVT.getScalarSizeInBits() == HBitWidth && "Unexpected VTs");
8133
8134 // Divisor needs to less than (1 << HBitWidth).
8135 APInt HalfMaxPlus1 = APInt::getOneBitSet(numBits: BitWidth, BitNo: HBitWidth);
8136 if (Divisor.uge(RHS: HalfMaxPlus1))
8137 return false;
8138
8139 // We depend on the UREM by constant optimization in DAGCombiner that requires
8140 // high multiply.
8141 if (!isOperationLegalOrCustom(Op: ISD::MULHU, VT: HiLoVT) &&
8142 !isOperationLegalOrCustom(Op: ISD::UMUL_LOHI, VT: HiLoVT))
8143 return false;
8144
8145 // Don't expand if optimizing for size.
8146 if (DAG.shouldOptForSize())
8147 return false;
8148
8149 // Early out for 0 or 1 divisors.
8150 if (Divisor.ule(RHS: 1))
8151 return false;
8152
8153 // If the divisor is even, shift it until it becomes odd.
8154 unsigned TrailingZeros = 0;
8155 if (!Divisor[0]) {
8156 TrailingZeros = Divisor.countr_zero();
8157 Divisor.lshrInPlace(ShiftAmt: TrailingZeros);
8158 }
8159
8160 // Look for the largest chunk width W such that (1 << W) % Divisor == 1.
8161 unsigned BestChunkWidth = 0;
8162 for (unsigned I = HBitWidth, E = HBitWidth / 2; I > E; --I) {
8163 APInt Mod = APInt::getOneBitSet(numBits: Divisor.getBitWidth(), BitNo: I).urem(RHS: Divisor);
8164
8165 if (!Mod.isOne())
8166 continue;
8167
8168 // If best chunk is HBitWidth, we can use it and handle the carry out.
8169 // Otherwise, ensure the sum won't overflow HiLoVT (HBitWidth).
8170 // Summing N chunks adds ceil(log2(N)) extra carry bits to the width.
8171 // Safety check: Base Chunk Width (I) + Carry Bits <= Register Width.
8172 unsigned NumChunks = divideCeil(Numerator: BitWidth, Denominator: I);
8173 if (I == HBitWidth || I + llvm::bit_width(Value: NumChunks - 1) <= HBitWidth) {
8174 BestChunkWidth = I;
8175 break;
8176 }
8177 }
8178
8179 // If we didn't find a chunk size, exit.
8180 if (!BestChunkWidth)
8181 return false;
8182
8183 SDLoc dl(N);
8184
8185 assert(!LL == !LH && "Expected both input halves or no input halves!");
8186 if (!LL)
8187 std::tie(args&: LL, args&: LH) = DAG.SplitScalar(N: N->getOperand(Num: 0), DL: dl, LoVT: HiLoVT, HiVT: HiLoVT);
8188
8189 bool HasFSHR = isOperationLegal(Op: ISD::FSHR, VT: HiLoVT);
8190
8191 auto GetFSHR = [&](SDValue Lo, SDValue Hi, unsigned ShiftAmt) {
8192 assert(ShiftAmt > 0 && ShiftAmt < HBitWidth);
8193 if (HasFSHR)
8194 return DAG.getNode(Opcode: ISD::FSHR, DL: dl, VT: HiLoVT, N1: Hi, N2: Lo,
8195 N3: DAG.getShiftAmountConstant(Val: ShiftAmt, VT: HiLoVT, DL: dl));
8196 return DAG.getNode(
8197 Opcode: ISD::OR, DL: dl, VT: HiLoVT,
8198 N1: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: HiLoVT, N1: Lo,
8199 N2: DAG.getShiftAmountConstant(Val: ShiftAmt, VT: HiLoVT, DL: dl)),
8200 N2: DAG.getNode(
8201 Opcode: ISD::SHL, DL: dl, VT: HiLoVT, N1: Hi,
8202 N2: DAG.getShiftAmountConstant(Val: HBitWidth - ShiftAmt, VT: HiLoVT, DL: dl)));
8203 };
8204
8205 // Shift the input by the number of TrailingZeros in the divisor. The
8206 // shifted out bits will be added to the remainder later.
8207 SDValue PartialRem;
8208 if (TrailingZeros && Opcode != ISD::UDIV) {
8209 // Save the shifted off bits if we need the remainder.
8210 APInt Mask = APInt::getLowBitsSet(numBits: HBitWidth, loBitsSet: TrailingZeros);
8211 PartialRem = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: HiLoVT, N1: LL,
8212 N2: DAG.getConstant(Val: Mask, DL: dl, VT: HiLoVT));
8213 }
8214
8215 SDValue Sum;
8216 // If BestChunkWidth is HBitWidth add low and high half. If there is a carry
8217 // out, add that to the final sum.
8218 if (BestChunkWidth == HBitWidth) {
8219 // Shift LH:LL right if there were trailing zeros in the divisor.
8220 if (TrailingZeros) {
8221 LL = GetFSHR(LL, LH, TrailingZeros);
8222 LH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: HiLoVT, N1: LH,
8223 N2: DAG.getShiftAmountConstant(Val: TrailingZeros, VT: HiLoVT, DL: dl));
8224 }
8225
8226 // Use uaddo_carry if we can, otherwise use a compare to detect overflow.
8227 EVT SetCCType =
8228 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: HiLoVT);
8229 if (isOperationLegalOrCustom(Op: ISD::UADDO_CARRY, VT: HiLoVT)) {
8230 SDVTList VTList = DAG.getVTList(VT1: HiLoVT, VT2: SetCCType);
8231 Sum = DAG.getNode(Opcode: ISD::UADDO, DL: dl, VTList, N1: LL, N2: LH);
8232 Sum = DAG.getNode(Opcode: ISD::UADDO_CARRY, DL: dl, VTList, N1: Sum,
8233 N2: DAG.getConstant(Val: 0, DL: dl, VT: HiLoVT), N3: Sum.getValue(R: 1));
8234 } else {
8235 Sum = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: HiLoVT, N1: LL, N2: LH);
8236 SDValue Carry = DAG.getSetCC(DL: dl, VT: SetCCType, LHS: Sum, RHS: LL, Cond: ISD::SETULT);
8237 // If the boolean for the target is 0 or 1, we can add the setcc result
8238 // directly.
8239 if (getBooleanContents(Type: HiLoVT) ==
8240 TargetLoweringBase::ZeroOrOneBooleanContent)
8241 Carry = DAG.getZExtOrTrunc(Op: Carry, DL: dl, VT: HiLoVT);
8242 else
8243 Carry = DAG.getSelect(DL: dl, VT: HiLoVT, Cond: Carry, LHS: DAG.getConstant(Val: 1, DL: dl, VT: HiLoVT),
8244 RHS: DAG.getConstant(Val: 0, DL: dl, VT: HiLoVT));
8245 Sum = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: HiLoVT, N1: Sum, N2: Carry);
8246 }
8247 } else {
8248 // Otherwise split into multple chunks and add them together. We chose
8249 // BestChunkWidth so that the sum will not overflow.
8250 SDValue Mask = DAG.getConstant(
8251 Val: APInt::getLowBitsSet(numBits: HBitWidth, loBitsSet: BestChunkWidth), DL: dl, VT: HiLoVT);
8252
8253 for (unsigned I = 0; I < BitWidth - TrailingZeros; I += BestChunkWidth) {
8254 // If there were trailing zeros in the divisor, increase the shift amount.
8255 unsigned Shift = I + TrailingZeros;
8256 SDValue Chunk;
8257 if (Shift == 0)
8258 Chunk = LL;
8259 else if (Shift >= HBitWidth)
8260 Chunk = DAG.getNode(
8261 Opcode: ISD::SRL, DL: dl, VT: HiLoVT, N1: LH,
8262 N2: DAG.getShiftAmountConstant(Val: Shift - HBitWidth, VT: HiLoVT, DL: dl));
8263 else
8264 Chunk = GetFSHR(LL, LH, Shift);
8265 // If we're on the last chunk, we don't need an AND.
8266 if (I + BestChunkWidth < BitWidth - TrailingZeros)
8267 Chunk = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: HiLoVT, N1: Chunk, N2: Mask);
8268 if (!Sum)
8269 Sum = Chunk;
8270 else
8271 Sum = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: HiLoVT, N1: Sum, N2: Chunk);
8272 }
8273 }
8274
8275 // Perform a HiLoVT urem on the Sum using truncated divisor.
8276 SDValue RemL =
8277 DAG.getNode(Opcode: ISD::UREM, DL: dl, VT: HiLoVT, N1: Sum,
8278 N2: DAG.getConstant(Val: Divisor.trunc(width: HBitWidth), DL: dl, VT: HiLoVT));
8279 SDValue RemH = DAG.getConstant(Val: 0, DL: dl, VT: HiLoVT);
8280
8281 if (Opcode != ISD::UREM) {
8282 // If we didn't shift LH/LR earlier, do it now.
8283 if (BestChunkWidth != HBitWidth && TrailingZeros) {
8284 LL = GetFSHR(LL, LH, TrailingZeros);
8285 LH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: HiLoVT, N1: LH,
8286 N2: DAG.getShiftAmountConstant(Val: TrailingZeros, VT: HiLoVT, DL: dl));
8287 }
8288
8289 // Subtract the remainder from the shifted dividend.
8290 SDValue Dividend = DAG.getNode(Opcode: ISD::BUILD_PAIR, DL: dl, VT, N1: LL, N2: LH);
8291 SDValue Rem = DAG.getNode(Opcode: ISD::BUILD_PAIR, DL: dl, VT, N1: RemL, N2: RemH);
8292
8293 Dividend = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Dividend, N2: Rem);
8294
8295 // Multiply by the multiplicative inverse of the divisor modulo
8296 // (1 << BitWidth).
8297 APInt MulFactor = Divisor.multiplicativeInverse();
8298
8299 SDValue Quotient = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: Dividend,
8300 N2: DAG.getConstant(Val: MulFactor, DL: dl, VT));
8301
8302 // Split the quotient into low and high parts.
8303 SDValue QuotL, QuotH;
8304 std::tie(args&: QuotL, args&: QuotH) = DAG.SplitScalar(N: Quotient, DL: dl, LoVT: HiLoVT, HiVT: HiLoVT);
8305 Result.push_back(Elt: QuotL);
8306 Result.push_back(Elt: QuotH);
8307 }
8308
8309 if (Opcode != ISD::UDIV) {
8310 // If we shifted the input, shift the remainder left and add the bits we
8311 // shifted off the input.
8312 if (TrailingZeros) {
8313 RemL = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: HiLoVT, N1: RemL,
8314 N2: DAG.getShiftAmountConstant(Val: TrailingZeros, VT: HiLoVT, DL: dl));
8315
8316 RemL = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: HiLoVT, N1: RemL, N2: PartialRem,
8317 Flags: SDNodeFlags::Disjoint);
8318 }
8319 Result.push_back(Elt: RemL);
8320 Result.push_back(Elt: RemH);
8321 }
8322
8323 return true;
8324}
8325
8326// Check that (every element of) Z is undef or not an exact multiple of BW.
8327static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
8328 return ISD::matchUnaryPredicate(
8329 Op: Z,
8330 Match: [=](ConstantSDNode *C) { return !C || C->getAPIntValue().urem(RHS: BW) != 0; },
8331 /*AllowUndefs=*/true, /*AllowTruncation=*/true);
8332}
8333
8334static SDValue expandVPFunnelShift(SDNode *Node, SelectionDAG &DAG) {
8335 EVT VT = Node->getValueType(ResNo: 0);
8336 SDValue ShX, ShY;
8337 SDValue ShAmt, InvShAmt;
8338 SDValue X = Node->getOperand(Num: 0);
8339 SDValue Y = Node->getOperand(Num: 1);
8340 SDValue Z = Node->getOperand(Num: 2);
8341 SDValue Mask = Node->getOperand(Num: 3);
8342 SDValue VL = Node->getOperand(Num: 4);
8343
8344 unsigned BW = VT.getScalarSizeInBits();
8345 bool IsFSHL = Node->getOpcode() == ISD::VP_FSHL;
8346 SDLoc DL(SDValue(Node, 0));
8347
8348 EVT ShVT = Z.getValueType();
8349 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8350 // fshl: X << C | Y >> (BW - C)
8351 // fshr: X << (BW - C) | Y >> C
8352 // where C = Z % BW is not zero
8353 SDValue BitWidthC = DAG.getConstant(Val: BW, DL, VT: ShVT);
8354 ShAmt = DAG.getNode(Opcode: ISD::VP_UREM, DL, VT: ShVT, N1: Z, N2: BitWidthC, N3: Mask, N4: VL);
8355 InvShAmt = DAG.getNode(Opcode: ISD::VP_SUB, DL, VT: ShVT, N1: BitWidthC, N2: ShAmt, N3: Mask, N4: VL);
8356 ShX = DAG.getNode(Opcode: ISD::VP_SHL, DL, VT, N1: X, N2: IsFSHL ? ShAmt : InvShAmt, N3: Mask,
8357 N4: VL);
8358 ShY = DAG.getNode(Opcode: ISD::VP_SRL, DL, VT, N1: Y, N2: IsFSHL ? InvShAmt : ShAmt, N3: Mask,
8359 N4: VL);
8360 } else {
8361 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
8362 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
8363 SDValue BitMask = DAG.getConstant(Val: BW - 1, DL, VT: ShVT);
8364 if (isPowerOf2_32(Value: BW)) {
8365 // Z % BW -> Z & (BW - 1)
8366 ShAmt = DAG.getNode(Opcode: ISD::VP_AND, DL, VT: ShVT, N1: Z, N2: BitMask, N3: Mask, N4: VL);
8367 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8368 SDValue NotZ = DAG.getNode(Opcode: ISD::VP_XOR, DL, VT: ShVT, N1: Z,
8369 N2: DAG.getAllOnesConstant(DL, VT: ShVT), N3: Mask, N4: VL);
8370 InvShAmt = DAG.getNode(Opcode: ISD::VP_AND, DL, VT: ShVT, N1: NotZ, N2: BitMask, N3: Mask, N4: VL);
8371 } else {
8372 SDValue BitWidthC = DAG.getConstant(Val: BW, DL, VT: ShVT);
8373 ShAmt = DAG.getNode(Opcode: ISD::VP_UREM, DL, VT: ShVT, N1: Z, N2: BitWidthC, N3: Mask, N4: VL);
8374 InvShAmt = DAG.getNode(Opcode: ISD::VP_SUB, DL, VT: ShVT, N1: BitMask, N2: ShAmt, N3: Mask, N4: VL);
8375 }
8376
8377 SDValue One = DAG.getConstant(Val: 1, DL, VT: ShVT);
8378 if (IsFSHL) {
8379 ShX = DAG.getNode(Opcode: ISD::VP_SHL, DL, VT, N1: X, N2: ShAmt, N3: Mask, N4: VL);
8380 SDValue ShY1 = DAG.getNode(Opcode: ISD::VP_SRL, DL, VT, N1: Y, N2: One, N3: Mask, N4: VL);
8381 ShY = DAG.getNode(Opcode: ISD::VP_SRL, DL, VT, N1: ShY1, N2: InvShAmt, N3: Mask, N4: VL);
8382 } else {
8383 SDValue ShX1 = DAG.getNode(Opcode: ISD::VP_SHL, DL, VT, N1: X, N2: One, N3: Mask, N4: VL);
8384 ShX = DAG.getNode(Opcode: ISD::VP_SHL, DL, VT, N1: ShX1, N2: InvShAmt, N3: Mask, N4: VL);
8385 ShY = DAG.getNode(Opcode: ISD::VP_SRL, DL, VT, N1: Y, N2: ShAmt, N3: Mask, N4: VL);
8386 }
8387 }
8388 return DAG.getNode(Opcode: ISD::VP_OR, DL, VT, N1: ShX, N2: ShY, N3: Mask, N4: VL);
8389}
8390
8391SDValue TargetLowering::expandFunnelShift(SDNode *Node,
8392 SelectionDAG &DAG) const {
8393 if (Node->isVPOpcode())
8394 return expandVPFunnelShift(Node, DAG);
8395
8396 EVT VT = Node->getValueType(ResNo: 0);
8397
8398 if (VT.isVector() && (!isOperationLegalOrCustom(Op: ISD::SHL, VT) ||
8399 !isOperationLegalOrCustom(Op: ISD::SRL, VT) ||
8400 !isOperationLegalOrCustom(Op: ISD::SUB, VT) ||
8401 !isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT)))
8402 return SDValue();
8403
8404 SDValue X = Node->getOperand(Num: 0);
8405 SDValue Y = Node->getOperand(Num: 1);
8406 SDValue Z = Node->getOperand(Num: 2);
8407
8408 unsigned BW = VT.getScalarSizeInBits();
8409 bool IsFSHL = Node->getOpcode() == ISD::FSHL;
8410 SDLoc DL(SDValue(Node, 0));
8411
8412 EVT ShVT = Z.getValueType();
8413
8414 // If a funnel shift in the other direction is more supported, use it.
8415 unsigned RevOpcode = IsFSHL ? ISD::FSHR : ISD::FSHL;
8416 if (!isOperationLegalOrCustom(Op: Node->getOpcode(), VT) &&
8417 isOperationLegalOrCustom(Op: RevOpcode, VT) && isPowerOf2_32(Value: BW)) {
8418 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8419 // fshl X, Y, Z -> fshr X, Y, -Z
8420 // fshr X, Y, Z -> fshl X, Y, -Z
8421 Z = DAG.getNegative(Val: Z, DL, VT: ShVT);
8422 } else {
8423 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
8424 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
8425 SDValue One = DAG.getConstant(Val: 1, DL, VT: ShVT);
8426 if (IsFSHL) {
8427 Y = DAG.getNode(Opcode: RevOpcode, DL, VT, N1: X, N2: Y, N3: One);
8428 X = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: X, N2: One);
8429 } else {
8430 X = DAG.getNode(Opcode: RevOpcode, DL, VT, N1: X, N2: Y, N3: One);
8431 Y = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Y, N2: One);
8432 }
8433 Z = DAG.getNOT(DL, Val: Z, VT: ShVT);
8434 }
8435 return DAG.getNode(Opcode: RevOpcode, DL, VT, N1: X, N2: Y, N3: Z);
8436 }
8437
8438 SDValue ShX, ShY;
8439 SDValue ShAmt, InvShAmt;
8440 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8441 // fshl: X << C | Y >> (BW - C)
8442 // fshr: X << (BW - C) | Y >> C
8443 // where C = Z % BW is not zero
8444 SDValue BitWidthC = DAG.getConstant(Val: BW, DL, VT: ShVT);
8445 ShAmt = DAG.getNode(Opcode: ISD::UREM, DL, VT: ShVT, N1: Z, N2: BitWidthC);
8446 InvShAmt = DAG.getNode(Opcode: ISD::SUB, DL, VT: ShVT, N1: BitWidthC, N2: ShAmt);
8447 ShX = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: X, N2: IsFSHL ? ShAmt : InvShAmt);
8448 ShY = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Y, N2: IsFSHL ? InvShAmt : ShAmt);
8449 } else {
8450 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
8451 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
8452 SDValue Mask = DAG.getConstant(Val: BW - 1, DL, VT: ShVT);
8453 if (isPowerOf2_32(Value: BW)) {
8454 // Z % BW -> Z & (BW - 1)
8455 ShAmt = DAG.getNode(Opcode: ISD::AND, DL, VT: ShVT, N1: Z, N2: Mask);
8456 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8457 InvShAmt = DAG.getNode(Opcode: ISD::AND, DL, VT: ShVT, N1: DAG.getNOT(DL, Val: Z, VT: ShVT), N2: Mask);
8458 } else {
8459 SDValue BitWidthC = DAG.getConstant(Val: BW, DL, VT: ShVT);
8460 ShAmt = DAG.getNode(Opcode: ISD::UREM, DL, VT: ShVT, N1: Z, N2: BitWidthC);
8461 InvShAmt = DAG.getNode(Opcode: ISD::SUB, DL, VT: ShVT, N1: Mask, N2: ShAmt);
8462 }
8463
8464 SDValue One = DAG.getConstant(Val: 1, DL, VT: ShVT);
8465 if (IsFSHL) {
8466 ShX = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: X, N2: ShAmt);
8467 SDValue ShY1 = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Y, N2: One);
8468 ShY = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: ShY1, N2: InvShAmt);
8469 } else {
8470 SDValue ShX1 = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: X, N2: One);
8471 ShX = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: ShX1, N2: InvShAmt);
8472 ShY = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Y, N2: ShAmt);
8473 }
8474 }
8475 return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: ShX, N2: ShY);
8476}
8477
8478// TODO: Merge with expandFunnelShift.
8479SDValue TargetLowering::expandROT(SDNode *Node, bool AllowVectorOps,
8480 SelectionDAG &DAG) const {
8481 EVT VT = Node->getValueType(ResNo: 0);
8482 unsigned EltSizeInBits = VT.getScalarSizeInBits();
8483 bool IsLeft = Node->getOpcode() == ISD::ROTL;
8484 SDValue Op0 = Node->getOperand(Num: 0);
8485 SDValue Op1 = Node->getOperand(Num: 1);
8486 SDLoc DL(SDValue(Node, 0));
8487
8488 EVT ShVT = Op1.getValueType();
8489 SDValue Zero = DAG.getConstant(Val: 0, DL, VT: ShVT);
8490
8491 // If a rotate in the other direction is more supported, use it.
8492 unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
8493 if (!isOperationLegalOrCustom(Op: Node->getOpcode(), VT) &&
8494 isOperationLegalOrCustom(Op: RevRot, VT) && isPowerOf2_32(Value: EltSizeInBits)) {
8495 SDValue Sub = DAG.getNode(Opcode: ISD::SUB, DL, VT: ShVT, N1: Zero, N2: Op1);
8496 return DAG.getNode(Opcode: RevRot, DL, VT, N1: Op0, N2: Sub);
8497 }
8498
8499 if (!AllowVectorOps && VT.isVector() &&
8500 (!isOperationLegalOrCustom(Op: ISD::SHL, VT) ||
8501 !isOperationLegalOrCustom(Op: ISD::SRL, VT) ||
8502 !isOperationLegalOrCustom(Op: ISD::SUB, VT) ||
8503 !isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT) ||
8504 !isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT)))
8505 return SDValue();
8506
8507 unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
8508 unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
8509 SDValue BitWidthMinusOneC = DAG.getConstant(Val: EltSizeInBits - 1, DL, VT: ShVT);
8510 SDValue ShVal;
8511 SDValue HsVal;
8512 if (isPowerOf2_32(Value: EltSizeInBits)) {
8513 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
8514 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
8515 SDValue NegOp1 = DAG.getNode(Opcode: ISD::SUB, DL, VT: ShVT, N1: Zero, N2: Op1);
8516 SDValue ShAmt = DAG.getNode(Opcode: ISD::AND, DL, VT: ShVT, N1: Op1, N2: BitWidthMinusOneC);
8517 ShVal = DAG.getNode(Opcode: ShOpc, DL, VT, N1: Op0, N2: ShAmt);
8518 SDValue HsAmt = DAG.getNode(Opcode: ISD::AND, DL, VT: ShVT, N1: NegOp1, N2: BitWidthMinusOneC);
8519 HsVal = DAG.getNode(Opcode: HsOpc, DL, VT, N1: Op0, N2: HsAmt);
8520 } else {
8521 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
8522 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
8523 SDValue BitWidthC = DAG.getConstant(Val: EltSizeInBits, DL, VT: ShVT);
8524 SDValue ShAmt = DAG.getNode(Opcode: ISD::UREM, DL, VT: ShVT, N1: Op1, N2: BitWidthC);
8525 ShVal = DAG.getNode(Opcode: ShOpc, DL, VT, N1: Op0, N2: ShAmt);
8526 SDValue HsAmt = DAG.getNode(Opcode: ISD::SUB, DL, VT: ShVT, N1: BitWidthMinusOneC, N2: ShAmt);
8527 SDValue One = DAG.getConstant(Val: 1, DL, VT: ShVT);
8528 HsVal =
8529 DAG.getNode(Opcode: HsOpc, DL, VT, N1: DAG.getNode(Opcode: HsOpc, DL, VT, N1: Op0, N2: One), N2: HsAmt);
8530 }
8531 return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: ShVal, N2: HsVal);
8532}
8533
8534/// Check if CLMUL on VT can eventually reach a type with legal CLMUL through
8535/// a chain of halving decompositions (halving element width) and/or vector
8536/// widening (doubling element count). This guides expansion strategy selection:
8537/// if true, the halving/widening path produces better code than bit-by-bit.
8538///
8539/// HalveDepth tracks halving steps only (each creates ~4x more operations).
8540/// Widening steps are cheap (O(1) pad/extract) and don't count.
8541/// Limiting halvings to 2 prevents exponential blowup:
8542/// 1 halving: ~4 sub-CLMULs (good, e.g. v8i16 -> v8i8)
8543/// 2 halvings: ~16 sub-CLMULs (acceptable, e.g. v4i32 -> v4i16 -> v8i8)
8544/// 3 halvings: ~64 sub-CLMULs (worse than bit-by-bit expansion)
8545static bool canNarrowCLMULToLegal(const TargetLowering &TLI, LLVMContext &Ctx,
8546 EVT VT, unsigned HalveDepth = 0,
8547 unsigned TotalDepth = 0) {
8548 if (HalveDepth > 2 || TotalDepth > 8 || !VT.isFixedLengthVector())
8549 return false;
8550 if (TLI.isOperationLegalOrCustom(Op: ISD::CLMUL, VT))
8551 return true;
8552 if (!TLI.isTypeLegal(VT))
8553 return false;
8554
8555 unsigned BW = VT.getScalarSizeInBits();
8556
8557 // Halve: halve element width, same element count.
8558 // This is the expensive step -- each halving creates ~4x more operations.
8559 if (BW % 2 == 0) {
8560 EVT HalfEltVT = EVT::getIntegerVT(Context&: Ctx, BitWidth: BW / 2);
8561 EVT HalfVT = VT.changeVectorElementType(Context&: Ctx, EltVT: HalfEltVT);
8562 if (TLI.isTypeLegal(VT: HalfVT) &&
8563 canNarrowCLMULToLegal(TLI, Ctx, VT: HalfVT, HalveDepth: HalveDepth + 1, TotalDepth: TotalDepth + 1))
8564 return true;
8565 }
8566
8567 // Widen: double element count (fixed-width vectors only).
8568 // This is cheap -- just INSERT_SUBVECTOR + EXTRACT_SUBVECTOR.
8569 EVT WideVT = VT.getDoubleNumVectorElementsVT(Context&: Ctx);
8570 if (TLI.isTypeLegal(VT: WideVT) &&
8571 canNarrowCLMULToLegal(TLI, Ctx, VT: WideVT, HalveDepth, TotalDepth: TotalDepth + 1))
8572 return true;
8573
8574 return false;
8575}
8576
8577SDValue TargetLowering::expandCLMUL(SDNode *Node, SelectionDAG &DAG) const {
8578 SDLoc DL(Node);
8579 EVT VT = Node->getValueType(ResNo: 0);
8580 SDValue X = Node->getOperand(Num: 0);
8581 SDValue Y = Node->getOperand(Num: 1);
8582 unsigned BW = VT.getScalarSizeInBits();
8583 unsigned Opcode = Node->getOpcode();
8584 LLVMContext &Ctx = *DAG.getContext();
8585
8586 switch (Opcode) {
8587 case ISD::CLMUL: {
8588 // For vector types, try decomposition strategies that leverage legal
8589 // CLMUL on narrower or wider element types, avoiding the expensive
8590 // bit-by-bit expansion.
8591 if (VT.isVector()) {
8592 // Strategy 1: Halving decomposition to half-element-width CLMUL.
8593 // Applies ExpandIntRes_CLMUL's identity element-wise:
8594 // CLMUL(X, Y) = (Hi << HalfBW) | Lo
8595 // where:
8596 // Lo = CLMUL(XLo, YLo)
8597 // Hi = CLMULH(XLo, YLo) ^ CLMUL(XLo, YHi) ^ CLMUL(XHi, YLo)
8598 unsigned HalfBW = BW / 2;
8599 if (BW % 2 == 0) {
8600 EVT HalfEltVT = EVT::getIntegerVT(Context&: Ctx, BitWidth: HalfBW);
8601 EVT HalfVT =
8602 EVT::getVectorVT(Context&: Ctx, VT: HalfEltVT, EC: VT.getVectorElementCount());
8603 if (isTypeLegal(VT: HalfVT) && canNarrowCLMULToLegal(TLI: *this, Ctx, VT: HalfVT,
8604 /*HalveDepth=*/1)) {
8605 SDValue ShAmt = DAG.getShiftAmountConstant(Val: HalfBW, VT, DL);
8606
8607 // Extract low and high halves of each element.
8608 SDValue XLo = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: HalfVT, Operand: X);
8609 SDValue XHi = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: HalfVT,
8610 Operand: DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: X, N2: ShAmt));
8611 SDValue YLo = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: HalfVT, Operand: Y);
8612 SDValue YHi = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: HalfVT,
8613 Operand: DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Y, N2: ShAmt));
8614
8615 // Lo = CLMUL(XLo, YLo)
8616 SDValue Lo = DAG.getNode(Opcode: ISD::CLMUL, DL, VT: HalfVT, N1: XLo, N2: YLo);
8617
8618 // Hi = CLMULH(XLo, YLo) ^ CLMUL(XLo, YHi) ^ CLMUL(XHi, YLo)
8619 SDValue LoH = DAG.getNode(Opcode: ISD::CLMULH, DL, VT: HalfVT, N1: XLo, N2: YLo);
8620 SDValue Cross1 = DAG.getNode(Opcode: ISD::CLMUL, DL, VT: HalfVT, N1: XLo, N2: YHi);
8621 SDValue Cross2 = DAG.getNode(Opcode: ISD::CLMUL, DL, VT: HalfVT, N1: XHi, N2: YLo);
8622 SDValue Cross = DAG.getNode(Opcode: ISD::XOR, DL, VT: HalfVT, N1: Cross1, N2: Cross2);
8623 SDValue Hi = DAG.getNode(Opcode: ISD::XOR, DL, VT: HalfVT, N1: LoH, N2: Cross);
8624
8625 // Reassemble: Result = ZExt(Lo) | (AnyExt(Hi) << HalfBW)
8626 SDValue LoExt = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT, Operand: Lo);
8627 SDValue HiExt = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT, Operand: Hi);
8628 SDValue HiShifted = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: HiExt, N2: ShAmt);
8629 return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: LoExt, N2: HiShifted);
8630 }
8631 }
8632
8633 // Strategy 2: Promote to double-element-width CLMUL.
8634 // CLMUL(X, Y) = Trunc(CLMUL(AnyExt(X), AnyExt(Y)))
8635 {
8636 EVT ExtVT = VT.widenIntegerElementType(Context&: Ctx);
8637 if (isTypeLegal(VT: ExtVT) && isOperationLegalOrCustom(Op: ISD::CLMUL, VT: ExtVT)) {
8638 // If CLMUL on ExtVT is Custom (not Legal), the target may
8639 // scalarize it, costing O(NumElements) scalar ops. The bit-by-bit
8640 // fallback costs O(BW) vectorized iterations. Only widen when
8641 // element count is small enough that scalarization is cheaper.
8642 unsigned NumElts = VT.getVectorMinNumElements();
8643 if (isOperationLegal(Op: ISD::CLMUL, VT: ExtVT) || NumElts < BW) {
8644 SDValue XExt = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: ExtVT, Operand: X);
8645 SDValue YExt = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: ExtVT, Operand: Y);
8646 SDValue Mul = DAG.getNode(Opcode: ISD::CLMUL, DL, VT: ExtVT, N1: XExt, N2: YExt);
8647 return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Mul);
8648 }
8649 }
8650 }
8651
8652 // Strategy 3: Widen element count (pad with undef, do CLMUL on wider
8653 // vector, extract lower result). CLMUL is element-wise, so upper
8654 // (undef) lanes don't affect the lower results.
8655 // e.g. v4i16 => pad to v8i16 => halve to v8i8 PMUL => extract v4i16.
8656 if (auto EC = VT.getVectorElementCount(); EC.isFixed()) {
8657 EVT WideVT = EVT::getVectorVT(Context&: Ctx, VT: VT.getVectorElementType(), EC: EC * 2);
8658 if (isTypeLegal(VT: WideVT) && canNarrowCLMULToLegal(TLI: *this, Ctx, VT: WideVT)) {
8659 SDValue Undef = DAG.getUNDEF(VT: WideVT);
8660 SDValue XWide = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: WideVT, N1: Undef,
8661 N2: X, N3: DAG.getVectorIdxConstant(Val: 0, DL));
8662 SDValue YWide = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: WideVT, N1: Undef,
8663 N2: Y, N3: DAG.getVectorIdxConstant(Val: 0, DL));
8664 SDValue WideRes = DAG.getNode(Opcode: ISD::CLMUL, DL, VT: WideVT, N1: XWide, N2: YWide);
8665 return DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT, N1: WideRes,
8666 N2: DAG.getVectorIdxConstant(Val: 0, DL));
8667 }
8668 }
8669 }
8670
8671 // NOTE: If you change this expansion, please update the cost model
8672 // calculation in BasicTTIImpl::getTypeBasedIntrinsicInstrCost for
8673 // Intrinsic::clmul.
8674
8675 EVT SetCCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: Ctx, VT);
8676
8677 SDValue Res = DAG.getConstant(Val: 0, DL, VT);
8678 for (unsigned I = 0; I < BW; ++I) {
8679 SDValue ShiftAmt = DAG.getShiftAmountConstant(Val: I, VT, DL);
8680 SDValue Mask = DAG.getConstant(Val: APInt::getOneBitSet(numBits: BW, BitNo: I), DL, VT);
8681 SDValue YMasked = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Y, N2: Mask);
8682
8683 // For targets with a fast bit test instruction (e.g., x86 BT) or without
8684 // multiply, use a shift-based expansion to avoid expensive MUL
8685 // instructions.
8686 SDValue Part;
8687 if (!hasBitTest(X: Y, Y: ShiftAmt) &&
8688 isOperationLegalOrCustom(
8689 Op: ISD::MUL, VT: getTypeToTransformTo(Context&: *DAG.getContext(), VT))) {
8690 Part = DAG.getNode(Opcode: ISD::MUL, DL, VT, N1: X, N2: YMasked);
8691 } else {
8692 // Canonical bit test: (Y & (1 << I)) != 0
8693 SDValue Zero = DAG.getConstant(Val: 0, DL, VT);
8694 SDValue Cond = DAG.getSetCC(DL, VT: SetCCVT, LHS: YMasked, RHS: Zero, Cond: ISD::SETEQ);
8695 SDValue XShifted = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: X, N2: ShiftAmt);
8696 Part = DAG.getSelect(DL, VT, Cond, LHS: Zero, RHS: XShifted);
8697 }
8698 Res = DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: Res, N2: Part);
8699 }
8700 return Res;
8701 }
8702 case ISD::CLMULR:
8703 // If we have CLMUL/CLMULH, merge the shifted results to form CLMULR.
8704 if (isOperationLegalOrCustom(Op: ISD::CLMUL, VT) &&
8705 isOperationLegalOrCustom(Op: ISD::CLMULH, VT)) {
8706 SDValue Lo = DAG.getNode(Opcode: ISD::CLMUL, DL, VT, N1: X, N2: Y);
8707 SDValue Hi = DAG.getNode(Opcode: ISD::CLMULH, DL, VT, N1: X, N2: Y);
8708 Lo = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Lo,
8709 N2: DAG.getShiftAmountConstant(Val: BW - 1, VT, DL));
8710 Hi = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Hi,
8711 N2: DAG.getShiftAmountConstant(Val: 1, VT, DL));
8712 return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: Lo, N2: Hi);
8713 }
8714 [[fallthrough]];
8715 case ISD::CLMULH: {
8716 EVT ExtVT = VT.widenIntegerElementType(Context&: Ctx);
8717 // Use bitreverse-based lowering (CLMULR/H = rev(CLMUL(rev,rev)) >> S)
8718 // when any of these hold:
8719 // (a) ZERO_EXTEND to ExtVT or SRL on ExtVT isn't legal.
8720 // (b) CLMUL is legal on VT but not on ExtVT (e.g. v8i8 on AArch64).
8721 // (c) CLMUL on ExtVT isn't legal, but CLMUL on VT can be efficiently
8722 // expanded via halving/widening to reach legal CLMUL. The bitreverse
8723 // path creates CLMUL(VT) which will be expanded efficiently. The
8724 // promote path would create CLMUL(ExtVT) => halving => CLMULH(VT),
8725 // causing a cycle.
8726 // Note: when CLMUL is legal on ExtVT, the zext => CLMUL(ExtVT) => shift
8727 // => trunc path is preferred over the bitreverse path, as it avoids the
8728 // cost of 3 bitreverse operations.
8729 if (!isOperationLegalOrCustom(Op: ISD::ZERO_EXTEND, VT: ExtVT) ||
8730 !isOperationLegalOrCustom(Op: ISD::SRL, VT: ExtVT) ||
8731 (!isOperationLegalOrCustom(Op: ISD::CLMUL, VT: ExtVT) &&
8732 (isOperationLegalOrCustom(Op: ISD::CLMUL, VT) ||
8733 canNarrowCLMULToLegal(TLI: *this, Ctx, VT)))) {
8734 SDValue XRev = DAG.getNode(Opcode: ISD::BITREVERSE, DL, VT, Operand: X);
8735 SDValue YRev = DAG.getNode(Opcode: ISD::BITREVERSE, DL, VT, Operand: Y);
8736 SDValue ClMul = DAG.getNode(Opcode: ISD::CLMUL, DL, VT, N1: XRev, N2: YRev);
8737 SDValue Res = DAG.getNode(Opcode: ISD::BITREVERSE, DL, VT, Operand: ClMul);
8738 if (Opcode == ISD::CLMULH)
8739 Res = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Res,
8740 N2: DAG.getShiftAmountConstant(Val: 1, VT, DL));
8741 return Res;
8742 }
8743 SDValue XExt = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: ExtVT, Operand: X);
8744 SDValue YExt = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: ExtVT, Operand: Y);
8745 SDValue ClMul = DAG.getNode(Opcode: ISD::CLMUL, DL, VT: ExtVT, N1: XExt, N2: YExt);
8746 unsigned ShAmt = Opcode == ISD::CLMULR ? BW - 1 : BW;
8747 SDValue HiBits = DAG.getNode(Opcode: ISD::SRL, DL, VT: ExtVT, N1: ClMul,
8748 N2: DAG.getShiftAmountConstant(Val: ShAmt, VT: ExtVT, DL));
8749 return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: HiBits);
8750 }
8751 }
8752 llvm_unreachable("Expected CLMUL, CLMULR, or CLMULH");
8753}
8754
8755void TargetLowering::expandShiftParts(SDNode *Node, SDValue &Lo, SDValue &Hi,
8756 SelectionDAG &DAG) const {
8757 assert(Node->getNumOperands() == 3 && "Not a double-shift!");
8758 EVT VT = Node->getValueType(ResNo: 0);
8759 unsigned VTBits = VT.getScalarSizeInBits();
8760 assert(isPowerOf2_32(VTBits) && "Power-of-two integer type expected");
8761
8762 bool IsSHL = Node->getOpcode() == ISD::SHL_PARTS;
8763 bool IsSRA = Node->getOpcode() == ISD::SRA_PARTS;
8764 SDValue ShOpLo = Node->getOperand(Num: 0);
8765 SDValue ShOpHi = Node->getOperand(Num: 1);
8766 SDValue ShAmt = Node->getOperand(Num: 2);
8767 EVT ShAmtVT = ShAmt.getValueType();
8768 EVT ShAmtCCVT =
8769 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: ShAmtVT);
8770 SDLoc dl(Node);
8771
8772 // ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and
8773 // ISD::SRA/L nodes haven't. Insert an AND to be safe, it's usually optimized
8774 // away during isel.
8775 SDValue SafeShAmt = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ShAmtVT, N1: ShAmt,
8776 N2: DAG.getConstant(Val: VTBits - 1, DL: dl, VT: ShAmtVT));
8777 SDValue Tmp1 = IsSRA ? DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: ShOpHi,
8778 N2: DAG.getConstant(Val: VTBits - 1, DL: dl, VT: ShAmtVT))
8779 : DAG.getConstant(Val: 0, DL: dl, VT);
8780
8781 SDValue Tmp2, Tmp3;
8782 if (IsSHL) {
8783 Tmp2 = DAG.getNode(Opcode: ISD::FSHL, DL: dl, VT, N1: ShOpHi, N2: ShOpLo, N3: ShAmt);
8784 Tmp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: ShOpLo, N2: SafeShAmt);
8785 } else {
8786 Tmp2 = DAG.getNode(Opcode: ISD::FSHR, DL: dl, VT, N1: ShOpHi, N2: ShOpLo, N3: ShAmt);
8787 Tmp3 = DAG.getNode(Opcode: IsSRA ? ISD::SRA : ISD::SRL, DL: dl, VT, N1: ShOpHi, N2: SafeShAmt);
8788 }
8789
8790 // If the shift amount is larger or equal than the width of a part we don't
8791 // use the result from the FSHL/FSHR. Insert a test and select the appropriate
8792 // values for large shift amounts.
8793 SDValue AndNode = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ShAmtVT, N1: ShAmt,
8794 N2: DAG.getConstant(Val: VTBits, DL: dl, VT: ShAmtVT));
8795 SDValue Cond = DAG.getSetCC(DL: dl, VT: ShAmtCCVT, LHS: AndNode,
8796 RHS: DAG.getConstant(Val: 0, DL: dl, VT: ShAmtVT), Cond: ISD::SETNE);
8797
8798 if (IsSHL) {
8799 Hi = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT, N1: Cond, N2: Tmp3, N3: Tmp2);
8800 Lo = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT, N1: Cond, N2: Tmp1, N3: Tmp3);
8801 } else {
8802 Lo = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT, N1: Cond, N2: Tmp3, N3: Tmp2);
8803 Hi = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT, N1: Cond, N2: Tmp1, N3: Tmp3);
8804 }
8805}
8806
8807SDValue TargetLowering::expandFCANONICALIZE(SDNode *Node,
8808 SelectionDAG &DAG) const {
8809 // This implements llvm.canonicalize.f* by multiplication with 1.0, as
8810 // suggested in
8811 // https://llvm.org/docs/LangRef.html#llvm-canonicalize-intrinsic.
8812 // It uses strict_fp operations even outside a strict_fp context in order
8813 // to guarantee that the canonicalization is not optimized away by later
8814 // passes. The result chain introduced by that is intentionally ignored
8815 // since no ordering requirement is intended here.
8816 EVT VT = Node->getValueType(ResNo: 0);
8817 SDLoc DL(Node);
8818 SDNodeFlags Flags = Node->getFlags();
8819 Flags.setNoFPExcept(true);
8820 SDValue One = DAG.getConstantFP(Val: 1.0, DL, VT);
8821 SDValue Mul =
8822 DAG.getNode(Opcode: ISD::STRICT_FMUL, DL, ResultTys: {VT, MVT::Other},
8823 Ops: {DAG.getEntryNode(), Node->getOperand(Num: 0), One}, Flags);
8824 return Mul;
8825}
8826
8827bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
8828 SelectionDAG &DAG) const {
8829 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8830 SDValue Src = Node->getOperand(Num: OpNo);
8831 EVT SrcVT = Src.getValueType();
8832 EVT DstVT = Node->getValueType(ResNo: 0);
8833 SDLoc dl(SDValue(Node, 0));
8834
8835 // FIXME: Only f32 to i64 conversions are supported.
8836 if (SrcVT != MVT::f32 || DstVT != MVT::i64)
8837 return false;
8838
8839 if (Node->isStrictFPOpcode())
8840 // When a NaN is converted to an integer a trap is allowed. We can't
8841 // use this expansion here because it would eliminate that trap. Other
8842 // traps are also allowed and cannot be eliminated. See
8843 // IEEE 754-2008 sec 5.8.
8844 return false;
8845
8846 // Expand f32 -> i64 conversion
8847 // This algorithm comes from compiler-rt's implementation of fixsfdi:
8848 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
8849 unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
8850 EVT IntVT = SrcVT.changeTypeToInteger();
8851 EVT IntShVT = getShiftAmountTy(LHSTy: IntVT, DL: DAG.getDataLayout());
8852
8853 SDValue ExponentMask = DAG.getConstant(Val: 0x7F800000, DL: dl, VT: IntVT);
8854 SDValue ExponentLoBit = DAG.getConstant(Val: 23, DL: dl, VT: IntVT);
8855 SDValue Bias = DAG.getConstant(Val: 127, DL: dl, VT: IntVT);
8856 SDValue SignMask = DAG.getConstant(Val: APInt::getSignMask(BitWidth: SrcEltBits), DL: dl, VT: IntVT);
8857 SDValue SignLowBit = DAG.getConstant(Val: SrcEltBits - 1, DL: dl, VT: IntVT);
8858 SDValue MantissaMask = DAG.getConstant(Val: 0x007FFFFF, DL: dl, VT: IntVT);
8859
8860 SDValue Bits = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: IntVT, Operand: Src);
8861
8862 SDValue ExponentBits = DAG.getNode(
8863 Opcode: ISD::SRL, DL: dl, VT: IntVT, N1: DAG.getNode(Opcode: ISD::AND, DL: dl, VT: IntVT, N1: Bits, N2: ExponentMask),
8864 N2: DAG.getZExtOrTrunc(Op: ExponentLoBit, DL: dl, VT: IntShVT));
8865 SDValue Exponent = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: IntVT, N1: ExponentBits, N2: Bias);
8866
8867 SDValue Sign = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT: IntVT,
8868 N1: DAG.getNode(Opcode: ISD::AND, DL: dl, VT: IntVT, N1: Bits, N2: SignMask),
8869 N2: DAG.getZExtOrTrunc(Op: SignLowBit, DL: dl, VT: IntShVT));
8870 Sign = DAG.getSExtOrTrunc(Op: Sign, DL: dl, VT: DstVT);
8871
8872 SDValue R = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: IntVT,
8873 N1: DAG.getNode(Opcode: ISD::AND, DL: dl, VT: IntVT, N1: Bits, N2: MantissaMask),
8874 N2: DAG.getConstant(Val: 0x00800000, DL: dl, VT: IntVT));
8875
8876 R = DAG.getZExtOrTrunc(Op: R, DL: dl, VT: DstVT);
8877
8878 R = DAG.getSelectCC(
8879 DL: dl, LHS: Exponent, RHS: ExponentLoBit,
8880 True: DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: DstVT, N1: R,
8881 N2: DAG.getZExtOrTrunc(
8882 Op: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: IntVT, N1: Exponent, N2: ExponentLoBit),
8883 DL: dl, VT: IntShVT)),
8884 False: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: DstVT, N1: R,
8885 N2: DAG.getZExtOrTrunc(
8886 Op: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: IntVT, N1: ExponentLoBit, N2: Exponent),
8887 DL: dl, VT: IntShVT)),
8888 Cond: ISD::SETGT);
8889
8890 SDValue Ret = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: DstVT,
8891 N1: DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: DstVT, N1: R, N2: Sign), N2: Sign);
8892
8893 Result = DAG.getSelectCC(DL: dl, LHS: Exponent, RHS: DAG.getConstant(Val: 0, DL: dl, VT: IntVT),
8894 True: DAG.getConstant(Val: 0, DL: dl, VT: DstVT), False: Ret, Cond: ISD::SETLT);
8895 return true;
8896}
8897
8898bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
8899 SDValue &Chain,
8900 SelectionDAG &DAG) const {
8901 SDLoc dl(SDValue(Node, 0));
8902 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8903 SDValue Src = Node->getOperand(Num: OpNo);
8904
8905 EVT SrcVT = Src.getValueType();
8906 EVT DstVT = Node->getValueType(ResNo: 0);
8907 EVT SetCCVT =
8908 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: SrcVT);
8909 EVT DstSetCCVT =
8910 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: DstVT);
8911
8912 // Only expand vector types if we have the appropriate vector bit operations.
8913 unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
8914 ISD::FP_TO_SINT;
8915 if (DstVT.isVector() && (!isOperationLegalOrCustom(Op: SIntOpcode, VT: DstVT) ||
8916 !isOperationLegalOrCustomOrPromote(Op: ISD::XOR, VT: SrcVT)))
8917 return false;
8918
8919 // If the maximum float value is smaller then the signed integer range,
8920 // the destination signmask can't be represented by the float, so we can
8921 // just use FP_TO_SINT directly.
8922 const fltSemantics &APFSem = SrcVT.getFltSemantics();
8923 APFloat APF(APFSem, APInt::getZero(numBits: SrcVT.getScalarSizeInBits()));
8924 APInt SignMask = APInt::getSignMask(BitWidth: DstVT.getScalarSizeInBits());
8925 if (APFloat::opOverflow &
8926 APF.convertFromAPInt(Input: SignMask, IsSigned: false, RM: APFloat::rmNearestTiesToEven)) {
8927 if (Node->isStrictFPOpcode()) {
8928 Result = DAG.getNode(Opcode: ISD::STRICT_FP_TO_SINT, DL: dl, ResultTys: { DstVT, MVT::Other },
8929 Ops: { Node->getOperand(Num: 0), Src });
8930 Chain = Result.getValue(R: 1);
8931 } else
8932 Result = DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: dl, VT: DstVT, Operand: Src);
8933 return true;
8934 }
8935
8936 // Don't expand it if there isn't cheap fsub instruction.
8937 if (!isOperationLegalOrCustom(
8938 Op: Node->isStrictFPOpcode() ? ISD::STRICT_FSUB : ISD::FSUB, VT: SrcVT))
8939 return false;
8940
8941 SDValue Cst = DAG.getConstantFP(Val: APF, DL: dl, VT: SrcVT);
8942 SDValue Sel;
8943
8944 if (Node->isStrictFPOpcode()) {
8945 Sel = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: Cst, Cond: ISD::SETLT,
8946 Chain: Node->getOperand(Num: 0), /*IsSignaling*/ true);
8947 Chain = Sel.getValue(R: 1);
8948 } else {
8949 Sel = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: Cst, Cond: ISD::SETLT);
8950 }
8951
8952 bool Strict = Node->isStrictFPOpcode() ||
8953 shouldUseStrictFP_TO_INT(FpVT: SrcVT, IntVT: DstVT, /*IsSigned*/ false);
8954
8955 if (Strict) {
8956 // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
8957 // signmask then offset (the result of which should be fully representable).
8958 // Sel = Src < 0x8000000000000000
8959 // FltOfs = select Sel, 0, 0x8000000000000000
8960 // IntOfs = select Sel, 0, 0x8000000000000000
8961 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8962
8963 // TODO: Should any fast-math-flags be set for the FSUB?
8964 SDValue FltOfs = DAG.getSelect(DL: dl, VT: SrcVT, Cond: Sel,
8965 LHS: DAG.getConstantFP(Val: 0.0, DL: dl, VT: SrcVT), RHS: Cst);
8966 Sel = DAG.getBoolExtOrTrunc(Op: Sel, SL: dl, VT: DstSetCCVT, OpVT: DstVT);
8967 SDValue IntOfs = DAG.getSelect(DL: dl, VT: DstVT, Cond: Sel,
8968 LHS: DAG.getConstant(Val: 0, DL: dl, VT: DstVT),
8969 RHS: DAG.getConstant(Val: SignMask, DL: dl, VT: DstVT));
8970 SDValue SInt;
8971 if (Node->isStrictFPOpcode()) {
8972 SDValue Val = DAG.getNode(Opcode: ISD::STRICT_FSUB, DL: dl, ResultTys: { SrcVT, MVT::Other },
8973 Ops: { Chain, Src, FltOfs });
8974 SInt = DAG.getNode(Opcode: ISD::STRICT_FP_TO_SINT, DL: dl, ResultTys: { DstVT, MVT::Other },
8975 Ops: { Val.getValue(R: 1), Val });
8976 Chain = SInt.getValue(R: 1);
8977 } else {
8978 SDValue Val = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: SrcVT, N1: Src, N2: FltOfs);
8979 SInt = DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: dl, VT: DstVT, Operand: Val);
8980 }
8981 Result = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: DstVT, N1: SInt, N2: IntOfs);
8982 } else {
8983 // Expand based on maximum range of FP_TO_SINT:
8984 // True = fp_to_sint(Src)
8985 // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
8986 // Result = select (Src < 0x8000000000000000), True, False
8987
8988 SDValue True = DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: dl, VT: DstVT, Operand: Src);
8989 // TODO: Should any fast-math-flags be set for the FSUB?
8990 SDValue False = DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: dl, VT: DstVT,
8991 Operand: DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: SrcVT, N1: Src, N2: Cst));
8992 False = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: DstVT, N1: False,
8993 N2: DAG.getConstant(Val: SignMask, DL: dl, VT: DstVT));
8994 Sel = DAG.getBoolExtOrTrunc(Op: Sel, SL: dl, VT: DstSetCCVT, OpVT: DstVT);
8995 Result = DAG.getSelect(DL: dl, VT: DstVT, Cond: Sel, LHS: True, RHS: False);
8996 }
8997 return true;
8998}
8999
9000bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
9001 SDValue &Chain, SelectionDAG &DAG) const {
9002 // This transform is not correct for converting 0 when rounding mode is set
9003 // to round toward negative infinity which will produce -0.0. So disable
9004 // under strictfp.
9005 if (Node->isStrictFPOpcode())
9006 return false;
9007
9008 SDValue Src = Node->getOperand(Num: 0);
9009 EVT SrcVT = Src.getValueType();
9010 EVT DstVT = Node->getValueType(ResNo: 0);
9011
9012 // If the input is known to be non-negative and SINT_TO_FP is legal then use
9013 // it.
9014 if (Node->getFlags().hasNonNeg() &&
9015 isOperationLegalOrCustom(Op: ISD::SINT_TO_FP, VT: SrcVT)) {
9016 Result =
9017 DAG.getNode(Opcode: ISD::SINT_TO_FP, DL: SDLoc(Node), VT: DstVT, Operand: Node->getOperand(Num: 0));
9018 return true;
9019 }
9020
9021 if (SrcVT.getScalarType() != MVT::i64 || DstVT.getScalarType() != MVT::f64)
9022 return false;
9023
9024 // Only expand vector types if we have the appropriate vector bit
9025 // operations.
9026 if (SrcVT.isVector() && (!isOperationLegalOrCustom(Op: ISD::SRL, VT: SrcVT) ||
9027 !isOperationLegalOrCustom(Op: ISD::FADD, VT: DstVT) ||
9028 !isOperationLegalOrCustom(Op: ISD::FSUB, VT: DstVT) ||
9029 !isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT: SrcVT) ||
9030 !isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT: SrcVT)))
9031 return false;
9032
9033 SDLoc dl(SDValue(Node, 0));
9034
9035 // Implementation of unsigned i64 to f64 following the algorithm in
9036 // __floatundidf in compiler_rt. This implementation performs rounding
9037 // correctly in all rounding modes with the exception of converting 0
9038 // when rounding toward negative infinity. In that case the fsub will
9039 // produce -0.0. This will be added to +0.0 and produce -0.0 which is
9040 // incorrect.
9041 SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), DL: dl, VT: SrcVT);
9042 SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
9043 Val: llvm::bit_cast<double>(UINT64_C(0x4530000000100000)), DL: dl, VT: DstVT);
9044 SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), DL: dl, VT: SrcVT);
9045 SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), DL: dl, VT: SrcVT);
9046 SDValue HiShift = DAG.getShiftAmountConstant(Val: 32, VT: SrcVT, DL: dl);
9047
9048 SDValue Lo = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: SrcVT, N1: Src, N2: LoMask);
9049 SDValue Hi = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: SrcVT, N1: Src, N2: HiShift);
9050 SDValue LoOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: SrcVT, N1: Lo, N2: TwoP52);
9051 SDValue HiOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: SrcVT, N1: Hi, N2: TwoP84);
9052 SDValue LoFlt = DAG.getBitcast(VT: DstVT, V: LoOr);
9053 SDValue HiFlt = DAG.getBitcast(VT: DstVT, V: HiOr);
9054 SDValue HiSub = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: DstVT, N1: HiFlt, N2: TwoP84PlusTwoP52);
9055 Result = DAG.getNode(Opcode: ISD::FADD, DL: dl, VT: DstVT, N1: LoFlt, N2: HiSub);
9056 return true;
9057}
9058
9059SDValue
9060TargetLowering::createSelectForFMINNUM_FMAXNUM(SDNode *Node,
9061 SelectionDAG &DAG) const {
9062 unsigned Opcode = Node->getOpcode();
9063 assert((Opcode == ISD::FMINNUM || Opcode == ISD::FMAXNUM ||
9064 Opcode == ISD::STRICT_FMINNUM || Opcode == ISD::STRICT_FMAXNUM) &&
9065 "Wrong opcode");
9066
9067 if (Node->getFlags().hasNoNaNs()) {
9068 ISD::CondCode Pred = Opcode == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
9069 EVT VT = Node->getValueType(ResNo: 0);
9070 if ((!isCondCodeLegal(CC: Pred, VT: VT.getSimpleVT()) ||
9071 !isOperationLegalOrCustom(Op: ISD::VSELECT, VT)) &&
9072 VT.isVector())
9073 return SDValue();
9074 SDValue Op1 = Node->getOperand(Num: 0);
9075 SDValue Op2 = Node->getOperand(Num: 1);
9076 return DAG.getSelectCC(DL: SDLoc(Node), LHS: Op1, RHS: Op2, True: Op1, False: Op2, Cond: Pred,
9077 Flags: Node->getFlags());
9078 }
9079
9080 return SDValue();
9081}
9082
9083SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
9084 SelectionDAG &DAG) const {
9085 if (SDValue Expanded = expandVectorNaryOpBySplitting(Node, DAG))
9086 return Expanded;
9087
9088 EVT VT = Node->getValueType(ResNo: 0);
9089 if (VT.isScalableVector())
9090 report_fatal_error(
9091 reason: "Expanding fminnum/fmaxnum for scalable vectors is undefined.");
9092
9093 SDLoc dl(Node);
9094 unsigned NewOp =
9095 Node->getOpcode() == ISD::FMINNUM ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
9096
9097 if (isOperationLegalOrCustom(Op: NewOp, VT)) {
9098 SDValue Quiet0 = Node->getOperand(Num: 0);
9099 SDValue Quiet1 = Node->getOperand(Num: 1);
9100
9101 if (!Node->getFlags().hasNoNaNs()) {
9102 // Insert canonicalizes if it's possible we need to quiet to get correct
9103 // sNaN behavior.
9104 if (!DAG.isKnownNeverSNaN(Op: Quiet0)) {
9105 Quiet0 = DAG.getNode(Opcode: ISD::FCANONICALIZE, DL: dl, VT, Operand: Quiet0,
9106 Flags: Node->getFlags());
9107 }
9108 if (!DAG.isKnownNeverSNaN(Op: Quiet1)) {
9109 Quiet1 = DAG.getNode(Opcode: ISD::FCANONICALIZE, DL: dl, VT, Operand: Quiet1,
9110 Flags: Node->getFlags());
9111 }
9112 }
9113
9114 return DAG.getNode(Opcode: NewOp, DL: dl, VT, N1: Quiet0, N2: Quiet1, Flags: Node->getFlags());
9115 }
9116
9117 // If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
9118 // instead if there are no NaNs.
9119 if (Node->getFlags().hasNoNaNs() ||
9120 (DAG.isKnownNeverNaN(Op: Node->getOperand(Num: 0)) &&
9121 DAG.isKnownNeverNaN(Op: Node->getOperand(Num: 1)))) {
9122 unsigned IEEE2018Op =
9123 Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
9124 if (isOperationLegalOrCustom(Op: IEEE2018Op, VT))
9125 return DAG.getNode(Opcode: IEEE2018Op, DL: dl, VT, N1: Node->getOperand(Num: 0),
9126 N2: Node->getOperand(Num: 1), Flags: Node->getFlags());
9127 }
9128
9129 if (SDValue SelCC = createSelectForFMINNUM_FMAXNUM(Node, DAG))
9130 return SelCC;
9131
9132 return SDValue();
9133}
9134
9135SDValue TargetLowering::expandFMINIMUM_FMAXIMUM(SDNode *N,
9136 SelectionDAG &DAG) const {
9137 if (SDValue Expanded = expandVectorNaryOpBySplitting(Node: N, DAG))
9138 return Expanded;
9139
9140 SDLoc DL(N);
9141 SDValue LHS = N->getOperand(Num: 0);
9142 SDValue RHS = N->getOperand(Num: 1);
9143 unsigned Opc = N->getOpcode();
9144 EVT VT = N->getValueType(ResNo: 0);
9145 EVT CCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
9146 bool IsMax = Opc == ISD::FMAXIMUM;
9147 SDNodeFlags Flags = N->getFlags();
9148
9149 // First, implement comparison not propagating NaN. If no native fmin or fmax
9150 // available, use plain select with setcc instead.
9151 SDValue MinMax;
9152 unsigned CompOpcIeee = IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
9153 unsigned CompOpc = IsMax ? ISD::FMAXNUM : ISD::FMINNUM;
9154
9155 // FIXME: We should probably define fminnum/fmaxnum variants with correct
9156 // signed zero behavior.
9157 bool MinMaxMustRespectOrderedZero = false;
9158
9159 if (isOperationLegalOrCustom(Op: CompOpcIeee, VT)) {
9160 MinMax = DAG.getNode(Opcode: CompOpcIeee, DL, VT, N1: LHS, N2: RHS, Flags);
9161 MinMaxMustRespectOrderedZero = true;
9162 } else if (isOperationLegalOrCustom(Op: CompOpc, VT)) {
9163 MinMax = DAG.getNode(Opcode: CompOpc, DL, VT, N1: LHS, N2: RHS, Flags);
9164 } else {
9165 if (VT.isVector() && !isOperationLegalOrCustom(Op: ISD::VSELECT, VT))
9166 return DAG.UnrollVectorOp(N);
9167
9168 // NaN (if exists) will be propagated later, so orderness doesn't matter.
9169 SDValue Compare =
9170 DAG.getSetCC(DL, VT: CCVT, LHS, RHS, Cond: IsMax ? ISD::SETOGT : ISD::SETOLT);
9171 MinMax = DAG.getSelect(DL, VT, Cond: Compare, LHS, RHS, Flags);
9172 }
9173
9174 // Propagate any NaN of both operands
9175 if (!N->getFlags().hasNoNaNs() &&
9176 (!DAG.isKnownNeverNaN(Op: RHS) || !DAG.isKnownNeverNaN(Op: LHS))) {
9177 ConstantFP *FPNaN = ConstantFP::get(Context&: *DAG.getContext(),
9178 V: APFloat::getNaN(Sem: VT.getFltSemantics()));
9179 MinMax = DAG.getSelect(DL, VT, Cond: DAG.getSetCC(DL, VT: CCVT, LHS, RHS, Cond: ISD::SETUO),
9180 LHS: DAG.getConstantFP(V: *FPNaN, DL, VT), RHS: MinMax, Flags);
9181 }
9182
9183 // fminimum/fmaximum requires -0.0 less than +0.0
9184 if (!MinMaxMustRespectOrderedZero && !N->getFlags().hasNoSignedZeros() &&
9185 !DAG.isKnownNeverZeroFloat(Op: RHS) && !DAG.isKnownNeverZeroFloat(Op: LHS)) {
9186 SDValue IsZero = DAG.getSetCC(DL, VT: CCVT, LHS: MinMax,
9187 RHS: DAG.getConstantFP(Val: 0.0, DL, VT), Cond: ISD::SETOEQ);
9188 SDValue TestZero =
9189 DAG.getTargetConstant(Val: IsMax ? fcPosZero : fcNegZero, DL, VT: MVT::i32);
9190 SDValue LCmp = DAG.getSelect(
9191 DL, VT, Cond: DAG.getNode(Opcode: ISD::IS_FPCLASS, DL, VT: CCVT, N1: LHS, N2: TestZero), LHS,
9192 RHS: MinMax, Flags);
9193 SDValue RCmp = DAG.getSelect(
9194 DL, VT, Cond: DAG.getNode(Opcode: ISD::IS_FPCLASS, DL, VT: CCVT, N1: RHS, N2: TestZero), LHS: RHS,
9195 RHS: LCmp, Flags);
9196 MinMax = DAG.getSelect(DL, VT, Cond: IsZero, LHS: RCmp, RHS: MinMax, Flags);
9197 }
9198
9199 return MinMax;
9200}
9201
9202SDValue TargetLowering::expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *Node,
9203 SelectionDAG &DAG) const {
9204 SDLoc DL(Node);
9205 SDValue LHS = Node->getOperand(Num: 0);
9206 SDValue RHS = Node->getOperand(Num: 1);
9207 unsigned Opc = Node->getOpcode();
9208 EVT VT = Node->getValueType(ResNo: 0);
9209 EVT CCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
9210 bool IsMax = Opc == ISD::FMAXIMUMNUM;
9211 SDNodeFlags Flags = Node->getFlags();
9212
9213 unsigned NewOp =
9214 Opc == ISD::FMINIMUMNUM ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
9215
9216 if (isOperationLegalOrCustom(Op: NewOp, VT)) {
9217 if (!Flags.hasNoNaNs()) {
9218 // Insert canonicalizes if it's possible we need to quiet to get correct
9219 // sNaN behavior.
9220 if (!DAG.isKnownNeverSNaN(Op: LHS)) {
9221 LHS = DAG.getNode(Opcode: ISD::FCANONICALIZE, DL, VT, Operand: LHS, Flags);
9222 }
9223 if (!DAG.isKnownNeverSNaN(Op: RHS)) {
9224 RHS = DAG.getNode(Opcode: ISD::FCANONICALIZE, DL, VT, Operand: RHS, Flags);
9225 }
9226 }
9227
9228 return DAG.getNode(Opcode: NewOp, DL, VT, N1: LHS, N2: RHS, Flags);
9229 }
9230
9231 // We can use FMINIMUM/FMAXIMUM if there is no NaN, since it has
9232 // same behaviors for all of other cases: +0.0 vs -0.0 included.
9233 if (Flags.hasNoNaNs() ||
9234 (DAG.isKnownNeverNaN(Op: LHS) && DAG.isKnownNeverNaN(Op: RHS))) {
9235 unsigned IEEE2019Op =
9236 Opc == ISD::FMINIMUMNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
9237 if (isOperationLegalOrCustom(Op: IEEE2019Op, VT))
9238 return DAG.getNode(Opcode: IEEE2019Op, DL, VT, N1: LHS, N2: RHS, Flags);
9239 }
9240
9241 // FMINNUM/FMAXMUM returns qNaN if either operand is sNaN, and it may return
9242 // either one for +0.0 vs -0.0.
9243 if ((Flags.hasNoNaNs() ||
9244 (DAG.isKnownNeverSNaN(Op: LHS) && DAG.isKnownNeverSNaN(Op: RHS))) &&
9245 (Flags.hasNoSignedZeros() || DAG.isKnownNeverZeroFloat(Op: LHS) ||
9246 DAG.isKnownNeverZeroFloat(Op: RHS))) {
9247 unsigned IEEE2008Op = Opc == ISD::FMINIMUMNUM ? ISD::FMINNUM : ISD::FMAXNUM;
9248 if (isOperationLegalOrCustom(Op: IEEE2008Op, VT))
9249 return DAG.getNode(Opcode: IEEE2008Op, DL, VT, N1: LHS, N2: RHS, Flags);
9250 }
9251
9252 if (VT.isVector() &&
9253 (isOperationLegalOrCustomOrPromote(Op: Opc, VT: VT.getVectorElementType()) ||
9254 !isOperationLegalOrCustom(Op: ISD::VSELECT, VT)))
9255 return DAG.UnrollVectorOp(N: Node);
9256
9257 // If only one operand is NaN, override it with another operand.
9258 if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(Op: LHS)) {
9259 LHS = DAG.getSelectCC(DL, LHS, RHS: LHS, True: RHS, False: LHS, Cond: ISD::SETUO);
9260 }
9261 if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(Op: RHS)) {
9262 RHS = DAG.getSelectCC(DL, LHS: RHS, RHS, True: LHS, False: RHS, Cond: ISD::SETUO);
9263 }
9264
9265 // Always prefer RHS if equal.
9266 SDValue MinMax =
9267 DAG.getSelectCC(DL, LHS, RHS, True: LHS, False: RHS, Cond: IsMax ? ISD::SETGT : ISD::SETLT);
9268
9269 // TODO: We need quiet sNaN if strictfp.
9270
9271 // Fixup signed zero behavior.
9272 if (Flags.hasNoSignedZeros() || DAG.isKnownNeverZeroFloat(Op: LHS) ||
9273 DAG.isKnownNeverZeroFloat(Op: RHS)) {
9274 return MinMax;
9275 }
9276 SDValue TestZero =
9277 DAG.getTargetConstant(Val: IsMax ? fcPosZero : fcNegZero, DL, VT: MVT::i32);
9278 SDValue IsZero = DAG.getSetCC(DL, VT: CCVT, LHS: MinMax,
9279 RHS: DAG.getConstantFP(Val: 0.0, DL, VT), Cond: ISD::SETEQ);
9280 EVT IntVT = VT.changeTypeToInteger();
9281 EVT FloatVT = VT.changeElementType(Context&: *DAG.getContext(), EltVT: MVT::f32);
9282 SDValue LHSTrunc = LHS;
9283 if (!isTypeLegal(VT: IntVT) && !isOperationLegalOrCustom(Op: ISD::IS_FPCLASS, VT)) {
9284 LHSTrunc = DAG.getNode(Opcode: ISD::FP_ROUND, DL, VT: FloatVT, N1: LHS,
9285 N2: DAG.getIntPtrConstant(Val: 0, DL, /*isTarget=*/true));
9286 }
9287 // It's OK to select from LHS and MinMax, with only one ISD::IS_FPCLASS, as
9288 // we preferred RHS when generate MinMax, if the operands are equal.
9289 SDValue RetZero = DAG.getSelect(
9290 DL, VT, Cond: DAG.getNode(Opcode: ISD::IS_FPCLASS, DL, VT: CCVT, N1: LHSTrunc, N2: TestZero), LHS,
9291 RHS: MinMax, Flags);
9292 return DAG.getSelect(DL, VT, Cond: IsZero, LHS: RetZero, RHS: MinMax, Flags);
9293}
9294
9295/// Returns a true value if if this FPClassTest can be performed with an ordered
9296/// fcmp to 0, and a false value if it's an unordered fcmp to 0. Returns
9297/// std::nullopt if it cannot be performed as a compare with 0.
9298static std::optional<bool> isFCmpEqualZero(FPClassTest Test,
9299 const fltSemantics &Semantics,
9300 const MachineFunction &MF) {
9301 FPClassTest OrderedMask = Test & ~fcNan;
9302 FPClassTest NanTest = Test & fcNan;
9303 bool IsOrdered = NanTest == fcNone;
9304 bool IsUnordered = NanTest == fcNan;
9305
9306 // Skip cases that are testing for only a qnan or snan.
9307 if (!IsOrdered && !IsUnordered)
9308 return std::nullopt;
9309
9310 if (OrderedMask == fcZero &&
9311 MF.getDenormalMode(FPType: Semantics).Input == DenormalMode::IEEE)
9312 return IsOrdered;
9313 if (OrderedMask == (fcZero | fcSubnormal) &&
9314 MF.getDenormalMode(FPType: Semantics).inputsAreZero())
9315 return IsOrdered;
9316 return std::nullopt;
9317}
9318
9319SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
9320 const FPClassTest OrigTestMask,
9321 SDNodeFlags Flags, const SDLoc &DL,
9322 SelectionDAG &DAG) const {
9323 EVT OperandVT = Op.getValueType();
9324 assert(OperandVT.isFloatingPoint());
9325 FPClassTest Test = OrigTestMask;
9326
9327 // Degenerated cases.
9328 if (Test == fcNone)
9329 return DAG.getBoolConstant(V: false, DL, VT: ResultVT, OpVT: OperandVT);
9330 if (Test == fcAllFlags)
9331 return DAG.getBoolConstant(V: true, DL, VT: ResultVT, OpVT: OperandVT);
9332
9333 // PPC double double is a pair of doubles, of which the higher part determines
9334 // the value class.
9335 if (OperandVT == MVT::ppcf128) {
9336 Op = DAG.getNode(Opcode: ISD::EXTRACT_ELEMENT, DL, VT: MVT::f64, N1: Op,
9337 N2: DAG.getConstant(Val: 1, DL, VT: MVT::i32));
9338 OperandVT = MVT::f64;
9339 }
9340
9341 // Floating-point type properties.
9342 EVT ScalarFloatVT = OperandVT.getScalarType();
9343 const Type *FloatTy = ScalarFloatVT.getTypeForEVT(Context&: *DAG.getContext());
9344 const llvm::fltSemantics &Semantics = FloatTy->getFltSemantics();
9345 bool IsF80 = (ScalarFloatVT == MVT::f80);
9346
9347 // Some checks can be implemented using float comparisons, if floating point
9348 // exceptions are ignored.
9349 if (Flags.hasNoFPExcept() &&
9350 isOperationLegalOrCustom(Op: ISD::SETCC, VT: OperandVT.getScalarType())) {
9351 FPClassTest FPTestMask = Test;
9352 bool IsInvertedFP = false;
9353
9354 if (FPClassTest InvertedFPCheck =
9355 invertFPClassTestIfSimpler(Test: FPTestMask, UseFCmp: true)) {
9356 FPTestMask = InvertedFPCheck;
9357 IsInvertedFP = true;
9358 }
9359
9360 ISD::CondCode OrderedCmpOpcode = IsInvertedFP ? ISD::SETUNE : ISD::SETOEQ;
9361 ISD::CondCode UnorderedCmpOpcode = IsInvertedFP ? ISD::SETONE : ISD::SETUEQ;
9362
9363 // See if we can fold an | fcNan into an unordered compare.
9364 FPClassTest OrderedFPTestMask = FPTestMask & ~fcNan;
9365
9366 // Can't fold the ordered check if we're only testing for snan or qnan
9367 // individually.
9368 if ((FPTestMask & fcNan) != fcNan)
9369 OrderedFPTestMask = FPTestMask;
9370
9371 const bool IsOrdered = FPTestMask == OrderedFPTestMask;
9372
9373 if (std::optional<bool> IsCmp0 =
9374 isFCmpEqualZero(Test: FPTestMask, Semantics, MF: DAG.getMachineFunction());
9375 IsCmp0 && (isCondCodeLegalOrCustom(
9376 CC: *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode,
9377 VT: OperandVT.getScalarType().getSimpleVT()))) {
9378
9379 // If denormals could be implicitly treated as 0, this is not equivalent
9380 // to a compare with 0 since it will also be true for denormals.
9381 return DAG.getSetCC(DL, VT: ResultVT, LHS: Op,
9382 RHS: DAG.getConstantFP(Val: 0.0, DL, VT: OperandVT),
9383 Cond: *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode);
9384 }
9385
9386 if (FPTestMask == fcNan &&
9387 isCondCodeLegalOrCustom(CC: IsInvertedFP ? ISD::SETO : ISD::SETUO,
9388 VT: OperandVT.getScalarType().getSimpleVT()))
9389 return DAG.getSetCC(DL, VT: ResultVT, LHS: Op, RHS: Op,
9390 Cond: IsInvertedFP ? ISD::SETO : ISD::SETUO);
9391
9392 bool IsOrderedInf = FPTestMask == fcInf;
9393 if ((FPTestMask == fcInf || FPTestMask == (fcInf | fcNan)) &&
9394 isCondCodeLegalOrCustom(CC: IsOrderedInf ? OrderedCmpOpcode
9395 : UnorderedCmpOpcode,
9396 VT: OperandVT.getScalarType().getSimpleVT()) &&
9397 isOperationLegalOrCustom(Op: ISD::FABS, VT: OperandVT.getScalarType()) &&
9398 (isOperationLegal(Op: ISD::ConstantFP, VT: OperandVT.getScalarType()) ||
9399 (OperandVT.isVector() &&
9400 isOperationLegalOrCustom(Op: ISD::BUILD_VECTOR, VT: OperandVT)))) {
9401 // isinf(x) --> fabs(x) == inf
9402 SDValue Abs = DAG.getNode(Opcode: ISD::FABS, DL, VT: OperandVT, Operand: Op);
9403 SDValue Inf =
9404 DAG.getConstantFP(Val: APFloat::getInf(Sem: Semantics), DL, VT: OperandVT);
9405 return DAG.getSetCC(DL, VT: ResultVT, LHS: Abs, RHS: Inf,
9406 Cond: IsOrderedInf ? OrderedCmpOpcode : UnorderedCmpOpcode);
9407 }
9408
9409 if ((OrderedFPTestMask == fcPosInf || OrderedFPTestMask == fcNegInf) &&
9410 isCondCodeLegalOrCustom(CC: IsOrdered ? OrderedCmpOpcode
9411 : UnorderedCmpOpcode,
9412 VT: OperandVT.getSimpleVT())) {
9413 // isposinf(x) --> x == inf
9414 // isneginf(x) --> x == -inf
9415 // isposinf(x) || nan --> x u== inf
9416 // isneginf(x) || nan --> x u== -inf
9417
9418 SDValue Inf = DAG.getConstantFP(
9419 Val: APFloat::getInf(Sem: Semantics, Negative: OrderedFPTestMask == fcNegInf), DL,
9420 VT: OperandVT);
9421 return DAG.getSetCC(DL, VT: ResultVT, LHS: Op, RHS: Inf,
9422 Cond: IsOrdered ? OrderedCmpOpcode : UnorderedCmpOpcode);
9423 }
9424
9425 if (OrderedFPTestMask == (fcSubnormal | fcZero) && !IsOrdered) {
9426 // TODO: Could handle ordered case, but it produces worse code for
9427 // x86. Maybe handle ordered if fabs is free?
9428
9429 ISD::CondCode OrderedOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
9430 ISD::CondCode UnorderedOp = IsInvertedFP ? ISD::SETOGE : ISD::SETULT;
9431
9432 if (isCondCodeLegalOrCustom(CC: IsOrdered ? OrderedOp : UnorderedOp,
9433 VT: OperandVT.getScalarType().getSimpleVT())) {
9434 // (issubnormal(x) || iszero(x)) --> fabs(x) < smallest_normal
9435
9436 // TODO: Maybe only makes sense if fabs is free. Integer test of
9437 // exponent bits seems better for x86.
9438 SDValue Abs = DAG.getNode(Opcode: ISD::FABS, DL, VT: OperandVT, Operand: Op);
9439 SDValue SmallestNormal = DAG.getConstantFP(
9440 Val: APFloat::getSmallestNormalized(Sem: Semantics), DL, VT: OperandVT);
9441 return DAG.getSetCC(DL, VT: ResultVT, LHS: Abs, RHS: SmallestNormal,
9442 Cond: IsOrdered ? OrderedOp : UnorderedOp);
9443 }
9444 }
9445
9446 if (FPTestMask == fcNormal) {
9447 // TODO: Handle unordered
9448 ISD::CondCode IsFiniteOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
9449 ISD::CondCode IsNormalOp = IsInvertedFP ? ISD::SETOLT : ISD::SETUGE;
9450
9451 if (isCondCodeLegalOrCustom(CC: IsFiniteOp,
9452 VT: OperandVT.getScalarType().getSimpleVT()) &&
9453 isCondCodeLegalOrCustom(CC: IsNormalOp,
9454 VT: OperandVT.getScalarType().getSimpleVT()) &&
9455 isFAbsFree(VT: OperandVT)) {
9456 // isnormal(x) --> fabs(x) < infinity && !(fabs(x) < smallest_normal)
9457 SDValue Inf =
9458 DAG.getConstantFP(Val: APFloat::getInf(Sem: Semantics), DL, VT: OperandVT);
9459 SDValue SmallestNormal = DAG.getConstantFP(
9460 Val: APFloat::getSmallestNormalized(Sem: Semantics), DL, VT: OperandVT);
9461
9462 SDValue Abs = DAG.getNode(Opcode: ISD::FABS, DL, VT: OperandVT, Operand: Op);
9463 SDValue IsFinite = DAG.getSetCC(DL, VT: ResultVT, LHS: Abs, RHS: Inf, Cond: IsFiniteOp);
9464 SDValue IsNormal =
9465 DAG.getSetCC(DL, VT: ResultVT, LHS: Abs, RHS: SmallestNormal, Cond: IsNormalOp);
9466 unsigned LogicOp = IsInvertedFP ? ISD::OR : ISD::AND;
9467 return DAG.getNode(Opcode: LogicOp, DL, VT: ResultVT, N1: IsFinite, N2: IsNormal);
9468 }
9469 }
9470 }
9471
9472 // Some checks may be represented as inversion of simpler check, for example
9473 // "inf|normal|subnormal|zero" => !"nan".
9474 bool IsInverted = false;
9475
9476 if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test, UseFCmp: false)) {
9477 Test = InvertedCheck;
9478 IsInverted = true;
9479 }
9480
9481 // In the general case use integer operations.
9482 unsigned BitSize = OperandVT.getScalarSizeInBits();
9483 EVT IntVT = OperandVT.changeElementType(
9484 Context&: *DAG.getContext(), EltVT: EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: BitSize));
9485 SDValue OpAsInt = DAG.getBitcast(VT: IntVT, V: Op);
9486
9487 // Various masks.
9488 APInt SignBit = APInt::getSignMask(BitWidth: BitSize);
9489 APInt ValueMask = APInt::getSignedMaxValue(numBits: BitSize); // All bits but sign.
9490 APInt Inf = APFloat::getInf(Sem: Semantics).bitcastToAPInt(); // Exp and int bit.
9491 const unsigned ExplicitIntBitInF80 = 63;
9492 APInt ExpMask = Inf;
9493 if (IsF80)
9494 ExpMask.clearBit(BitPosition: ExplicitIntBitInF80);
9495 APInt AllOneMantissa = APFloat::getLargest(Sem: Semantics).bitcastToAPInt() & ~Inf;
9496 APInt QNaNBitMask =
9497 APInt::getOneBitSet(numBits: BitSize, BitNo: AllOneMantissa.getActiveBits() - 1);
9498 APInt InversionMask = APInt::getAllOnes(numBits: ResultVT.getScalarSizeInBits());
9499
9500 SDValue ValueMaskV = DAG.getConstant(Val: ValueMask, DL, VT: IntVT);
9501 SDValue SignBitV = DAG.getConstant(Val: SignBit, DL, VT: IntVT);
9502 SDValue ExpMaskV = DAG.getConstant(Val: ExpMask, DL, VT: IntVT);
9503 SDValue ZeroV = DAG.getConstant(Val: 0, DL, VT: IntVT);
9504 SDValue InfV = DAG.getConstant(Val: Inf, DL, VT: IntVT);
9505 SDValue ResultInversionMask = DAG.getConstant(Val: InversionMask, DL, VT: ResultVT);
9506
9507 SDValue Res;
9508 const auto appendResult = [&](SDValue PartialRes) {
9509 if (PartialRes) {
9510 if (Res)
9511 Res = DAG.getNode(Opcode: ISD::OR, DL, VT: ResultVT, N1: Res, N2: PartialRes);
9512 else
9513 Res = PartialRes;
9514 }
9515 };
9516
9517 SDValue IntBitIsSetV; // Explicit integer bit in f80 mantissa is set.
9518 const auto getIntBitIsSet = [&]() -> SDValue {
9519 if (!IntBitIsSetV) {
9520 APInt IntBitMask(BitSize, 0);
9521 IntBitMask.setBit(ExplicitIntBitInF80);
9522 SDValue IntBitMaskV = DAG.getConstant(Val: IntBitMask, DL, VT: IntVT);
9523 SDValue IntBitV = DAG.getNode(Opcode: ISD::AND, DL, VT: IntVT, N1: OpAsInt, N2: IntBitMaskV);
9524 IntBitIsSetV = DAG.getSetCC(DL, VT: ResultVT, LHS: IntBitV, RHS: ZeroV, Cond: ISD::SETNE);
9525 }
9526 return IntBitIsSetV;
9527 };
9528
9529 // Split the value into sign bit and absolute value.
9530 SDValue AbsV = DAG.getNode(Opcode: ISD::AND, DL, VT: IntVT, N1: OpAsInt, N2: ValueMaskV);
9531 SDValue SignV = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt,
9532 RHS: DAG.getConstant(Val: 0, DL, VT: IntVT), Cond: ISD::SETLT);
9533
9534 // Tests that involve more than one class should be processed first.
9535 SDValue PartialRes;
9536
9537 if (IsF80)
9538 ; // Detect finite numbers of f80 by checking individual classes because
9539 // they have different settings of the explicit integer bit.
9540 else if ((Test & fcFinite) == fcFinite) {
9541 // finite(V) ==> (a << 1) < (inf << 1)
9542 //
9543 // See https://github.com/llvm/llvm-project/issues/169270, this is slightly
9544 // shorter than the `finite(V) ==> abs(V) < exp_mask` formula used before.
9545
9546 assert(APFloat::isIEEELikeFP(OperandVT.getFltSemantics()) &&
9547 "finite check requires IEEE-like FP");
9548
9549 SDValue One = DAG.getShiftAmountConstant(Val: 1, VT: IntVT, DL);
9550 SDValue TwiceOp = DAG.getNode(Opcode: ISD::SHL, DL, VT: IntVT, N1: OpAsInt, N2: One);
9551 SDValue TwiceInf = DAG.getNode(Opcode: ISD::SHL, DL, VT: IntVT, N1: ExpMaskV, N2: One);
9552
9553 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: TwiceOp, RHS: TwiceInf, Cond: ISD::SETULT);
9554 Test &= ~fcFinite;
9555 } else if ((Test & fcFinite) == fcPosFinite) {
9556 // finite(V) && V > 0 ==> V < exp_mask
9557 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt, RHS: ExpMaskV, Cond: ISD::SETULT);
9558 Test &= ~fcPosFinite;
9559 } else if ((Test & fcFinite) == fcNegFinite) {
9560 // finite(V) && V < 0 ==> abs(V) < exp_mask && signbit == 1
9561 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: ExpMaskV, Cond: ISD::SETLT);
9562 PartialRes = DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: PartialRes, N2: SignV);
9563 Test &= ~fcNegFinite;
9564 }
9565 appendResult(PartialRes);
9566
9567 if (FPClassTest PartialCheck = Test & (fcZero | fcSubnormal)) {
9568 // fcZero | fcSubnormal => test all exponent bits are 0
9569 // TODO: Handle sign bit specific cases
9570 if (PartialCheck == (fcZero | fcSubnormal)) {
9571 SDValue ExpBits = DAG.getNode(Opcode: ISD::AND, DL, VT: IntVT, N1: OpAsInt, N2: ExpMaskV);
9572 SDValue ExpIsZero =
9573 DAG.getSetCC(DL, VT: ResultVT, LHS: ExpBits, RHS: ZeroV, Cond: ISD::SETEQ);
9574 appendResult(ExpIsZero);
9575 Test &= ~PartialCheck & fcAllFlags;
9576 }
9577 }
9578
9579 // Check for individual classes.
9580
9581 if (unsigned PartialCheck = Test & fcZero) {
9582 if (PartialCheck == fcPosZero)
9583 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt, RHS: ZeroV, Cond: ISD::SETEQ);
9584 else if (PartialCheck == fcZero)
9585 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: ZeroV, Cond: ISD::SETEQ);
9586 else // ISD::fcNegZero
9587 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt, RHS: SignBitV, Cond: ISD::SETEQ);
9588 appendResult(PartialRes);
9589 }
9590
9591 if (unsigned PartialCheck = Test & fcSubnormal) {
9592 // issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set)
9593 // issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set)
9594 SDValue V = (PartialCheck == fcPosSubnormal) ? OpAsInt : AbsV;
9595 SDValue MantissaV = DAG.getConstant(Val: AllOneMantissa, DL, VT: IntVT);
9596 SDValue VMinusOneV =
9597 DAG.getNode(Opcode: ISD::SUB, DL, VT: IntVT, N1: V, N2: DAG.getConstant(Val: 1, DL, VT: IntVT));
9598 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: VMinusOneV, RHS: MantissaV, Cond: ISD::SETULT);
9599 if (PartialCheck == fcNegSubnormal)
9600 PartialRes = DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: PartialRes, N2: SignV);
9601 appendResult(PartialRes);
9602 }
9603
9604 if (unsigned PartialCheck = Test & fcInf) {
9605 if (PartialCheck == fcPosInf)
9606 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt, RHS: InfV, Cond: ISD::SETEQ);
9607 else if (PartialCheck == fcInf)
9608 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: InfV, Cond: ISD::SETEQ);
9609 else { // ISD::fcNegInf
9610 APInt NegInf = APFloat::getInf(Sem: Semantics, Negative: true).bitcastToAPInt();
9611 SDValue NegInfV = DAG.getConstant(Val: NegInf, DL, VT: IntVT);
9612 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt, RHS: NegInfV, Cond: ISD::SETEQ);
9613 }
9614 appendResult(PartialRes);
9615 }
9616
9617 if (unsigned PartialCheck = Test & fcNan) {
9618 APInt InfWithQnanBit = Inf | QNaNBitMask;
9619 SDValue InfWithQnanBitV = DAG.getConstant(Val: InfWithQnanBit, DL, VT: IntVT);
9620 if (PartialCheck == fcNan) {
9621 // isnan(V) ==> abs(V) > int(inf)
9622 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: InfV, Cond: ISD::SETGT);
9623 if (IsF80) {
9624 // Recognize unsupported values as NaNs for compatibility with glibc.
9625 // In them (exp(V)==0) == int_bit.
9626 SDValue ExpBits = DAG.getNode(Opcode: ISD::AND, DL, VT: IntVT, N1: AbsV, N2: ExpMaskV);
9627 SDValue ExpIsZero =
9628 DAG.getSetCC(DL, VT: ResultVT, LHS: ExpBits, RHS: ZeroV, Cond: ISD::SETEQ);
9629 SDValue IsPseudo =
9630 DAG.getSetCC(DL, VT: ResultVT, LHS: getIntBitIsSet(), RHS: ExpIsZero, Cond: ISD::SETEQ);
9631 PartialRes = DAG.getNode(Opcode: ISD::OR, DL, VT: ResultVT, N1: PartialRes, N2: IsPseudo);
9632 }
9633 } else if (PartialCheck == fcQNan) {
9634 // isquiet(V) ==> abs(V) >= (unsigned(Inf) | quiet_bit)
9635 PartialRes =
9636 DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: InfWithQnanBitV, Cond: ISD::SETGE);
9637 } else { // ISD::fcSNan
9638 // issignaling(V) ==> abs(V) > unsigned(Inf) &&
9639 // abs(V) < (unsigned(Inf) | quiet_bit)
9640 SDValue IsNan = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: InfV, Cond: ISD::SETGT);
9641 SDValue IsNotQnan =
9642 DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: InfWithQnanBitV, Cond: ISD::SETLT);
9643 PartialRes = DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: IsNan, N2: IsNotQnan);
9644 }
9645 appendResult(PartialRes);
9646 }
9647
9648 if (unsigned PartialCheck = Test & fcNormal) {
9649 // isnormal(V) ==> (0 < exp < max_exp) ==> (unsigned(exp-1) < (max_exp-1))
9650 APInt ExpLSB = ExpMask & ~(ExpMask.shl(shiftAmt: 1));
9651 SDValue ExpLSBV = DAG.getConstant(Val: ExpLSB, DL, VT: IntVT);
9652 SDValue ExpMinus1 = DAG.getNode(Opcode: ISD::SUB, DL, VT: IntVT, N1: AbsV, N2: ExpLSBV);
9653 APInt ExpLimit = ExpMask - ExpLSB;
9654 SDValue ExpLimitV = DAG.getConstant(Val: ExpLimit, DL, VT: IntVT);
9655 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: ExpMinus1, RHS: ExpLimitV, Cond: ISD::SETULT);
9656 if (PartialCheck == fcNegNormal)
9657 PartialRes = DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: PartialRes, N2: SignV);
9658 else if (PartialCheck == fcPosNormal) {
9659 SDValue PosSignV =
9660 DAG.getNode(Opcode: ISD::XOR, DL, VT: ResultVT, N1: SignV, N2: ResultInversionMask);
9661 PartialRes = DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: PartialRes, N2: PosSignV);
9662 }
9663 if (IsF80)
9664 PartialRes =
9665 DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: PartialRes, N2: getIntBitIsSet());
9666 appendResult(PartialRes);
9667 }
9668
9669 if (!Res)
9670 return DAG.getConstant(Val: IsInverted, DL, VT: ResultVT);
9671 if (IsInverted)
9672 Res = DAG.getNode(Opcode: ISD::XOR, DL, VT: ResultVT, N1: Res, N2: ResultInversionMask);
9673 return Res;
9674}
9675
9676// Only expand vector types if we have the appropriate vector bit operations.
9677static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) {
9678 assert(VT.isVector() && "Expected vector type");
9679 unsigned Len = VT.getScalarSizeInBits();
9680 return TLI.isOperationLegalOrCustom(Op: ISD::ADD, VT) &&
9681 TLI.isOperationLegalOrCustom(Op: ISD::SUB, VT) &&
9682 TLI.isOperationLegalOrCustom(Op: ISD::SRL, VT) &&
9683 (Len == 8 || TLI.isOperationLegalOrCustom(Op: ISD::MUL, VT)) &&
9684 TLI.isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT);
9685}
9686
9687SDValue TargetLowering::expandCTPOP(SDNode *Node, SelectionDAG &DAG) const {
9688 SDLoc dl(Node);
9689 EVT VT = Node->getValueType(ResNo: 0);
9690 EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
9691 SDValue Op = Node->getOperand(Num: 0);
9692 unsigned Len = VT.getScalarSizeInBits();
9693 assert(VT.isInteger() && "CTPOP not implemented for this type.");
9694
9695 // TODO: Add support for irregular type lengths.
9696 if (!(Len <= 128 && Len % 8 == 0))
9697 return SDValue();
9698
9699 // Only expand vector types if we have the appropriate vector bit operations.
9700 if (VT.isVector() && !canExpandVectorCTPOP(TLI: *this, VT))
9701 return SDValue();
9702
9703 // This is the "best" algorithm from
9704 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
9705 SDValue Mask55 =
9706 DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt(8, 0x55)), DL: dl, VT);
9707 SDValue Mask33 =
9708 DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt(8, 0x33)), DL: dl, VT);
9709 SDValue Mask0F =
9710 DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt(8, 0x0F)), DL: dl, VT);
9711
9712 // v = v - ((v >> 1) & 0x55555555...)
9713 Op = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Op,
9714 N2: DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
9715 N1: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op,
9716 N2: DAG.getConstant(Val: 1, DL: dl, VT: ShVT)),
9717 N2: Mask55));
9718 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
9719 Op = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op, N2: Mask33),
9720 N2: DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
9721 N1: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op,
9722 N2: DAG.getConstant(Val: 2, DL: dl, VT: ShVT)),
9723 N2: Mask33));
9724 // v = (v + (v >> 4)) & 0x0F0F0F0F...
9725 Op = DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
9726 N1: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Op,
9727 N2: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op,
9728 N2: DAG.getConstant(Val: 4, DL: dl, VT: ShVT))),
9729 N2: Mask0F);
9730
9731 if (Len <= 8)
9732 return Op;
9733
9734 // Avoid the multiply if we only have 2 bytes to add.
9735 // TODO: Only doing this for scalars because vectors weren't as obviously
9736 // improved.
9737 if (Len == 16 && !VT.isVector()) {
9738 // v = (v + (v >> 8)) & 0x00FF;
9739 return DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
9740 N1: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Op,
9741 N2: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op,
9742 N2: DAG.getConstant(Val: 8, DL: dl, VT: ShVT))),
9743 N2: DAG.getConstant(Val: 0xFF, DL: dl, VT));
9744 }
9745
9746 // v = (v * 0x01010101...) >> (Len - 8)
9747 SDValue V;
9748 if (isOperationLegalOrCustomOrPromote(
9749 Op: ISD::MUL, VT: getTypeToTransformTo(Context&: *DAG.getContext(), VT))) {
9750 SDValue Mask01 =
9751 DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt(8, 0x01)), DL: dl, VT);
9752 V = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: Op, N2: Mask01);
9753 } else {
9754 V = Op;
9755 for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
9756 SDValue ShiftC = DAG.getShiftAmountConstant(Val: Shift, VT, DL: dl);
9757 V = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: V,
9758 N2: DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: V, N2: ShiftC));
9759 }
9760 }
9761 return DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: V, N2: DAG.getConstant(Val: Len - 8, DL: dl, VT: ShVT));
9762}
9763
9764SDValue TargetLowering::expandVPCTPOP(SDNode *Node, SelectionDAG &DAG) const {
9765 SDLoc dl(Node);
9766 EVT VT = Node->getValueType(ResNo: 0);
9767 EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
9768 SDValue Op = Node->getOperand(Num: 0);
9769 SDValue Mask = Node->getOperand(Num: 1);
9770 SDValue VL = Node->getOperand(Num: 2);
9771 unsigned Len = VT.getScalarSizeInBits();
9772 assert(VT.isInteger() && "VP_CTPOP not implemented for this type.");
9773
9774 // TODO: Add support for irregular type lengths.
9775 if (!(Len <= 128 && Len % 8 == 0))
9776 return SDValue();
9777
9778 // This is same algorithm of expandCTPOP from
9779 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
9780 SDValue Mask55 =
9781 DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt(8, 0x55)), DL: dl, VT);
9782 SDValue Mask33 =
9783 DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt(8, 0x33)), DL: dl, VT);
9784 SDValue Mask0F =
9785 DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt(8, 0x0F)), DL: dl, VT);
9786
9787 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5;
9788
9789 // v = v - ((v >> 1) & 0x55555555...)
9790 Tmp1 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT,
9791 N1: DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op,
9792 N2: DAG.getConstant(Val: 1, DL: dl, VT: ShVT), N3: Mask, N4: VL),
9793 N2: Mask55, N3: Mask, N4: VL);
9794 Op = DAG.getNode(Opcode: ISD::VP_SUB, DL: dl, VT, N1: Op, N2: Tmp1, N3: Mask, N4: VL);
9795
9796 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
9797 Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Op, N2: Mask33, N3: Mask, N4: VL);
9798 Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT,
9799 N1: DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op,
9800 N2: DAG.getConstant(Val: 2, DL: dl, VT: ShVT), N3: Mask, N4: VL),
9801 N2: Mask33, N3: Mask, N4: VL);
9802 Op = DAG.getNode(Opcode: ISD::VP_ADD, DL: dl, VT, N1: Tmp2, N2: Tmp3, N3: Mask, N4: VL);
9803
9804 // v = (v + (v >> 4)) & 0x0F0F0F0F...
9805 Tmp4 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 4, DL: dl, VT: ShVT),
9806 N3: Mask, N4: VL),
9807 Tmp5 = DAG.getNode(Opcode: ISD::VP_ADD, DL: dl, VT, N1: Op, N2: Tmp4, N3: Mask, N4: VL);
9808 Op = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp5, N2: Mask0F, N3: Mask, N4: VL);
9809
9810 if (Len <= 8)
9811 return Op;
9812
9813 // v = (v * 0x01010101...) >> (Len - 8)
9814 SDValue V;
9815 if (isOperationLegalOrCustomOrPromote(
9816 Op: ISD::VP_MUL, VT: getTypeToTransformTo(Context&: *DAG.getContext(), VT))) {
9817 SDValue Mask01 =
9818 DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt(8, 0x01)), DL: dl, VT);
9819 V = DAG.getNode(Opcode: ISD::VP_MUL, DL: dl, VT, N1: Op, N2: Mask01, N3: Mask, N4: VL);
9820 } else {
9821 V = Op;
9822 for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
9823 SDValue ShiftC = DAG.getShiftAmountConstant(Val: Shift, VT, DL: dl);
9824 V = DAG.getNode(Opcode: ISD::VP_ADD, DL: dl, VT, N1: V,
9825 N2: DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: V, N2: ShiftC, N3: Mask, N4: VL),
9826 N3: Mask, N4: VL);
9827 }
9828 }
9829 return DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: V, N2: DAG.getConstant(Val: Len - 8, DL: dl, VT: ShVT),
9830 N3: Mask, N4: VL);
9831}
9832
9833SDValue TargetLowering::expandCTLZ(SDNode *Node, SelectionDAG &DAG) const {
9834 SDLoc dl(Node);
9835 EVT VT = Node->getValueType(ResNo: 0);
9836 EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
9837 SDValue Op = Node->getOperand(Num: 0);
9838 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9839
9840 // If the non-ZERO_UNDEF version is supported we can use that instead.
9841 if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
9842 isOperationLegalOrCustom(Op: ISD::CTLZ, VT))
9843 return DAG.getNode(Opcode: ISD::CTLZ, DL: dl, VT, Operand: Op);
9844
9845 // If the ZERO_UNDEF version is supported use that and handle the zero case.
9846 if (isOperationLegalOrCustom(Op: ISD::CTLZ_ZERO_UNDEF, VT)) {
9847 EVT SetCCVT =
9848 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
9849 SDValue CTLZ = DAG.getNode(Opcode: ISD::CTLZ_ZERO_UNDEF, DL: dl, VT, Operand: Op);
9850 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT);
9851 SDValue SrcIsZero = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Op, RHS: Zero, Cond: ISD::SETEQ);
9852 return DAG.getSelect(DL: dl, VT, Cond: SrcIsZero,
9853 LHS: DAG.getConstant(Val: NumBitsPerElt, DL: dl, VT), RHS: CTLZ);
9854 }
9855
9856 // Only expand vector types if we have the appropriate vector bit operations.
9857 // This includes the operations needed to expand CTPOP if it isn't supported.
9858 if (VT.isVector() && (!isPowerOf2_32(Value: NumBitsPerElt) ||
9859 (!isOperationLegalOrCustom(Op: ISD::CTPOP, VT) &&
9860 !canExpandVectorCTPOP(TLI: *this, VT)) ||
9861 !isOperationLegalOrCustom(Op: ISD::SRL, VT) ||
9862 !isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT)))
9863 return SDValue();
9864
9865 // for now, we do this:
9866 // x = x | (x >> 1);
9867 // x = x | (x >> 2);
9868 // ...
9869 // x = x | (x >>16);
9870 // x = x | (x >>32); // for 64-bit input
9871 // return popcount(~x);
9872 //
9873 // Ref: "Hacker's Delight" by Henry Warren
9874 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
9875 SDValue Tmp = DAG.getConstant(Val: 1ULL << i, DL: dl, VT: ShVT);
9876 Op = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Op,
9877 N2: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: Tmp));
9878 }
9879 Op = DAG.getNOT(DL: dl, Val: Op, VT);
9880 return DAG.getNode(Opcode: ISD::CTPOP, DL: dl, VT, Operand: Op);
9881}
9882
9883SDValue TargetLowering::expandVPCTLZ(SDNode *Node, SelectionDAG &DAG) const {
9884 SDLoc dl(Node);
9885 EVT VT = Node->getValueType(ResNo: 0);
9886 EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
9887 SDValue Op = Node->getOperand(Num: 0);
9888 SDValue Mask = Node->getOperand(Num: 1);
9889 SDValue VL = Node->getOperand(Num: 2);
9890 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9891
9892 // do this:
9893 // x = x | (x >> 1);
9894 // x = x | (x >> 2);
9895 // ...
9896 // x = x | (x >>16);
9897 // x = x | (x >>32); // for 64-bit input
9898 // return popcount(~x);
9899 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
9900 SDValue Tmp = DAG.getConstant(Val: 1ULL << i, DL: dl, VT: ShVT);
9901 Op = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Op,
9902 N2: DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: Tmp, N3: Mask, N4: VL), N3: Mask,
9903 N4: VL);
9904 }
9905 Op = DAG.getNode(Opcode: ISD::VP_XOR, DL: dl, VT, N1: Op, N2: DAG.getAllOnesConstant(DL: dl, VT),
9906 N3: Mask, N4: VL);
9907 return DAG.getNode(Opcode: ISD::VP_CTPOP, DL: dl, VT, N1: Op, N2: Mask, N3: VL);
9908}
9909
9910SDValue TargetLowering::expandCTLS(SDNode *Node, SelectionDAG &DAG) const {
9911 SDLoc dl(Node);
9912 EVT VT = Node->getValueType(ResNo: 0);
9913 SDValue Op = DAG.getFreeze(V: Node->getOperand(Num: 0));
9914 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9915
9916 // CTLS(x) = CTLZ(OR(SHL(XOR(x, SRA(x, BW-1)), 1), 1))
9917 // This transforms the sign bits into leading zeros that can be counted.
9918 SDValue ShiftAmt = DAG.getShiftAmountConstant(Val: NumBitsPerElt - 1, VT, DL: dl);
9919 SDValue SignBit = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: Op, N2: ShiftAmt);
9920 SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: Op, N2: SignBit);
9921 SDValue Shl =
9922 DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Xor, N2: DAG.getShiftAmountConstant(Val: 1, VT, DL: dl));
9923 SDValue Or = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Shl, N2: DAG.getConstant(Val: 1, DL: dl, VT));
9924 return DAG.getNode(Opcode: ISD::CTLZ_ZERO_UNDEF, DL: dl, VT, Operand: Or);
9925}
9926
9927SDValue TargetLowering::CTTZTableLookup(SDNode *Node, SelectionDAG &DAG,
9928 const SDLoc &DL, EVT VT, SDValue Op,
9929 unsigned BitWidth) const {
9930 if (BitWidth != 32 && BitWidth != 64)
9931 return SDValue();
9932
9933 const DataLayout &TD = DAG.getDataLayout();
9934 if (!isOperationCustom(Op: ISD::ConstantPool, VT: getPointerTy(DL: TD)))
9935 return SDValue();
9936
9937 APInt DeBruijn = BitWidth == 32 ? APInt(32, 0x077CB531U)
9938 : APInt(64, 0x0218A392CD3D5DBFULL);
9939 MachinePointerInfo PtrInfo =
9940 MachinePointerInfo::getConstantPool(MF&: DAG.getMachineFunction());
9941 unsigned ShiftAmt = BitWidth - Log2_32(Value: BitWidth);
9942 SDValue Neg = DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: DAG.getConstant(Val: 0, DL, VT), N2: Op);
9943 SDValue Lookup = DAG.getNode(
9944 Opcode: ISD::SRL, DL, VT,
9945 N1: DAG.getNode(Opcode: ISD::MUL, DL, VT, N1: DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Op, N2: Neg),
9946 N2: DAG.getConstant(Val: DeBruijn, DL, VT)),
9947 N2: DAG.getShiftAmountConstant(Val: ShiftAmt, VT, DL));
9948 Lookup = DAG.getSExtOrTrunc(Op: Lookup, DL, VT: getPointerTy(DL: TD));
9949
9950 SmallVector<uint8_t> Table(BitWidth, 0);
9951 for (unsigned i = 0; i < BitWidth; i++) {
9952 APInt Shl = DeBruijn.shl(shiftAmt: i);
9953 APInt Lshr = Shl.lshr(shiftAmt: ShiftAmt);
9954 Table[Lshr.getZExtValue()] = i;
9955 }
9956
9957 // Create a ConstantArray in Constant Pool
9958 auto *CA = ConstantDataArray::get(Context&: *DAG.getContext(), Elts&: Table);
9959 SDValue CPIdx = DAG.getConstantPool(C: CA, VT: getPointerTy(DL: TD),
9960 Align: TD.getPrefTypeAlign(Ty: CA->getType()));
9961 SDValue ExtLoad = DAG.getExtLoad(ExtType: ISD::ZEXTLOAD, dl: DL, VT, Chain: DAG.getEntryNode(),
9962 Ptr: DAG.getMemBasePlusOffset(Base: CPIdx, Offset: Lookup, DL),
9963 PtrInfo, MemVT: MVT::i8);
9964 if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF)
9965 return ExtLoad;
9966
9967 EVT SetCCVT =
9968 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
9969 SDValue Zero = DAG.getConstant(Val: 0, DL, VT);
9970 SDValue SrcIsZero = DAG.getSetCC(DL, VT: SetCCVT, LHS: Op, RHS: Zero, Cond: ISD::SETEQ);
9971 return DAG.getSelect(DL, VT, Cond: SrcIsZero,
9972 LHS: DAG.getConstant(Val: BitWidth, DL, VT), RHS: ExtLoad);
9973}
9974
9975SDValue TargetLowering::expandCTTZ(SDNode *Node, SelectionDAG &DAG) const {
9976 SDLoc dl(Node);
9977 EVT VT = Node->getValueType(ResNo: 0);
9978 SDValue Op = Node->getOperand(Num: 0);
9979 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9980
9981 // If the non-ZERO_UNDEF version is supported we can use that instead.
9982 if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
9983 isOperationLegalOrCustom(Op: ISD::CTTZ, VT))
9984 return DAG.getNode(Opcode: ISD::CTTZ, DL: dl, VT, Operand: Op);
9985
9986 // If the ZERO_UNDEF version is supported use that and handle the zero case.
9987 if (isOperationLegalOrCustom(Op: ISD::CTTZ_ZERO_UNDEF, VT)) {
9988 EVT SetCCVT =
9989 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
9990 SDValue CTTZ = DAG.getNode(Opcode: ISD::CTTZ_ZERO_UNDEF, DL: dl, VT, Operand: Op);
9991 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT);
9992 SDValue SrcIsZero = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Op, RHS: Zero, Cond: ISD::SETEQ);
9993 return DAG.getSelect(DL: dl, VT, Cond: SrcIsZero,
9994 LHS: DAG.getConstant(Val: NumBitsPerElt, DL: dl, VT), RHS: CTTZ);
9995 }
9996
9997 // Only expand vector types if we have the appropriate vector bit operations.
9998 // This includes the operations needed to expand CTPOP if it isn't supported.
9999 if (VT.isVector() && (!isPowerOf2_32(Value: NumBitsPerElt) ||
10000 (!isOperationLegalOrCustom(Op: ISD::CTPOP, VT) &&
10001 !isOperationLegalOrCustom(Op: ISD::CTLZ, VT) &&
10002 !canExpandVectorCTPOP(TLI: *this, VT)) ||
10003 !isOperationLegalOrCustom(Op: ISD::SUB, VT) ||
10004 !isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT) ||
10005 !isOperationLegalOrCustomOrPromote(Op: ISD::XOR, VT)))
10006 return SDValue();
10007
10008 // Emit Table Lookup if ISD::CTPOP used in the fallback path below is going
10009 // to be expanded or converted to a libcall.
10010 if (!VT.isVector() && !isOperationLegalOrCustomOrPromote(Op: ISD::CTPOP, VT) &&
10011 !isOperationLegal(Op: ISD::CTLZ, VT))
10012 if (SDValue V = CTTZTableLookup(Node, DAG, DL: dl, VT, Op, BitWidth: NumBitsPerElt))
10013 return V;
10014
10015 // for now, we use: { return popcount(~x & (x - 1)); }
10016 // unless the target has ctlz but not ctpop, in which case we use:
10017 // { return 32 - nlz(~x & (x-1)); }
10018 // Ref: "Hacker's Delight" by Henry Warren
10019 SDValue Tmp = DAG.getNode(
10020 Opcode: ISD::AND, DL: dl, VT, N1: DAG.getNOT(DL: dl, Val: Op, VT),
10021 N2: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 1, DL: dl, VT)));
10022
10023 // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
10024 if (isOperationLegal(Op: ISD::CTLZ, VT) && !isOperationLegal(Op: ISD::CTPOP, VT)) {
10025 return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: DAG.getConstant(Val: NumBitsPerElt, DL: dl, VT),
10026 N2: DAG.getNode(Opcode: ISD::CTLZ, DL: dl, VT, Operand: Tmp));
10027 }
10028
10029 return DAG.getNode(Opcode: ISD::CTPOP, DL: dl, VT, Operand: Tmp);
10030}
10031
10032SDValue TargetLowering::expandVPCTTZ(SDNode *Node, SelectionDAG &DAG) const {
10033 SDValue Op = Node->getOperand(Num: 0);
10034 SDValue Mask = Node->getOperand(Num: 1);
10035 SDValue VL = Node->getOperand(Num: 2);
10036 SDLoc dl(Node);
10037 EVT VT = Node->getValueType(ResNo: 0);
10038
10039 // Same as the vector part of expandCTTZ, use: popcount(~x & (x - 1))
10040 SDValue Not = DAG.getNode(Opcode: ISD::VP_XOR, DL: dl, VT, N1: Op,
10041 N2: DAG.getAllOnesConstant(DL: dl, VT), N3: Mask, N4: VL);
10042 SDValue MinusOne = DAG.getNode(Opcode: ISD::VP_SUB, DL: dl, VT, N1: Op,
10043 N2: DAG.getConstant(Val: 1, DL: dl, VT), N3: Mask, N4: VL);
10044 SDValue Tmp = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Not, N2: MinusOne, N3: Mask, N4: VL);
10045 return DAG.getNode(Opcode: ISD::VP_CTPOP, DL: dl, VT, N1: Tmp, N2: Mask, N3: VL);
10046}
10047
10048SDValue TargetLowering::expandVPCTTZElements(SDNode *N,
10049 SelectionDAG &DAG) const {
10050 // %cond = to_bool_vec %source
10051 // %splat = splat /*val=*/VL
10052 // %tz = step_vector
10053 // %v = vp.select %cond, /*true=*/tz, /*false=*/%splat
10054 // %r = vp.reduce.umin %v
10055 SDLoc DL(N);
10056 SDValue Source = N->getOperand(Num: 0);
10057 SDValue Mask = N->getOperand(Num: 1);
10058 SDValue EVL = N->getOperand(Num: 2);
10059 EVT SrcVT = Source.getValueType();
10060 EVT ResVT = N->getValueType(ResNo: 0);
10061 EVT ResVecVT =
10062 EVT::getVectorVT(Context&: *DAG.getContext(), VT: ResVT, EC: SrcVT.getVectorElementCount());
10063
10064 // Convert to boolean vector.
10065 if (SrcVT.getScalarType() != MVT::i1) {
10066 SDValue AllZero = DAG.getConstant(Val: 0, DL, VT: SrcVT);
10067 SrcVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: MVT::i1,
10068 EC: SrcVT.getVectorElementCount());
10069 Source = DAG.getNode(Opcode: ISD::VP_SETCC, DL, VT: SrcVT, N1: Source, N2: AllZero,
10070 N3: DAG.getCondCode(Cond: ISD::SETNE), N4: Mask, N5: EVL);
10071 }
10072
10073 SDValue ExtEVL = DAG.getZExtOrTrunc(Op: EVL, DL, VT: ResVT);
10074 SDValue Splat = DAG.getSplat(VT: ResVecVT, DL, Op: ExtEVL);
10075 SDValue StepVec = DAG.getStepVector(DL, ResVT: ResVecVT);
10076 SDValue Select =
10077 DAG.getNode(Opcode: ISD::VP_SELECT, DL, VT: ResVecVT, N1: Source, N2: StepVec, N3: Splat, N4: EVL);
10078 return DAG.getNode(Opcode: ISD::VP_REDUCE_UMIN, DL, VT: ResVT, N1: ExtEVL, N2: Select, N3: Mask, N4: EVL);
10079}
10080
10081/// Returns a type-legalized version of \p Mask as the first item in the
10082/// pair. The second item contains a type-legalized step vector that's
10083/// guaranteed to fit the number of elements in \p Mask.
10084static std::pair<SDValue, SDValue>
10085getLegalMaskAndStepVector(SDValue Mask, bool ZeroIsPoison, SDLoc DL,
10086 SelectionDAG &DAG) {
10087 EVT MaskVT = Mask.getValueType();
10088 EVT BoolVT = MaskVT.getScalarType();
10089
10090 // Find a suitable type for a stepvector.
10091 // If zero is poison, we can assume the upper limit of the result is VF-1.
10092 ConstantRange VScaleRange(1, /*isFullSet=*/true); // Fixed length default.
10093 if (MaskVT.isScalableVector())
10094 VScaleRange = getVScaleRange(F: &DAG.getMachineFunction().getFunction(), BitWidth: 64);
10095 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10096 uint64_t EltWidth = TLI.getBitWidthForCttzElements(
10097 RetVT: EVT(TLI.getVectorIdxTy(DL: DAG.getDataLayout())),
10098 EC: MaskVT.getVectorElementCount(), ZeroIsPoison, VScaleRange: &VScaleRange);
10099 // If the step vector element type is smaller than the mask element type,
10100 // use the mask type directly to avoid widening issues.
10101 EltWidth = std::max(a: EltWidth, b: BoolVT.getFixedSizeInBits());
10102 EVT StepVT = MVT::getIntegerVT(BitWidth: EltWidth);
10103 EVT StepVecVT = MaskVT.changeVectorElementType(Context&: *DAG.getContext(), EltVT: StepVT);
10104
10105 // If promotion or widening is required to make the type legal, do it here.
10106 // Promotion of integers within LegalizeVectorOps is looking for types of
10107 // the same size but with a smaller number of larger elements, not the usual
10108 // larger size with the same number of larger elements.
10109 TargetLowering::LegalizeTypeAction TypeAction =
10110 TLI.getTypeAction(VT: StepVecVT.getSimpleVT());
10111 SDValue StepVec;
10112 if (TypeAction == TargetLowering::TypePromoteInteger) {
10113 StepVecVT = TLI.getTypeToTransformTo(Context&: *DAG.getContext(), VT: StepVecVT);
10114 StepVec = DAG.getStepVector(DL, ResVT: StepVecVT);
10115 } else if (TypeAction == TargetLowering::TypeWidenVector) {
10116 // For widening, the element count changes. Create a step vector with only
10117 // the original elements valid and zeros for padding. Also widen the mask.
10118 EVT WideVecVT = TLI.getTypeToTransformTo(Context&: *DAG.getContext(), VT: StepVecVT);
10119 unsigned WideNumElts = WideVecVT.getVectorNumElements();
10120
10121 // Build widened step vector: <0, 1, ..., OrigNumElts-1, poison, poison, ..>
10122 SDValue OrigStepVec = DAG.getStepVector(DL, ResVT: StepVecVT);
10123 SDValue UndefStep = DAG.getPOISON(VT: WideVecVT);
10124 StepVec = DAG.getInsertSubvector(DL, Vec: UndefStep, SubVec: OrigStepVec, Idx: 0);
10125
10126 // Widen mask: pad with zeros.
10127 EVT WideMaskVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: BoolVT, NumElements: WideNumElts);
10128 SDValue ZeroMask = DAG.getConstant(Val: 0, DL, VT: WideMaskVT);
10129 Mask = DAG.getInsertSubvector(DL, Vec: ZeroMask, SubVec: Mask, Idx: 0);
10130 } else {
10131 StepVec = DAG.getStepVector(DL, ResVT: StepVecVT);
10132 }
10133
10134 return {Mask, StepVec};
10135}
10136
10137SDValue TargetLowering::expandVectorFindLastActive(SDNode *N,
10138 SelectionDAG &DAG) const {
10139 SDLoc DL(N);
10140 auto [Mask, StepVec] = getLegalMaskAndStepVector(
10141 Mask: N->getOperand(Num: 0), /*ZeroIsPoison=*/true, DL, DAG);
10142 EVT StepVecVT = StepVec.getValueType();
10143 EVT StepVT = StepVec.getValueType().getVectorElementType();
10144
10145 // Zero out lanes with inactive elements, then find the highest remaining
10146 // value from the stepvector.
10147 SDValue Zeroes = DAG.getConstant(Val: 0, DL, VT: StepVecVT);
10148 SDValue ActiveElts = DAG.getSelect(DL, VT: StepVecVT, Cond: Mask, LHS: StepVec, RHS: Zeroes);
10149 SDValue HighestIdx = DAG.getNode(Opcode: ISD::VECREDUCE_UMAX, DL, VT: StepVT, Operand: ActiveElts);
10150 return DAG.getZExtOrTrunc(Op: HighestIdx, DL, VT: N->getValueType(ResNo: 0));
10151}
10152
10153SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG,
10154 bool IsNegative) const {
10155 SDLoc dl(N);
10156 EVT VT = N->getValueType(ResNo: 0);
10157 SDValue Op = N->getOperand(Num: 0);
10158
10159 // abs(x) -> smax(x,sub(0,x))
10160 if (!IsNegative && isOperationLegal(Op: ISD::SUB, VT) &&
10161 isOperationLegal(Op: ISD::SMAX, VT)) {
10162 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT);
10163 Op = DAG.getFreeze(V: Op);
10164 return DAG.getNode(Opcode: ISD::SMAX, DL: dl, VT, N1: Op,
10165 N2: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Zero, N2: Op));
10166 }
10167
10168 // abs(x) -> umin(x,sub(0,x))
10169 if (!IsNegative && isOperationLegal(Op: ISD::SUB, VT) &&
10170 isOperationLegal(Op: ISD::UMIN, VT)) {
10171 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT);
10172 Op = DAG.getFreeze(V: Op);
10173 return DAG.getNode(Opcode: ISD::UMIN, DL: dl, VT, N1: Op,
10174 N2: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Zero, N2: Op));
10175 }
10176
10177 // 0 - abs(x) -> smin(x, sub(0,x))
10178 if (IsNegative && isOperationLegal(Op: ISD::SUB, VT) &&
10179 isOperationLegal(Op: ISD::SMIN, VT)) {
10180 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT);
10181 Op = DAG.getFreeze(V: Op);
10182 return DAG.getNode(Opcode: ISD::SMIN, DL: dl, VT, N1: Op,
10183 N2: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Zero, N2: Op));
10184 }
10185
10186 // Only expand vector types if we have the appropriate vector operations.
10187 if (VT.isVector() &&
10188 (!isOperationLegalOrCustom(Op: ISD::SRA, VT) ||
10189 (!IsNegative && !isOperationLegalOrCustom(Op: ISD::ADD, VT)) ||
10190 (IsNegative && !isOperationLegalOrCustom(Op: ISD::SUB, VT)) ||
10191 !isOperationLegalOrCustomOrPromote(Op: ISD::XOR, VT)))
10192 return SDValue();
10193
10194 Op = DAG.getFreeze(V: Op);
10195 SDValue Shift = DAG.getNode(
10196 Opcode: ISD::SRA, DL: dl, VT, N1: Op,
10197 N2: DAG.getShiftAmountConstant(Val: VT.getScalarSizeInBits() - 1, VT, DL: dl));
10198 SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: Op, N2: Shift);
10199
10200 // abs(x) -> Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
10201 if (!IsNegative)
10202 return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Xor, N2: Shift);
10203
10204 // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
10205 return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Shift, N2: Xor);
10206}
10207
10208SDValue TargetLowering::expandABD(SDNode *N, SelectionDAG &DAG) const {
10209 SDLoc dl(N);
10210 EVT VT = N->getValueType(ResNo: 0);
10211 SDValue LHS = N->getOperand(Num: 0);
10212 SDValue RHS = N->getOperand(Num: 1);
10213 bool IsSigned = N->getOpcode() == ISD::ABDS;
10214
10215 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
10216 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
10217 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
10218 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
10219 if (isOperationLegal(Op: MaxOpc, VT) && isOperationLegal(Op: MinOpc, VT)) {
10220 LHS = DAG.getFreeze(V: LHS);
10221 RHS = DAG.getFreeze(V: RHS);
10222 SDValue Max = DAG.getNode(Opcode: MaxOpc, DL: dl, VT, N1: LHS, N2: RHS);
10223 SDValue Min = DAG.getNode(Opcode: MinOpc, DL: dl, VT, N1: LHS, N2: RHS);
10224 return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Max, N2: Min);
10225 }
10226
10227 // abdu(lhs, rhs) -> or(usubsat(lhs,rhs), usubsat(rhs,lhs))
10228 if (!IsSigned && isOperationLegal(Op: ISD::USUBSAT, VT)) {
10229 LHS = DAG.getFreeze(V: LHS);
10230 RHS = DAG.getFreeze(V: RHS);
10231 return DAG.getNode(Opcode: ISD::OR, DL: dl, VT,
10232 N1: DAG.getNode(Opcode: ISD::USUBSAT, DL: dl, VT, N1: LHS, N2: RHS),
10233 N2: DAG.getNode(Opcode: ISD::USUBSAT, DL: dl, VT, N1: RHS, N2: LHS));
10234 }
10235
10236 // If the subtract doesn't overflow then just use abs(sub())
10237 bool IsNonNegative = DAG.SignBitIsZero(Op: LHS) && DAG.SignBitIsZero(Op: RHS);
10238
10239 if (DAG.willNotOverflowSub(IsSigned: IsSigned || IsNonNegative, N0: LHS, N1: RHS))
10240 return DAG.getNode(Opcode: ISD::ABS, DL: dl, VT,
10241 Operand: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: LHS, N2: RHS));
10242
10243 if (DAG.willNotOverflowSub(IsSigned: IsSigned || IsNonNegative, N0: RHS, N1: LHS))
10244 return DAG.getNode(Opcode: ISD::ABS, DL: dl, VT,
10245 Operand: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: RHS, N2: LHS));
10246
10247 EVT CCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
10248 ISD::CondCode CC = IsSigned ? ISD::CondCode::SETGT : ISD::CondCode::SETUGT;
10249 LHS = DAG.getFreeze(V: LHS);
10250 RHS = DAG.getFreeze(V: RHS);
10251 SDValue Cmp = DAG.getSetCC(DL: dl, VT: CCVT, LHS, RHS, Cond: CC);
10252
10253 // Branchless expansion iff cmp result is allbits:
10254 // abds(lhs, rhs) -> sub(sgt(lhs, rhs), xor(sgt(lhs, rhs), sub(lhs, rhs)))
10255 // abdu(lhs, rhs) -> sub(ugt(lhs, rhs), xor(ugt(lhs, rhs), sub(lhs, rhs)))
10256 if (CCVT == VT && getBooleanContents(Type: VT) == ZeroOrNegativeOneBooleanContent) {
10257 SDValue Diff = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: LHS, N2: RHS);
10258 SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: Diff, N2: Cmp);
10259 return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Cmp, N2: Xor);
10260 }
10261
10262 // Similar to the branchless expansion, if we don't prefer selects, use the
10263 // (sign-extended) usubo overflow flag if the (scalar) type is illegal as this
10264 // is more likely to legalize cleanly: abdu(lhs, rhs) -> sub(xor(sub(lhs,
10265 // rhs), uof(lhs, rhs)), uof(lhs, rhs))
10266 if (!IsSigned && VT.isScalarInteger() && !isTypeLegal(VT) &&
10267 !preferSelectsOverBooleanArithmetic(VT)) {
10268 SDValue USubO =
10269 DAG.getNode(Opcode: ISD::USUBO, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: MVT::i1), Ops: {LHS, RHS});
10270 SDValue Cmp = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT, Operand: USubO.getValue(R: 1));
10271 SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: USubO.getValue(R: 0), N2: Cmp);
10272 return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Xor, N2: Cmp);
10273 }
10274
10275 // FIXME: Should really try to split the vector in case it's legal on a
10276 // subvector.
10277 if (VT.isVector() && !isOperationLegalOrCustom(Op: ISD::VSELECT, VT))
10278 return DAG.UnrollVectorOp(N);
10279
10280 // abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
10281 // abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
10282 return DAG.getSelect(DL: dl, VT, Cond: Cmp, LHS: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: LHS, N2: RHS),
10283 RHS: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: RHS, N2: LHS));
10284}
10285
10286SDValue TargetLowering::expandAVG(SDNode *N, SelectionDAG &DAG) const {
10287 SDLoc dl(N);
10288 EVT VT = N->getValueType(ResNo: 0);
10289 SDValue LHS = N->getOperand(Num: 0);
10290 SDValue RHS = N->getOperand(Num: 1);
10291
10292 unsigned Opc = N->getOpcode();
10293 bool IsFloor = Opc == ISD::AVGFLOORS || Opc == ISD::AVGFLOORU;
10294 bool IsSigned = Opc == ISD::AVGCEILS || Opc == ISD::AVGFLOORS;
10295 unsigned SumOpc = IsFloor ? ISD::ADD : ISD::SUB;
10296 unsigned SignOpc = IsFloor ? ISD::AND : ISD::OR;
10297 unsigned ShiftOpc = IsSigned ? ISD::SRA : ISD::SRL;
10298 unsigned ExtOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
10299 assert((Opc == ISD::AVGFLOORS || Opc == ISD::AVGCEILS ||
10300 Opc == ISD::AVGFLOORU || Opc == ISD::AVGCEILU) &&
10301 "Unknown AVG node");
10302
10303 // If the operands are already extended, we can add+shift.
10304 bool IsExt =
10305 (IsSigned && DAG.ComputeNumSignBits(Op: LHS) >= 2 &&
10306 DAG.ComputeNumSignBits(Op: RHS) >= 2) ||
10307 (!IsSigned && DAG.computeKnownBits(Op: LHS).countMinLeadingZeros() >= 1 &&
10308 DAG.computeKnownBits(Op: RHS).countMinLeadingZeros() >= 1);
10309 if (IsExt) {
10310 SDValue Sum = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: LHS, N2: RHS);
10311 if (!IsFloor)
10312 Sum = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Sum, N2: DAG.getConstant(Val: 1, DL: dl, VT));
10313 return DAG.getNode(Opcode: ShiftOpc, DL: dl, VT, N1: Sum,
10314 N2: DAG.getShiftAmountConstant(Val: 1, VT, DL: dl));
10315 }
10316
10317 // For scalars, see if we can efficiently extend/truncate to use add+shift.
10318 if (VT.isScalarInteger()) {
10319 EVT ExtVT = VT.widenIntegerElementType(Context&: *DAG.getContext());
10320 if (isTypeLegal(VT: ExtVT) && isTruncateFree(FromVT: ExtVT, ToVT: VT)) {
10321 LHS = DAG.getNode(Opcode: ExtOpc, DL: dl, VT: ExtVT, Operand: LHS);
10322 RHS = DAG.getNode(Opcode: ExtOpc, DL: dl, VT: ExtVT, Operand: RHS);
10323 SDValue Avg = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: ExtVT, N1: LHS, N2: RHS);
10324 if (!IsFloor)
10325 Avg = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: ExtVT, N1: Avg,
10326 N2: DAG.getConstant(Val: 1, DL: dl, VT: ExtVT));
10327 // Just use SRL as we will be truncating away the extended sign bits.
10328 Avg = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: ExtVT, N1: Avg,
10329 N2: DAG.getShiftAmountConstant(Val: 1, VT: ExtVT, DL: dl));
10330 return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Avg);
10331 }
10332 }
10333
10334 // avgflooru(lhs, rhs) -> or(lshr(add(lhs, rhs),1),shl(overflow, typesize-1))
10335 if (Opc == ISD::AVGFLOORU && VT.isScalarInteger() && !isTypeLegal(VT) &&
10336 isOperationLegalOrCustom(
10337 Op: ISD::UADDO, VT: getLegalTypeToTransformTo(Context&: *DAG.getContext(), VT))) {
10338 SDValue UAddWithOverflow =
10339 DAG.getNode(Opcode: ISD::UADDO, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: MVT::i1), Ops: {RHS, LHS});
10340
10341 SDValue Sum = UAddWithOverflow.getValue(R: 0);
10342 SDValue Overflow = UAddWithOverflow.getValue(R: 1);
10343
10344 // Right shift the sum by 1
10345 SDValue LShrVal = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Sum,
10346 N2: DAG.getShiftAmountConstant(Val: 1, VT, DL: dl));
10347
10348 SDValue ZeroExtOverflow = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT, Operand: Overflow);
10349 SDValue OverflowShl = DAG.getNode(
10350 Opcode: ISD::SHL, DL: dl, VT, N1: ZeroExtOverflow,
10351 N2: DAG.getShiftAmountConstant(Val: VT.getScalarSizeInBits() - 1, VT, DL: dl));
10352
10353 return DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: LShrVal, N2: OverflowShl);
10354 }
10355
10356 // avgceils(lhs, rhs) -> sub(or(lhs,rhs),ashr(xor(lhs,rhs),1))
10357 // avgceilu(lhs, rhs) -> sub(or(lhs,rhs),lshr(xor(lhs,rhs),1))
10358 // avgfloors(lhs, rhs) -> add(and(lhs,rhs),ashr(xor(lhs,rhs),1))
10359 // avgflooru(lhs, rhs) -> add(and(lhs,rhs),lshr(xor(lhs,rhs),1))
10360 LHS = DAG.getFreeze(V: LHS);
10361 RHS = DAG.getFreeze(V: RHS);
10362 SDValue Sign = DAG.getNode(Opcode: SignOpc, DL: dl, VT, N1: LHS, N2: RHS);
10363 SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: LHS, N2: RHS);
10364 SDValue Shift =
10365 DAG.getNode(Opcode: ShiftOpc, DL: dl, VT, N1: Xor, N2: DAG.getShiftAmountConstant(Val: 1, VT, DL: dl));
10366 return DAG.getNode(Opcode: SumOpc, DL: dl, VT, N1: Sign, N2: Shift);
10367}
10368
10369SDValue TargetLowering::expandBSWAP(SDNode *N, SelectionDAG &DAG) const {
10370 SDLoc dl(N);
10371 EVT VT = N->getValueType(ResNo: 0);
10372 SDValue Op = N->getOperand(Num: 0);
10373
10374 if (!VT.isSimple())
10375 return SDValue();
10376
10377 EVT SHVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
10378 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
10379 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
10380 default:
10381 return SDValue();
10382 case MVT::i16:
10383 // Use a rotate by 8. This can be further expanded if necessary.
10384 return DAG.getNode(Opcode: ISD::ROTL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT));
10385 case MVT::i32:
10386 // This is meant for ARM speficially, which has ROTR but no ROTL.
10387 if (isOperationLegalOrCustom(Op: ISD::ROTR, VT)) {
10388 SDValue Mask = DAG.getConstant(Val: 0x00FF00FF, DL: dl, VT);
10389 // (x & 0x00FF00FF) rotr 8 | (x rotl 8) & 0x00FF00FF
10390 SDValue And = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op, N2: Mask);
10391 SDValue Rotr =
10392 DAG.getNode(Opcode: ISD::ROTR, DL: dl, VT, N1: And, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT));
10393 SDValue Rotl =
10394 DAG.getNode(Opcode: ISD::ROTR, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 24, DL: dl, VT: SHVT));
10395 SDValue And2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Rotl, N2: Mask);
10396 return DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Rotr, N2: And2);
10397 }
10398 Tmp4 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 24, DL: dl, VT: SHVT));
10399 Tmp3 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op,
10400 N2: DAG.getConstant(Val: 0xFF00, DL: dl, VT));
10401 Tmp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT));
10402 Tmp2 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT));
10403 Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2, N2: DAG.getConstant(Val: 0xFF00, DL: dl, VT));
10404 Tmp1 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 24, DL: dl, VT: SHVT));
10405 Tmp4 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp4, N2: Tmp3);
10406 Tmp2 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp1);
10407 return DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp4, N2: Tmp2);
10408 case MVT::i64:
10409 Tmp8 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 56, DL: dl, VT: SHVT));
10410 Tmp7 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op,
10411 N2: DAG.getConstant(Val: 255ULL<<8, DL: dl, VT));
10412 Tmp7 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp7, N2: DAG.getConstant(Val: 40, DL: dl, VT: SHVT));
10413 Tmp6 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op,
10414 N2: DAG.getConstant(Val: 255ULL<<16, DL: dl, VT));
10415 Tmp6 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp6, N2: DAG.getConstant(Val: 24, DL: dl, VT: SHVT));
10416 Tmp5 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op,
10417 N2: DAG.getConstant(Val: 255ULL<<24, DL: dl, VT));
10418 Tmp5 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp5, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT));
10419 Tmp4 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT));
10420 Tmp4 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp4,
10421 N2: DAG.getConstant(Val: 255ULL<<24, DL: dl, VT));
10422 Tmp3 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 24, DL: dl, VT: SHVT));
10423 Tmp3 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp3,
10424 N2: DAG.getConstant(Val: 255ULL<<16, DL: dl, VT));
10425 Tmp2 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 40, DL: dl, VT: SHVT));
10426 Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2,
10427 N2: DAG.getConstant(Val: 255ULL<<8, DL: dl, VT));
10428 Tmp1 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 56, DL: dl, VT: SHVT));
10429 Tmp8 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp8, N2: Tmp7);
10430 Tmp6 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp6, N2: Tmp5);
10431 Tmp4 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp4, N2: Tmp3);
10432 Tmp2 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp1);
10433 Tmp8 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp8, N2: Tmp6);
10434 Tmp4 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp4, N2: Tmp2);
10435 return DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp8, N2: Tmp4);
10436 }
10437}
10438
10439SDValue TargetLowering::expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const {
10440 SDLoc dl(N);
10441 EVT VT = N->getValueType(ResNo: 0);
10442 SDValue Op = N->getOperand(Num: 0);
10443 SDValue Mask = N->getOperand(Num: 1);
10444 SDValue EVL = N->getOperand(Num: 2);
10445
10446 if (!VT.isSimple())
10447 return SDValue();
10448
10449 EVT SHVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
10450 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
10451 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
10452 default:
10453 return SDValue();
10454 case MVT::i16:
10455 Tmp1 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT),
10456 N3: Mask, N4: EVL);
10457 Tmp2 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT),
10458 N3: Mask, N4: EVL);
10459 return DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp1, N2: Tmp2, N3: Mask, N4: EVL);
10460 case MVT::i32:
10461 Tmp4 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 24, DL: dl, VT: SHVT),
10462 N3: Mask, N4: EVL);
10463 Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 0xFF00, DL: dl, VT),
10464 N3: Mask, N4: EVL);
10465 Tmp3 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT),
10466 N3: Mask, N4: EVL);
10467 Tmp2 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT),
10468 N3: Mask, N4: EVL);
10469 Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp2,
10470 N2: DAG.getConstant(Val: 0xFF00, DL: dl, VT), N3: Mask, N4: EVL);
10471 Tmp1 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 24, DL: dl, VT: SHVT),
10472 N3: Mask, N4: EVL);
10473 Tmp4 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp4, N2: Tmp3, N3: Mask, N4: EVL);
10474 Tmp2 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp2, N2: Tmp1, N3: Mask, N4: EVL);
10475 return DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp4, N2: Tmp2, N3: Mask, N4: EVL);
10476 case MVT::i64:
10477 Tmp8 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 56, DL: dl, VT: SHVT),
10478 N3: Mask, N4: EVL);
10479 Tmp7 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Op,
10480 N2: DAG.getConstant(Val: 255ULL << 8, DL: dl, VT), N3: Mask, N4: EVL);
10481 Tmp7 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp7, N2: DAG.getConstant(Val: 40, DL: dl, VT: SHVT),
10482 N3: Mask, N4: EVL);
10483 Tmp6 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Op,
10484 N2: DAG.getConstant(Val: 255ULL << 16, DL: dl, VT), N3: Mask, N4: EVL);
10485 Tmp6 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp6, N2: DAG.getConstant(Val: 24, DL: dl, VT: SHVT),
10486 N3: Mask, N4: EVL);
10487 Tmp5 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Op,
10488 N2: DAG.getConstant(Val: 255ULL << 24, DL: dl, VT), N3: Mask, N4: EVL);
10489 Tmp5 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp5, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT),
10490 N3: Mask, N4: EVL);
10491 Tmp4 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT),
10492 N3: Mask, N4: EVL);
10493 Tmp4 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp4,
10494 N2: DAG.getConstant(Val: 255ULL << 24, DL: dl, VT), N3: Mask, N4: EVL);
10495 Tmp3 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 24, DL: dl, VT: SHVT),
10496 N3: Mask, N4: EVL);
10497 Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp3,
10498 N2: DAG.getConstant(Val: 255ULL << 16, DL: dl, VT), N3: Mask, N4: EVL);
10499 Tmp2 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 40, DL: dl, VT: SHVT),
10500 N3: Mask, N4: EVL);
10501 Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp2,
10502 N2: DAG.getConstant(Val: 255ULL << 8, DL: dl, VT), N3: Mask, N4: EVL);
10503 Tmp1 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 56, DL: dl, VT: SHVT),
10504 N3: Mask, N4: EVL);
10505 Tmp8 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp8, N2: Tmp7, N3: Mask, N4: EVL);
10506 Tmp6 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp6, N2: Tmp5, N3: Mask, N4: EVL);
10507 Tmp4 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp4, N2: Tmp3, N3: Mask, N4: EVL);
10508 Tmp2 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp2, N2: Tmp1, N3: Mask, N4: EVL);
10509 Tmp8 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp8, N2: Tmp6, N3: Mask, N4: EVL);
10510 Tmp4 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp4, N2: Tmp2, N3: Mask, N4: EVL);
10511 return DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp8, N2: Tmp4, N3: Mask, N4: EVL);
10512 }
10513}
10514
10515SDValue TargetLowering::expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const {
10516 SDLoc dl(N);
10517 EVT VT = N->getValueType(ResNo: 0);
10518 SDValue Op = N->getOperand(Num: 0);
10519 EVT SHVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
10520 unsigned Sz = VT.getScalarSizeInBits();
10521
10522 SDValue Tmp, Tmp2, Tmp3;
10523
10524 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
10525 // and finally the i1 pairs.
10526 // TODO: We can easily support i4/i2 legal types if any target ever does.
10527 if (Sz >= 8 && isPowerOf2_32(Value: Sz)) {
10528 // Create the masks - repeating the pattern every byte.
10529 APInt Mask4 = APInt::getSplat(NewLen: Sz, V: APInt(8, 0x0F));
10530 APInt Mask2 = APInt::getSplat(NewLen: Sz, V: APInt(8, 0x33));
10531 APInt Mask1 = APInt::getSplat(NewLen: Sz, V: APInt(8, 0x55));
10532
10533 // BSWAP if the type is wider than a single byte.
10534 Tmp = (Sz > 8 ? DAG.getNode(Opcode: ISD::BSWAP, DL: dl, VT, Operand: Op) : Op);
10535
10536 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
10537 Tmp2 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: 4, DL: dl, VT: SHVT));
10538 Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2, N2: DAG.getConstant(Val: Mask4, DL: dl, VT));
10539 Tmp3 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask4, DL: dl, VT));
10540 Tmp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: 4, DL: dl, VT: SHVT));
10541 Tmp = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp3);
10542
10543 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
10544 Tmp2 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: 2, DL: dl, VT: SHVT));
10545 Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2, N2: DAG.getConstant(Val: Mask2, DL: dl, VT));
10546 Tmp3 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask2, DL: dl, VT));
10547 Tmp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: 2, DL: dl, VT: SHVT));
10548 Tmp = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp3);
10549
10550 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
10551 Tmp2 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: 1, DL: dl, VT: SHVT));
10552 Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2, N2: DAG.getConstant(Val: Mask1, DL: dl, VT));
10553 Tmp3 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask1, DL: dl, VT));
10554 Tmp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: 1, DL: dl, VT: SHVT));
10555 Tmp = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp3);
10556 return Tmp;
10557 }
10558
10559 Tmp = DAG.getConstant(Val: 0, DL: dl, VT);
10560 for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) {
10561 if (I < J)
10562 Tmp2 =
10563 DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: J - I, DL: dl, VT: SHVT));
10564 else
10565 Tmp2 =
10566 DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: I - J, DL: dl, VT: SHVT));
10567
10568 APInt Shift = APInt::getOneBitSet(numBits: Sz, BitNo: J);
10569 Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2, N2: DAG.getConstant(Val: Shift, DL: dl, VT));
10570 Tmp = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp, N2: Tmp2);
10571 }
10572
10573 return Tmp;
10574}
10575
10576SDValue TargetLowering::expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const {
10577 assert(N->getOpcode() == ISD::VP_BITREVERSE);
10578
10579 SDLoc dl(N);
10580 EVT VT = N->getValueType(ResNo: 0);
10581 SDValue Op = N->getOperand(Num: 0);
10582 SDValue Mask = N->getOperand(Num: 1);
10583 SDValue EVL = N->getOperand(Num: 2);
10584 EVT SHVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
10585 unsigned Sz = VT.getScalarSizeInBits();
10586
10587 SDValue Tmp, Tmp2, Tmp3;
10588
10589 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
10590 // and finally the i1 pairs.
10591 // TODO: We can easily support i4/i2 legal types if any target ever does.
10592 if (Sz >= 8 && isPowerOf2_32(Value: Sz)) {
10593 // Create the masks - repeating the pattern every byte.
10594 APInt Mask4 = APInt::getSplat(NewLen: Sz, V: APInt(8, 0x0F));
10595 APInt Mask2 = APInt::getSplat(NewLen: Sz, V: APInt(8, 0x33));
10596 APInt Mask1 = APInt::getSplat(NewLen: Sz, V: APInt(8, 0x55));
10597
10598 // BSWAP if the type is wider than a single byte.
10599 Tmp = (Sz > 8 ? DAG.getNode(Opcode: ISD::VP_BSWAP, DL: dl, VT, N1: Op, N2: Mask, N3: EVL) : Op);
10600
10601 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
10602 Tmp2 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: 4, DL: dl, VT: SHVT),
10603 N3: Mask, N4: EVL);
10604 Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp2,
10605 N2: DAG.getConstant(Val: Mask4, DL: dl, VT), N3: Mask, N4: EVL);
10606 Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask4, DL: dl, VT),
10607 N3: Mask, N4: EVL);
10608 Tmp3 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: 4, DL: dl, VT: SHVT),
10609 N3: Mask, N4: EVL);
10610 Tmp = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp2, N2: Tmp3, N3: Mask, N4: EVL);
10611
10612 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
10613 Tmp2 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: 2, DL: dl, VT: SHVT),
10614 N3: Mask, N4: EVL);
10615 Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp2,
10616 N2: DAG.getConstant(Val: Mask2, DL: dl, VT), N3: Mask, N4: EVL);
10617 Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask2, DL: dl, VT),
10618 N3: Mask, N4: EVL);
10619 Tmp3 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: 2, DL: dl, VT: SHVT),
10620 N3: Mask, N4: EVL);
10621 Tmp = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp2, N2: Tmp3, N3: Mask, N4: EVL);
10622
10623 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
10624 Tmp2 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: 1, DL: dl, VT: SHVT),
10625 N3: Mask, N4: EVL);
10626 Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp2,
10627 N2: DAG.getConstant(Val: Mask1, DL: dl, VT), N3: Mask, N4: EVL);
10628 Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask1, DL: dl, VT),
10629 N3: Mask, N4: EVL);
10630 Tmp3 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: 1, DL: dl, VT: SHVT),
10631 N3: Mask, N4: EVL);
10632 Tmp = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp2, N2: Tmp3, N3: Mask, N4: EVL);
10633 return Tmp;
10634 }
10635 return SDValue();
10636}
10637
10638std::pair<SDValue, SDValue>
10639TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
10640 SelectionDAG &DAG) const {
10641 SDLoc SL(LD);
10642 SDValue Chain = LD->getChain();
10643 SDValue BasePTR = LD->getBasePtr();
10644 EVT SrcVT = LD->getMemoryVT();
10645 EVT DstVT = LD->getValueType(ResNo: 0);
10646 ISD::LoadExtType ExtType = LD->getExtensionType();
10647
10648 if (SrcVT.isScalableVector())
10649 report_fatal_error(reason: "Cannot scalarize scalable vector loads");
10650
10651 unsigned NumElem = SrcVT.getVectorNumElements();
10652
10653 EVT SrcEltVT = SrcVT.getScalarType();
10654 EVT DstEltVT = DstVT.getScalarType();
10655
10656 // A vector must always be stored in memory as-is, i.e. without any padding
10657 // between the elements, since various code depend on it, e.g. in the
10658 // handling of a bitcast of a vector type to int, which may be done with a
10659 // vector store followed by an integer load. A vector that does not have
10660 // elements that are byte-sized must therefore be stored as an integer
10661 // built out of the extracted vector elements.
10662 if (!SrcEltVT.isByteSized()) {
10663 unsigned NumLoadBits = SrcVT.getStoreSizeInBits();
10664 EVT LoadVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NumLoadBits);
10665
10666 unsigned NumSrcBits = SrcVT.getSizeInBits();
10667 EVT SrcIntVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NumSrcBits);
10668
10669 unsigned SrcEltBits = SrcEltVT.getSizeInBits();
10670 SDValue SrcEltBitMask = DAG.getConstant(
10671 Val: APInt::getLowBitsSet(numBits: NumLoadBits, loBitsSet: SrcEltBits), DL: SL, VT: LoadVT);
10672
10673 // Load the whole vector and avoid masking off the top bits as it makes
10674 // the codegen worse.
10675 SDValue Load =
10676 DAG.getExtLoad(ExtType: ISD::EXTLOAD, dl: SL, VT: LoadVT, Chain, Ptr: BasePTR,
10677 PtrInfo: LD->getPointerInfo(), MemVT: SrcIntVT, Alignment: LD->getBaseAlign(),
10678 MMOFlags: LD->getMemOperand()->getFlags(), AAInfo: LD->getAAInfo());
10679
10680 SmallVector<SDValue, 8> Vals;
10681 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10682 unsigned ShiftIntoIdx =
10683 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
10684 SDValue ShiftAmount = DAG.getShiftAmountConstant(
10685 Val: ShiftIntoIdx * SrcEltVT.getSizeInBits(), VT: LoadVT, DL: SL);
10686 SDValue ShiftedElt = DAG.getNode(Opcode: ISD::SRL, DL: SL, VT: LoadVT, N1: Load, N2: ShiftAmount);
10687 SDValue Elt =
10688 DAG.getNode(Opcode: ISD::AND, DL: SL, VT: LoadVT, N1: ShiftedElt, N2: SrcEltBitMask);
10689 SDValue Scalar = DAG.getNode(Opcode: ISD::TRUNCATE, DL: SL, VT: SrcEltVT, Operand: Elt);
10690
10691 if (ExtType != ISD::NON_EXTLOAD) {
10692 unsigned ExtendOp = ISD::getExtForLoadExtType(IsFP: false, ExtType);
10693 Scalar = DAG.getNode(Opcode: ExtendOp, DL: SL, VT: DstEltVT, Operand: Scalar);
10694 }
10695
10696 Vals.push_back(Elt: Scalar);
10697 }
10698
10699 SDValue Value = DAG.getBuildVector(VT: DstVT, DL: SL, Ops: Vals);
10700 return std::make_pair(x&: Value, y: Load.getValue(R: 1));
10701 }
10702
10703 unsigned Stride = SrcEltVT.getSizeInBits() / 8;
10704 assert(SrcEltVT.isByteSized());
10705
10706 SmallVector<SDValue, 8> Vals;
10707 SmallVector<SDValue, 8> LoadChains;
10708
10709 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10710 SDValue ScalarLoad = DAG.getExtLoad(
10711 ExtType, dl: SL, VT: DstEltVT, Chain, Ptr: BasePTR,
10712 PtrInfo: LD->getPointerInfo().getWithOffset(O: Idx * Stride), MemVT: SrcEltVT,
10713 Alignment: LD->getBaseAlign(), MMOFlags: LD->getMemOperand()->getFlags(), AAInfo: LD->getAAInfo());
10714
10715 BasePTR = DAG.getObjectPtrOffset(SL, Ptr: BasePTR, Offset: TypeSize::getFixed(ExactSize: Stride));
10716
10717 Vals.push_back(Elt: ScalarLoad.getValue(R: 0));
10718 LoadChains.push_back(Elt: ScalarLoad.getValue(R: 1));
10719 }
10720
10721 SDValue NewChain = DAG.getNode(Opcode: ISD::TokenFactor, DL: SL, VT: MVT::Other, Ops: LoadChains);
10722 SDValue Value = DAG.getBuildVector(VT: DstVT, DL: SL, Ops: Vals);
10723
10724 return std::make_pair(x&: Value, y&: NewChain);
10725}
10726
10727SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
10728 SelectionDAG &DAG) const {
10729 SDLoc SL(ST);
10730
10731 SDValue Chain = ST->getChain();
10732 SDValue BasePtr = ST->getBasePtr();
10733 SDValue Value = ST->getValue();
10734 EVT StVT = ST->getMemoryVT();
10735
10736 if (StVT.isScalableVector())
10737 report_fatal_error(reason: "Cannot scalarize scalable vector stores");
10738
10739 // The type of the data we want to save
10740 EVT RegVT = Value.getValueType();
10741 EVT RegSclVT = RegVT.getScalarType();
10742
10743 // The type of data as saved in memory.
10744 EVT MemSclVT = StVT.getScalarType();
10745
10746 unsigned NumElem = StVT.getVectorNumElements();
10747
10748 // A vector must always be stored in memory as-is, i.e. without any padding
10749 // between the elements, since various code depend on it, e.g. in the
10750 // handling of a bitcast of a vector type to int, which may be done with a
10751 // vector store followed by an integer load. A vector that does not have
10752 // elements that are byte-sized must therefore be stored as an integer
10753 // built out of the extracted vector elements.
10754 if (!MemSclVT.isByteSized()) {
10755 unsigned NumBits = StVT.getSizeInBits();
10756 EVT IntVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NumBits);
10757
10758 SDValue CurrVal = DAG.getConstant(Val: 0, DL: SL, VT: IntVT);
10759
10760 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10761 SDValue Elt = DAG.getExtractVectorElt(DL: SL, VT: RegSclVT, Vec: Value, Idx);
10762 SDValue Trunc = DAG.getNode(Opcode: ISD::TRUNCATE, DL: SL, VT: MemSclVT, Operand: Elt);
10763 SDValue ExtElt = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: SL, VT: IntVT, Operand: Trunc);
10764 unsigned ShiftIntoIdx =
10765 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
10766 SDValue ShiftAmount =
10767 DAG.getConstant(Val: ShiftIntoIdx * MemSclVT.getSizeInBits(), DL: SL, VT: IntVT);
10768 SDValue ShiftedElt =
10769 DAG.getNode(Opcode: ISD::SHL, DL: SL, VT: IntVT, N1: ExtElt, N2: ShiftAmount);
10770 CurrVal = DAG.getNode(Opcode: ISD::OR, DL: SL, VT: IntVT, N1: CurrVal, N2: ShiftedElt);
10771 }
10772
10773 return DAG.getStore(Chain, dl: SL, Val: CurrVal, Ptr: BasePtr, PtrInfo: ST->getPointerInfo(),
10774 Alignment: ST->getBaseAlign(), MMOFlags: ST->getMemOperand()->getFlags(),
10775 AAInfo: ST->getAAInfo());
10776 }
10777
10778 // Store Stride in bytes
10779 unsigned Stride = MemSclVT.getSizeInBits() / 8;
10780 assert(Stride && "Zero stride!");
10781 // Extract each of the elements from the original vector and save them into
10782 // memory individually.
10783 SmallVector<SDValue, 8> Stores;
10784 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10785 SDValue Elt = DAG.getExtractVectorElt(DL: SL, VT: RegSclVT, Vec: Value, Idx);
10786
10787 SDValue Ptr =
10788 DAG.getObjectPtrOffset(SL, Ptr: BasePtr, Offset: TypeSize::getFixed(ExactSize: Idx * Stride));
10789
10790 // This scalar TruncStore may be illegal, but we legalize it later.
10791 SDValue Store = DAG.getTruncStore(
10792 Chain, dl: SL, Val: Elt, Ptr, PtrInfo: ST->getPointerInfo().getWithOffset(O: Idx * Stride),
10793 SVT: MemSclVT, Alignment: ST->getBaseAlign(), MMOFlags: ST->getMemOperand()->getFlags(),
10794 AAInfo: ST->getAAInfo());
10795
10796 Stores.push_back(Elt: Store);
10797 }
10798
10799 return DAG.getNode(Opcode: ISD::TokenFactor, DL: SL, VT: MVT::Other, Ops: Stores);
10800}
10801
10802std::pair<SDValue, SDValue>
10803TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
10804 assert(LD->getAddressingMode() == ISD::UNINDEXED &&
10805 "unaligned indexed loads not implemented!");
10806 SDValue Chain = LD->getChain();
10807 SDValue Ptr = LD->getBasePtr();
10808 EVT VT = LD->getValueType(ResNo: 0);
10809 EVT LoadedVT = LD->getMemoryVT();
10810 SDLoc dl(LD);
10811 auto &MF = DAG.getMachineFunction();
10812
10813 if (VT.isFloatingPoint() || VT.isVector()) {
10814 EVT intVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: LoadedVT.getSizeInBits());
10815 if (isTypeLegal(VT: intVT) && isTypeLegal(VT: LoadedVT)) {
10816 if (!isOperationLegalOrCustom(Op: ISD::LOAD, VT: intVT) &&
10817 LoadedVT.isVector()) {
10818 // Scalarize the load and let the individual components be handled.
10819 return scalarizeVectorLoad(LD, DAG);
10820 }
10821
10822 // Expand to a (misaligned) integer load of the same size,
10823 // then bitconvert to floating point or vector.
10824 SDValue newLoad = DAG.getLoad(VT: intVT, dl, Chain, Ptr,
10825 MMO: LD->getMemOperand());
10826 SDValue Result = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: LoadedVT, Operand: newLoad);
10827 if (LoadedVT != VT)
10828 Result = DAG.getNode(Opcode: VT.isFloatingPoint() ? ISD::FP_EXTEND :
10829 ISD::ANY_EXTEND, DL: dl, VT, Operand: Result);
10830
10831 return std::make_pair(x&: Result, y: newLoad.getValue(R: 1));
10832 }
10833
10834 // Copy the value to a (aligned) stack slot using (unaligned) integer
10835 // loads and stores, then do a (aligned) load from the stack slot.
10836 MVT RegVT = getRegisterType(Context&: *DAG.getContext(), VT: intVT);
10837 unsigned LoadedBytes = LoadedVT.getStoreSize();
10838 unsigned RegBytes = RegVT.getSizeInBits() / 8;
10839 unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
10840
10841 // Make sure the stack slot is also aligned for the register type.
10842 SDValue StackBase = DAG.CreateStackTemporary(VT1: LoadedVT, VT2: RegVT);
10843 auto FrameIndex = cast<FrameIndexSDNode>(Val: StackBase.getNode())->getIndex();
10844 SmallVector<SDValue, 8> Stores;
10845 SDValue StackPtr = StackBase;
10846 unsigned Offset = 0;
10847
10848 EVT PtrVT = Ptr.getValueType();
10849 EVT StackPtrVT = StackPtr.getValueType();
10850
10851 SDValue PtrIncrement = DAG.getConstant(Val: RegBytes, DL: dl, VT: PtrVT);
10852 SDValue StackPtrIncrement = DAG.getConstant(Val: RegBytes, DL: dl, VT: StackPtrVT);
10853
10854 // Do all but one copies using the full register width.
10855 for (unsigned i = 1; i < NumRegs; i++) {
10856 // Load one integer register's worth from the original location.
10857 SDValue Load = DAG.getLoad(
10858 VT: RegVT, dl, Chain, Ptr, PtrInfo: LD->getPointerInfo().getWithOffset(O: Offset),
10859 Alignment: LD->getBaseAlign(), MMOFlags: LD->getMemOperand()->getFlags(), AAInfo: LD->getAAInfo());
10860 // Follow the load with a store to the stack slot. Remember the store.
10861 Stores.push_back(Elt: DAG.getStore(
10862 Chain: Load.getValue(R: 1), dl, Val: Load, Ptr: StackPtr,
10863 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset)));
10864 // Increment the pointers.
10865 Offset += RegBytes;
10866
10867 Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: PtrIncrement);
10868 StackPtr = DAG.getObjectPtrOffset(SL: dl, Ptr: StackPtr, Offset: StackPtrIncrement);
10869 }
10870
10871 // The last copy may be partial. Do an extending load.
10872 EVT MemVT = EVT::getIntegerVT(Context&: *DAG.getContext(),
10873 BitWidth: 8 * (LoadedBytes - Offset));
10874 SDValue Load = DAG.getExtLoad(
10875 ExtType: ISD::EXTLOAD, dl, VT: RegVT, Chain, Ptr,
10876 PtrInfo: LD->getPointerInfo().getWithOffset(O: Offset), MemVT, Alignment: LD->getBaseAlign(),
10877 MMOFlags: LD->getMemOperand()->getFlags(), AAInfo: LD->getAAInfo());
10878 // Follow the load with a store to the stack slot. Remember the store.
10879 // On big-endian machines this requires a truncating store to ensure
10880 // that the bits end up in the right place.
10881 Stores.push_back(Elt: DAG.getTruncStore(
10882 Chain: Load.getValue(R: 1), dl, Val: Load, Ptr: StackPtr,
10883 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset), SVT: MemVT));
10884
10885 // The order of the stores doesn't matter - say it with a TokenFactor.
10886 SDValue TF = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: Stores);
10887
10888 // Finally, perform the original load only redirected to the stack slot.
10889 Load = DAG.getExtLoad(ExtType: LD->getExtensionType(), dl, VT, Chain: TF, Ptr: StackBase,
10890 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset: 0),
10891 MemVT: LoadedVT);
10892
10893 // Callers expect a MERGE_VALUES node.
10894 return std::make_pair(x&: Load, y&: TF);
10895 }
10896
10897 assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
10898 "Unaligned load of unsupported type.");
10899
10900 // Compute the new VT that is half the size of the old one. This is an
10901 // integer MVT.
10902 unsigned NumBits = LoadedVT.getSizeInBits();
10903 EVT NewLoadedVT;
10904 NewLoadedVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NumBits/2);
10905 NumBits >>= 1;
10906
10907 Align Alignment = LD->getBaseAlign();
10908 unsigned IncrementSize = NumBits / 8;
10909 ISD::LoadExtType HiExtType = LD->getExtensionType();
10910
10911 // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
10912 if (HiExtType == ISD::NON_EXTLOAD)
10913 HiExtType = ISD::ZEXTLOAD;
10914
10915 // Load the value in two parts
10916 SDValue Lo, Hi;
10917 if (DAG.getDataLayout().isLittleEndian()) {
10918 Lo = DAG.getExtLoad(ExtType: ISD::ZEXTLOAD, dl, VT, Chain, Ptr, PtrInfo: LD->getPointerInfo(),
10919 MemVT: NewLoadedVT, Alignment, MMOFlags: LD->getMemOperand()->getFlags(),
10920 AAInfo: LD->getAAInfo());
10921
10922 Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: TypeSize::getFixed(ExactSize: IncrementSize));
10923 Hi = DAG.getExtLoad(ExtType: HiExtType, dl, VT, Chain, Ptr,
10924 PtrInfo: LD->getPointerInfo().getWithOffset(O: IncrementSize),
10925 MemVT: NewLoadedVT, Alignment, MMOFlags: LD->getMemOperand()->getFlags(),
10926 AAInfo: LD->getAAInfo());
10927 } else {
10928 Hi = DAG.getExtLoad(ExtType: HiExtType, dl, VT, Chain, Ptr, PtrInfo: LD->getPointerInfo(),
10929 MemVT: NewLoadedVT, Alignment, MMOFlags: LD->getMemOperand()->getFlags(),
10930 AAInfo: LD->getAAInfo());
10931
10932 Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: TypeSize::getFixed(ExactSize: IncrementSize));
10933 Lo = DAG.getExtLoad(ExtType: ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
10934 PtrInfo: LD->getPointerInfo().getWithOffset(O: IncrementSize),
10935 MemVT: NewLoadedVT, Alignment, MMOFlags: LD->getMemOperand()->getFlags(),
10936 AAInfo: LD->getAAInfo());
10937 }
10938
10939 // aggregate the two parts
10940 SDValue ShiftAmount = DAG.getShiftAmountConstant(Val: NumBits, VT, DL: dl);
10941 SDValue Result = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Hi, N2: ShiftAmount);
10942 Result = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Result, N2: Lo);
10943
10944 SDValue TF = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, N1: Lo.getValue(R: 1),
10945 N2: Hi.getValue(R: 1));
10946
10947 return std::make_pair(x&: Result, y&: TF);
10948}
10949
10950SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
10951 SelectionDAG &DAG) const {
10952 assert(ST->getAddressingMode() == ISD::UNINDEXED &&
10953 "unaligned indexed stores not implemented!");
10954 SDValue Chain = ST->getChain();
10955 SDValue Ptr = ST->getBasePtr();
10956 SDValue Val = ST->getValue();
10957 EVT VT = Val.getValueType();
10958 Align Alignment = ST->getBaseAlign();
10959 auto &MF = DAG.getMachineFunction();
10960 EVT StoreMemVT = ST->getMemoryVT();
10961
10962 SDLoc dl(ST);
10963 if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) {
10964 EVT intVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: VT.getSizeInBits());
10965 if (isTypeLegal(VT: intVT)) {
10966 if (!isOperationLegalOrCustom(Op: ISD::STORE, VT: intVT) &&
10967 StoreMemVT.isVector()) {
10968 // Scalarize the store and let the individual components be handled.
10969 SDValue Result = scalarizeVectorStore(ST, DAG);
10970 return Result;
10971 }
10972 // Expand to a bitconvert of the value to the integer type of the
10973 // same size, then a (misaligned) int store.
10974 // FIXME: Does not handle truncating floating point stores!
10975 SDValue Result = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: intVT, Operand: Val);
10976 Result = DAG.getStore(Chain, dl, Val: Result, Ptr, PtrInfo: ST->getPointerInfo(),
10977 Alignment, MMOFlags: ST->getMemOperand()->getFlags());
10978 return Result;
10979 }
10980 // Do a (aligned) store to a stack slot, then copy from the stack slot
10981 // to the final destination using (unaligned) integer loads and stores.
10982 MVT RegVT = getRegisterType(
10983 Context&: *DAG.getContext(),
10984 VT: EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: StoreMemVT.getSizeInBits()));
10985 EVT PtrVT = Ptr.getValueType();
10986 unsigned StoredBytes = StoreMemVT.getStoreSize();
10987 unsigned RegBytes = RegVT.getSizeInBits() / 8;
10988 unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
10989
10990 // Make sure the stack slot is also aligned for the register type.
10991 SDValue StackPtr = DAG.CreateStackTemporary(VT1: StoreMemVT, VT2: RegVT);
10992 auto FrameIndex = cast<FrameIndexSDNode>(Val: StackPtr.getNode())->getIndex();
10993
10994 // Perform the original store, only redirected to the stack slot.
10995 SDValue Store = DAG.getTruncStore(
10996 Chain, dl, Val, Ptr: StackPtr,
10997 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset: 0), SVT: StoreMemVT);
10998
10999 EVT StackPtrVT = StackPtr.getValueType();
11000
11001 SDValue PtrIncrement = DAG.getConstant(Val: RegBytes, DL: dl, VT: PtrVT);
11002 SDValue StackPtrIncrement = DAG.getConstant(Val: RegBytes, DL: dl, VT: StackPtrVT);
11003 SmallVector<SDValue, 8> Stores;
11004 unsigned Offset = 0;
11005
11006 // Do all but one copies using the full register width.
11007 for (unsigned i = 1; i < NumRegs; i++) {
11008 // Load one integer register's worth from the stack slot.
11009 SDValue Load = DAG.getLoad(
11010 VT: RegVT, dl, Chain: Store, Ptr: StackPtr,
11011 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset));
11012 // Store it to the final location. Remember the store.
11013 Stores.push_back(Elt: DAG.getStore(Chain: Load.getValue(R: 1), dl, Val: Load, Ptr,
11014 PtrInfo: ST->getPointerInfo().getWithOffset(O: Offset),
11015 Alignment: ST->getBaseAlign(),
11016 MMOFlags: ST->getMemOperand()->getFlags()));
11017 // Increment the pointers.
11018 Offset += RegBytes;
11019 StackPtr = DAG.getObjectPtrOffset(SL: dl, Ptr: StackPtr, Offset: StackPtrIncrement);
11020 Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: PtrIncrement);
11021 }
11022
11023 // The last store may be partial. Do a truncating store. On big-endian
11024 // machines this requires an extending load from the stack slot to ensure
11025 // that the bits are in the right place.
11026 EVT LoadMemVT =
11027 EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: 8 * (StoredBytes - Offset));
11028
11029 // Load from the stack slot.
11030 SDValue Load = DAG.getExtLoad(
11031 ExtType: ISD::EXTLOAD, dl, VT: RegVT, Chain: Store, Ptr: StackPtr,
11032 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset), MemVT: LoadMemVT);
11033
11034 Stores.push_back(Elt: DAG.getTruncStore(
11035 Chain: Load.getValue(R: 1), dl, Val: Load, Ptr,
11036 PtrInfo: ST->getPointerInfo().getWithOffset(O: Offset), SVT: LoadMemVT,
11037 Alignment: ST->getBaseAlign(), MMOFlags: ST->getMemOperand()->getFlags(), AAInfo: ST->getAAInfo()));
11038 // The order of the stores doesn't matter - say it with a TokenFactor.
11039 SDValue Result = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: Stores);
11040 return Result;
11041 }
11042
11043 assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
11044 "Unaligned store of unknown type.");
11045 // Get the half-size VT
11046 EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(Context&: *DAG.getContext());
11047 unsigned NumBits = NewStoredVT.getFixedSizeInBits();
11048 unsigned IncrementSize = NumBits / 8;
11049
11050 // Divide the stored value in two parts.
11051 SDValue ShiftAmount =
11052 DAG.getShiftAmountConstant(Val: NumBits, VT: Val.getValueType(), DL: dl);
11053 SDValue Lo = Val;
11054 // If Val is a constant, replace the upper bits with 0. The SRL will constant
11055 // fold and not use the upper bits. A smaller constant may be easier to
11056 // materialize.
11057 if (auto *C = dyn_cast<ConstantSDNode>(Val&: Lo); C && !C->isOpaque())
11058 Lo = DAG.getNode(
11059 Opcode: ISD::AND, DL: dl, VT, N1: Lo,
11060 N2: DAG.getConstant(Val: APInt::getLowBitsSet(numBits: VT.getSizeInBits(), loBitsSet: NumBits), DL: dl,
11061 VT));
11062 SDValue Hi = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Val, N2: ShiftAmount);
11063
11064 // Store the two parts
11065 SDValue Store1, Store2;
11066 Store1 = DAG.getTruncStore(Chain, dl,
11067 Val: DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
11068 Ptr, PtrInfo: ST->getPointerInfo(), SVT: NewStoredVT, Alignment,
11069 MMOFlags: ST->getMemOperand()->getFlags());
11070
11071 Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: TypeSize::getFixed(ExactSize: IncrementSize));
11072 Store2 = DAG.getTruncStore(
11073 Chain, dl, Val: DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
11074 PtrInfo: ST->getPointerInfo().getWithOffset(O: IncrementSize), SVT: NewStoredVT, Alignment,
11075 MMOFlags: ST->getMemOperand()->getFlags(), AAInfo: ST->getAAInfo());
11076
11077 SDValue Result =
11078 DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, N1: Store1, N2: Store2);
11079 return Result;
11080}
11081
11082SDValue
11083TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
11084 const SDLoc &DL, EVT DataVT,
11085 SelectionDAG &DAG,
11086 bool IsCompressedMemory) const {
11087 SDValue Increment;
11088 EVT AddrVT = Addr.getValueType();
11089 EVT MaskVT = Mask.getValueType();
11090 assert(DataVT.getVectorElementCount() == MaskVT.getVectorElementCount() &&
11091 "Incompatible types of Data and Mask");
11092 if (IsCompressedMemory) {
11093 // Incrementing the pointer according to number of '1's in the mask.
11094 if (DataVT.isScalableVector()) {
11095 EVT MaskExtVT = MaskVT.changeElementType(Context&: *DAG.getContext(), EltVT: MVT::i32);
11096 SDValue MaskExt = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: MaskExtVT, Operand: Mask);
11097 Increment = DAG.getNode(Opcode: ISD::VECREDUCE_ADD, DL, VT: MVT::i32, Operand: MaskExt);
11098 } else {
11099 EVT MaskIntVT =
11100 EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: MaskVT.getSizeInBits());
11101 SDValue MaskInIntReg = DAG.getBitcast(VT: MaskIntVT, V: Mask);
11102 if (MaskIntVT.getSizeInBits() < 32) {
11103 MaskInIntReg =
11104 DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: MVT::i32, Operand: MaskInIntReg);
11105 MaskIntVT = MVT::i32;
11106 }
11107 Increment = DAG.getNode(Opcode: ISD::CTPOP, DL, VT: MaskIntVT, Operand: MaskInIntReg);
11108 }
11109 // Scale is an element size in bytes.
11110 SDValue Scale = DAG.getConstant(Val: DataVT.getScalarSizeInBits() / 8, DL,
11111 VT: AddrVT);
11112 Increment = DAG.getZExtOrTrunc(Op: Increment, DL, VT: AddrVT);
11113 Increment = DAG.getNode(Opcode: ISD::MUL, DL, VT: AddrVT, N1: Increment, N2: Scale);
11114 } else
11115 Increment = DAG.getTypeSize(DL, VT: AddrVT, TS: DataVT.getStoreSize());
11116
11117 return DAG.getNode(Opcode: ISD::ADD, DL, VT: AddrVT, N1: Addr, N2: Increment);
11118}
11119
11120static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx,
11121 EVT VecVT, const SDLoc &dl,
11122 ElementCount SubEC) {
11123 assert(!(SubEC.isScalable() && VecVT.isFixedLengthVector()) &&
11124 "Cannot index a scalable vector within a fixed-width vector");
11125
11126 unsigned NElts = VecVT.getVectorMinNumElements();
11127 unsigned NumSubElts = SubEC.getKnownMinValue();
11128 EVT IdxVT = Idx.getValueType();
11129
11130 if (VecVT.isScalableVector() && !SubEC.isScalable()) {
11131 // If this is a constant index and we know the value plus the number of the
11132 // elements in the subvector minus one is less than the minimum number of
11133 // elements then it's safe to return Idx.
11134 if (auto *IdxCst = dyn_cast<ConstantSDNode>(Val&: Idx))
11135 if (IdxCst->getZExtValue() + (NumSubElts - 1) < NElts)
11136 return Idx;
11137 SDValue VS =
11138 DAG.getVScale(DL: dl, VT: IdxVT, MulImm: APInt(IdxVT.getFixedSizeInBits(), NElts));
11139 unsigned SubOpcode = NumSubElts <= NElts ? ISD::SUB : ISD::USUBSAT;
11140 SDValue Sub = DAG.getNode(Opcode: SubOpcode, DL: dl, VT: IdxVT, N1: VS,
11141 N2: DAG.getConstant(Val: NumSubElts, DL: dl, VT: IdxVT));
11142 return DAG.getNode(Opcode: ISD::UMIN, DL: dl, VT: IdxVT, N1: Idx, N2: Sub);
11143 }
11144 if (isPowerOf2_32(Value: NElts) && NumSubElts == 1) {
11145 APInt Imm = APInt::getLowBitsSet(numBits: IdxVT.getSizeInBits(), loBitsSet: Log2_32(Value: NElts));
11146 return DAG.getNode(Opcode: ISD::AND, DL: dl, VT: IdxVT, N1: Idx,
11147 N2: DAG.getConstant(Val: Imm, DL: dl, VT: IdxVT));
11148 }
11149 unsigned MaxIndex = NumSubElts < NElts ? NElts - NumSubElts : 0;
11150 return DAG.getNode(Opcode: ISD::UMIN, DL: dl, VT: IdxVT, N1: Idx,
11151 N2: DAG.getConstant(Val: MaxIndex, DL: dl, VT: IdxVT));
11152}
11153
11154SDValue
11155TargetLowering::getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr,
11156 EVT VecVT, SDValue Index,
11157 const SDNodeFlags PtrArithFlags) const {
11158 return getVectorSubVecPointer(
11159 DAG, VecPtr, VecVT,
11160 SubVecVT: EVT::getVectorVT(Context&: *DAG.getContext(), VT: VecVT.getVectorElementType(), NumElements: 1),
11161 Index, PtrArithFlags);
11162}
11163
11164SDValue
11165TargetLowering::getVectorSubVecPointer(SelectionDAG &DAG, SDValue VecPtr,
11166 EVT VecVT, EVT SubVecVT, SDValue Index,
11167 const SDNodeFlags PtrArithFlags) const {
11168 SDLoc dl(Index);
11169 // Make sure the index type is big enough to compute in.
11170 Index = DAG.getZExtOrTrunc(Op: Index, DL: dl, VT: VecPtr.getValueType());
11171
11172 EVT EltVT = VecVT.getVectorElementType();
11173
11174 // Calculate the element offset and add it to the pointer.
11175 unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size.
11176 assert(EltSize * 8 == EltVT.getFixedSizeInBits() &&
11177 "Converting bits to bytes lost precision");
11178 assert(SubVecVT.getVectorElementType() == EltVT &&
11179 "Sub-vector must be a vector with matching element type");
11180 Index = clampDynamicVectorIndex(DAG, Idx: Index, VecVT, dl,
11181 SubEC: SubVecVT.getVectorElementCount());
11182
11183 EVT IdxVT = Index.getValueType();
11184 if (SubVecVT.isScalableVector())
11185 Index =
11186 DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: IdxVT, N1: Index,
11187 N2: DAG.getVScale(DL: dl, VT: IdxVT, MulImm: APInt(IdxVT.getSizeInBits(), 1)));
11188
11189 Index = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: IdxVT, N1: Index,
11190 N2: DAG.getConstant(Val: EltSize, DL: dl, VT: IdxVT));
11191 return DAG.getMemBasePlusOffset(Base: VecPtr, Offset: Index, DL: dl, Flags: PtrArithFlags);
11192}
11193
11194//===----------------------------------------------------------------------===//
11195// Implementation of Emulated TLS Model
11196//===----------------------------------------------------------------------===//
11197
11198SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
11199 SelectionDAG &DAG) const {
11200 // Access to address of TLS varialbe xyz is lowered to a function call:
11201 // __emutls_get_address( address of global variable named "__emutls_v.xyz" )
11202 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
11203 PointerType *VoidPtrType = PointerType::get(C&: *DAG.getContext(), AddressSpace: 0);
11204 SDLoc dl(GA);
11205
11206 ArgListTy Args;
11207 const GlobalValue *GV =
11208 cast<GlobalValue>(Val: GA->getGlobal()->stripPointerCastsAndAliases());
11209 SmallString<32> NameString("__emutls_v.");
11210 NameString += GV->getName();
11211 StringRef EmuTlsVarName(NameString);
11212 const GlobalVariable *EmuTlsVar =
11213 GV->getParent()->getNamedGlobal(Name: EmuTlsVarName);
11214 assert(EmuTlsVar && "Cannot find EmuTlsVar ");
11215 Args.emplace_back(args: DAG.getGlobalAddress(GV: EmuTlsVar, DL: dl, VT: PtrVT), args&: VoidPtrType);
11216
11217 SDValue EmuTlsGetAddr = DAG.getExternalSymbol(Sym: "__emutls_get_address", VT: PtrVT);
11218
11219 TargetLowering::CallLoweringInfo CLI(DAG);
11220 CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
11221 CLI.setLibCallee(CC: CallingConv::C, ResultType: VoidPtrType, Target: EmuTlsGetAddr, ArgsList: std::move(Args));
11222 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
11223
11224 // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
11225 // At last for X86 targets, maybe good for other targets too?
11226 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
11227 MFI.setAdjustsStack(true); // Is this only for X86 target?
11228 MFI.setHasCalls(true);
11229
11230 assert((GA->getOffset() == 0) &&
11231 "Emulated TLS must have zero offset in GlobalAddressSDNode");
11232 return CallResult.first;
11233}
11234
11235SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,
11236 SelectionDAG &DAG) const {
11237 assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
11238 if (!isCtlzFast())
11239 return SDValue();
11240 ISD::CondCode CC = cast<CondCodeSDNode>(Val: Op.getOperand(i: 2))->get();
11241 SDLoc dl(Op);
11242 if (isNullConstant(V: Op.getOperand(i: 1)) && CC == ISD::SETEQ) {
11243 EVT VT = Op.getOperand(i: 0).getValueType();
11244 SDValue Zext = Op.getOperand(i: 0);
11245 if (VT.bitsLT(VT: MVT::i32)) {
11246 VT = MVT::i32;
11247 Zext = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: Op.getOperand(i: 0));
11248 }
11249 unsigned Log2b = Log2_32(Value: VT.getSizeInBits());
11250 SDValue Clz = DAG.getNode(Opcode: ISD::CTLZ, DL: dl, VT, Operand: Zext);
11251 SDValue Scc = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Clz,
11252 N2: DAG.getConstant(Val: Log2b, DL: dl, VT: MVT::i32));
11253 return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i32, Operand: Scc);
11254 }
11255 return SDValue();
11256}
11257
11258SDValue TargetLowering::expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const {
11259 SDValue Op0 = Node->getOperand(Num: 0);
11260 SDValue Op1 = Node->getOperand(Num: 1);
11261 EVT VT = Op0.getValueType();
11262 EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
11263 unsigned Opcode = Node->getOpcode();
11264 SDLoc DL(Node);
11265
11266 // If both sign bits are zero, flip UMIN/UMAX <-> SMIN/SMAX if legal.
11267 unsigned AltOpcode = ISD::getOppositeSignednessMinMaxOpcode(MinMaxOpc: Opcode);
11268 if (isOperationLegal(Op: AltOpcode, VT) && DAG.SignBitIsZero(Op: Op0) &&
11269 DAG.SignBitIsZero(Op: Op1))
11270 return DAG.getNode(Opcode: AltOpcode, DL, VT, N1: Op0, N2: Op1);
11271
11272 // umax(x,1) --> sub(x,cmpeq(x,0)) iff cmp result is allbits
11273 if (Opcode == ISD::UMAX && llvm::isOneOrOneSplat(V: Op1, AllowUndefs: true) && BoolVT == VT &&
11274 getBooleanContents(Type: VT) == ZeroOrNegativeOneBooleanContent) {
11275 Op0 = DAG.getFreeze(V: Op0);
11276 SDValue Zero = DAG.getConstant(Val: 0, DL, VT);
11277 return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Op0,
11278 N2: DAG.getSetCC(DL, VT, LHS: Op0, RHS: Zero, Cond: ISD::SETEQ));
11279 }
11280
11281 // umin(x,y) -> sub(x,usubsat(x,y))
11282 // TODO: Missing freeze(Op0)?
11283 if (Opcode == ISD::UMIN && isOperationLegal(Op: ISD::SUB, VT) &&
11284 isOperationLegal(Op: ISD::USUBSAT, VT)) {
11285 return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Op0,
11286 N2: DAG.getNode(Opcode: ISD::USUBSAT, DL, VT, N1: Op0, N2: Op1));
11287 }
11288
11289 // umax(x,y) -> add(x,usubsat(y,x))
11290 // TODO: Missing freeze(Op0)?
11291 if (Opcode == ISD::UMAX && isOperationLegal(Op: ISD::ADD, VT) &&
11292 isOperationLegal(Op: ISD::USUBSAT, VT)) {
11293 return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Op0,
11294 N2: DAG.getNode(Opcode: ISD::USUBSAT, DL, VT, N1: Op1, N2: Op0));
11295 }
11296
11297 // FIXME: Should really try to split the vector in case it's legal on a
11298 // subvector.
11299 if (VT.isVector() && !isOperationLegalOrCustom(Op: ISD::VSELECT, VT))
11300 return DAG.UnrollVectorOp(N: Node);
11301
11302 // Attempt to find an existing SETCC node that we can reuse.
11303 // TODO: Do we need a generic doesSETCCNodeExist?
11304 // TODO: Missing freeze(Op0)/freeze(Op1)?
11305 auto buildMinMax = [&](ISD::CondCode PrefCC, ISD::CondCode AltCC,
11306 ISD::CondCode PrefCommuteCC,
11307 ISD::CondCode AltCommuteCC) {
11308 SDVTList BoolVTList = DAG.getVTList(VT: BoolVT);
11309 for (ISD::CondCode CC : {PrefCC, AltCC}) {
11310 if (DAG.doesNodeExist(Opcode: ISD::SETCC, VTList: BoolVTList,
11311 Ops: {Op0, Op1, DAG.getCondCode(Cond: CC)})) {
11312 SDValue Cond = DAG.getSetCC(DL, VT: BoolVT, LHS: Op0, RHS: Op1, Cond: CC);
11313 return DAG.getSelect(DL, VT, Cond, LHS: Op0, RHS: Op1);
11314 }
11315 }
11316 for (ISD::CondCode CC : {PrefCommuteCC, AltCommuteCC}) {
11317 if (DAG.doesNodeExist(Opcode: ISD::SETCC, VTList: BoolVTList,
11318 Ops: {Op0, Op1, DAG.getCondCode(Cond: CC)})) {
11319 SDValue Cond = DAG.getSetCC(DL, VT: BoolVT, LHS: Op0, RHS: Op1, Cond: CC);
11320 return DAG.getSelect(DL, VT, Cond, LHS: Op1, RHS: Op0);
11321 }
11322 }
11323 SDValue Cond = DAG.getSetCC(DL, VT: BoolVT, LHS: Op0, RHS: Op1, Cond: PrefCC);
11324 return DAG.getSelect(DL, VT, Cond, LHS: Op0, RHS: Op1);
11325 };
11326
11327 // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
11328 // -> Y = (A < B) ? B : A
11329 // -> Y = (A >= B) ? A : B
11330 // -> Y = (A <= B) ? B : A
11331 switch (Opcode) {
11332 case ISD::SMAX:
11333 return buildMinMax(ISD::SETGT, ISD::SETGE, ISD::SETLT, ISD::SETLE);
11334 case ISD::SMIN:
11335 return buildMinMax(ISD::SETLT, ISD::SETLE, ISD::SETGT, ISD::SETGE);
11336 case ISD::UMAX:
11337 return buildMinMax(ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE);
11338 case ISD::UMIN:
11339 return buildMinMax(ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE);
11340 }
11341
11342 llvm_unreachable("How did we get here?");
11343}
11344
11345SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
11346 unsigned Opcode = Node->getOpcode();
11347 SDValue LHS = Node->getOperand(Num: 0);
11348 SDValue RHS = Node->getOperand(Num: 1);
11349 EVT VT = LHS.getValueType();
11350 SDLoc dl(Node);
11351
11352 assert(VT == RHS.getValueType() && "Expected operands to be the same type");
11353 assert(VT.isInteger() && "Expected operands to be integers");
11354
11355 // usub.sat(a, b) -> umax(a, b) - b
11356 if (Opcode == ISD::USUBSAT && isOperationLegal(Op: ISD::UMAX, VT)) {
11357 SDValue Max = DAG.getNode(Opcode: ISD::UMAX, DL: dl, VT, N1: LHS, N2: RHS);
11358 return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Max, N2: RHS);
11359 }
11360
11361 // usub.sat(a, 1) -> sub(a, zext(a != 0))
11362 if (Opcode == ISD::USUBSAT && isOneOrOneSplat(V: RHS)) {
11363 LHS = DAG.getFreeze(V: LHS);
11364 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT);
11365 EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
11366 SDValue IsNonZero = DAG.getSetCC(DL: dl, VT: BoolVT, LHS, RHS: Zero, Cond: ISD::SETNE);
11367 SDValue Subtrahend = DAG.getBoolExtOrTrunc(Op: IsNonZero, SL: dl, VT, OpVT: BoolVT);
11368 Subtrahend =
11369 DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Subtrahend, N2: DAG.getConstant(Val: 1, DL: dl, VT));
11370 return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: LHS, N2: Subtrahend);
11371 }
11372
11373 // uadd.sat(a, b) -> umin(a, ~b) + b
11374 if (Opcode == ISD::UADDSAT && isOperationLegal(Op: ISD::UMIN, VT)) {
11375 SDValue InvRHS = DAG.getNOT(DL: dl, Val: RHS, VT);
11376 SDValue Min = DAG.getNode(Opcode: ISD::UMIN, DL: dl, VT, N1: LHS, N2: InvRHS);
11377 return DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Min, N2: RHS);
11378 }
11379
11380 unsigned OverflowOp;
11381 switch (Opcode) {
11382 case ISD::SADDSAT:
11383 OverflowOp = ISD::SADDO;
11384 break;
11385 case ISD::UADDSAT:
11386 OverflowOp = ISD::UADDO;
11387 break;
11388 case ISD::SSUBSAT:
11389 OverflowOp = ISD::SSUBO;
11390 break;
11391 case ISD::USUBSAT:
11392 OverflowOp = ISD::USUBO;
11393 break;
11394 default:
11395 llvm_unreachable("Expected method to receive signed or unsigned saturation "
11396 "addition or subtraction node.");
11397 }
11398
11399 // FIXME: Should really try to split the vector in case it's legal on a
11400 // subvector.
11401 if (VT.isVector() && !isOperationLegalOrCustom(Op: ISD::VSELECT, VT))
11402 return DAG.UnrollVectorOp(N: Node);
11403
11404 unsigned BitWidth = LHS.getScalarValueSizeInBits();
11405 EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
11406 SDValue Result = DAG.getNode(Opcode: OverflowOp, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: BoolVT), N1: LHS, N2: RHS);
11407 SDValue SumDiff = Result.getValue(R: 0);
11408 SDValue Overflow = Result.getValue(R: 1);
11409 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT);
11410 SDValue AllOnes = DAG.getAllOnesConstant(DL: dl, VT);
11411
11412 if (Opcode == ISD::UADDSAT) {
11413 if (getBooleanContents(Type: VT) == ZeroOrNegativeOneBooleanContent) {
11414 // (LHS + RHS) | OverflowMask
11415 SDValue OverflowMask = DAG.getSExtOrTrunc(Op: Overflow, DL: dl, VT);
11416 return DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: SumDiff, N2: OverflowMask);
11417 }
11418 // Overflow ? 0xffff.... : (LHS + RHS)
11419 return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: AllOnes, RHS: SumDiff);
11420 }
11421
11422 if (Opcode == ISD::USUBSAT) {
11423 if (getBooleanContents(Type: VT) == ZeroOrNegativeOneBooleanContent) {
11424 // (LHS - RHS) & ~OverflowMask
11425 SDValue OverflowMask = DAG.getSExtOrTrunc(Op: Overflow, DL: dl, VT);
11426 SDValue Not = DAG.getNOT(DL: dl, Val: OverflowMask, VT);
11427 return DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: SumDiff, N2: Not);
11428 }
11429 // Overflow ? 0 : (LHS - RHS)
11430 return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: Zero, RHS: SumDiff);
11431 }
11432
11433 assert((Opcode == ISD::SADDSAT || Opcode == ISD::SSUBSAT) &&
11434 "Expected signed saturating add/sub opcode");
11435
11436 const APInt MinVal = APInt::getSignedMinValue(numBits: BitWidth);
11437 const APInt MaxVal = APInt::getSignedMaxValue(numBits: BitWidth);
11438
11439 KnownBits KnownLHS = DAG.computeKnownBits(Op: LHS);
11440 KnownBits KnownRHS = DAG.computeKnownBits(Op: RHS);
11441
11442 // If either of the operand signs are known, then they are guaranteed to
11443 // only saturate in one direction. If non-negative they will saturate
11444 // towards SIGNED_MAX, if negative they will saturate towards SIGNED_MIN.
11445 //
11446 // In the case of ISD::SSUBSAT, 'x - y' is equivalent to 'x + (-y)', so the
11447 // sign of 'y' has to be flipped.
11448
11449 bool LHSIsNonNegative = KnownLHS.isNonNegative();
11450 bool RHSIsNonNegative =
11451 Opcode == ISD::SADDSAT ? KnownRHS.isNonNegative() : KnownRHS.isNegative();
11452 if (LHSIsNonNegative || RHSIsNonNegative) {
11453 SDValue SatMax = DAG.getConstant(Val: MaxVal, DL: dl, VT);
11454 return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: SatMax, RHS: SumDiff);
11455 }
11456
11457 bool LHSIsNegative = KnownLHS.isNegative();
11458 bool RHSIsNegative =
11459 Opcode == ISD::SADDSAT ? KnownRHS.isNegative() : KnownRHS.isNonNegative();
11460 if (LHSIsNegative || RHSIsNegative) {
11461 SDValue SatMin = DAG.getConstant(Val: MinVal, DL: dl, VT);
11462 return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: SatMin, RHS: SumDiff);
11463 }
11464
11465 // Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff
11466 SDValue SatMin = DAG.getConstant(Val: MinVal, DL: dl, VT);
11467 SDValue Shift = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: SumDiff,
11468 N2: DAG.getConstant(Val: BitWidth - 1, DL: dl, VT));
11469 Result = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: Shift, N2: SatMin);
11470 return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: Result, RHS: SumDiff);
11471}
11472
11473SDValue TargetLowering::expandCMP(SDNode *Node, SelectionDAG &DAG) const {
11474 unsigned Opcode = Node->getOpcode();
11475 SDValue LHS = Node->getOperand(Num: 0);
11476 SDValue RHS = Node->getOperand(Num: 1);
11477 EVT VT = LHS.getValueType();
11478 EVT ResVT = Node->getValueType(ResNo: 0);
11479 EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
11480 SDLoc dl(Node);
11481
11482 auto LTPredicate = (Opcode == ISD::UCMP ? ISD::SETULT : ISD::SETLT);
11483 auto GTPredicate = (Opcode == ISD::UCMP ? ISD::SETUGT : ISD::SETGT);
11484 SDValue IsLT = DAG.getSetCC(DL: dl, VT: BoolVT, LHS, RHS, Cond: LTPredicate);
11485 SDValue IsGT = DAG.getSetCC(DL: dl, VT: BoolVT, LHS, RHS, Cond: GTPredicate);
11486
11487 // We can't perform arithmetic on i1 values. Extending them would
11488 // probably result in worse codegen, so let's just use two selects instead.
11489 // Some targets are also just better off using selects rather than subtraction
11490 // because one of the conditions can be merged with one of the selects.
11491 // And finally, if we don't know the contents of high bits of a boolean value
11492 // we can't perform any arithmetic either.
11493 if (preferSelectsOverBooleanArithmetic(VT) ||
11494 BoolVT.getScalarSizeInBits() == 1 ||
11495 getBooleanContents(Type: BoolVT) == UndefinedBooleanContent) {
11496 SDValue SelectZeroOrOne =
11497 DAG.getSelect(DL: dl, VT: ResVT, Cond: IsGT, LHS: DAG.getConstant(Val: 1, DL: dl, VT: ResVT),
11498 RHS: DAG.getConstant(Val: 0, DL: dl, VT: ResVT));
11499 return DAG.getSelect(DL: dl, VT: ResVT, Cond: IsLT, LHS: DAG.getAllOnesConstant(DL: dl, VT: ResVT),
11500 RHS: SelectZeroOrOne);
11501 }
11502
11503 if (getBooleanContents(Type: BoolVT) == ZeroOrNegativeOneBooleanContent)
11504 std::swap(a&: IsGT, b&: IsLT);
11505 return DAG.getSExtOrTrunc(Op: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: BoolVT, N1: IsGT, N2: IsLT), DL: dl,
11506 VT: ResVT);
11507}
11508
11509SDValue TargetLowering::expandShlSat(SDNode *Node, SelectionDAG &DAG) const {
11510 unsigned Opcode = Node->getOpcode();
11511 bool IsSigned = Opcode == ISD::SSHLSAT;
11512 SDValue LHS = Node->getOperand(Num: 0);
11513 SDValue RHS = Node->getOperand(Num: 1);
11514 EVT VT = LHS.getValueType();
11515 SDLoc dl(Node);
11516
11517 assert((Node->getOpcode() == ISD::SSHLSAT ||
11518 Node->getOpcode() == ISD::USHLSAT) &&
11519 "Expected a SHLSAT opcode");
11520 assert(VT.isInteger() && "Expected operands to be integers");
11521
11522 if (VT.isVector() && !isOperationLegalOrCustom(Op: ISD::VSELECT, VT))
11523 return DAG.UnrollVectorOp(N: Node);
11524
11525 // If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate.
11526
11527 unsigned BW = VT.getScalarSizeInBits();
11528 EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
11529 SDValue Result = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: LHS, N2: RHS);
11530 SDValue Orig =
11531 DAG.getNode(Opcode: IsSigned ? ISD::SRA : ISD::SRL, DL: dl, VT, N1: Result, N2: RHS);
11532
11533 SDValue SatVal;
11534 if (IsSigned) {
11535 SDValue SatMin = DAG.getConstant(Val: APInt::getSignedMinValue(numBits: BW), DL: dl, VT);
11536 SDValue SatMax = DAG.getConstant(Val: APInt::getSignedMaxValue(numBits: BW), DL: dl, VT);
11537 SDValue Cond =
11538 DAG.getSetCC(DL: dl, VT: BoolVT, LHS, RHS: DAG.getConstant(Val: 0, DL: dl, VT), Cond: ISD::SETLT);
11539 SatVal = DAG.getSelect(DL: dl, VT, Cond, LHS: SatMin, RHS: SatMax);
11540 } else {
11541 SatVal = DAG.getConstant(Val: APInt::getMaxValue(numBits: BW), DL: dl, VT);
11542 }
11543 SDValue Cond = DAG.getSetCC(DL: dl, VT: BoolVT, LHS, RHS: Orig, Cond: ISD::SETNE);
11544 return DAG.getSelect(DL: dl, VT, Cond, LHS: SatVal, RHS: Result);
11545}
11546
11547void TargetLowering::forceExpandMultiply(SelectionDAG &DAG, const SDLoc &dl,
11548 bool Signed, SDValue &Lo, SDValue &Hi,
11549 SDValue LHS, SDValue RHS,
11550 SDValue HiLHS, SDValue HiRHS) const {
11551 EVT VT = LHS.getValueType();
11552 assert(RHS.getValueType() == VT && "Mismatching operand types");
11553
11554 assert((HiLHS && HiRHS) || (!HiLHS && !HiRHS));
11555 assert((!Signed || !HiLHS) &&
11556 "Signed flag should only be set when HiLHS and RiRHS are null");
11557
11558 // We'll expand the multiplication by brute force because we have no other
11559 // options. This is a trivially-generalized version of the code from
11560 // Hacker's Delight (itself derived from Knuth's Algorithm M from section
11561 // 4.3.1). If Signed is set, we can use arithmetic right shifts to propagate
11562 // sign bits while calculating the Hi half.
11563 unsigned Bits = VT.getSizeInBits();
11564 unsigned HalfBits = Bits / 2;
11565 SDValue Mask = DAG.getConstant(Val: APInt::getLowBitsSet(numBits: Bits, loBitsSet: HalfBits), DL: dl, VT);
11566 SDValue LL = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: LHS, N2: Mask);
11567 SDValue RL = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: RHS, N2: Mask);
11568
11569 SDValue T = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LL, N2: RL);
11570 SDValue TL = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: T, N2: Mask);
11571
11572 SDValue Shift = DAG.getShiftAmountConstant(Val: HalfBits, VT, DL: dl);
11573 // This is always an unsigned shift.
11574 SDValue TH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: T, N2: Shift);
11575
11576 unsigned ShiftOpc = Signed ? ISD::SRA : ISD::SRL;
11577 SDValue LH = DAG.getNode(Opcode: ShiftOpc, DL: dl, VT, N1: LHS, N2: Shift);
11578 SDValue RH = DAG.getNode(Opcode: ShiftOpc, DL: dl, VT, N1: RHS, N2: Shift);
11579
11580 SDValue U =
11581 DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LH, N2: RL), N2: TH);
11582 SDValue UL = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: U, N2: Mask);
11583 SDValue UH = DAG.getNode(Opcode: ShiftOpc, DL: dl, VT, N1: U, N2: Shift);
11584
11585 SDValue V =
11586 DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LL, N2: RH), N2: UL);
11587 SDValue VH = DAG.getNode(Opcode: ShiftOpc, DL: dl, VT, N1: V, N2: Shift);
11588
11589 Lo = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: TL,
11590 N2: DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: V, N2: Shift));
11591
11592 Hi = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LH, N2: RH),
11593 N2: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: UH, N2: VH));
11594
11595 // If HiLHS and HiRHS are set, multiply them by the opposite low part and add
11596 // the products to Hi.
11597 if (HiLHS) {
11598 SDValue RHLL = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: HiRHS, N2: LHS);
11599 SDValue RLLH = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: RHS, N2: HiLHS);
11600 Hi = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Hi,
11601 N2: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: RHLL, N2: RLLH));
11602 }
11603}
11604
11605void TargetLowering::forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl,
11606 bool Signed, const SDValue LHS,
11607 const SDValue RHS, SDValue &Lo,
11608 SDValue &Hi) const {
11609 EVT VT = LHS.getValueType();
11610 assert(RHS.getValueType() == VT && "Mismatching operand types");
11611 EVT WideVT = VT.widenIntegerElementType(Context&: *DAG.getContext());
11612 // We can fall back to a libcall with an illegal type for the MUL if we
11613 // have a libcall big enough.
11614 RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
11615 if (WideVT == MVT::i16)
11616 LC = RTLIB::MUL_I16;
11617 else if (WideVT == MVT::i32)
11618 LC = RTLIB::MUL_I32;
11619 else if (WideVT == MVT::i64)
11620 LC = RTLIB::MUL_I64;
11621 else if (WideVT == MVT::i128)
11622 LC = RTLIB::MUL_I128;
11623
11624 RTLIB::LibcallImpl LibcallImpl = getLibcallImpl(Call: LC);
11625 if (LibcallImpl == RTLIB::Unsupported) {
11626 forceExpandMultiply(DAG, dl, Signed, Lo, Hi, LHS, RHS);
11627 return;
11628 }
11629
11630 SDValue HiLHS, HiRHS;
11631 if (Signed) {
11632 // The high part is obtained by SRA'ing all but one of the bits of low
11633 // part.
11634 unsigned LoSize = VT.getFixedSizeInBits();
11635 SDValue Shift = DAG.getShiftAmountConstant(Val: LoSize - 1, VT, DL: dl);
11636 HiLHS = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: LHS, N2: Shift);
11637 HiRHS = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: RHS, N2: Shift);
11638 } else {
11639 HiLHS = DAG.getConstant(Val: 0, DL: dl, VT);
11640 HiRHS = DAG.getConstant(Val: 0, DL: dl, VT);
11641 }
11642
11643 // Attempt a libcall.
11644 SDValue Ret;
11645 TargetLowering::MakeLibCallOptions CallOptions;
11646 CallOptions.setIsSigned(Signed);
11647 CallOptions.setIsPostTypeLegalization(true);
11648 if (shouldSplitFunctionArgumentsAsLittleEndian(DL: DAG.getDataLayout())) {
11649 // Halves of WideVT are packed into registers in different order
11650 // depending on platform endianness. This is usually handled by
11651 // the C calling convention, but we can't defer to it in
11652 // the legalizer.
11653 SDValue Args[] = {LHS, HiLHS, RHS, HiRHS};
11654 Ret = makeLibCall(DAG, LC, RetVT: WideVT, Ops: Args, CallOptions, dl).first;
11655 } else {
11656 SDValue Args[] = {HiLHS, LHS, HiRHS, RHS};
11657 Ret = makeLibCall(DAG, LC, RetVT: WideVT, Ops: Args, CallOptions, dl).first;
11658 }
11659 assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
11660 "Ret value is a collection of constituent nodes holding result.");
11661 if (DAG.getDataLayout().isLittleEndian()) {
11662 // Same as above.
11663 Lo = Ret.getOperand(i: 0);
11664 Hi = Ret.getOperand(i: 1);
11665 } else {
11666 Lo = Ret.getOperand(i: 1);
11667 Hi = Ret.getOperand(i: 0);
11668 }
11669}
11670
11671SDValue
11672TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
11673 assert((Node->getOpcode() == ISD::SMULFIX ||
11674 Node->getOpcode() == ISD::UMULFIX ||
11675 Node->getOpcode() == ISD::SMULFIXSAT ||
11676 Node->getOpcode() == ISD::UMULFIXSAT) &&
11677 "Expected a fixed point multiplication opcode");
11678
11679 SDLoc dl(Node);
11680 SDValue LHS = Node->getOperand(Num: 0);
11681 SDValue RHS = Node->getOperand(Num: 1);
11682 EVT VT = LHS.getValueType();
11683 unsigned Scale = Node->getConstantOperandVal(Num: 2);
11684 bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT ||
11685 Node->getOpcode() == ISD::UMULFIXSAT);
11686 bool Signed = (Node->getOpcode() == ISD::SMULFIX ||
11687 Node->getOpcode() == ISD::SMULFIXSAT);
11688 EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
11689 unsigned VTSize = VT.getScalarSizeInBits();
11690
11691 if (!Scale) {
11692 // [us]mul.fix(a, b, 0) -> mul(a, b)
11693 if (!Saturating) {
11694 if (isOperationLegalOrCustom(Op: ISD::MUL, VT))
11695 return DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LHS, N2: RHS);
11696 } else if (Signed && isOperationLegalOrCustom(Op: ISD::SMULO, VT)) {
11697 SDValue Result =
11698 DAG.getNode(Opcode: ISD::SMULO, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: BoolVT), N1: LHS, N2: RHS);
11699 SDValue Product = Result.getValue(R: 0);
11700 SDValue Overflow = Result.getValue(R: 1);
11701 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT);
11702
11703 APInt MinVal = APInt::getSignedMinValue(numBits: VTSize);
11704 APInt MaxVal = APInt::getSignedMaxValue(numBits: VTSize);
11705 SDValue SatMin = DAG.getConstant(Val: MinVal, DL: dl, VT);
11706 SDValue SatMax = DAG.getConstant(Val: MaxVal, DL: dl, VT);
11707 // Xor the inputs, if resulting sign bit is 0 the product will be
11708 // positive, else negative.
11709 SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: LHS, N2: RHS);
11710 SDValue ProdNeg = DAG.getSetCC(DL: dl, VT: BoolVT, LHS: Xor, RHS: Zero, Cond: ISD::SETLT);
11711 Result = DAG.getSelect(DL: dl, VT, Cond: ProdNeg, LHS: SatMin, RHS: SatMax);
11712 return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: Result, RHS: Product);
11713 } else if (!Signed && isOperationLegalOrCustom(Op: ISD::UMULO, VT)) {
11714 SDValue Result =
11715 DAG.getNode(Opcode: ISD::UMULO, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: BoolVT), N1: LHS, N2: RHS);
11716 SDValue Product = Result.getValue(R: 0);
11717 SDValue Overflow = Result.getValue(R: 1);
11718
11719 APInt MaxVal = APInt::getMaxValue(numBits: VTSize);
11720 SDValue SatMax = DAG.getConstant(Val: MaxVal, DL: dl, VT);
11721 return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: SatMax, RHS: Product);
11722 }
11723 }
11724
11725 assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
11726 "Expected scale to be less than the number of bits if signed or at "
11727 "most the number of bits if unsigned.");
11728 assert(LHS.getValueType() == RHS.getValueType() &&
11729 "Expected both operands to be the same type");
11730
11731 // Get the upper and lower bits of the result.
11732 SDValue Lo, Hi;
11733 unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
11734 unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
11735 EVT WideVT = VT.widenIntegerElementType(Context&: *DAG.getContext());
11736 if (isOperationLegalOrCustom(Op: LoHiOp, VT)) {
11737 SDValue Result = DAG.getNode(Opcode: LoHiOp, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: VT), N1: LHS, N2: RHS);
11738 Lo = Result.getValue(R: 0);
11739 Hi = Result.getValue(R: 1);
11740 } else if (isOperationLegalOrCustom(Op: HiOp, VT)) {
11741 Lo = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LHS, N2: RHS);
11742 Hi = DAG.getNode(Opcode: HiOp, DL: dl, VT, N1: LHS, N2: RHS);
11743 } else if (isOperationLegalOrCustom(Op: ISD::MUL, VT: WideVT)) {
11744 // Try for a multiplication using a wider type.
11745 unsigned Ext = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
11746 SDValue LHSExt = DAG.getNode(Opcode: Ext, DL: dl, VT: WideVT, Operand: LHS);
11747 SDValue RHSExt = DAG.getNode(Opcode: Ext, DL: dl, VT: WideVT, Operand: RHS);
11748 SDValue Res = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: WideVT, N1: LHSExt, N2: RHSExt);
11749 Lo = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Res);
11750 SDValue Shifted =
11751 DAG.getNode(Opcode: ISD::SRA, DL: dl, VT: WideVT, N1: Res,
11752 N2: DAG.getShiftAmountConstant(Val: VTSize, VT: WideVT, DL: dl));
11753 Hi = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Shifted);
11754 } else if (VT.isVector()) {
11755 return SDValue();
11756 } else {
11757 forceExpandWideMUL(DAG, dl, Signed, LHS, RHS, Lo, Hi);
11758 }
11759
11760 if (Scale == VTSize)
11761 // Result is just the top half since we'd be shifting by the width of the
11762 // operand. Overflow impossible so this works for both UMULFIX and
11763 // UMULFIXSAT.
11764 return Hi;
11765
11766 // The result will need to be shifted right by the scale since both operands
11767 // are scaled. The result is given to us in 2 halves, so we only want part of
11768 // both in the result.
11769 SDValue Result = DAG.getNode(Opcode: ISD::FSHR, DL: dl, VT, N1: Hi, N2: Lo,
11770 N3: DAG.getShiftAmountConstant(Val: Scale, VT, DL: dl));
11771 if (!Saturating)
11772 return Result;
11773
11774 if (!Signed) {
11775 // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
11776 // widened multiplication) aren't all zeroes.
11777
11778 // Saturate to max if ((Hi >> Scale) != 0),
11779 // which is the same as if (Hi > ((1 << Scale) - 1))
11780 APInt MaxVal = APInt::getMaxValue(numBits: VTSize);
11781 SDValue LowMask = DAG.getConstant(Val: APInt::getLowBitsSet(numBits: VTSize, loBitsSet: Scale),
11782 DL: dl, VT);
11783 Result = DAG.getSelectCC(DL: dl, LHS: Hi, RHS: LowMask,
11784 True: DAG.getConstant(Val: MaxVal, DL: dl, VT), False: Result,
11785 Cond: ISD::SETUGT);
11786
11787 return Result;
11788 }
11789
11790 // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
11791 // widened multiplication) aren't all ones or all zeroes.
11792
11793 SDValue SatMin = DAG.getConstant(Val: APInt::getSignedMinValue(numBits: VTSize), DL: dl, VT);
11794 SDValue SatMax = DAG.getConstant(Val: APInt::getSignedMaxValue(numBits: VTSize), DL: dl, VT);
11795
11796 if (Scale == 0) {
11797 SDValue Sign = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: Lo,
11798 N2: DAG.getShiftAmountConstant(Val: VTSize - 1, VT, DL: dl));
11799 SDValue Overflow = DAG.getSetCC(DL: dl, VT: BoolVT, LHS: Hi, RHS: Sign, Cond: ISD::SETNE);
11800 // Saturated to SatMin if wide product is negative, and SatMax if wide
11801 // product is positive ...
11802 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT);
11803 SDValue ResultIfOverflow = DAG.getSelectCC(DL: dl, LHS: Hi, RHS: Zero, True: SatMin, False: SatMax,
11804 Cond: ISD::SETLT);
11805 // ... but only if we overflowed.
11806 return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: ResultIfOverflow, RHS: Result);
11807 }
11808
11809 // We handled Scale==0 above so all the bits to examine is in Hi.
11810
11811 // Saturate to max if ((Hi >> (Scale - 1)) > 0),
11812 // which is the same as if (Hi > (1 << (Scale - 1)) - 1)
11813 SDValue LowMask = DAG.getConstant(Val: APInt::getLowBitsSet(numBits: VTSize, loBitsSet: Scale - 1),
11814 DL: dl, VT);
11815 Result = DAG.getSelectCC(DL: dl, LHS: Hi, RHS: LowMask, True: SatMax, False: Result, Cond: ISD::SETGT);
11816 // Saturate to min if (Hi >> (Scale - 1)) < -1),
11817 // which is the same as if (HI < (-1 << (Scale - 1))
11818 SDValue HighMask =
11819 DAG.getConstant(Val: APInt::getHighBitsSet(numBits: VTSize, hiBitsSet: VTSize - Scale + 1),
11820 DL: dl, VT);
11821 Result = DAG.getSelectCC(DL: dl, LHS: Hi, RHS: HighMask, True: SatMin, False: Result, Cond: ISD::SETLT);
11822 return Result;
11823}
11824
11825SDValue
11826TargetLowering::expandFixedPointDiv(unsigned Opcode, const SDLoc &dl,
11827 SDValue LHS, SDValue RHS,
11828 unsigned Scale, SelectionDAG &DAG) const {
11829 assert((Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT ||
11830 Opcode == ISD::UDIVFIX || Opcode == ISD::UDIVFIXSAT) &&
11831 "Expected a fixed point division opcode");
11832
11833 EVT VT = LHS.getValueType();
11834 bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT;
11835 bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT;
11836 EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
11837
11838 // If there is enough room in the type to upscale the LHS or downscale the
11839 // RHS before the division, we can perform it in this type without having to
11840 // resize. For signed operations, the LHS headroom is the number of
11841 // redundant sign bits, and for unsigned ones it is the number of zeroes.
11842 // The headroom for the RHS is the number of trailing zeroes.
11843 unsigned LHSLead = Signed ? DAG.ComputeNumSignBits(Op: LHS) - 1
11844 : DAG.computeKnownBits(Op: LHS).countMinLeadingZeros();
11845 unsigned RHSTrail = DAG.computeKnownBits(Op: RHS).countMinTrailingZeros();
11846
11847 // For signed saturating operations, we need to be able to detect true integer
11848 // division overflow; that is, when you have MIN / -EPS. However, this
11849 // is undefined behavior and if we emit divisions that could take such
11850 // values it may cause undesired behavior (arithmetic exceptions on x86, for
11851 // example).
11852 // Avoid this by requiring an extra bit so that we never get this case.
11853 // FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale
11854 // signed saturating division, we need to emit a whopping 32-bit division.
11855 if (LHSLead + RHSTrail < Scale + (unsigned)(Saturating && Signed))
11856 return SDValue();
11857
11858 unsigned LHSShift = std::min(a: LHSLead, b: Scale);
11859 unsigned RHSShift = Scale - LHSShift;
11860
11861 // At this point, we know that if we shift the LHS up by LHSShift and the
11862 // RHS down by RHSShift, we can emit a regular division with a final scaling
11863 // factor of Scale.
11864
11865 if (LHSShift)
11866 LHS = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: LHS,
11867 N2: DAG.getShiftAmountConstant(Val: LHSShift, VT, DL: dl));
11868 if (RHSShift)
11869 RHS = DAG.getNode(Opcode: Signed ? ISD::SRA : ISD::SRL, DL: dl, VT, N1: RHS,
11870 N2: DAG.getShiftAmountConstant(Val: RHSShift, VT, DL: dl));
11871
11872 SDValue Quot;
11873 if (Signed) {
11874 // For signed operations, if the resulting quotient is negative and the
11875 // remainder is nonzero, subtract 1 from the quotient to round towards
11876 // negative infinity.
11877 SDValue Rem;
11878 // FIXME: Ideally we would always produce an SDIVREM here, but if the
11879 // type isn't legal, SDIVREM cannot be expanded. There is no reason why
11880 // we couldn't just form a libcall, but the type legalizer doesn't do it.
11881 if (isTypeLegal(VT) &&
11882 isOperationLegalOrCustom(Op: ISD::SDIVREM, VT)) {
11883 Quot = DAG.getNode(Opcode: ISD::SDIVREM, DL: dl,
11884 VTList: DAG.getVTList(VT1: VT, VT2: VT),
11885 N1: LHS, N2: RHS);
11886 Rem = Quot.getValue(R: 1);
11887 Quot = Quot.getValue(R: 0);
11888 } else {
11889 Quot = DAG.getNode(Opcode: ISD::SDIV, DL: dl, VT,
11890 N1: LHS, N2: RHS);
11891 Rem = DAG.getNode(Opcode: ISD::SREM, DL: dl, VT,
11892 N1: LHS, N2: RHS);
11893 }
11894 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT);
11895 SDValue RemNonZero = DAG.getSetCC(DL: dl, VT: BoolVT, LHS: Rem, RHS: Zero, Cond: ISD::SETNE);
11896 SDValue LHSNeg = DAG.getSetCC(DL: dl, VT: BoolVT, LHS, RHS: Zero, Cond: ISD::SETLT);
11897 SDValue RHSNeg = DAG.getSetCC(DL: dl, VT: BoolVT, LHS: RHS, RHS: Zero, Cond: ISD::SETLT);
11898 SDValue QuotNeg = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: BoolVT, N1: LHSNeg, N2: RHSNeg);
11899 SDValue Sub1 = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Quot,
11900 N2: DAG.getConstant(Val: 1, DL: dl, VT));
11901 Quot = DAG.getSelect(DL: dl, VT,
11902 Cond: DAG.getNode(Opcode: ISD::AND, DL: dl, VT: BoolVT, N1: RemNonZero, N2: QuotNeg),
11903 LHS: Sub1, RHS: Quot);
11904 } else
11905 Quot = DAG.getNode(Opcode: ISD::UDIV, DL: dl, VT,
11906 N1: LHS, N2: RHS);
11907
11908 return Quot;
11909}
11910
11911void TargetLowering::expandUADDSUBO(
11912 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
11913 SDLoc dl(Node);
11914 SDValue LHS = Node->getOperand(Num: 0);
11915 SDValue RHS = Node->getOperand(Num: 1);
11916 bool IsAdd = Node->getOpcode() == ISD::UADDO;
11917
11918 // If UADDO_CARRY/SUBO_CARRY is legal, use that instead.
11919 unsigned OpcCarry = IsAdd ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
11920 if (isOperationLegalOrCustom(Op: OpcCarry, VT: Node->getValueType(ResNo: 0))) {
11921 SDValue CarryIn = DAG.getConstant(Val: 0, DL: dl, VT: Node->getValueType(ResNo: 1));
11922 SDValue NodeCarry = DAG.getNode(Opcode: OpcCarry, DL: dl, VTList: Node->getVTList(),
11923 Ops: { LHS, RHS, CarryIn });
11924 Result = SDValue(NodeCarry.getNode(), 0);
11925 Overflow = SDValue(NodeCarry.getNode(), 1);
11926 return;
11927 }
11928
11929 Result = DAG.getNode(Opcode: IsAdd ? ISD::ADD : ISD::SUB, DL: dl,
11930 VT: LHS.getValueType(), N1: LHS, N2: RHS);
11931
11932 EVT ResultType = Node->getValueType(ResNo: 1);
11933 EVT SetCCType = getSetCCResultType(
11934 DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: Node->getValueType(ResNo: 0));
11935 SDValue SetCC;
11936 if (IsAdd && isOneConstant(V: RHS)) {
11937 // Special case: uaddo X, 1 overflowed if X+1 is 0. This potential reduces
11938 // the live range of X. We assume comparing with 0 is cheap.
11939 // The general case (X + C) < C is not necessarily beneficial. Although we
11940 // reduce the live range of X, we may introduce the materialization of
11941 // constant C.
11942 SetCC =
11943 DAG.getSetCC(DL: dl, VT: SetCCType, LHS: Result,
11944 RHS: DAG.getConstant(Val: 0, DL: dl, VT: Node->getValueType(ResNo: 0)), Cond: ISD::SETEQ);
11945 } else if (IsAdd && isAllOnesConstant(V: RHS)) {
11946 // Special case: uaddo X, -1 overflows if X != 0.
11947 SetCC =
11948 DAG.getSetCC(DL: dl, VT: SetCCType, LHS,
11949 RHS: DAG.getConstant(Val: 0, DL: dl, VT: Node->getValueType(ResNo: 0)), Cond: ISD::SETNE);
11950 } else {
11951 ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
11952 SetCC = DAG.getSetCC(DL: dl, VT: SetCCType, LHS: Result, RHS: LHS, Cond: CC);
11953 }
11954 Overflow = DAG.getBoolExtOrTrunc(Op: SetCC, SL: dl, VT: ResultType, OpVT: ResultType);
11955}
11956
11957void TargetLowering::expandSADDSUBO(
11958 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
11959 SDLoc dl(Node);
11960 SDValue LHS = Node->getOperand(Num: 0);
11961 SDValue RHS = Node->getOperand(Num: 1);
11962 bool IsAdd = Node->getOpcode() == ISD::SADDO;
11963
11964 Result = DAG.getNode(Opcode: IsAdd ? ISD::ADD : ISD::SUB, DL: dl,
11965 VT: LHS.getValueType(), N1: LHS, N2: RHS);
11966
11967 EVT ResultType = Node->getValueType(ResNo: 1);
11968 EVT OType = getSetCCResultType(
11969 DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: Node->getValueType(ResNo: 0));
11970
11971 // If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
11972 unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
11973 if (isOperationLegal(Op: OpcSat, VT: LHS.getValueType())) {
11974 SDValue Sat = DAG.getNode(Opcode: OpcSat, DL: dl, VT: LHS.getValueType(), N1: LHS, N2: RHS);
11975 SDValue SetCC = DAG.getSetCC(DL: dl, VT: OType, LHS: Result, RHS: Sat, Cond: ISD::SETNE);
11976 Overflow = DAG.getBoolExtOrTrunc(Op: SetCC, SL: dl, VT: ResultType, OpVT: ResultType);
11977 return;
11978 }
11979
11980 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT: LHS.getValueType());
11981
11982 // For an addition, the result should be less than one of the operands (LHS)
11983 // if and only if the other operand (RHS) is negative, otherwise there will
11984 // be overflow.
11985 // For a subtraction, the result should be less than one of the operands
11986 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
11987 // otherwise there will be overflow.
11988 SDValue ResultLowerThanLHS = DAG.getSetCC(DL: dl, VT: OType, LHS: Result, RHS: LHS, Cond: ISD::SETLT);
11989 SDValue ConditionRHS =
11990 DAG.getSetCC(DL: dl, VT: OType, LHS: RHS, RHS: Zero, Cond: IsAdd ? ISD::SETLT : ISD::SETGT);
11991
11992 Overflow = DAG.getBoolExtOrTrunc(
11993 Op: DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: OType, N1: ConditionRHS, N2: ResultLowerThanLHS), SL: dl,
11994 VT: ResultType, OpVT: ResultType);
11995}
11996
11997bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,
11998 SDValue &Overflow, SelectionDAG &DAG) const {
11999 SDLoc dl(Node);
12000 EVT VT = Node->getValueType(ResNo: 0);
12001 EVT SetCCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
12002 SDValue LHS = Node->getOperand(Num: 0);
12003 SDValue RHS = Node->getOperand(Num: 1);
12004 bool isSigned = Node->getOpcode() == ISD::SMULO;
12005
12006 // For power-of-two multiplications we can use a simpler shift expansion.
12007 if (ConstantSDNode *RHSC = isConstOrConstSplat(N: RHS)) {
12008 const APInt &C = RHSC->getAPIntValue();
12009 // mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
12010 if (C.isPowerOf2()) {
12011 // smulo(x, signed_min) is same as umulo(x, signed_min).
12012 bool UseArithShift = isSigned && !C.isMinSignedValue();
12013 SDValue ShiftAmt = DAG.getShiftAmountConstant(Val: C.logBase2(), VT, DL: dl);
12014 Result = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: LHS, N2: ShiftAmt);
12015 Overflow = DAG.getSetCC(DL: dl, VT: SetCCVT,
12016 LHS: DAG.getNode(Opcode: UseArithShift ? ISD::SRA : ISD::SRL,
12017 DL: dl, VT, N1: Result, N2: ShiftAmt),
12018 RHS: LHS, Cond: ISD::SETNE);
12019 return true;
12020 }
12021 }
12022
12023 SDValue BottomHalf;
12024 SDValue TopHalf;
12025 EVT WideVT = VT.widenIntegerElementType(Context&: *DAG.getContext());
12026
12027 static const unsigned Ops[2][3] =
12028 { { ISD::UMUL_LOHI, ISD::MULHU, ISD::ZERO_EXTEND },
12029 { ISD::SMUL_LOHI, ISD::MULHS, ISD::SIGN_EXTEND }};
12030 if (isOperationLegalOrCustom(Op: Ops[isSigned][0], VT)) {
12031 BottomHalf = DAG.getNode(Opcode: Ops[isSigned][0], DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: VT), N1: LHS,
12032 N2: RHS);
12033 TopHalf = BottomHalf.getValue(R: 1);
12034 } else if (isOperationLegalOrCustom(Op: Ops[isSigned][1], VT)) {
12035 BottomHalf = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LHS, N2: RHS);
12036 TopHalf = DAG.getNode(Opcode: Ops[isSigned][1], DL: dl, VT, N1: LHS, N2: RHS);
12037 } else if (isTypeLegal(VT: WideVT)) {
12038 LHS = DAG.getNode(Opcode: Ops[isSigned][2], DL: dl, VT: WideVT, Operand: LHS);
12039 RHS = DAG.getNode(Opcode: Ops[isSigned][2], DL: dl, VT: WideVT, Operand: RHS);
12040 SDValue Mul = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: WideVT, N1: LHS, N2: RHS);
12041 BottomHalf = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Mul);
12042 SDValue ShiftAmt =
12043 DAG.getShiftAmountConstant(Val: VT.getScalarSizeInBits(), VT: WideVT, DL: dl);
12044 TopHalf = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT,
12045 Operand: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: WideVT, N1: Mul, N2: ShiftAmt));
12046 } else {
12047 if (VT.isVector())
12048 return false;
12049
12050 forceExpandWideMUL(DAG, dl, Signed: isSigned, LHS, RHS, Lo&: BottomHalf, Hi&: TopHalf);
12051 }
12052
12053 Result = BottomHalf;
12054 if (isSigned) {
12055 SDValue ShiftAmt = DAG.getShiftAmountConstant(
12056 Val: VT.getScalarSizeInBits() - 1, VT: BottomHalf.getValueType(), DL: dl);
12057 SDValue Sign = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: BottomHalf, N2: ShiftAmt);
12058 Overflow = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: TopHalf, RHS: Sign, Cond: ISD::SETNE);
12059 } else {
12060 Overflow = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: TopHalf,
12061 RHS: DAG.getConstant(Val: 0, DL: dl, VT), Cond: ISD::SETNE);
12062 }
12063
12064 // Truncate the result if SetCC returns a larger type than needed.
12065 EVT RType = Node->getValueType(ResNo: 1);
12066 if (RType.bitsLT(VT: Overflow.getValueType()))
12067 Overflow = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: RType, Operand: Overflow);
12068
12069 assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
12070 "Unexpected result type for S/UMULO legalization");
12071 return true;
12072}
12073
12074SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const {
12075 SDLoc dl(Node);
12076 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(VecReduceOpcode: Node->getOpcode());
12077 SDValue Op = Node->getOperand(Num: 0);
12078 EVT VT = Op.getValueType();
12079
12080 // Try to use a shuffle reduction for power of two vectors.
12081 if (VT.isPow2VectorType()) {
12082 while (VT.getVectorElementCount().isKnownMultipleOf(RHS: 2)) {
12083 EVT HalfVT = VT.getHalfNumVectorElementsVT(Context&: *DAG.getContext());
12084 if (!isOperationLegalOrCustom(Op: BaseOpcode, VT: HalfVT))
12085 break;
12086
12087 SDValue Lo, Hi;
12088 std::tie(args&: Lo, args&: Hi) = DAG.SplitVector(N: Op, DL: dl);
12089 Op = DAG.getNode(Opcode: BaseOpcode, DL: dl, VT: HalfVT, N1: Lo, N2: Hi, Flags: Node->getFlags());
12090 VT = HalfVT;
12091
12092 // Stop if splitting is enough to make the reduction legal.
12093 if (isOperationLegalOrCustom(Op: Node->getOpcode(), VT: HalfVT))
12094 return DAG.getNode(Opcode: Node->getOpcode(), DL: dl, VT: Node->getValueType(ResNo: 0), Operand: Op,
12095 Flags: Node->getFlags());
12096 }
12097 }
12098
12099 if (VT.isScalableVector())
12100 reportFatalInternalError(
12101 reason: "Expanding reductions for scalable vectors is undefined.");
12102
12103 EVT EltVT = VT.getVectorElementType();
12104 unsigned NumElts = VT.getVectorNumElements();
12105
12106 SmallVector<SDValue, 8> Ops;
12107 DAG.ExtractVectorElements(Op, Args&: Ops, Start: 0, Count: NumElts);
12108
12109 SDValue Res = Ops[0];
12110 for (unsigned i = 1; i < NumElts; i++)
12111 Res = DAG.getNode(Opcode: BaseOpcode, DL: dl, VT: EltVT, N1: Res, N2: Ops[i], Flags: Node->getFlags());
12112
12113 // Result type may be wider than element type.
12114 if (EltVT != Node->getValueType(ResNo: 0))
12115 Res = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT: Node->getValueType(ResNo: 0), Operand: Res);
12116 return Res;
12117}
12118
12119SDValue TargetLowering::expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const {
12120 SDLoc dl(Node);
12121 SDValue AccOp = Node->getOperand(Num: 0);
12122 SDValue VecOp = Node->getOperand(Num: 1);
12123 SDNodeFlags Flags = Node->getFlags();
12124
12125 EVT VT = VecOp.getValueType();
12126 EVT EltVT = VT.getVectorElementType();
12127
12128 if (VT.isScalableVector())
12129 report_fatal_error(
12130 reason: "Expanding reductions for scalable vectors is undefined.");
12131
12132 unsigned NumElts = VT.getVectorNumElements();
12133
12134 SmallVector<SDValue, 8> Ops;
12135 DAG.ExtractVectorElements(Op: VecOp, Args&: Ops, Start: 0, Count: NumElts);
12136
12137 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(VecReduceOpcode: Node->getOpcode());
12138
12139 SDValue Res = AccOp;
12140 for (unsigned i = 0; i < NumElts; i++)
12141 Res = DAG.getNode(Opcode: BaseOpcode, DL: dl, VT: EltVT, N1: Res, N2: Ops[i], Flags);
12142
12143 return Res;
12144}
12145
12146bool TargetLowering::expandREM(SDNode *Node, SDValue &Result,
12147 SelectionDAG &DAG) const {
12148 EVT VT = Node->getValueType(ResNo: 0);
12149 SDLoc dl(Node);
12150 bool isSigned = Node->getOpcode() == ISD::SREM;
12151 unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
12152 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
12153 SDValue Dividend = Node->getOperand(Num: 0);
12154 SDValue Divisor = Node->getOperand(Num: 1);
12155 if (isOperationLegalOrCustom(Op: DivRemOpc, VT)) {
12156 SDVTList VTs = DAG.getVTList(VT1: VT, VT2: VT);
12157 Result = DAG.getNode(Opcode: DivRemOpc, DL: dl, VTList: VTs, N1: Dividend, N2: Divisor).getValue(R: 1);
12158 return true;
12159 }
12160 if (isOperationLegalOrCustom(Op: DivOpc, VT)) {
12161 // X % Y -> X-X/Y*Y
12162 SDValue Divide = DAG.getNode(Opcode: DivOpc, DL: dl, VT, N1: Dividend, N2: Divisor);
12163 SDValue Mul = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: Divide, N2: Divisor);
12164 Result = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Dividend, N2: Mul);
12165 return true;
12166 }
12167 return false;
12168}
12169
12170SDValue TargetLowering::expandFP_TO_INT_SAT(SDNode *Node,
12171 SelectionDAG &DAG) const {
12172 bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT;
12173 SDLoc dl(SDValue(Node, 0));
12174 SDValue Src = Node->getOperand(Num: 0);
12175
12176 // DstVT is the result type, while SatVT is the size to which we saturate
12177 EVT SrcVT = Src.getValueType();
12178 EVT DstVT = Node->getValueType(ResNo: 0);
12179
12180 EVT SatVT = cast<VTSDNode>(Val: Node->getOperand(Num: 1))->getVT();
12181 unsigned SatWidth = SatVT.getScalarSizeInBits();
12182 unsigned DstWidth = DstVT.getScalarSizeInBits();
12183 assert(SatWidth <= DstWidth &&
12184 "Expected saturation width smaller than result width");
12185
12186 // Determine minimum and maximum integer values and their corresponding
12187 // floating-point values.
12188 APInt MinInt, MaxInt;
12189 if (IsSigned) {
12190 MinInt = APInt::getSignedMinValue(numBits: SatWidth).sext(width: DstWidth);
12191 MaxInt = APInt::getSignedMaxValue(numBits: SatWidth).sext(width: DstWidth);
12192 } else {
12193 MinInt = APInt::getMinValue(numBits: SatWidth).zext(width: DstWidth);
12194 MaxInt = APInt::getMaxValue(numBits: SatWidth).zext(width: DstWidth);
12195 }
12196
12197 // We cannot risk emitting FP_TO_XINT nodes with a source VT of [b]f16, as
12198 // libcall emission cannot handle this. Large result types will fail.
12199 if (SrcVT == MVT::f16 || SrcVT == MVT::bf16) {
12200 Src = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f32, Operand: Src);
12201 SrcVT = Src.getValueType();
12202 }
12203
12204 const fltSemantics &Sem = SrcVT.getFltSemantics();
12205 APFloat MinFloat(Sem);
12206 APFloat MaxFloat(Sem);
12207
12208 APFloat::opStatus MinStatus =
12209 MinFloat.convertFromAPInt(Input: MinInt, IsSigned, RM: APFloat::rmTowardZero);
12210 APFloat::opStatus MaxStatus =
12211 MaxFloat.convertFromAPInt(Input: MaxInt, IsSigned, RM: APFloat::rmTowardZero);
12212 bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
12213 !(MaxStatus & APFloat::opStatus::opInexact);
12214
12215 SDValue MinFloatNode = DAG.getConstantFP(Val: MinFloat, DL: dl, VT: SrcVT);
12216 SDValue MaxFloatNode = DAG.getConstantFP(Val: MaxFloat, DL: dl, VT: SrcVT);
12217
12218 // If the integer bounds are exactly representable as floats and min/max are
12219 // legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
12220 // of comparisons and selects.
12221 auto EmitMinMax = [&](unsigned MinOpcode, unsigned MaxOpcode,
12222 bool MayPropagateNaN) {
12223 bool MinMaxLegal = isOperationLegalOrCustom(Op: MinOpcode, VT: SrcVT) &&
12224 isOperationLegalOrCustom(Op: MaxOpcode, VT: SrcVT);
12225 if (!MinMaxLegal)
12226 return SDValue();
12227
12228 SDValue Clamped = Src;
12229
12230 // Clamp Src by MinFloat from below. If !MayPropagateNaN and Src is NaN
12231 // then the result is MinFloat.
12232 Clamped = DAG.getNode(Opcode: MaxOpcode, DL: dl, VT: SrcVT, N1: Clamped, N2: MinFloatNode);
12233 // Clamp by MaxFloat from above. If !MayPropagateNaN then NaN cannot occur.
12234 Clamped = DAG.getNode(Opcode: MinOpcode, DL: dl, VT: SrcVT, N1: Clamped, N2: MaxFloatNode);
12235 // Convert clamped value to integer.
12236 SDValue FpToInt = DAG.getNode(Opcode: IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT,
12237 DL: dl, VT: DstVT, Operand: Clamped);
12238
12239 // If !MayPropagateNan and the conversion is unsigned case we're done,
12240 // because we mapped NaN to MinFloat, which will cast to zero.
12241 if (!MayPropagateNaN && !IsSigned)
12242 return FpToInt;
12243
12244 // Otherwise, select 0 if Src is NaN.
12245 SDValue ZeroInt = DAG.getConstant(Val: 0, DL: dl, VT: DstVT);
12246 EVT SetCCVT =
12247 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: SrcVT);
12248 SDValue IsNan = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: Src, Cond: ISD::CondCode::SETUO);
12249 return DAG.getSelect(DL: dl, VT: DstVT, Cond: IsNan, LHS: ZeroInt, RHS: FpToInt);
12250 };
12251 if (AreExactFloatBounds) {
12252 if (SDValue Res = EmitMinMax(ISD::FMINIMUMNUM, ISD::FMAXIMUMNUM,
12253 /*MayPropagateNaN=*/false))
12254 return Res;
12255 // These may propagate NaN for sNaN operands.
12256 if (SDValue Res =
12257 EmitMinMax(ISD::FMINNUM, ISD::FMAXNUM, /*MayPropagateNaN=*/true))
12258 return Res;
12259 // These always propagate NaN.
12260 if (SDValue Res =
12261 EmitMinMax(ISD::FMINIMUM, ISD::FMAXIMUM, /*MayPropagateNaN=*/true))
12262 return Res;
12263 }
12264
12265 SDValue MinIntNode = DAG.getConstant(Val: MinInt, DL: dl, VT: DstVT);
12266 SDValue MaxIntNode = DAG.getConstant(Val: MaxInt, DL: dl, VT: DstVT);
12267
12268 // Result of direct conversion. The assumption here is that the operation is
12269 // non-trapping and it's fine to apply it to an out-of-range value if we
12270 // select it away later.
12271 SDValue FpToInt =
12272 DAG.getNode(Opcode: IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, DL: dl, VT: DstVT, Operand: Src);
12273
12274 SDValue Select = FpToInt;
12275
12276 EVT SetCCVT =
12277 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: SrcVT);
12278
12279 // If Src ULT MinFloat, select MinInt. In particular, this also selects
12280 // MinInt if Src is NaN.
12281 SDValue ULT = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: MinFloatNode, Cond: ISD::SETULT);
12282 Select = DAG.getSelect(DL: dl, VT: DstVT, Cond: ULT, LHS: MinIntNode, RHS: Select);
12283 // If Src OGT MaxFloat, select MaxInt.
12284 SDValue OGT = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: MaxFloatNode, Cond: ISD::SETOGT);
12285 Select = DAG.getSelect(DL: dl, VT: DstVT, Cond: OGT, LHS: MaxIntNode, RHS: Select);
12286
12287 // In the unsigned case we are done, because we mapped NaN to MinInt, which
12288 // is already zero.
12289 if (!IsSigned)
12290 return Select;
12291
12292 // Otherwise, select 0 if Src is NaN.
12293 SDValue ZeroInt = DAG.getConstant(Val: 0, DL: dl, VT: DstVT);
12294 SDValue IsNan = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: Src, Cond: ISD::CondCode::SETUO);
12295 return DAG.getSelect(DL: dl, VT: DstVT, Cond: IsNan, LHS: ZeroInt, RHS: Select);
12296}
12297
12298SDValue TargetLowering::expandRoundInexactToOdd(EVT ResultVT, SDValue Op,
12299 const SDLoc &dl,
12300 SelectionDAG &DAG) const {
12301 EVT OperandVT = Op.getValueType();
12302 if (OperandVT.getScalarType() == ResultVT.getScalarType())
12303 return Op;
12304 EVT ResultIntVT = ResultVT.changeTypeToInteger();
12305 // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
12306 // can induce double-rounding which may alter the results. We can
12307 // correct for this using a trick explained in: Boldo, Sylvie, and
12308 // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
12309 // World Congress. 2005.
12310 SDValue Narrow = DAG.getFPExtendOrRound(Op, DL: dl, VT: ResultVT);
12311 SDValue NarrowAsWide = DAG.getFPExtendOrRound(Op: Narrow, DL: dl, VT: OperandVT);
12312
12313 // We can keep the narrow value as-is if narrowing was exact (no
12314 // rounding error), the wide value was NaN (the narrow value is also
12315 // NaN and should be preserved) or if we rounded to the odd value.
12316 SDValue NarrowBits = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: ResultIntVT, Operand: Narrow);
12317 SDValue One = DAG.getConstant(Val: 1, DL: dl, VT: ResultIntVT);
12318 SDValue NegativeOne = DAG.getAllOnesConstant(DL: dl, VT: ResultIntVT);
12319 SDValue And = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ResultIntVT, N1: NarrowBits, N2: One);
12320 EVT ResultIntVTCCVT = getSetCCResultType(
12321 DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: And.getValueType());
12322 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT: ResultIntVT);
12323 // The result is already odd so we don't need to do anything.
12324 SDValue AlreadyOdd = DAG.getSetCC(DL: dl, VT: ResultIntVTCCVT, LHS: And, RHS: Zero, Cond: ISD::SETNE);
12325
12326 EVT WideSetCCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(),
12327 VT: Op.getValueType());
12328 // We keep results which are exact, odd or NaN.
12329 SDValue KeepNarrow =
12330 DAG.getSetCC(DL: dl, VT: WideSetCCVT, LHS: Op, RHS: NarrowAsWide, Cond: ISD::SETUEQ);
12331 KeepNarrow = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: WideSetCCVT, N1: KeepNarrow, N2: AlreadyOdd);
12332 // We morally performed a round-down if AbsNarrow is smaller than
12333 // AbsWide.
12334 SDValue AbsWide = DAG.getNode(Opcode: ISD::FABS, DL: dl, VT: OperandVT, Operand: Op);
12335 SDValue AbsNarrowAsWide = DAG.getNode(Opcode: ISD::FABS, DL: dl, VT: OperandVT, Operand: NarrowAsWide);
12336 SDValue NarrowIsRd =
12337 DAG.getSetCC(DL: dl, VT: WideSetCCVT, LHS: AbsWide, RHS: AbsNarrowAsWide, Cond: ISD::SETOGT);
12338 // If the narrow value is odd or exact, pick it.
12339 // Otherwise, narrow is even and corresponds to either the rounded-up
12340 // or rounded-down value. If narrow is the rounded-down value, we want
12341 // the rounded-up value as it will be odd.
12342 SDValue Adjust = DAG.getSelect(DL: dl, VT: ResultIntVT, Cond: NarrowIsRd, LHS: One, RHS: NegativeOne);
12343 SDValue Adjusted = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: ResultIntVT, N1: NarrowBits, N2: Adjust);
12344 Op = DAG.getSelect(DL: dl, VT: ResultIntVT, Cond: KeepNarrow, LHS: NarrowBits, RHS: Adjusted);
12345 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: ResultVT, Operand: Op);
12346}
12347
12348SDValue TargetLowering::expandFP_ROUND(SDNode *Node, SelectionDAG &DAG) const {
12349 assert(Node->getOpcode() == ISD::FP_ROUND && "Unexpected opcode!");
12350 SDValue Op = Node->getOperand(Num: 0);
12351 EVT VT = Node->getValueType(ResNo: 0);
12352 SDLoc dl(Node);
12353 if (VT.getScalarType() == MVT::bf16) {
12354 if (Node->getConstantOperandVal(Num: 1) == 1) {
12355 return DAG.getNode(Opcode: ISD::FP_TO_BF16, DL: dl, VT, Operand: Node->getOperand(Num: 0));
12356 }
12357 EVT OperandVT = Op.getValueType();
12358 SDValue IsNaN = DAG.getSetCC(
12359 DL: dl,
12360 VT: getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: OperandVT),
12361 LHS: Op, RHS: Op, Cond: ISD::SETUO);
12362
12363 // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
12364 // can induce double-rounding which may alter the results. We can
12365 // correct for this using a trick explained in: Boldo, Sylvie, and
12366 // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
12367 // World Congress. 2005.
12368 EVT F32 = VT.changeElementType(Context&: *DAG.getContext(), EltVT: MVT::f32);
12369 EVT I32 = F32.changeTypeToInteger();
12370 Op = expandRoundInexactToOdd(ResultVT: F32, Op, dl, DAG);
12371 Op = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: I32, Operand: Op);
12372
12373 // Conversions should set NaN's quiet bit. This also prevents NaNs from
12374 // turning into infinities.
12375 SDValue NaN =
12376 DAG.getNode(Opcode: ISD::OR, DL: dl, VT: I32, N1: Op, N2: DAG.getConstant(Val: 0x400000, DL: dl, VT: I32));
12377
12378 // Factor in the contribution of the low 16 bits.
12379 SDValue One = DAG.getConstant(Val: 1, DL: dl, VT: I32);
12380 SDValue Lsb = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: I32, N1: Op,
12381 N2: DAG.getShiftAmountConstant(Val: 16, VT: I32, DL: dl));
12382 Lsb = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: I32, N1: Lsb, N2: One);
12383 SDValue RoundingBias =
12384 DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: I32, N1: DAG.getConstant(Val: 0x7fff, DL: dl, VT: I32), N2: Lsb);
12385 SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: I32, N1: Op, N2: RoundingBias);
12386
12387 // Don't round if we had a NaN, we don't want to turn 0x7fffffff into
12388 // 0x80000000.
12389 Op = DAG.getSelect(DL: dl, VT: I32, Cond: IsNaN, LHS: NaN, RHS: Add);
12390
12391 // Now that we have rounded, shift the bits into position.
12392 Op = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: I32, N1: Op,
12393 N2: DAG.getShiftAmountConstant(Val: 16, VT: I32, DL: dl));
12394 Op = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: I32, Operand: Op);
12395 EVT I16 = I32.changeElementType(Context&: *DAG.getContext(), EltVT: MVT::i16);
12396 Op = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: I16, Operand: Op);
12397 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT, Operand: Op);
12398 }
12399 return SDValue();
12400}
12401
12402SDValue TargetLowering::expandVectorSplice(SDNode *Node,
12403 SelectionDAG &DAG) const {
12404 assert((Node->getOpcode() == ISD::VECTOR_SPLICE_LEFT ||
12405 Node->getOpcode() == ISD::VECTOR_SPLICE_RIGHT) &&
12406 "Unexpected opcode!");
12407 assert((Node->getValueType(0).isScalableVector() ||
12408 !isa<ConstantSDNode>(Node->getOperand(2))) &&
12409 "Fixed length vector types with constant offsets expected to use "
12410 "SHUFFLE_VECTOR!");
12411
12412 EVT VT = Node->getValueType(ResNo: 0);
12413 SDValue V1 = Node->getOperand(Num: 0);
12414 SDValue V2 = Node->getOperand(Num: 1);
12415 SDValue Offset = Node->getOperand(Num: 2);
12416 SDLoc DL(Node);
12417
12418 // Expand through memory thusly:
12419 // Alloca CONCAT_VECTORS_TYPES(V1, V2) Ptr
12420 // Store V1, Ptr
12421 // Store V2, Ptr + sizeof(V1)
12422 // if (VECTOR_SPLICE_LEFT)
12423 // Ptr = Ptr + (Offset * sizeof(VT.Elt))
12424 // else
12425 // Ptr = Ptr + sizeof(V1) - (Offset * size(VT.Elt))
12426 // Res = Load Ptr
12427
12428 Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false);
12429
12430 EVT MemVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: VT.getVectorElementType(),
12431 EC: VT.getVectorElementCount() * 2);
12432 SDValue StackPtr = DAG.CreateStackTemporary(Bytes: MemVT.getStoreSize(), Alignment);
12433 EVT PtrVT = StackPtr.getValueType();
12434 auto &MF = DAG.getMachineFunction();
12435 auto FrameIndex = cast<FrameIndexSDNode>(Val: StackPtr.getNode())->getIndex();
12436 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FI: FrameIndex);
12437
12438 // Store the lo part of CONCAT_VECTORS(V1, V2)
12439 SDValue StoreV1 = DAG.getStore(Chain: DAG.getEntryNode(), dl: DL, Val: V1, Ptr: StackPtr, PtrInfo);
12440 // Store the hi part of CONCAT_VECTORS(V1, V2)
12441 SDValue VTBytes = DAG.getTypeSize(DL, VT: PtrVT, TS: VT.getStoreSize());
12442 SDValue StackPtr2 = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: StackPtr, N2: VTBytes);
12443 SDValue StoreV2 = DAG.getStore(Chain: StoreV1, dl: DL, Val: V2, Ptr: StackPtr2, PtrInfo);
12444
12445 // NOTE: TrailingBytes must be clamped so as not to read outside of V1:V2.
12446 SDValue EltByteSize =
12447 DAG.getTypeSize(DL, VT: PtrVT, TS: VT.getVectorElementType().getStoreSize());
12448 Offset = DAG.getZExtOrTrunc(Op: Offset, DL, VT: PtrVT);
12449 SDValue TrailingBytes = DAG.getNode(Opcode: ISD::MUL, DL, VT: PtrVT, N1: Offset, N2: EltByteSize);
12450
12451 TrailingBytes = DAG.getNode(Opcode: ISD::UMIN, DL, VT: PtrVT, N1: TrailingBytes, N2: VTBytes);
12452
12453 if (Node->getOpcode() == ISD::VECTOR_SPLICE_LEFT)
12454 StackPtr = DAG.getMemBasePlusOffset(Base: StackPtr, Offset: TrailingBytes, DL);
12455 else
12456 StackPtr = DAG.getNode(Opcode: ISD::SUB, DL, VT: PtrVT, N1: StackPtr2, N2: TrailingBytes);
12457
12458 // Load the spliced result
12459 return DAG.getLoad(VT, dl: DL, Chain: StoreV2, Ptr: StackPtr,
12460 PtrInfo: MachinePointerInfo::getUnknownStack(MF));
12461}
12462
12463SDValue TargetLowering::expandVECTOR_COMPRESS(SDNode *Node,
12464 SelectionDAG &DAG) const {
12465 SDLoc DL(Node);
12466 SDValue Vec = Node->getOperand(Num: 0);
12467 SDValue Mask = Node->getOperand(Num: 1);
12468 SDValue Passthru = Node->getOperand(Num: 2);
12469
12470 EVT VecVT = Vec.getValueType();
12471 EVT ScalarVT = VecVT.getScalarType();
12472 EVT MaskVT = Mask.getValueType();
12473 EVT MaskScalarVT = MaskVT.getScalarType();
12474
12475 // Needs to be handled by targets that have scalable vector types.
12476 if (VecVT.isScalableVector())
12477 report_fatal_error(reason: "Cannot expand masked_compress for scalable vectors.");
12478
12479 SDValue StackPtr = DAG.CreateStackTemporary(
12480 Bytes: VecVT.getStoreSize(), Alignment: DAG.getReducedAlign(VT: VecVT, /*UseABI=*/false));
12481 int FI = cast<FrameIndexSDNode>(Val: StackPtr.getNode())->getIndex();
12482 MachinePointerInfo PtrInfo =
12483 MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI);
12484
12485 MVT PositionVT = getVectorIdxTy(DL: DAG.getDataLayout());
12486 SDValue Chain = DAG.getEntryNode();
12487 SDValue OutPos = DAG.getConstant(Val: 0, DL, VT: PositionVT);
12488
12489 bool HasPassthru = !Passthru.isUndef();
12490
12491 // If we have a passthru vector, store it on the stack, overwrite the matching
12492 // positions and then re-write the last element that was potentially
12493 // overwritten even though mask[i] = false.
12494 if (HasPassthru)
12495 Chain = DAG.getStore(Chain, dl: DL, Val: Passthru, Ptr: StackPtr, PtrInfo);
12496
12497 SDValue LastWriteVal;
12498 APInt PassthruSplatVal;
12499 bool IsSplatPassthru =
12500 ISD::isConstantSplatVector(N: Passthru.getNode(), SplatValue&: PassthruSplatVal);
12501
12502 if (IsSplatPassthru) {
12503 // As we do not know which position we wrote to last, we cannot simply
12504 // access that index from the passthru vector. So we first check if passthru
12505 // is a splat vector, to use any element ...
12506 LastWriteVal = DAG.getConstant(Val: PassthruSplatVal, DL, VT: ScalarVT);
12507 } else if (HasPassthru) {
12508 // ... if it is not a splat vector, we need to get the passthru value at
12509 // position = popcount(mask) and re-load it from the stack before it is
12510 // overwritten in the loop below.
12511 EVT PopcountVT = ScalarVT.changeTypeToInteger();
12512 SDValue Popcount = DAG.getNode(
12513 Opcode: ISD::TRUNCATE, DL,
12514 VT: MaskVT.changeVectorElementType(Context&: *DAG.getContext(), EltVT: MVT::i1), Operand: Mask);
12515 Popcount = DAG.getNode(
12516 Opcode: ISD::ZERO_EXTEND, DL,
12517 VT: MaskVT.changeVectorElementType(Context&: *DAG.getContext(), EltVT: PopcountVT),
12518 Operand: Popcount);
12519 Popcount = DAG.getNode(Opcode: ISD::VECREDUCE_ADD, DL, VT: PopcountVT, Operand: Popcount);
12520 SDValue LastElmtPtr =
12521 getVectorElementPointer(DAG, VecPtr: StackPtr, VecVT, Index: Popcount);
12522 LastWriteVal = DAG.getLoad(
12523 VT: ScalarVT, dl: DL, Chain, Ptr: LastElmtPtr,
12524 PtrInfo: MachinePointerInfo::getUnknownStack(MF&: DAG.getMachineFunction()));
12525 Chain = LastWriteVal.getValue(R: 1);
12526 }
12527
12528 unsigned NumElms = VecVT.getVectorNumElements();
12529 for (unsigned I = 0; I < NumElms; I++) {
12530 SDValue ValI = DAG.getExtractVectorElt(DL, VT: ScalarVT, Vec, Idx: I);
12531 SDValue OutPtr = getVectorElementPointer(DAG, VecPtr: StackPtr, VecVT, Index: OutPos);
12532 Chain = DAG.getStore(
12533 Chain, dl: DL, Val: ValI, Ptr: OutPtr,
12534 PtrInfo: MachinePointerInfo::getUnknownStack(MF&: DAG.getMachineFunction()));
12535
12536 // Get the mask value and add it to the current output position. This
12537 // either increments by 1 if MaskI is true or adds 0 otherwise.
12538 // Freeze in case we have poison/undef mask entries.
12539 SDValue MaskI = DAG.getExtractVectorElt(DL, VT: MaskScalarVT, Vec: Mask, Idx: I);
12540 MaskI = DAG.getFreeze(V: MaskI);
12541 MaskI = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i1, Operand: MaskI);
12542 MaskI = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: PositionVT, Operand: MaskI);
12543 OutPos = DAG.getNode(Opcode: ISD::ADD, DL, VT: PositionVT, N1: OutPos, N2: MaskI);
12544
12545 if (HasPassthru && I == NumElms - 1) {
12546 SDValue EndOfVector =
12547 DAG.getConstant(Val: VecVT.getVectorNumElements() - 1, DL, VT: PositionVT);
12548 SDValue AllLanesSelected =
12549 DAG.getSetCC(DL, VT: MVT::i1, LHS: OutPos, RHS: EndOfVector, Cond: ISD::CondCode::SETUGT);
12550 OutPos = DAG.getNode(Opcode: ISD::UMIN, DL, VT: PositionVT, N1: OutPos, N2: EndOfVector);
12551 OutPtr = getVectorElementPointer(DAG, VecPtr: StackPtr, VecVT, Index: OutPos);
12552
12553 // Re-write the last ValI if all lanes were selected. Otherwise,
12554 // overwrite the last write it with the passthru value.
12555 LastWriteVal = DAG.getSelect(DL, VT: ScalarVT, Cond: AllLanesSelected, LHS: ValI,
12556 RHS: LastWriteVal, Flags: SDNodeFlags::Unpredictable);
12557 Chain = DAG.getStore(
12558 Chain, dl: DL, Val: LastWriteVal, Ptr: OutPtr,
12559 PtrInfo: MachinePointerInfo::getUnknownStack(MF&: DAG.getMachineFunction()));
12560 }
12561 }
12562
12563 return DAG.getLoad(VT: VecVT, dl: DL, Chain, Ptr: StackPtr, PtrInfo);
12564}
12565
12566SDValue TargetLowering::expandCttzElts(SDNode *Node, SelectionDAG &DAG) const {
12567 SDLoc DL(Node);
12568 EVT VT = Node->getValueType(ResNo: 0);
12569
12570 bool ZeroIsPoison = Node->getOpcode() == ISD::CTTZ_ELTS_ZERO_POISON;
12571 auto [Mask, StepVec] =
12572 getLegalMaskAndStepVector(Mask: Node->getOperand(Num: 0), ZeroIsPoison, DL, DAG);
12573 EVT StepVecVT = StepVec.getValueType();
12574 EVT StepVT = StepVecVT.getVectorElementType();
12575
12576 // Promote the scalar result type early to avoid redundant zexts.
12577 if (getTypeAction(VT: StepVT.getSimpleVT()) == TypePromoteInteger)
12578 StepVT = getTypeToTransformTo(Context&: *DAG.getContext(), VT: StepVT);
12579
12580 SDValue VL =
12581 DAG.getElementCount(DL, VT: StepVT, EC: StepVecVT.getVectorElementCount());
12582 SDValue SplatVL = DAG.getSplat(VT: StepVecVT, DL, Op: VL);
12583 StepVec = DAG.getNode(Opcode: ISD::SUB, DL, VT: StepVecVT, N1: SplatVL, N2: StepVec);
12584 SDValue Zeroes = DAG.getConstant(Val: 0, DL, VT: StepVecVT);
12585 SDValue Select = DAG.getSelect(DL, VT: StepVecVT, Cond: Mask, LHS: StepVec, RHS: Zeroes);
12586 SDValue Max = DAG.getNode(Opcode: ISD::VECREDUCE_UMAX, DL,
12587 VT: StepVecVT.getVectorElementType(), Operand: Select);
12588 SDValue Sub = DAG.getNode(Opcode: ISD::SUB, DL, VT: StepVT, N1: VL,
12589 N2: DAG.getZExtOrTrunc(Op: Max, DL, VT: StepVT));
12590
12591 return DAG.getZExtOrTrunc(Op: Sub, DL, VT);
12592}
12593
12594SDValue TargetLowering::expandPartialReduceMLA(SDNode *N,
12595 SelectionDAG &DAG) const {
12596 SDLoc DL(N);
12597 SDValue Acc = N->getOperand(Num: 0);
12598 SDValue MulLHS = N->getOperand(Num: 1);
12599 SDValue MulRHS = N->getOperand(Num: 2);
12600 EVT AccVT = Acc.getValueType();
12601 EVT MulOpVT = MulLHS.getValueType();
12602
12603 EVT ExtMulOpVT =
12604 EVT::getVectorVT(Context&: *DAG.getContext(), VT: AccVT.getVectorElementType(),
12605 EC: MulOpVT.getVectorElementCount());
12606
12607 unsigned ExtOpcLHS, ExtOpcRHS;
12608 switch (N->getOpcode()) {
12609 default:
12610 llvm_unreachable("Unexpected opcode");
12611 case ISD::PARTIAL_REDUCE_UMLA:
12612 ExtOpcLHS = ExtOpcRHS = ISD::ZERO_EXTEND;
12613 break;
12614 case ISD::PARTIAL_REDUCE_SMLA:
12615 ExtOpcLHS = ExtOpcRHS = ISD::SIGN_EXTEND;
12616 break;
12617 case ISD::PARTIAL_REDUCE_FMLA:
12618 ExtOpcLHS = ExtOpcRHS = ISD::FP_EXTEND;
12619 break;
12620 }
12621
12622 if (ExtMulOpVT != MulOpVT) {
12623 MulLHS = DAG.getNode(Opcode: ExtOpcLHS, DL, VT: ExtMulOpVT, Operand: MulLHS);
12624 MulRHS = DAG.getNode(Opcode: ExtOpcRHS, DL, VT: ExtMulOpVT, Operand: MulRHS);
12625 }
12626 SDValue Input = MulLHS;
12627 if (N->getOpcode() == ISD::PARTIAL_REDUCE_FMLA) {
12628 if (!llvm::isOneOrOneSplatFP(V: MulRHS))
12629 Input = DAG.getNode(Opcode: ISD::FMUL, DL, VT: ExtMulOpVT, N1: MulLHS, N2: MulRHS);
12630 } else if (!llvm::isOneOrOneSplat(V: MulRHS)) {
12631 Input = DAG.getNode(Opcode: ISD::MUL, DL, VT: ExtMulOpVT, N1: MulLHS, N2: MulRHS);
12632 }
12633
12634 unsigned Stride = AccVT.getVectorMinNumElements();
12635 unsigned ScaleFactor = MulOpVT.getVectorMinNumElements() / Stride;
12636
12637 // Collect all of the subvectors
12638 std::deque<SDValue> Subvectors = {Acc};
12639 for (unsigned I = 0; I < ScaleFactor; I++)
12640 Subvectors.push_back(x: DAG.getExtractSubvector(DL, VT: AccVT, Vec: Input, Idx: I * Stride));
12641
12642 unsigned FlatNode =
12643 N->getOpcode() == ISD::PARTIAL_REDUCE_FMLA ? ISD::FADD : ISD::ADD;
12644
12645 // Flatten the subvector tree
12646 while (Subvectors.size() > 1) {
12647 Subvectors.push_back(
12648 x: DAG.getNode(Opcode: FlatNode, DL, VT: AccVT, Ops: {Subvectors[0], Subvectors[1]}));
12649 Subvectors.pop_front();
12650 Subvectors.pop_front();
12651 }
12652
12653 assert(Subvectors.size() == 1 &&
12654 "There should only be one subvector after tree flattening");
12655
12656 return Subvectors[0];
12657}
12658
12659/// Given a store node \p StoreNode, return true if it is safe to fold that node
12660/// into \p FPNode, which expands to a library call with output pointers.
12661static bool canFoldStoreIntoLibCallOutputPointers(StoreSDNode *StoreNode,
12662 SDNode *FPNode) {
12663 SmallVector<const SDNode *, 8> Worklist;
12664 SmallVector<const SDNode *, 8> DeferredNodes;
12665 SmallPtrSet<const SDNode *, 16> Visited;
12666
12667 // Skip FPNode use by StoreNode (that's the use we want to fold into FPNode).
12668 for (SDValue Op : StoreNode->ops())
12669 if (Op.getNode() != FPNode)
12670 Worklist.push_back(Elt: Op.getNode());
12671
12672 unsigned MaxSteps = SelectionDAG::getHasPredecessorMaxSteps();
12673 while (!Worklist.empty()) {
12674 const SDNode *Node = Worklist.pop_back_val();
12675 auto [_, Inserted] = Visited.insert(Ptr: Node);
12676 if (!Inserted)
12677 continue;
12678
12679 if (MaxSteps > 0 && Visited.size() >= MaxSteps)
12680 return false;
12681
12682 // Reached the FPNode (would result in a cycle).
12683 // OR Reached CALLSEQ_START (would result in nested call sequences).
12684 if (Node == FPNode || Node->getOpcode() == ISD::CALLSEQ_START)
12685 return false;
12686
12687 if (Node->getOpcode() == ISD::CALLSEQ_END) {
12688 // Defer looking into call sequences (so we can check we're outside one).
12689 // We still need to look through these for the predecessor check.
12690 DeferredNodes.push_back(Elt: Node);
12691 continue;
12692 }
12693
12694 for (SDValue Op : Node->ops())
12695 Worklist.push_back(Elt: Op.getNode());
12696 }
12697
12698 // True if we're outside a call sequence and don't have the FPNode as a
12699 // predecessor. No cycles or nested call sequences possible.
12700 return !SDNode::hasPredecessorHelper(N: FPNode, Visited, Worklist&: DeferredNodes,
12701 MaxSteps);
12702}
12703
12704bool TargetLowering::expandMultipleResultFPLibCall(
12705 SelectionDAG &DAG, RTLIB::Libcall LC, SDNode *Node,
12706 SmallVectorImpl<SDValue> &Results,
12707 std::optional<unsigned> CallRetResNo) const {
12708 if (LC == RTLIB::UNKNOWN_LIBCALL)
12709 return false;
12710
12711 RTLIB::LibcallImpl LibcallImpl = getLibcallImpl(Call: LC);
12712 if (LibcallImpl == RTLIB::Unsupported)
12713 return false;
12714
12715 LLVMContext &Ctx = *DAG.getContext();
12716 EVT VT = Node->getValueType(ResNo: 0);
12717 unsigned NumResults = Node->getNumValues();
12718
12719 // Find users of the node that store the results (and share input chains). The
12720 // destination pointers can be used instead of creating stack allocations.
12721 SDValue StoresInChain;
12722 SmallVector<StoreSDNode *, 2> ResultStores(NumResults);
12723 for (SDNode *User : Node->users()) {
12724 if (!ISD::isNormalStore(N: User))
12725 continue;
12726 auto *ST = cast<StoreSDNode>(Val: User);
12727 SDValue StoreValue = ST->getValue();
12728 unsigned ResNo = StoreValue.getResNo();
12729 // Ensure the store corresponds to an output pointer.
12730 if (CallRetResNo == ResNo)
12731 continue;
12732 // Ensure the store to the default address space and not atomic or volatile.
12733 if (!ST->isSimple() || ST->getAddressSpace() != 0)
12734 continue;
12735 // Ensure all store chains are the same (so they don't alias).
12736 if (StoresInChain && ST->getChain() != StoresInChain)
12737 continue;
12738 // Ensure the store is properly aligned.
12739 Type *StoreType = StoreValue.getValueType().getTypeForEVT(Context&: Ctx);
12740 if (ST->getAlign() <
12741 DAG.getDataLayout().getABITypeAlign(Ty: StoreType->getScalarType()))
12742 continue;
12743 // Avoid:
12744 // 1. Creating cyclic dependencies.
12745 // 2. Expanding the node to a call within a call sequence.
12746 if (!canFoldStoreIntoLibCallOutputPointers(StoreNode: ST, FPNode: Node))
12747 continue;
12748 ResultStores[ResNo] = ST;
12749 StoresInChain = ST->getChain();
12750 }
12751
12752 ArgListTy Args;
12753
12754 // Pass the arguments.
12755 for (const SDValue &Op : Node->op_values()) {
12756 EVT ArgVT = Op.getValueType();
12757 Type *ArgTy = ArgVT.getTypeForEVT(Context&: Ctx);
12758 Args.emplace_back(args: Op, args&: ArgTy);
12759 }
12760
12761 // Pass the output pointers.
12762 SmallVector<SDValue, 2> ResultPtrs(NumResults);
12763 Type *PointerTy = PointerType::getUnqual(C&: Ctx);
12764 for (auto [ResNo, ST] : llvm::enumerate(First&: ResultStores)) {
12765 if (ResNo == CallRetResNo)
12766 continue;
12767 EVT ResVT = Node->getValueType(ResNo);
12768 SDValue ResultPtr = ST ? ST->getBasePtr() : DAG.CreateStackTemporary(VT: ResVT);
12769 ResultPtrs[ResNo] = ResultPtr;
12770 Args.emplace_back(args&: ResultPtr, args&: PointerTy);
12771 }
12772
12773 SDLoc DL(Node);
12774
12775 if (RTLIB::RuntimeLibcallsInfo::hasVectorMaskArgument(Impl: LibcallImpl)) {
12776 // Pass the vector mask (if required).
12777 EVT MaskVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: Ctx, VT);
12778 SDValue Mask = DAG.getBoolConstant(V: true, DL, VT: MaskVT, OpVT: VT);
12779 Args.emplace_back(args&: Mask, args: MaskVT.getTypeForEVT(Context&: Ctx));
12780 }
12781
12782 Type *RetType = CallRetResNo.has_value()
12783 ? Node->getValueType(ResNo: *CallRetResNo).getTypeForEVT(Context&: Ctx)
12784 : Type::getVoidTy(C&: Ctx);
12785 SDValue InChain = StoresInChain ? StoresInChain : DAG.getEntryNode();
12786 SDValue Callee =
12787 DAG.getExternalSymbol(LCImpl: LibcallImpl, VT: getPointerTy(DL: DAG.getDataLayout()));
12788 TargetLowering::CallLoweringInfo CLI(DAG);
12789 CLI.setDebugLoc(DL).setChain(InChain).setLibCallee(
12790 CC: getLibcallImplCallingConv(Call: LibcallImpl), ResultType: RetType, Target: Callee, ArgsList: std::move(Args));
12791
12792 auto [Call, CallChain] = LowerCallTo(CLI);
12793
12794 for (auto [ResNo, ResultPtr] : llvm::enumerate(First&: ResultPtrs)) {
12795 if (ResNo == CallRetResNo) {
12796 Results.push_back(Elt: Call);
12797 continue;
12798 }
12799 MachinePointerInfo PtrInfo;
12800 SDValue LoadResult = DAG.getLoad(VT: Node->getValueType(ResNo), dl: DL, Chain: CallChain,
12801 Ptr: ResultPtr, PtrInfo);
12802 SDValue OutChain = LoadResult.getValue(R: 1);
12803
12804 if (StoreSDNode *ST = ResultStores[ResNo]) {
12805 // Replace store with the library call.
12806 DAG.ReplaceAllUsesOfValueWith(From: SDValue(ST, 0), To: OutChain);
12807 PtrInfo = ST->getPointerInfo();
12808 } else {
12809 PtrInfo = MachinePointerInfo::getFixedStack(
12810 MF&: DAG.getMachineFunction(),
12811 FI: cast<FrameIndexSDNode>(Val&: ResultPtr)->getIndex());
12812 }
12813
12814 Results.push_back(Elt: LoadResult);
12815 }
12816
12817 return true;
12818}
12819
12820bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT,
12821 SDValue &LHS, SDValue &RHS,
12822 SDValue &CC, SDValue Mask,
12823 SDValue EVL, bool &NeedInvert,
12824 const SDLoc &dl, SDValue &Chain,
12825 bool IsSignaling) const {
12826 MVT OpVT = LHS.getSimpleValueType();
12827 ISD::CondCode CCCode = cast<CondCodeSDNode>(Val&: CC)->get();
12828 NeedInvert = false;
12829 assert(!EVL == !Mask && "VP Mask and EVL must either both be set or unset");
12830 bool IsNonVP = !EVL;
12831 switch (getCondCodeAction(CC: CCCode, VT: OpVT)) {
12832 default:
12833 llvm_unreachable("Unknown condition code action!");
12834 case TargetLowering::Legal:
12835 // Nothing to do.
12836 break;
12837 case TargetLowering::Expand: {
12838 ISD::CondCode InvCC = ISD::getSetCCSwappedOperands(Operation: CCCode);
12839 if (isCondCodeLegalOrCustom(CC: InvCC, VT: OpVT)) {
12840 std::swap(a&: LHS, b&: RHS);
12841 CC = DAG.getCondCode(Cond: InvCC);
12842 return true;
12843 }
12844 // Swapping operands didn't work. Try inverting the condition.
12845 bool NeedSwap = false;
12846 InvCC = getSetCCInverse(Operation: CCCode, Type: OpVT);
12847 if (!isCondCodeLegalOrCustom(CC: InvCC, VT: OpVT)) {
12848 // If inverting the condition is not enough, try swapping operands
12849 // on top of it.
12850 InvCC = ISD::getSetCCSwappedOperands(Operation: InvCC);
12851 NeedSwap = true;
12852 }
12853 if (isCondCodeLegalOrCustom(CC: InvCC, VT: OpVT)) {
12854 CC = DAG.getCondCode(Cond: InvCC);
12855 NeedInvert = true;
12856 if (NeedSwap)
12857 std::swap(a&: LHS, b&: RHS);
12858 return true;
12859 }
12860
12861 // Special case: expand i1 comparisons using logical operations.
12862 if (OpVT == MVT::i1) {
12863 SDValue Ret;
12864 switch (CCCode) {
12865 default:
12866 llvm_unreachable("Unknown integer setcc!");
12867 case ISD::SETEQ: // X == Y --> ~(X ^ Y)
12868 Ret = DAG.getNOT(DL: dl, Val: DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: MVT::i1, N1: LHS, N2: RHS),
12869 VT: MVT::i1);
12870 break;
12871 case ISD::SETNE: // X != Y --> (X ^ Y)
12872 Ret = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: MVT::i1, N1: LHS, N2: RHS);
12873 break;
12874 case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
12875 case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
12876 Ret = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i1, N1: RHS,
12877 N2: DAG.getNOT(DL: dl, Val: LHS, VT: MVT::i1));
12878 break;
12879 case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
12880 case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
12881 Ret = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i1, N1: LHS,
12882 N2: DAG.getNOT(DL: dl, Val: RHS, VT: MVT::i1));
12883 break;
12884 case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
12885 case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
12886 Ret = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: MVT::i1, N1: RHS,
12887 N2: DAG.getNOT(DL: dl, Val: LHS, VT: MVT::i1));
12888 break;
12889 case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
12890 case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
12891 Ret = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: MVT::i1, N1: LHS,
12892 N2: DAG.getNOT(DL: dl, Val: RHS, VT: MVT::i1));
12893 break;
12894 }
12895
12896 LHS = DAG.getZExtOrTrunc(Op: Ret, DL: dl, VT);
12897 RHS = SDValue();
12898 CC = SDValue();
12899 return true;
12900 }
12901
12902 ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
12903 unsigned Opc = 0;
12904 switch (CCCode) {
12905 default:
12906 llvm_unreachable("Don't know how to expand this condition!");
12907 case ISD::SETUO:
12908 if (isCondCodeLegal(CC: ISD::SETUNE, VT: OpVT)) {
12909 CC1 = ISD::SETUNE;
12910 CC2 = ISD::SETUNE;
12911 Opc = ISD::OR;
12912 break;
12913 }
12914 assert(isCondCodeLegal(ISD::SETOEQ, OpVT) &&
12915 "If SETUE is expanded, SETOEQ or SETUNE must be legal!");
12916 NeedInvert = true;
12917 [[fallthrough]];
12918 case ISD::SETO:
12919 assert(isCondCodeLegal(ISD::SETOEQ, OpVT) &&
12920 "If SETO is expanded, SETOEQ must be legal!");
12921 CC1 = ISD::SETOEQ;
12922 CC2 = ISD::SETOEQ;
12923 Opc = ISD::AND;
12924 break;
12925 case ISD::SETONE:
12926 case ISD::SETUEQ:
12927 // If the SETUO or SETO CC isn't legal, we might be able to use
12928 // SETOGT || SETOLT, inverting the result for SETUEQ. We only need one
12929 // of SETOGT/SETOLT to be legal, the other can be emulated by swapping
12930 // the operands.
12931 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
12932 if (!isCondCodeLegal(CC: CC2, VT: OpVT) && (isCondCodeLegal(CC: ISD::SETOGT, VT: OpVT) ||
12933 isCondCodeLegal(CC: ISD::SETOLT, VT: OpVT))) {
12934 CC1 = ISD::SETOGT;
12935 CC2 = ISD::SETOLT;
12936 Opc = ISD::OR;
12937 NeedInvert = ((unsigned)CCCode & 0x8U);
12938 break;
12939 }
12940 [[fallthrough]];
12941 case ISD::SETOEQ:
12942 case ISD::SETOGT:
12943 case ISD::SETOGE:
12944 case ISD::SETOLT:
12945 case ISD::SETOLE:
12946 case ISD::SETUNE:
12947 case ISD::SETUGT:
12948 case ISD::SETUGE:
12949 case ISD::SETULT:
12950 case ISD::SETULE:
12951 // If we are floating point, assign and break, otherwise fall through.
12952 if (!OpVT.isInteger()) {
12953 // We can use the 4th bit to tell if we are the unordered
12954 // or ordered version of the opcode.
12955 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
12956 Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
12957 CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
12958 break;
12959 }
12960 // Fallthrough if we are unsigned integer.
12961 [[fallthrough]];
12962 case ISD::SETLE:
12963 case ISD::SETGT:
12964 case ISD::SETGE:
12965 case ISD::SETLT:
12966 case ISD::SETNE:
12967 case ISD::SETEQ:
12968 // If all combinations of inverting the condition and swapping operands
12969 // didn't work then we have no means to expand the condition.
12970 llvm_unreachable("Don't know how to expand this condition!");
12971 }
12972
12973 SDValue SetCC1, SetCC2;
12974 if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
12975 // If we aren't the ordered or unorder operation,
12976 // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
12977 if (IsNonVP) {
12978 SetCC1 = DAG.getSetCC(DL: dl, VT, LHS, RHS, Cond: CC1, Chain, IsSignaling);
12979 SetCC2 = DAG.getSetCC(DL: dl, VT, LHS, RHS, Cond: CC2, Chain, IsSignaling);
12980 } else {
12981 SetCC1 = DAG.getSetCCVP(DL: dl, VT, LHS, RHS, Cond: CC1, Mask, EVL);
12982 SetCC2 = DAG.getSetCCVP(DL: dl, VT, LHS, RHS, Cond: CC2, Mask, EVL);
12983 }
12984 } else {
12985 // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
12986 if (IsNonVP) {
12987 SetCC1 = DAG.getSetCC(DL: dl, VT, LHS, RHS: LHS, Cond: CC1, Chain, IsSignaling);
12988 SetCC2 = DAG.getSetCC(DL: dl, VT, LHS: RHS, RHS, Cond: CC2, Chain, IsSignaling);
12989 } else {
12990 SetCC1 = DAG.getSetCCVP(DL: dl, VT, LHS, RHS: LHS, Cond: CC1, Mask, EVL);
12991 SetCC2 = DAG.getSetCCVP(DL: dl, VT, LHS: RHS, RHS, Cond: CC2, Mask, EVL);
12992 }
12993 }
12994 if (Chain)
12995 Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, N1: SetCC1.getValue(R: 1),
12996 N2: SetCC2.getValue(R: 1));
12997 if (IsNonVP)
12998 LHS = DAG.getNode(Opcode: Opc, DL: dl, VT, N1: SetCC1, N2: SetCC2);
12999 else {
13000 // Transform the binary opcode to the VP equivalent.
13001 assert((Opc == ISD::OR || Opc == ISD::AND) && "Unexpected opcode");
13002 Opc = Opc == ISD::OR ? ISD::VP_OR : ISD::VP_AND;
13003 LHS = DAG.getNode(Opcode: Opc, DL: dl, VT, N1: SetCC1, N2: SetCC2, N3: Mask, N4: EVL);
13004 }
13005 RHS = SDValue();
13006 CC = SDValue();
13007 return true;
13008 }
13009 }
13010 return false;
13011}
13012
13013SDValue TargetLowering::expandVectorNaryOpBySplitting(SDNode *Node,
13014 SelectionDAG &DAG) const {
13015 EVT VT = Node->getValueType(ResNo: 0);
13016 // Despite its documentation, GetSplitDestVTs will assert if VT cannot be
13017 // split into two equal parts.
13018 if (!VT.isVector() || !VT.getVectorElementCount().isKnownMultipleOf(RHS: 2))
13019 return SDValue();
13020
13021 // Restrict expansion to cases where both parts can be concatenated.
13022 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VT);
13023 if (LoVT != HiVT || !isTypeLegal(VT: LoVT))
13024 return SDValue();
13025
13026 SDLoc DL(Node);
13027 unsigned Opcode = Node->getOpcode();
13028
13029 // Don't expand if the result is likely to be unrolled anyway.
13030 if (!isOperationLegalOrCustomOrPromote(Op: Opcode, VT: LoVT))
13031 return SDValue();
13032
13033 SmallVector<SDValue, 4> LoOps, HiOps;
13034 for (const SDValue &V : Node->op_values()) {
13035 auto [Lo, Hi] = DAG.SplitVector(N: V, DL, LoVT, HiVT);
13036 LoOps.push_back(Elt: Lo);
13037 HiOps.push_back(Elt: Hi);
13038 }
13039
13040 SDValue SplitOpLo = DAG.getNode(Opcode, DL, VT: LoVT, Ops: LoOps);
13041 SDValue SplitOpHi = DAG.getNode(Opcode, DL, VT: HiVT, Ops: HiOps);
13042 return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT, N1: SplitOpLo, N2: SplitOpHi);
13043}
13044
13045SDValue TargetLowering::scalarizeExtractedVectorLoad(EVT ResultVT,
13046 const SDLoc &DL,
13047 EVT InVecVT, SDValue EltNo,
13048 LoadSDNode *OriginalLoad,
13049 SelectionDAG &DAG) const {
13050 assert(OriginalLoad->isSimple());
13051
13052 EVT VecEltVT = InVecVT.getVectorElementType();
13053
13054 // If the vector element type is not a multiple of a byte then we are unable
13055 // to correctly compute an address to load only the extracted element as a
13056 // scalar.
13057 if (!VecEltVT.isByteSized())
13058 return SDValue();
13059
13060 ISD::LoadExtType ExtTy =
13061 ResultVT.bitsGT(VT: VecEltVT) ? ISD::EXTLOAD : ISD::NON_EXTLOAD;
13062 if (!isOperationLegalOrCustom(Op: ISD::LOAD, VT: VecEltVT))
13063 return SDValue();
13064
13065 std::optional<unsigned> ByteOffset;
13066 Align Alignment = OriginalLoad->getAlign();
13067 MachinePointerInfo MPI;
13068 if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(Val&: EltNo)) {
13069 int Elt = ConstEltNo->getZExtValue();
13070 ByteOffset = VecEltVT.getSizeInBits() * Elt / 8;
13071 MPI = OriginalLoad->getPointerInfo().getWithOffset(O: *ByteOffset);
13072 Alignment = commonAlignment(A: Alignment, Offset: *ByteOffset);
13073 } else {
13074 // Discard the pointer info except the address space because the memory
13075 // operand can't represent this new access since the offset is variable.
13076 MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
13077 Alignment = commonAlignment(A: Alignment, Offset: VecEltVT.getSizeInBits() / 8);
13078 }
13079
13080 if (!shouldReduceLoadWidth(Load: OriginalLoad, ExtTy, NewVT: VecEltVT, ByteOffset))
13081 return SDValue();
13082
13083 unsigned IsFast = 0;
13084 if (!allowsMemoryAccess(Context&: *DAG.getContext(), DL: DAG.getDataLayout(), VT: VecEltVT,
13085 AddrSpace: OriginalLoad->getAddressSpace(), Alignment,
13086 Flags: OriginalLoad->getMemOperand()->getFlags(), Fast: &IsFast) ||
13087 !IsFast)
13088 return SDValue();
13089
13090 // The original DAG loaded the entire vector from memory, so arithmetic
13091 // within it must be inbounds.
13092 SDValue NewPtr = getInboundsVectorElementPointer(
13093 DAG, VecPtr: OriginalLoad->getBasePtr(), VecVT: InVecVT, Index: EltNo);
13094
13095 // We are replacing a vector load with a scalar load. The new load must have
13096 // identical memory op ordering to the original.
13097 SDValue Load;
13098 if (ResultVT.bitsGT(VT: VecEltVT)) {
13099 // If the result type of vextract is wider than the load, then issue an
13100 // extending load instead.
13101 ISD::LoadExtType ExtType =
13102 isLoadLegal(ValVT: ResultVT, MemVT: VecEltVT, Alignment,
13103 AddrSpace: OriginalLoad->getAddressSpace(), ExtType: ISD::ZEXTLOAD, Atomic: false)
13104 ? ISD::ZEXTLOAD
13105 : ISD::EXTLOAD;
13106 Load = DAG.getExtLoad(ExtType, dl: DL, VT: ResultVT, Chain: OriginalLoad->getChain(),
13107 Ptr: NewPtr, PtrInfo: MPI, MemVT: VecEltVT, Alignment,
13108 MMOFlags: OriginalLoad->getMemOperand()->getFlags(),
13109 AAInfo: OriginalLoad->getAAInfo());
13110 DAG.makeEquivalentMemoryOrdering(OldLoad: OriginalLoad, NewMemOp: Load);
13111 } else {
13112 // The result type is narrower or the same width as the vector element
13113 Load = DAG.getLoad(VT: VecEltVT, dl: DL, Chain: OriginalLoad->getChain(), Ptr: NewPtr, PtrInfo: MPI,
13114 Alignment, MMOFlags: OriginalLoad->getMemOperand()->getFlags(),
13115 AAInfo: OriginalLoad->getAAInfo());
13116 DAG.makeEquivalentMemoryOrdering(OldLoad: OriginalLoad, NewMemOp: Load);
13117 if (ResultVT.bitsLT(VT: VecEltVT))
13118 Load = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: ResultVT, Operand: Load);
13119 else
13120 Load = DAG.getBitcast(VT: ResultVT, V: Load);
13121 }
13122
13123 return Load;
13124}
13125
13126// Set type id for call site info and metadata 'call_target'.
13127// We are filtering for:
13128// a) The call-graph-section use case that wants to know about indirect
13129// calls, or
13130// b) We want to annotate indirect calls.
13131void TargetLowering::setTypeIdForCallsiteInfo(
13132 const CallBase *CB, MachineFunction &MF,
13133 MachineFunction::CallSiteInfo &CSInfo) const {
13134 if (CB && CB->isIndirectCall() &&
13135 (MF.getTarget().Options.EmitCallGraphSection ||
13136 MF.getTarget().Options.EmitCallSiteInfo))
13137 CSInfo = MachineFunction::CallSiteInfo(*CB);
13138}
13139