1//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This implements the TargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/CodeGen/TargetLowering.h"
14#include "llvm/ADT/STLExtras.h"
15#include "llvm/Analysis/ValueTracking.h"
16#include "llvm/Analysis/VectorUtils.h"
17#include "llvm/CodeGen/Analysis.h"
18#include "llvm/CodeGen/CallingConvLower.h"
19#include "llvm/CodeGen/CodeGenCommonISel.h"
20#include "llvm/CodeGen/MachineFrameInfo.h"
21#include "llvm/CodeGen/MachineFunction.h"
22#include "llvm/CodeGen/MachineJumpTableInfo.h"
23#include "llvm/CodeGen/MachineRegisterInfo.h"
24#include "llvm/CodeGen/SDPatternMatch.h"
25#include "llvm/CodeGen/SelectionDAG.h"
26#include "llvm/CodeGen/TargetRegisterInfo.h"
27#include "llvm/IR/DataLayout.h"
28#include "llvm/IR/DerivedTypes.h"
29#include "llvm/IR/GlobalVariable.h"
30#include "llvm/IR/LLVMContext.h"
31#include "llvm/MC/MCAsmInfo.h"
32#include "llvm/MC/MCExpr.h"
33#include "llvm/Support/DivisionByConstantInfo.h"
34#include "llvm/Support/ErrorHandling.h"
35#include "llvm/Support/KnownBits.h"
36#include "llvm/Support/MathExtras.h"
37#include "llvm/Target/TargetMachine.h"
38#include <cctype>
39#include <deque>
40using namespace llvm;
41using namespace llvm::SDPatternMatch;
42
43/// NOTE: The TargetMachine owns TLOF.
44TargetLowering::TargetLowering(const TargetMachine &tm,
45 const TargetSubtargetInfo &STI)
46 : TargetLoweringBase(tm, STI) {}
47
48// Define the virtual destructor out-of-line for build efficiency.
49TargetLowering::~TargetLowering() = default;
50
51const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
52 return nullptr;
53}
54
55bool TargetLowering::isPositionIndependent() const {
56 return getTargetMachine().isPositionIndependent();
57}
58
59/// Check whether a given call node is in tail position within its function. If
60/// so, it sets Chain to the input chain of the tail call.
61bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
62 SDValue &Chain) const {
63 const Function &F = DAG.getMachineFunction().getFunction();
64
65 // First, check if tail calls have been disabled in this function.
66 if (F.getFnAttribute(Kind: "disable-tail-calls").getValueAsBool())
67 return false;
68
69 // Conservatively require the attributes of the call to match those of
70 // the return. Ignore following attributes because they don't affect the
71 // call sequence.
72 AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs());
73 for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
74 Attribute::DereferenceableOrNull, Attribute::NoAlias,
75 Attribute::NonNull, Attribute::NoUndef,
76 Attribute::Range, Attribute::NoFPClass})
77 CallerAttrs.removeAttribute(Val: Attr);
78
79 if (CallerAttrs.hasAttributes())
80 return false;
81
82 // It's not safe to eliminate the sign / zero extension of the return value.
83 if (CallerAttrs.contains(A: Attribute::ZExt) ||
84 CallerAttrs.contains(A: Attribute::SExt))
85 return false;
86
87 // Check if the only use is a function return node.
88 return isUsedByReturnOnly(Node, Chain);
89}
90
91bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
92 const uint32_t *CallerPreservedMask,
93 const SmallVectorImpl<CCValAssign> &ArgLocs,
94 const SmallVectorImpl<SDValue> &OutVals) const {
95 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
96 const CCValAssign &ArgLoc = ArgLocs[I];
97 if (!ArgLoc.isRegLoc())
98 continue;
99 MCRegister Reg = ArgLoc.getLocReg();
100 // Only look at callee saved registers.
101 if (MachineOperand::clobbersPhysReg(RegMask: CallerPreservedMask, PhysReg: Reg))
102 continue;
103 // Check that we pass the value used for the caller.
104 // (We look for a CopyFromReg reading a virtual register that is used
105 // for the function live-in value of register Reg)
106 SDValue Value = OutVals[I];
107 if (Value->getOpcode() == ISD::AssertZext)
108 Value = Value.getOperand(i: 0);
109 if (Value->getOpcode() != ISD::CopyFromReg)
110 return false;
111 Register ArgReg = cast<RegisterSDNode>(Val: Value->getOperand(Num: 1))->getReg();
112 if (MRI.getLiveInPhysReg(VReg: ArgReg) != Reg)
113 return false;
114 }
115 return true;
116}
117
118/// Set CallLoweringInfo attribute flags based on a call instruction
119/// and called function attributes.
120void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
121 unsigned ArgIdx) {
122 IsSExt = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::SExt);
123 IsZExt = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::ZExt);
124 IsNoExt = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::NoExt);
125 IsInReg = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::InReg);
126 IsSRet = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::StructRet);
127 IsNest = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::Nest);
128 IsByVal = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::ByVal);
129 IsPreallocated = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::Preallocated);
130 IsInAlloca = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::InAlloca);
131 IsReturned = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::Returned);
132 IsSwiftSelf = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::SwiftSelf);
133 IsSwiftAsync = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::SwiftAsync);
134 IsSwiftError = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::SwiftError);
135 Alignment = Call->getParamStackAlign(ArgNo: ArgIdx);
136 IndirectType = nullptr;
137 assert(IsByVal + IsPreallocated + IsInAlloca + IsSRet <= 1 &&
138 "multiple ABI attributes?");
139 if (IsByVal) {
140 IndirectType = Call->getParamByValType(ArgNo: ArgIdx);
141 if (!Alignment)
142 Alignment = Call->getParamAlign(ArgNo: ArgIdx);
143 }
144 if (IsPreallocated)
145 IndirectType = Call->getParamPreallocatedType(ArgNo: ArgIdx);
146 if (IsInAlloca)
147 IndirectType = Call->getParamInAllocaType(ArgNo: ArgIdx);
148 if (IsSRet)
149 IndirectType = Call->getParamStructRetType(ArgNo: ArgIdx);
150}
151
152/// Generate a libcall taking the given operands as arguments and returning a
153/// result of type RetVT.
154std::pair<SDValue, SDValue>
155TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl,
156 EVT RetVT, ArrayRef<SDValue> Ops,
157 MakeLibCallOptions CallOptions, const SDLoc &dl,
158 SDValue InChain) const {
159 if (LibcallImpl == RTLIB::Unsupported)
160 reportFatalInternalError(reason: "unsupported library call operation");
161
162 if (!InChain)
163 InChain = DAG.getEntryNode();
164
165 TargetLowering::ArgListTy Args;
166 Args.reserve(n: Ops.size());
167
168 ArrayRef<Type *> OpsTypeOverrides = CallOptions.OpsTypeOverrides;
169 for (unsigned i = 0; i < Ops.size(); ++i) {
170 SDValue NewOp = Ops[i];
171 Type *Ty = i < OpsTypeOverrides.size() && OpsTypeOverrides[i]
172 ? OpsTypeOverrides[i]
173 : NewOp.getValueType().getTypeForEVT(Context&: *DAG.getContext());
174 TargetLowering::ArgListEntry Entry(NewOp, Ty);
175 if (CallOptions.IsSoften)
176 Entry.OrigTy =
177 CallOptions.OpsVTBeforeSoften[i].getTypeForEVT(Context&: *DAG.getContext());
178
179 Entry.IsSExt =
180 shouldSignExtendTypeInLibCall(Ty: Entry.Ty, IsSigned: CallOptions.IsSigned);
181 Entry.IsZExt = !Entry.IsSExt;
182
183 if (CallOptions.IsSoften &&
184 !shouldExtendTypeInLibCall(Type: CallOptions.OpsVTBeforeSoften[i])) {
185 Entry.IsSExt = Entry.IsZExt = false;
186 }
187 Args.push_back(x: Entry);
188 }
189
190 SDValue Callee =
191 DAG.getExternalSymbol(LCImpl: LibcallImpl, VT: getPointerTy(DL: DAG.getDataLayout()));
192
193 Type *RetTy = RetVT.getTypeForEVT(Context&: *DAG.getContext());
194 Type *OrigRetTy = RetTy;
195 TargetLowering::CallLoweringInfo CLI(DAG);
196 bool signExtend = shouldSignExtendTypeInLibCall(Ty: RetTy, IsSigned: CallOptions.IsSigned);
197 bool zeroExtend = !signExtend;
198
199 if (CallOptions.IsSoften) {
200 OrigRetTy = CallOptions.RetVTBeforeSoften.getTypeForEVT(Context&: *DAG.getContext());
201 if (!shouldExtendTypeInLibCall(Type: CallOptions.RetVTBeforeSoften))
202 signExtend = zeroExtend = false;
203 }
204
205 CLI.setDebugLoc(dl)
206 .setChain(InChain)
207 .setLibCallee(CC: getLibcallImplCallingConv(Call: LibcallImpl), ResultType: RetTy, OrigResultType: OrigRetTy,
208 Target: Callee, ArgsList: std::move(Args))
209 .setNoReturn(CallOptions.DoesNotReturn)
210 .setDiscardResult(!CallOptions.IsReturnValueUsed)
211 .setIsPostTypeLegalization(CallOptions.IsPostTypeLegalization)
212 .setSExtResult(signExtend)
213 .setZExtResult(zeroExtend);
214 return LowerCallTo(CLI);
215}
216
217bool TargetLowering::findOptimalMemOpLowering(
218 LLVMContext &Context, std::vector<EVT> &MemOps, unsigned Limit,
219 const MemOp &Op, unsigned DstAS, unsigned SrcAS,
220 const AttributeList &FuncAttributes, EVT *LargestVT) const {
221 EVT VT = getOptimalMemOpType(Context, Op, FuncAttributes);
222
223 if (VT == MVT::Other) {
224 // Use the largest integer type whose alignment constraints are satisfied.
225 VT = MVT::LAST_INTEGER_VALUETYPE;
226 if (Op.isFixedDstAlign()) {
227 bool LoadsFromSrc = Op.isMemcpy() && !Op.isMemcpyStrSrc();
228 while (VT != MVT::i8) {
229 unsigned VTSize = VT.getSizeInBits() / 8;
230 bool DstOk =
231 Op.getDstAlign() >= VTSize ||
232 allowsMisalignedMemoryAccesses(VT, AddrSpace: DstAS, Alignment: Op.getDstAlign());
233 bool SrcOk =
234 !LoadsFromSrc || Op.getSrcAlign() >= VTSize ||
235 allowsMisalignedMemoryAccesses(VT, AddrSpace: SrcAS, Alignment: Op.getSrcAlign());
236 if (DstOk && SrcOk)
237 break;
238 VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
239 }
240 }
241 assert(VT.isInteger());
242
243 // Find the largest legal integer type.
244 MVT LVT = MVT::LAST_INTEGER_VALUETYPE;
245 while (!isTypeLegal(VT: LVT))
246 LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
247 assert(LVT.isInteger());
248
249 // If the type we've chosen is larger than the largest legal integer type
250 // then use the largest legal type.
251 if (VT.bitsGT(VT: LVT))
252 VT = LVT;
253 }
254
255 unsigned NumMemOps = 0;
256 uint64_t Size = Op.size();
257 while (Size) {
258 unsigned VTSize = VT.getSizeInBits() / 8;
259 while (VTSize > Size) {
260 // For now, only use non-vector load / store's for the left-over pieces.
261 EVT NewVT = VT;
262 unsigned NewVTSize;
263
264 bool Found = false;
265 if (VT.isVector() || VT.isFloatingPoint()) {
266 NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
267 if (isOperationLegalOrCustom(Op: ISD::STORE, VT: NewVT) &&
268 isSafeMemOpType(NewVT.getSimpleVT()))
269 Found = true;
270 else if (NewVT == MVT::i64 &&
271 isOperationLegalOrCustom(Op: ISD::STORE, VT: MVT::f64) &&
272 isSafeMemOpType(MVT::f64)) {
273 // i64 is usually not legal on 32-bit targets, but f64 may be.
274 NewVT = MVT::f64;
275 Found = true;
276 }
277 }
278
279 if (!Found) {
280 do {
281 NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
282 if (NewVT == MVT::i8)
283 break;
284 } while (!isSafeMemOpType(NewVT.getSimpleVT()));
285 }
286 NewVTSize = NewVT.getSizeInBits() / 8;
287
288 // If the new VT cannot cover all of the remaining bits, then consider
289 // issuing a (or a pair of) unaligned and overlapping load / store.
290 unsigned Fast;
291 if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
292 allowsMisalignedMemoryAccesses(
293 VT, AddrSpace: DstAS, Alignment: Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
294 Flags: MachineMemOperand::MONone, &Fast) &&
295 Fast)
296 VTSize = Size;
297 else {
298 VT = NewVT;
299 VTSize = NewVTSize;
300 }
301 }
302
303 if (++NumMemOps > Limit)
304 return false;
305
306 MemOps.push_back(x: VT);
307 Size -= VTSize;
308 }
309
310 return true;
311}
312
313/// Soften the operands of a comparison. This code is shared among BR_CC,
314/// SELECT_CC, and SETCC handlers.
315void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
316 SDValue &NewLHS, SDValue &NewRHS,
317 ISD::CondCode &CCCode,
318 const SDLoc &dl, const SDValue OldLHS,
319 const SDValue OldRHS) const {
320 SDValue Chain;
321 return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, DL: dl, OldLHS,
322 OldRHS, Chain);
323}
324
325void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
326 SDValue &NewLHS, SDValue &NewRHS,
327 ISD::CondCode &CCCode,
328 const SDLoc &dl, const SDValue OldLHS,
329 const SDValue OldRHS,
330 SDValue &Chain,
331 bool IsSignaling) const {
332 // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
333 // not supporting it. We can update this code when libgcc provides such
334 // functions.
335
336 assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
337 && "Unsupported setcc type!");
338
339 // Expand into one or more soft-fp libcall(s).
340 RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
341 bool ShouldInvertCC = false;
342 switch (CCCode) {
343 case ISD::SETEQ:
344 case ISD::SETOEQ:
345 LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
346 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
347 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
348 break;
349 case ISD::SETNE:
350 case ISD::SETUNE:
351 LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
352 (VT == MVT::f64) ? RTLIB::UNE_F64 :
353 (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
354 break;
355 case ISD::SETGE:
356 case ISD::SETOGE:
357 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
358 (VT == MVT::f64) ? RTLIB::OGE_F64 :
359 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
360 break;
361 case ISD::SETLT:
362 case ISD::SETOLT:
363 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
364 (VT == MVT::f64) ? RTLIB::OLT_F64 :
365 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
366 break;
367 case ISD::SETLE:
368 case ISD::SETOLE:
369 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
370 (VT == MVT::f64) ? RTLIB::OLE_F64 :
371 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
372 break;
373 case ISD::SETGT:
374 case ISD::SETOGT:
375 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
376 (VT == MVT::f64) ? RTLIB::OGT_F64 :
377 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
378 break;
379 case ISD::SETO:
380 ShouldInvertCC = true;
381 [[fallthrough]];
382 case ISD::SETUO:
383 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
384 (VT == MVT::f64) ? RTLIB::UO_F64 :
385 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
386 break;
387 case ISD::SETONE:
388 // SETONE = O && UNE
389 ShouldInvertCC = true;
390 [[fallthrough]];
391 case ISD::SETUEQ:
392 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
393 (VT == MVT::f64) ? RTLIB::UO_F64 :
394 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
395 LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
396 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
397 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
398 break;
399 default:
400 // Invert CC for unordered comparisons
401 ShouldInvertCC = true;
402 switch (CCCode) {
403 case ISD::SETULT:
404 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
405 (VT == MVT::f64) ? RTLIB::OGE_F64 :
406 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
407 break;
408 case ISD::SETULE:
409 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
410 (VT == MVT::f64) ? RTLIB::OGT_F64 :
411 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
412 break;
413 case ISD::SETUGT:
414 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
415 (VT == MVT::f64) ? RTLIB::OLE_F64 :
416 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
417 break;
418 case ISD::SETUGE:
419 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
420 (VT == MVT::f64) ? RTLIB::OLT_F64 :
421 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
422 break;
423 default: llvm_unreachable("Do not know how to soften this setcc!");
424 }
425 }
426
427 // Use the target specific return value for comparison lib calls.
428 EVT RetVT = getCmpLibcallReturnType();
429 SDValue Ops[2] = {NewLHS, NewRHS};
430 TargetLowering::MakeLibCallOptions CallOptions;
431 EVT OpsVT[2] = { OldLHS.getValueType(),
432 OldRHS.getValueType() };
433 CallOptions.setTypeListBeforeSoften(OpsVT, RetVT);
434 auto Call = makeLibCall(DAG, LC: LC1, RetVT, Ops, CallOptions, dl, Chain);
435 NewLHS = Call.first;
436 NewRHS = DAG.getConstant(Val: 0, DL: dl, VT: RetVT);
437
438 RTLIB::LibcallImpl LC1Impl = getLibcallImpl(Call: LC1);
439 if (LC1Impl == RTLIB::Unsupported) {
440 reportFatalUsageError(
441 reason: "no libcall available to soften floating-point compare");
442 }
443
444 CCCode = getSoftFloatCmpLibcallPredicate(Call: LC1Impl);
445 if (ShouldInvertCC) {
446 assert(RetVT.isInteger());
447 CCCode = getSetCCInverse(Operation: CCCode, Type: RetVT);
448 }
449
450 if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
451 // Update Chain.
452 Chain = Call.second;
453 } else {
454 RTLIB::LibcallImpl LC2Impl = getLibcallImpl(Call: LC2);
455 if (LC2Impl == RTLIB::Unsupported) {
456 reportFatalUsageError(
457 reason: "no libcall available to soften floating-point compare");
458 }
459
460 assert(CCCode == (ShouldInvertCC ? ISD::SETEQ : ISD::SETNE) &&
461 "unordered call should be simple boolean");
462
463 EVT SetCCVT =
464 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: RetVT);
465 if (getBooleanContents(Type: RetVT) == ZeroOrOneBooleanContent) {
466 NewLHS = DAG.getNode(Opcode: ISD::AssertZext, DL: dl, VT: RetVT, N1: Call.first,
467 N2: DAG.getValueType(MVT::i1));
468 }
469
470 SDValue Tmp = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: NewLHS, RHS: NewRHS, Cond: CCCode);
471 auto Call2 = makeLibCall(DAG, LC: LC2, RetVT, Ops, CallOptions, dl, Chain);
472 CCCode = getSoftFloatCmpLibcallPredicate(Call: LC2Impl);
473 if (ShouldInvertCC)
474 CCCode = getSetCCInverse(Operation: CCCode, Type: RetVT);
475 NewLHS = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Call2.first, RHS: NewRHS, Cond: CCCode);
476 if (Chain)
477 Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, N1: Call.second,
478 N2: Call2.second);
479 NewLHS = DAG.getNode(Opcode: ShouldInvertCC ? ISD::AND : ISD::OR, DL: dl,
480 VT: Tmp.getValueType(), N1: Tmp, N2: NewLHS);
481 NewRHS = SDValue();
482 }
483}
484
485/// Return the entry encoding for a jump table in the current function. The
486/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
487unsigned TargetLowering::getJumpTableEncoding() const {
488 // In non-pic modes, just use the address of a block.
489 if (!isPositionIndependent())
490 return MachineJumpTableInfo::EK_BlockAddress;
491
492 // Otherwise, use a label difference.
493 return MachineJumpTableInfo::EK_LabelDifference32;
494}
495
496SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
497 SelectionDAG &DAG) const {
498 return Table;
499}
500
501/// This returns the relocation base for the given PIC jumptable, the same as
502/// getPICJumpTableRelocBase, but as an MCExpr.
503const MCExpr *
504TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
505 unsigned JTI,MCContext &Ctx) const{
506 // The normal PIC reloc base is the label at the start of the jump table.
507 return MCSymbolRefExpr::create(Symbol: MF->getJTISymbol(JTI, Ctx), Ctx);
508}
509
510SDValue TargetLowering::expandIndirectJTBranch(const SDLoc &dl, SDValue Value,
511 SDValue Addr, int JTI,
512 SelectionDAG &DAG) const {
513 SDValue Chain = Value;
514 // Jump table debug info is only needed if CodeView is enabled.
515 if (DAG.getTarget().getTargetTriple().isOSBinFormatCOFF()) {
516 Chain = DAG.getJumpTableDebugInfo(JTI, Chain, DL: dl);
517 }
518 return DAG.getNode(Opcode: ISD::BRIND, DL: dl, VT: MVT::Other, N1: Chain, N2: Addr);
519}
520
521bool
522TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
523 const TargetMachine &TM = getTargetMachine();
524 const GlobalValue *GV = GA->getGlobal();
525
526 // If the address is not even local to this DSO we will have to load it from
527 // a got and then add the offset.
528 if (!TM.shouldAssumeDSOLocal(GV))
529 return false;
530
531 // If the code is position independent we will have to add a base register.
532 if (isPositionIndependent())
533 return false;
534
535 // Otherwise we can do it.
536 return true;
537}
538
539//===----------------------------------------------------------------------===//
540// Optimization Methods
541//===----------------------------------------------------------------------===//
542
543/// If the specified instruction has a constant integer operand and there are
544/// bits set in that constant that are not demanded, then clear those bits and
545/// return true.
546bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
547 const APInt &DemandedBits,
548 const APInt &DemandedElts,
549 TargetLoweringOpt &TLO) const {
550 SDLoc DL(Op);
551 unsigned Opcode = Op.getOpcode();
552
553 // Early-out if we've ended up calling an undemanded node, leave this to
554 // constant folding.
555 if (DemandedBits.isZero() || DemandedElts.isZero())
556 return false;
557
558 // Do target-specific constant optimization.
559 if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
560 return TLO.New.getNode();
561
562 // FIXME: ISD::SELECT, ISD::SELECT_CC
563 switch (Opcode) {
564 default:
565 break;
566 case ISD::XOR:
567 case ISD::AND:
568 case ISD::OR: {
569 auto *Op1C = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 1));
570 if (!Op1C || Op1C->isOpaque())
571 return false;
572
573 // If this is a 'not' op, don't touch it because that's a canonical form.
574 const APInt &C = Op1C->getAPIntValue();
575 if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(RHS: C))
576 return false;
577
578 if (!C.isSubsetOf(RHS: DemandedBits)) {
579 EVT VT = Op.getValueType();
580 SDValue NewC = TLO.DAG.getConstant(Val: DemandedBits & C, DL, VT);
581 SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, N1: Op.getOperand(i: 0), N2: NewC,
582 Flags: Op->getFlags());
583 return TLO.CombineTo(O: Op, N: NewOp);
584 }
585
586 break;
587 }
588 }
589
590 return false;
591}
592
593bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
594 const APInt &DemandedBits,
595 TargetLoweringOpt &TLO) const {
596 EVT VT = Op.getValueType();
597 APInt DemandedElts = VT.isVector()
598 ? APInt::getAllOnes(numBits: VT.getVectorNumElements())
599 : APInt(1, 1);
600 return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
601}
602
603/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
604/// This uses isTruncateFree/isZExtFree and ANY_EXTEND for the widening cast,
605/// but it could be generalized for targets with other types of implicit
606/// widening casts.
607bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
608 const APInt &DemandedBits,
609 TargetLoweringOpt &TLO) const {
610 assert(Op.getNumOperands() == 2 &&
611 "ShrinkDemandedOp only supports binary operators!");
612 assert(Op.getNode()->getNumValues() == 1 &&
613 "ShrinkDemandedOp only supports nodes with one result!");
614
615 EVT VT = Op.getValueType();
616 SelectionDAG &DAG = TLO.DAG;
617 SDLoc dl(Op);
618
619 // Early return, as this function cannot handle vector types.
620 if (VT.isVector())
621 return false;
622
623 assert(Op.getOperand(0).getValueType().getScalarSizeInBits() == BitWidth &&
624 Op.getOperand(1).getValueType().getScalarSizeInBits() == BitWidth &&
625 "ShrinkDemandedOp only supports operands that have the same size!");
626
627 // Don't do this if the node has another user, which may require the
628 // full value.
629 if (!Op.getNode()->hasOneUse())
630 return false;
631
632 // Search for the smallest integer type with free casts to and from
633 // Op's type. For expedience, just check power-of-2 integer types.
634 unsigned DemandedSize = DemandedBits.getActiveBits();
635 for (unsigned SmallVTBits = llvm::bit_ceil(Value: DemandedSize);
636 SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(A: SmallVTBits)) {
637 EVT SmallVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: SmallVTBits);
638 if (isTruncateFree(Val: Op, VT2: SmallVT) && isZExtFree(FromTy: SmallVT, ToTy: VT)) {
639 // We found a type with free casts.
640
641 // If the operation has the 'disjoint' flag, then the
642 // operands on the new node are also disjoint.
643 SDNodeFlags Flags(Op->getFlags().hasDisjoint() ? SDNodeFlags::Disjoint
644 : SDNodeFlags::None);
645 unsigned Opcode = Op.getOpcode();
646 if (Opcode == ISD::PTRADD) {
647 // It isn't a ptradd anymore if it doesn't operate on the entire
648 // pointer.
649 Opcode = ISD::ADD;
650 }
651 SDValue X = DAG.getNode(
652 Opcode, DL: dl, VT: SmallVT,
653 N1: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: SmallVT, Operand: Op.getOperand(i: 0)),
654 N2: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: SmallVT, Operand: Op.getOperand(i: 1)), Flags);
655 assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
656 SDValue Z = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT, Operand: X);
657 return TLO.CombineTo(O: Op, N: Z);
658 }
659 }
660 return false;
661}
662
663bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
664 DAGCombinerInfo &DCI) const {
665 SelectionDAG &DAG = DCI.DAG;
666 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
667 !DCI.isBeforeLegalizeOps());
668 KnownBits Known;
669
670 bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
671 if (Simplified) {
672 DCI.AddToWorklist(N: Op.getNode());
673 DCI.CommitTargetLoweringOpt(TLO);
674 }
675 return Simplified;
676}
677
678bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
679 const APInt &DemandedElts,
680 DAGCombinerInfo &DCI) const {
681 SelectionDAG &DAG = DCI.DAG;
682 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
683 !DCI.isBeforeLegalizeOps());
684 KnownBits Known;
685
686 bool Simplified =
687 SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO);
688 if (Simplified) {
689 DCI.AddToWorklist(N: Op.getNode());
690 DCI.CommitTargetLoweringOpt(TLO);
691 }
692 return Simplified;
693}
694
695bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
696 KnownBits &Known,
697 TargetLoweringOpt &TLO,
698 unsigned Depth,
699 bool AssumeSingleUse) const {
700 EVT VT = Op.getValueType();
701
702 // Since the number of lanes in a scalable vector is unknown at compile time,
703 // we track one bit which is implicitly broadcast to all lanes. This means
704 // that all lanes in a scalable vector are considered demanded.
705 APInt DemandedElts = VT.isFixedLengthVector()
706 ? APInt::getAllOnes(numBits: VT.getVectorNumElements())
707 : APInt(1, 1);
708 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
709 AssumeSingleUse);
710}
711
712// TODO: Under what circumstances can we create nodes? Constant folding?
713SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
714 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
715 SelectionDAG &DAG, unsigned Depth) const {
716 EVT VT = Op.getValueType();
717
718 // Limit search depth.
719 if (Depth >= SelectionDAG::MaxRecursionDepth)
720 return SDValue();
721
722 // Ignore UNDEFs.
723 if (Op.isUndef())
724 return SDValue();
725
726 // Not demanding any bits/elts from Op.
727 if (DemandedBits == 0 || DemandedElts == 0)
728 return DAG.getUNDEF(VT);
729
730 bool IsLE = DAG.getDataLayout().isLittleEndian();
731 unsigned NumElts = DemandedElts.getBitWidth();
732 unsigned BitWidth = DemandedBits.getBitWidth();
733 KnownBits LHSKnown, RHSKnown;
734 switch (Op.getOpcode()) {
735 case ISD::BITCAST: {
736 if (VT.isScalableVector())
737 return SDValue();
738
739 SDValue Src = peekThroughBitcasts(V: Op.getOperand(i: 0));
740 EVT SrcVT = Src.getValueType();
741 EVT DstVT = Op.getValueType();
742 if (SrcVT == DstVT)
743 return Src;
744
745 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
746 unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
747 if (NumSrcEltBits == NumDstEltBits)
748 if (SDValue V = SimplifyMultipleUseDemandedBits(
749 Op: Src, DemandedBits, DemandedElts, DAG, Depth: Depth + 1))
750 return DAG.getBitcast(VT: DstVT, V);
751
752 if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) {
753 unsigned Scale = NumDstEltBits / NumSrcEltBits;
754 unsigned NumSrcElts = SrcVT.getVectorNumElements();
755 APInt DemandedSrcBits = APInt::getZero(numBits: NumSrcEltBits);
756 for (unsigned i = 0; i != Scale; ++i) {
757 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
758 unsigned BitOffset = EltOffset * NumSrcEltBits;
759 DemandedSrcBits |= DemandedBits.extractBits(numBits: NumSrcEltBits, bitPosition: BitOffset);
760 }
761 // Recursive calls below may turn not demanded elements into poison, so we
762 // need to demand all smaller source elements that maps to a demanded
763 // destination element.
764 APInt DemandedSrcElts = APIntOps::ScaleBitMask(A: DemandedElts, NewBitWidth: NumSrcElts);
765
766 if (SDValue V = SimplifyMultipleUseDemandedBits(
767 Op: Src, DemandedBits: DemandedSrcBits, DemandedElts: DemandedSrcElts, DAG, Depth: Depth + 1))
768 return DAG.getBitcast(VT: DstVT, V);
769 }
770
771 // TODO - bigendian once we have test coverage.
772 if (IsLE && (NumSrcEltBits % NumDstEltBits) == 0) {
773 unsigned Scale = NumSrcEltBits / NumDstEltBits;
774 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
775 APInt DemandedSrcBits = APInt::getZero(numBits: NumSrcEltBits);
776 APInt DemandedSrcElts = APInt::getZero(numBits: NumSrcElts);
777 for (unsigned i = 0; i != NumElts; ++i)
778 if (DemandedElts[i]) {
779 unsigned Offset = (i % Scale) * NumDstEltBits;
780 DemandedSrcBits.insertBits(SubBits: DemandedBits, bitPosition: Offset);
781 DemandedSrcElts.setBit(i / Scale);
782 }
783
784 if (SDValue V = SimplifyMultipleUseDemandedBits(
785 Op: Src, DemandedBits: DemandedSrcBits, DemandedElts: DemandedSrcElts, DAG, Depth: Depth + 1))
786 return DAG.getBitcast(VT: DstVT, V);
787 }
788
789 break;
790 }
791 case ISD::AND: {
792 LHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1);
793 RHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1);
794
795 // If all of the demanded bits are known 1 on one side, return the other.
796 // These bits cannot contribute to the result of the 'and' in this
797 // context.
798 if (DemandedBits.isSubsetOf(RHS: LHSKnown.Zero | RHSKnown.One))
799 return Op.getOperand(i: 0);
800 if (DemandedBits.isSubsetOf(RHS: RHSKnown.Zero | LHSKnown.One))
801 return Op.getOperand(i: 1);
802 break;
803 }
804 case ISD::OR: {
805 LHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1);
806 RHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1);
807
808 // If all of the demanded bits are known zero on one side, return the
809 // other. These bits cannot contribute to the result of the 'or' in this
810 // context.
811 if (DemandedBits.isSubsetOf(RHS: LHSKnown.One | RHSKnown.Zero))
812 return Op.getOperand(i: 0);
813 if (DemandedBits.isSubsetOf(RHS: RHSKnown.One | LHSKnown.Zero))
814 return Op.getOperand(i: 1);
815 break;
816 }
817 case ISD::XOR: {
818 LHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1);
819 RHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1);
820
821 // If all of the demanded bits are known zero on one side, return the
822 // other.
823 if (DemandedBits.isSubsetOf(RHS: RHSKnown.Zero))
824 return Op.getOperand(i: 0);
825 if (DemandedBits.isSubsetOf(RHS: LHSKnown.Zero))
826 return Op.getOperand(i: 1);
827 break;
828 }
829 case ISD::ADD:
830 case ISD::MUL:
831 case ISD::SMIN:
832 case ISD::SMAX:
833 case ISD::UMIN:
834 case ISD::UMAX: {
835 if (DAG.isIdentityElement(Opc: Op.getOpcode(), Flags: Op->getFlags(), V: Op.getOperand(i: 1),
836 DemandedElts, OperandNo: 1, Depth: Depth + 1))
837 return Op.getOperand(i: 0);
838
839 if (DAG.isIdentityElement(Opc: Op.getOpcode(), Flags: Op->getFlags(), V: Op.getOperand(i: 0),
840 DemandedElts, OperandNo: 0, Depth: Depth + 1))
841 return Op.getOperand(i: 1);
842 break;
843 }
844 case ISD::SHL: {
845 // If we are only demanding sign bits then we can use the shift source
846 // directly.
847 if (std::optional<unsigned> MaxSA =
848 DAG.getValidMaximumShiftAmount(V: Op, DemandedElts, Depth: Depth + 1)) {
849 SDValue Op0 = Op.getOperand(i: 0);
850 unsigned ShAmt = *MaxSA;
851 unsigned NumSignBits =
852 DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + 1);
853 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
854 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
855 return Op0;
856 }
857 break;
858 }
859 case ISD::SRL: {
860 // If we are only demanding sign bits then we can use the shift source
861 // directly.
862 if (std::optional<unsigned> MaxSA =
863 DAG.getValidMaximumShiftAmount(V: Op, DemandedElts, Depth: Depth + 1)) {
864 SDValue Op0 = Op.getOperand(i: 0);
865 unsigned ShAmt = *MaxSA;
866 // Must already be signbits in DemandedBits bounds, and can't demand any
867 // shifted in zeroes.
868 if (DemandedBits.countl_zero() >= ShAmt) {
869 unsigned NumSignBits =
870 DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + 1);
871 if (DemandedBits.countr_zero() >= (BitWidth - NumSignBits))
872 return Op0;
873 }
874 }
875 break;
876 }
877 case ISD::SETCC: {
878 SDValue Op0 = Op.getOperand(i: 0);
879 SDValue Op1 = Op.getOperand(i: 1);
880 ISD::CondCode CC = cast<CondCodeSDNode>(Val: Op.getOperand(i: 2))->get();
881 // If (1) we only need the sign-bit, (2) the setcc operands are the same
882 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
883 // -1, we may be able to bypass the setcc.
884 if (DemandedBits.isSignMask() &&
885 Op0.getScalarValueSizeInBits() == BitWidth &&
886 getBooleanContents(Type: Op0.getValueType()) ==
887 BooleanContent::ZeroOrNegativeOneBooleanContent) {
888 // If we're testing X < 0, then this compare isn't needed - just use X!
889 // FIXME: We're limiting to integer types here, but this should also work
890 // if we don't care about FP signed-zero. The use of SETLT with FP means
891 // that we don't care about NaNs.
892 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
893 (isNullConstant(V: Op1) || ISD::isBuildVectorAllZeros(N: Op1.getNode())))
894 return Op0;
895 }
896 break;
897 }
898 case ISD::SIGN_EXTEND_INREG: {
899 // If none of the extended bits are demanded, eliminate the sextinreg.
900 SDValue Op0 = Op.getOperand(i: 0);
901 EVT ExVT = cast<VTSDNode>(Val: Op.getOperand(i: 1))->getVT();
902 unsigned ExBits = ExVT.getScalarSizeInBits();
903 if (DemandedBits.getActiveBits() <= ExBits &&
904 shouldRemoveRedundantExtend(Op))
905 return Op0;
906 // If the input is already sign extended, just drop the extension.
907 unsigned NumSignBits = DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + 1);
908 if (NumSignBits >= (BitWidth - ExBits + 1))
909 return Op0;
910 break;
911 }
912 case ISD::ANY_EXTEND_VECTOR_INREG:
913 case ISD::SIGN_EXTEND_VECTOR_INREG:
914 case ISD::ZERO_EXTEND_VECTOR_INREG: {
915 if (VT.isScalableVector())
916 return SDValue();
917
918 // If we only want the lowest element and none of extended bits, then we can
919 // return the bitcasted source vector.
920 SDValue Src = Op.getOperand(i: 0);
921 EVT SrcVT = Src.getValueType();
922 EVT DstVT = Op.getValueType();
923 if (IsLE && DemandedElts == 1 &&
924 DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
925 DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
926 return DAG.getBitcast(VT: DstVT, V: Src);
927 }
928 break;
929 }
930 case ISD::INSERT_VECTOR_ELT: {
931 if (VT.isScalableVector())
932 return SDValue();
933
934 // If we don't demand the inserted element, return the base vector.
935 SDValue Vec = Op.getOperand(i: 0);
936 auto *CIdx = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 2));
937 EVT VecVT = Vec.getValueType();
938 if (CIdx && CIdx->getAPIntValue().ult(RHS: VecVT.getVectorNumElements()) &&
939 !DemandedElts[CIdx->getZExtValue()])
940 return Vec;
941 break;
942 }
943 case ISD::INSERT_SUBVECTOR: {
944 if (VT.isScalableVector())
945 return SDValue();
946
947 SDValue Vec = Op.getOperand(i: 0);
948 SDValue Sub = Op.getOperand(i: 1);
949 uint64_t Idx = Op.getConstantOperandVal(i: 2);
950 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
951 APInt DemandedSubElts = DemandedElts.extractBits(numBits: NumSubElts, bitPosition: Idx);
952 // If we don't demand the inserted subvector, return the base vector.
953 if (DemandedSubElts == 0)
954 return Vec;
955 break;
956 }
957 case ISD::VECTOR_SHUFFLE: {
958 assert(!VT.isScalableVector());
959 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Val&: Op)->getMask();
960
961 // If all the demanded elts are from one operand and are inline,
962 // then we can use the operand directly.
963 bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
964 for (unsigned i = 0; i != NumElts; ++i) {
965 int M = ShuffleMask[i];
966 if (M < 0 || !DemandedElts[i])
967 continue;
968 AllUndef = false;
969 IdentityLHS &= (M == (int)i);
970 IdentityRHS &= ((M - NumElts) == i);
971 }
972
973 if (AllUndef)
974 return DAG.getUNDEF(VT: Op.getValueType());
975 if (IdentityLHS)
976 return Op.getOperand(i: 0);
977 if (IdentityRHS)
978 return Op.getOperand(i: 1);
979 break;
980 }
981 default:
982 // TODO: Probably okay to remove after audit; here to reduce change size
983 // in initial enablement patch for scalable vectors
984 if (VT.isScalableVector())
985 return SDValue();
986
987 if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
988 if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
989 Op, DemandedBits, DemandedElts, DAG, Depth))
990 return V;
991 break;
992 }
993 return SDValue();
994}
995
996SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
997 SDValue Op, const APInt &DemandedBits, SelectionDAG &DAG,
998 unsigned Depth) const {
999 EVT VT = Op.getValueType();
1000 // Since the number of lanes in a scalable vector is unknown at compile time,
1001 // we track one bit which is implicitly broadcast to all lanes. This means
1002 // that all lanes in a scalable vector are considered demanded.
1003 APInt DemandedElts = VT.isFixedLengthVector()
1004 ? APInt::getAllOnes(numBits: VT.getVectorNumElements())
1005 : APInt(1, 1);
1006 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
1007 Depth);
1008}
1009
1010SDValue TargetLowering::SimplifyMultipleUseDemandedVectorElts(
1011 SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
1012 unsigned Depth) const {
1013 APInt DemandedBits = APInt::getAllOnes(numBits: Op.getScalarValueSizeInBits());
1014 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
1015 Depth);
1016}
1017
1018// Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1).
1019// or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1).
1020static SDValue combineShiftToAVG(SDValue Op,
1021 TargetLowering::TargetLoweringOpt &TLO,
1022 const TargetLowering &TLI,
1023 const APInt &DemandedBits,
1024 const APInt &DemandedElts, unsigned Depth) {
1025 assert((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) &&
1026 "SRL or SRA node is required here!");
1027 // Is the right shift using an immediate value of 1?
1028 ConstantSDNode *N1C = isConstOrConstSplat(N: Op.getOperand(i: 1), DemandedElts);
1029 if (!N1C || !N1C->isOne())
1030 return SDValue();
1031
1032 // We are looking for an avgfloor
1033 // add(ext, ext)
1034 // or one of these as a avgceil
1035 // add(add(ext, ext), 1)
1036 // add(add(ext, 1), ext)
1037 // add(ext, add(ext, 1))
1038 SDValue Add = Op.getOperand(i: 0);
1039 if (Add.getOpcode() != ISD::ADD)
1040 return SDValue();
1041
1042 SDValue ExtOpA = Add.getOperand(i: 0);
1043 SDValue ExtOpB = Add.getOperand(i: 1);
1044 SDValue Add2;
1045 auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3, SDValue A) {
1046 ConstantSDNode *ConstOp;
1047 if ((ConstOp = isConstOrConstSplat(N: Op2, DemandedElts)) &&
1048 ConstOp->isOne()) {
1049 ExtOpA = Op1;
1050 ExtOpB = Op3;
1051 Add2 = A;
1052 return true;
1053 }
1054 if ((ConstOp = isConstOrConstSplat(N: Op3, DemandedElts)) &&
1055 ConstOp->isOne()) {
1056 ExtOpA = Op1;
1057 ExtOpB = Op2;
1058 Add2 = A;
1059 return true;
1060 }
1061 return false;
1062 };
1063 bool IsCeil =
1064 (ExtOpA.getOpcode() == ISD::ADD &&
1065 MatchOperands(ExtOpA.getOperand(i: 0), ExtOpA.getOperand(i: 1), ExtOpB, ExtOpA)) ||
1066 (ExtOpB.getOpcode() == ISD::ADD &&
1067 MatchOperands(ExtOpB.getOperand(i: 0), ExtOpB.getOperand(i: 1), ExtOpA, ExtOpB));
1068
1069 // If the shift is signed (sra):
1070 // - Needs >= 2 sign bit for both operands.
1071 // - Needs >= 2 zero bits.
1072 // If the shift is unsigned (srl):
1073 // - Needs >= 1 zero bit for both operands.
1074 // - Needs 1 demanded bit zero and >= 2 sign bits.
1075 SelectionDAG &DAG = TLO.DAG;
1076 unsigned ShiftOpc = Op.getOpcode();
1077 bool IsSigned = false;
1078 unsigned KnownBits;
1079 unsigned NumSignedA = DAG.ComputeNumSignBits(Op: ExtOpA, DemandedElts, Depth);
1080 unsigned NumSignedB = DAG.ComputeNumSignBits(Op: ExtOpB, DemandedElts, Depth);
1081 unsigned NumSigned = std::min(a: NumSignedA, b: NumSignedB) - 1;
1082 unsigned NumZeroA =
1083 DAG.computeKnownBits(Op: ExtOpA, DemandedElts, Depth).countMinLeadingZeros();
1084 unsigned NumZeroB =
1085 DAG.computeKnownBits(Op: ExtOpB, DemandedElts, Depth).countMinLeadingZeros();
1086 unsigned NumZero = std::min(a: NumZeroA, b: NumZeroB);
1087
1088 switch (ShiftOpc) {
1089 default:
1090 llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG");
1091 case ISD::SRA: {
1092 if (NumZero >= 2 && NumSigned < NumZero) {
1093 IsSigned = false;
1094 KnownBits = NumZero;
1095 break;
1096 }
1097 if (NumSigned >= 1) {
1098 IsSigned = true;
1099 KnownBits = NumSigned;
1100 break;
1101 }
1102 return SDValue();
1103 }
1104 case ISD::SRL: {
1105 if (NumZero >= 1 && NumSigned < NumZero) {
1106 IsSigned = false;
1107 KnownBits = NumZero;
1108 break;
1109 }
1110 if (NumSigned >= 1 && DemandedBits.isSignBitClear()) {
1111 IsSigned = true;
1112 KnownBits = NumSigned;
1113 break;
1114 }
1115 return SDValue();
1116 }
1117 }
1118
1119 unsigned AVGOpc = IsCeil ? (IsSigned ? ISD::AVGCEILS : ISD::AVGCEILU)
1120 : (IsSigned ? ISD::AVGFLOORS : ISD::AVGFLOORU);
1121
1122 // Find the smallest power-2 type that is legal for this vector size and
1123 // operation, given the original type size and the number of known sign/zero
1124 // bits.
1125 EVT VT = Op.getValueType();
1126 unsigned MinWidth =
1127 std::max<unsigned>(a: VT.getScalarSizeInBits() - KnownBits, b: 8);
1128 EVT NVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: llvm::bit_ceil(Value: MinWidth));
1129 if (NVT.getScalarSizeInBits() > VT.getScalarSizeInBits())
1130 return SDValue();
1131 if (VT.isVector())
1132 NVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: NVT, EC: VT.getVectorElementCount());
1133 if (TLO.LegalTypes() && !TLI.isOperationLegal(Op: AVGOpc, VT: NVT)) {
1134 // If we could not transform, and (both) adds are nuw/nsw, we can use the
1135 // larger type size to do the transform.
1136 if (TLO.LegalOperations() && !TLI.isOperationLegal(Op: AVGOpc, VT))
1137 return SDValue();
1138 if (DAG.willNotOverflowAdd(IsSigned, N0: Add.getOperand(i: 0),
1139 N1: Add.getOperand(i: 1)) &&
1140 (!Add2 || DAG.willNotOverflowAdd(IsSigned, N0: Add2.getOperand(i: 0),
1141 N1: Add2.getOperand(i: 1))))
1142 NVT = VT;
1143 else
1144 return SDValue();
1145 }
1146
1147 // Don't create a AVGFLOOR node with a scalar constant unless its legal as
1148 // this is likely to stop other folds (reassociation, value tracking etc.)
1149 if (!IsCeil && !TLI.isOperationLegal(Op: AVGOpc, VT: NVT) &&
1150 (isa<ConstantSDNode>(Val: ExtOpA) || isa<ConstantSDNode>(Val: ExtOpB)))
1151 return SDValue();
1152
1153 SDLoc DL(Op);
1154 SDValue ResultAVG =
1155 DAG.getNode(Opcode: AVGOpc, DL, VT: NVT, N1: DAG.getExtOrTrunc(IsSigned, Op: ExtOpA, DL, VT: NVT),
1156 N2: DAG.getExtOrTrunc(IsSigned, Op: ExtOpB, DL, VT: NVT));
1157 return DAG.getExtOrTrunc(IsSigned, Op: ResultAVG, DL, VT);
1158}
1159
1160/// Look at Op. At this point, we know that only the OriginalDemandedBits of the
1161/// result of Op are ever used downstream. If we can use this information to
1162/// simplify Op, create a new simplified DAG node and return true, returning the
1163/// original and new nodes in Old and New. Otherwise, analyze the expression and
1164/// return a mask of Known bits for the expression (used to simplify the
1165/// caller). The Known bits may only be accurate for those bits in the
1166/// OriginalDemandedBits and OriginalDemandedElts.
1167bool TargetLowering::SimplifyDemandedBits(
1168 SDValue Op, const APInt &OriginalDemandedBits,
1169 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
1170 unsigned Depth, bool AssumeSingleUse) const {
1171 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
1172 assert(Op.getScalarValueSizeInBits() == BitWidth &&
1173 "Mask size mismatches value type size!");
1174
1175 // Don't know anything.
1176 Known = KnownBits(BitWidth);
1177
1178 EVT VT = Op.getValueType();
1179 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
1180 unsigned NumElts = OriginalDemandedElts.getBitWidth();
1181 assert((!VT.isFixedLengthVector() || NumElts == VT.getVectorNumElements()) &&
1182 "Unexpected vector size");
1183
1184 APInt DemandedBits = OriginalDemandedBits;
1185 APInt DemandedElts = OriginalDemandedElts;
1186 SDLoc dl(Op);
1187
1188 // Undef operand.
1189 if (Op.isUndef())
1190 return false;
1191
1192 // We can't simplify target constants.
1193 if (Op.getOpcode() == ISD::TargetConstant)
1194 return false;
1195
1196 if (Op.getOpcode() == ISD::Constant) {
1197 // We know all of the bits for a constant!
1198 Known = KnownBits::makeConstant(C: Op->getAsAPIntVal());
1199 return false;
1200 }
1201
1202 if (Op.getOpcode() == ISD::ConstantFP) {
1203 // We know all of the bits for a floating point constant!
1204 Known = KnownBits::makeConstant(
1205 C: cast<ConstantFPSDNode>(Val&: Op)->getValueAPF().bitcastToAPInt());
1206 return false;
1207 }
1208
1209 // Other users may use these bits.
1210 bool HasMultiUse = false;
1211 if (!AssumeSingleUse && !Op.getNode()->hasOneUse()) {
1212 if (Depth >= SelectionDAG::MaxRecursionDepth) {
1213 // Limit search depth.
1214 return false;
1215 }
1216 // Allow multiple uses, just set the DemandedBits/Elts to all bits.
1217 DemandedBits = APInt::getAllOnes(numBits: BitWidth);
1218 DemandedElts = APInt::getAllOnes(numBits: NumElts);
1219 HasMultiUse = true;
1220 } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
1221 // Not demanding any bits/elts from Op.
1222 return TLO.CombineTo(O: Op, N: TLO.DAG.getUNDEF(VT));
1223 } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
1224 // Limit search depth.
1225 return false;
1226 }
1227
1228 KnownBits Known2;
1229 switch (Op.getOpcode()) {
1230 case ISD::SCALAR_TO_VECTOR: {
1231 if (VT.isScalableVector())
1232 return false;
1233 if (!DemandedElts[0])
1234 return TLO.CombineTo(O: Op, N: TLO.DAG.getUNDEF(VT));
1235
1236 KnownBits SrcKnown;
1237 SDValue Src = Op.getOperand(i: 0);
1238 unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
1239 APInt SrcDemandedBits = DemandedBits.zext(width: SrcBitWidth);
1240 if (SimplifyDemandedBits(Op: Src, DemandedBits: SrcDemandedBits, Known&: SrcKnown, TLO, Depth: Depth + 1))
1241 return true;
1242
1243 // Upper elements are undef, so only get the knownbits if we just demand
1244 // the bottom element.
1245 if (DemandedElts == 1)
1246 Known = SrcKnown.anyextOrTrunc(BitWidth);
1247 break;
1248 }
1249 case ISD::BUILD_VECTOR:
1250 // Collect the known bits that are shared by every demanded element.
1251 // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
1252 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1253 return false; // Don't fall through, will infinitely loop.
1254 case ISD::SPLAT_VECTOR: {
1255 SDValue Scl = Op.getOperand(i: 0);
1256 APInt DemandedSclBits = DemandedBits.zextOrTrunc(width: Scl.getValueSizeInBits());
1257 KnownBits KnownScl;
1258 if (SimplifyDemandedBits(Op: Scl, DemandedBits: DemandedSclBits, Known&: KnownScl, TLO, Depth: Depth + 1))
1259 return true;
1260
1261 // Implicitly truncate the bits to match the official semantics of
1262 // SPLAT_VECTOR.
1263 Known = KnownScl.trunc(BitWidth);
1264 break;
1265 }
1266 case ISD::FREEZE: {
1267 SDValue N0 = Op.getOperand(i: 0);
1268 if (TLO.DAG.isGuaranteedNotToBeUndefOrPoison(
1269 Op: N0, DemandedElts, Kind: UndefPoisonKind::UndefOrPoison, Depth: Depth + 1))
1270 return TLO.CombineTo(O: Op, N: N0);
1271 break;
1272 }
1273 case ISD::LOAD: {
1274 auto *LD = cast<LoadSDNode>(Val&: Op);
1275 if (getTargetConstantFromLoad(LD)) {
1276 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1277 return false; // Don't fall through, will infinitely loop.
1278 }
1279 if (ISD::isZEXTLoad(N: Op.getNode()) && Op.getResNo() == 0) {
1280 // If this is a ZEXTLoad and we are looking at the loaded value.
1281 EVT MemVT = LD->getMemoryVT();
1282 unsigned MemBits = MemVT.getScalarSizeInBits();
1283 Known.Zero.setBitsFrom(MemBits);
1284 return false; // Don't fall through, will infinitely loop.
1285 }
1286 break;
1287 }
1288 case ISD::INSERT_VECTOR_ELT: {
1289 if (VT.isScalableVector())
1290 return false;
1291 SDValue Vec = Op.getOperand(i: 0);
1292 SDValue Scl = Op.getOperand(i: 1);
1293 auto *CIdx = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 2));
1294 EVT VecVT = Vec.getValueType();
1295
1296 // If index isn't constant, assume we need all vector elements AND the
1297 // inserted element.
1298 APInt DemandedVecElts(DemandedElts);
1299 if (CIdx && CIdx->getAPIntValue().ult(RHS: VecVT.getVectorNumElements())) {
1300 unsigned Idx = CIdx->getZExtValue();
1301 DemandedVecElts.clearBit(BitPosition: Idx);
1302
1303 // Inserted element is not required.
1304 if (!DemandedElts[Idx])
1305 return TLO.CombineTo(O: Op, N: Vec);
1306 }
1307
1308 KnownBits KnownScl;
1309 unsigned NumSclBits = Scl.getScalarValueSizeInBits();
1310 APInt DemandedSclBits = DemandedBits.zextOrTrunc(width: NumSclBits);
1311 if (SimplifyDemandedBits(Op: Scl, DemandedBits: DemandedSclBits, Known&: KnownScl, TLO, Depth: Depth + 1))
1312 return true;
1313
1314 Known = KnownScl.anyextOrTrunc(BitWidth);
1315
1316 KnownBits KnownVec;
1317 if (SimplifyDemandedBits(Op: Vec, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedVecElts, Known&: KnownVec, TLO,
1318 Depth: Depth + 1))
1319 return true;
1320
1321 if (!!DemandedVecElts)
1322 Known = Known.intersectWith(RHS: KnownVec);
1323
1324 return false;
1325 }
1326 case ISD::INSERT_SUBVECTOR: {
1327 if (VT.isScalableVector())
1328 return false;
1329 // Demand any elements from the subvector and the remainder from the src its
1330 // inserted into.
1331 SDValue Src = Op.getOperand(i: 0);
1332 SDValue Sub = Op.getOperand(i: 1);
1333 uint64_t Idx = Op.getConstantOperandVal(i: 2);
1334 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
1335 APInt DemandedSubElts = DemandedElts.extractBits(numBits: NumSubElts, bitPosition: Idx);
1336 APInt DemandedSrcElts = DemandedElts;
1337 DemandedSrcElts.clearBits(LoBit: Idx, HiBit: Idx + NumSubElts);
1338
1339 KnownBits KnownSub, KnownSrc;
1340 if (SimplifyDemandedBits(Op: Sub, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedSubElts, Known&: KnownSub, TLO,
1341 Depth: Depth + 1))
1342 return true;
1343 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedSrcElts, Known&: KnownSrc, TLO,
1344 Depth: Depth + 1))
1345 return true;
1346
1347 Known.setAllConflict();
1348 if (!!DemandedSubElts)
1349 Known = Known.intersectWith(RHS: KnownSub);
1350 if (!!DemandedSrcElts)
1351 Known = Known.intersectWith(RHS: KnownSrc);
1352
1353 // Attempt to avoid multi-use src if we don't need anything from it.
1354 if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||
1355 !DemandedSrcElts.isAllOnes()) {
1356 SDValue NewSub = SimplifyMultipleUseDemandedBits(
1357 Op: Sub, DemandedBits, DemandedElts: DemandedSubElts, DAG&: TLO.DAG, Depth: Depth + 1);
1358 SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1359 Op: Src, DemandedBits, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + 1);
1360 if (NewSub || NewSrc) {
1361 NewSub = NewSub ? NewSub : Sub;
1362 NewSrc = NewSrc ? NewSrc : Src;
1363 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: NewSrc, N2: NewSub,
1364 N3: Op.getOperand(i: 2));
1365 return TLO.CombineTo(O: Op, N: NewOp);
1366 }
1367 }
1368 break;
1369 }
1370 case ISD::EXTRACT_SUBVECTOR: {
1371 if (VT.isScalableVector())
1372 return false;
1373 // Offset the demanded elts by the subvector index.
1374 SDValue Src = Op.getOperand(i: 0);
1375 if (Src.getValueType().isScalableVector())
1376 break;
1377 uint64_t Idx = Op.getConstantOperandVal(i: 1);
1378 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1379 APInt DemandedSrcElts = DemandedElts.zext(width: NumSrcElts).shl(shiftAmt: Idx);
1380
1381 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedSrcElts, Known, TLO,
1382 Depth: Depth + 1))
1383 return true;
1384
1385 // Attempt to avoid multi-use src if we don't need anything from it.
1386 if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
1387 SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
1388 Op: Src, DemandedBits, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + 1);
1389 if (DemandedSrc) {
1390 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: DemandedSrc,
1391 N2: Op.getOperand(i: 1));
1392 return TLO.CombineTo(O: Op, N: NewOp);
1393 }
1394 }
1395 break;
1396 }
1397 case ISD::CONCAT_VECTORS: {
1398 if (VT.isScalableVector())
1399 return false;
1400 Known.setAllConflict();
1401 EVT SubVT = Op.getOperand(i: 0).getValueType();
1402 unsigned NumSubVecs = Op.getNumOperands();
1403 unsigned NumSubElts = SubVT.getVectorNumElements();
1404 for (unsigned i = 0; i != NumSubVecs; ++i) {
1405 APInt DemandedSubElts =
1406 DemandedElts.extractBits(numBits: NumSubElts, bitPosition: i * NumSubElts);
1407 if (SimplifyDemandedBits(Op: Op.getOperand(i), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedSubElts,
1408 Known&: Known2, TLO, Depth: Depth + 1))
1409 return true;
1410 // Known bits are shared by every demanded subvector element.
1411 if (!!DemandedSubElts)
1412 Known = Known.intersectWith(RHS: Known2);
1413 }
1414 break;
1415 }
1416 case ISD::VECTOR_SHUFFLE: {
1417 assert(!VT.isScalableVector());
1418 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Val&: Op)->getMask();
1419
1420 // Collect demanded elements from shuffle operands..
1421 APInt DemandedLHS, DemandedRHS;
1422 if (!getShuffleDemandedElts(SrcWidth: NumElts, Mask: ShuffleMask, DemandedElts, DemandedLHS,
1423 DemandedRHS))
1424 break;
1425
1426 if (!!DemandedLHS || !!DemandedRHS) {
1427 SDValue Op0 = Op.getOperand(i: 0);
1428 SDValue Op1 = Op.getOperand(i: 1);
1429
1430 Known.setAllConflict();
1431 if (!!DemandedLHS) {
1432 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedLHS, Known&: Known2, TLO,
1433 Depth: Depth + 1))
1434 return true;
1435 Known = Known.intersectWith(RHS: Known2);
1436 }
1437 if (!!DemandedRHS) {
1438 if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedRHS, Known&: Known2, TLO,
1439 Depth: Depth + 1))
1440 return true;
1441 Known = Known.intersectWith(RHS: Known2);
1442 }
1443
1444 // Attempt to avoid multi-use ops if we don't need anything from them.
1445 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1446 Op: Op0, DemandedBits, DemandedElts: DemandedLHS, DAG&: TLO.DAG, Depth: Depth + 1);
1447 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1448 Op: Op1, DemandedBits, DemandedElts: DemandedRHS, DAG&: TLO.DAG, Depth: Depth + 1);
1449 if (DemandedOp0 || DemandedOp1) {
1450 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1451 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1452 SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, N1: Op0, N2: Op1, Mask: ShuffleMask);
1453 return TLO.CombineTo(O: Op, N: NewOp);
1454 }
1455 }
1456 break;
1457 }
1458 case ISD::AND: {
1459 SDValue Op0 = Op.getOperand(i: 0);
1460 SDValue Op1 = Op.getOperand(i: 1);
1461
1462 // If the RHS is a constant, check to see if the LHS would be zero without
1463 // using the bits from the RHS. Below, we use knowledge about the RHS to
1464 // simplify the LHS, here we're using information from the LHS to simplify
1465 // the RHS.
1466 if (ConstantSDNode *RHSC = isConstOrConstSplat(N: Op1, DemandedElts)) {
1467 // Do not increment Depth here; that can cause an infinite loop.
1468 KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op: Op0, DemandedElts, Depth);
1469 // If the LHS already has zeros where RHSC does, this 'and' is dead.
1470 if ((LHSKnown.Zero & DemandedBits) ==
1471 (~RHSC->getAPIntValue() & DemandedBits))
1472 return TLO.CombineTo(O: Op, N: Op0);
1473
1474 // If any of the set bits in the RHS are known zero on the LHS, shrink
1475 // the constant.
1476 if (ShrinkDemandedConstant(Op, DemandedBits: ~LHSKnown.Zero & DemandedBits,
1477 DemandedElts, TLO))
1478 return true;
1479
1480 // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1481 // constant, but if this 'and' is only clearing bits that were just set by
1482 // the xor, then this 'and' can be eliminated by shrinking the mask of
1483 // the xor. For example, for a 32-bit X:
1484 // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1485 if (isBitwiseNot(V: Op0) && Op0.hasOneUse() &&
1486 LHSKnown.One == ~RHSC->getAPIntValue()) {
1487 SDValue Xor = TLO.DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: Op0.getOperand(i: 0), N2: Op1);
1488 return TLO.CombineTo(O: Op, N: Xor);
1489 }
1490 }
1491
1492 // (X +/- Y) & Y --> ~X & Y when Y is a power of 2 (or zero).
1493 SDValue X, Y;
1494 if (sd_match(N: Op,
1495 P: m_And(L: m_Value(N&: Y),
1496 R: m_OneUse(P: m_AnyOf(preds: m_Add(L: m_Value(N&: X), R: m_Deferred(V&: Y)),
1497 preds: m_Sub(L: m_Value(N&: X), R: m_Deferred(V&: Y)))))) &&
1498 TLO.DAG.isKnownToBeAPowerOfTwo(Val: Y, DemandedElts, /*OrZero=*/true)) {
1499 return TLO.CombineTo(
1500 O: Op, N: TLO.DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: TLO.DAG.getNOT(DL: dl, Val: X, VT), N2: Y));
1501 }
1502
1503 // AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)
1504 // iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits).
1505 if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR && !VT.isScalableVector() &&
1506 (Op0.getOperand(i: 0).isUndef() ||
1507 ISD::isBuildVectorOfConstantSDNodes(N: Op0.getOperand(i: 0).getNode())) &&
1508 Op0->hasOneUse()) {
1509 unsigned NumSubElts =
1510 Op0.getOperand(i: 1).getValueType().getVectorNumElements();
1511 unsigned SubIdx = Op0.getConstantOperandVal(i: 2);
1512 APInt DemandedSub =
1513 APInt::getBitsSet(numBits: NumElts, loBit: SubIdx, hiBit: SubIdx + NumSubElts);
1514 KnownBits KnownSubMask =
1515 TLO.DAG.computeKnownBits(Op: Op1, DemandedElts: DemandedSub & DemandedElts, Depth: Depth + 1);
1516 if (DemandedBits.isSubsetOf(RHS: KnownSubMask.One)) {
1517 SDValue NewAnd =
1518 TLO.DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op0.getOperand(i: 0), N2: Op1);
1519 SDValue NewInsert =
1520 TLO.DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL: dl, VT, N1: NewAnd,
1521 N2: Op0.getOperand(i: 1), N3: Op0.getOperand(i: 2));
1522 return TLO.CombineTo(O: Op, N: NewInsert);
1523 }
1524 }
1525
1526 if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts, Known, TLO,
1527 Depth: Depth + 1))
1528 return true;
1529 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: ~Known.Zero & DemandedBits, OriginalDemandedElts: DemandedElts,
1530 Known&: Known2, TLO, Depth: Depth + 1))
1531 return true;
1532
1533 // If all of the demanded bits are known one on one side, return the other.
1534 // These bits cannot contribute to the result of the 'and'.
1535 if (DemandedBits.isSubsetOf(RHS: Known2.Zero | Known.One))
1536 return TLO.CombineTo(O: Op, N: Op0);
1537 if (DemandedBits.isSubsetOf(RHS: Known.Zero | Known2.One))
1538 return TLO.CombineTo(O: Op, N: Op1);
1539 // If all of the demanded bits in the inputs are known zeros, return zero.
1540 if (DemandedBits.isSubsetOf(RHS: Known.Zero | Known2.Zero))
1541 return TLO.CombineTo(O: Op, N: TLO.DAG.getConstant(Val: 0, DL: dl, VT));
1542 // If the RHS is a constant, see if we can simplify it.
1543 if (ShrinkDemandedConstant(Op, DemandedBits: ~Known2.Zero & DemandedBits, DemandedElts,
1544 TLO))
1545 return true;
1546 // If the operation can be done in a smaller type, do so.
1547 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1548 return true;
1549
1550 // Attempt to avoid multi-use ops if we don't need anything from them.
1551 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1552 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1553 Op: Op0, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
1554 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1555 Op: Op1, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
1556 if (DemandedOp0 || DemandedOp1) {
1557 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1558 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1559 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: Op0, N2: Op1);
1560 return TLO.CombineTo(O: Op, N: NewOp);
1561 }
1562 }
1563
1564 Known &= Known2;
1565 break;
1566 }
1567 case ISD::OR: {
1568 SDValue Op0 = Op.getOperand(i: 0);
1569 SDValue Op1 = Op.getOperand(i: 1);
1570 if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts, Known, TLO,
1571 Depth: Depth + 1)) {
1572 Op->dropFlags(Mask: SDNodeFlags::Disjoint);
1573 return true;
1574 }
1575
1576 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: ~Known.One & DemandedBits, OriginalDemandedElts: DemandedElts,
1577 Known&: Known2, TLO, Depth: Depth + 1)) {
1578 Op->dropFlags(Mask: SDNodeFlags::Disjoint);
1579 return true;
1580 }
1581
1582 // If all of the demanded bits are known zero on one side, return the other.
1583 // These bits cannot contribute to the result of the 'or'.
1584 if (DemandedBits.isSubsetOf(RHS: Known2.One | Known.Zero))
1585 return TLO.CombineTo(O: Op, N: Op0);
1586 if (DemandedBits.isSubsetOf(RHS: Known.One | Known2.Zero))
1587 return TLO.CombineTo(O: Op, N: Op1);
1588 // If the RHS is a constant, see if we can simplify it.
1589 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1590 return true;
1591 // If the operation can be done in a smaller type, do so.
1592 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1593 return true;
1594
1595 // Attempt to avoid multi-use ops if we don't need anything from them.
1596 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1597 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1598 Op: Op0, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
1599 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1600 Op: Op1, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
1601 if (DemandedOp0 || DemandedOp1) {
1602 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1603 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1604 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: Op0, N2: Op1);
1605 return TLO.CombineTo(O: Op, N: NewOp);
1606 }
1607 }
1608
1609 // (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2))
1610 // TODO: Use SimplifyMultipleUseDemandedBits to peek through masks.
1611 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::AND &&
1612 Op0->hasOneUse() && Op1->hasOneUse()) {
1613 // Attempt to match all commutations - m_c_Or would've been useful!
1614 for (int I = 0; I != 2; ++I) {
1615 SDValue X = Op.getOperand(i: I).getOperand(i: 0);
1616 SDValue C1 = Op.getOperand(i: I).getOperand(i: 1);
1617 SDValue Alt = Op.getOperand(i: 1 - I).getOperand(i: 0);
1618 SDValue C2 = Op.getOperand(i: 1 - I).getOperand(i: 1);
1619 if (Alt.getOpcode() == ISD::OR) {
1620 for (int J = 0; J != 2; ++J) {
1621 if (X == Alt.getOperand(i: J)) {
1622 SDValue Y = Alt.getOperand(i: 1 - J);
1623 if (SDValue C12 = TLO.DAG.FoldConstantArithmetic(Opcode: ISD::OR, DL: dl, VT,
1624 Ops: {C1, C2})) {
1625 SDValue MaskX = TLO.DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: X, N2: C12);
1626 SDValue MaskY = TLO.DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Y, N2: C2);
1627 return TLO.CombineTo(
1628 O: Op, N: TLO.DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: MaskX, N2: MaskY));
1629 }
1630 }
1631 }
1632 }
1633 }
1634 }
1635
1636 Known |= Known2;
1637 break;
1638 }
1639 case ISD::XOR: {
1640 SDValue Op0 = Op.getOperand(i: 0);
1641 SDValue Op1 = Op.getOperand(i: 1);
1642
1643 if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts, Known, TLO,
1644 Depth: Depth + 1))
1645 return true;
1646 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
1647 Depth: Depth + 1))
1648 return true;
1649
1650 // If all of the demanded bits are known zero on one side, return the other.
1651 // These bits cannot contribute to the result of the 'xor'.
1652 if (DemandedBits.isSubsetOf(RHS: Known.Zero))
1653 return TLO.CombineTo(O: Op, N: Op0);
1654 if (DemandedBits.isSubsetOf(RHS: Known2.Zero))
1655 return TLO.CombineTo(O: Op, N: Op1);
1656 // If the operation can be done in a smaller type, do so.
1657 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1658 return true;
1659
1660 // If all of the unknown bits are known to be zero on one side or the other
1661 // turn this into an *inclusive* or.
1662 // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1663 if (DemandedBits.isSubsetOf(RHS: Known.Zero | Known2.Zero))
1664 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Op0, N2: Op1));
1665
1666 ConstantSDNode *C = isConstOrConstSplat(N: Op1, DemandedElts);
1667 if (C) {
1668 // If one side is a constant, and all of the set bits in the constant are
1669 // also known set on the other side, turn this into an AND, as we know
1670 // the bits will be cleared.
1671 // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1672 // NB: it is okay if more bits are known than are requested
1673 if (C->getAPIntValue() == Known2.One) {
1674 SDValue ANDC =
1675 TLO.DAG.getConstant(Val: ~C->getAPIntValue() & DemandedBits, DL: dl, VT);
1676 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op0, N2: ANDC));
1677 }
1678
1679 // If the RHS is a constant, see if we can change it. Don't alter a -1
1680 // constant because that's a 'not' op, and that is better for combining
1681 // and codegen.
1682 if (!C->isAllOnes() && DemandedBits.isSubsetOf(RHS: C->getAPIntValue())) {
1683 // We're flipping all demanded bits. Flip the undemanded bits too.
1684 SDValue New = TLO.DAG.getNOT(DL: dl, Val: Op0, VT);
1685 return TLO.CombineTo(O: Op, N: New);
1686 }
1687
1688 unsigned Op0Opcode = Op0.getOpcode();
1689 if ((Op0Opcode == ISD::SRL || Op0Opcode == ISD::SHL) && Op0.hasOneUse()) {
1690 if (ConstantSDNode *ShiftC =
1691 isConstOrConstSplat(N: Op0.getOperand(i: 1), DemandedElts)) {
1692 // Don't crash on an oversized shift. We can not guarantee that a
1693 // bogus shift has been simplified to undef.
1694 if (ShiftC->getAPIntValue().ult(RHS: BitWidth)) {
1695 uint64_t ShiftAmt = ShiftC->getZExtValue();
1696 APInt Ones = APInt::getAllOnes(numBits: BitWidth);
1697 Ones = Op0Opcode == ISD::SHL ? Ones.shl(shiftAmt: ShiftAmt)
1698 : Ones.lshr(shiftAmt: ShiftAmt);
1699 if ((DemandedBits & C->getAPIntValue()) == (DemandedBits & Ones) &&
1700 isDesirableToCommuteXorWithShift(N: Op.getNode())) {
1701 // If the xor constant is a demanded mask, do a 'not' before the
1702 // shift:
1703 // xor (X << ShiftC), XorC --> (not X) << ShiftC
1704 // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
1705 SDValue Not = TLO.DAG.getNOT(DL: dl, Val: Op0.getOperand(i: 0), VT);
1706 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Op0Opcode, DL: dl, VT, N1: Not,
1707 N2: Op0.getOperand(i: 1)));
1708 }
1709 }
1710 }
1711 }
1712 }
1713
1714 // If we can't turn this into a 'not', try to shrink the constant.
1715 if (!C || !C->isAllOnes())
1716 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1717 return true;
1718
1719 // Attempt to avoid multi-use ops if we don't need anything from them.
1720 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1721 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1722 Op: Op0, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
1723 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1724 Op: Op1, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
1725 if (DemandedOp0 || DemandedOp1) {
1726 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1727 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1728 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: Op0, N2: Op1);
1729 return TLO.CombineTo(O: Op, N: NewOp);
1730 }
1731 }
1732
1733 Known ^= Known2;
1734 break;
1735 }
1736 case ISD::SELECT:
1737 if (SimplifyDemandedBits(Op: Op.getOperand(i: 2), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1738 Known, TLO, Depth: Depth + 1))
1739 return true;
1740 if (SimplifyDemandedBits(Op: Op.getOperand(i: 1), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1741 Known&: Known2, TLO, Depth: Depth + 1))
1742 return true;
1743
1744 // If the operands are constants, see if we can simplify them.
1745 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1746 return true;
1747
1748 // Only known if known in both the LHS and RHS.
1749 Known = Known.intersectWith(RHS: Known2);
1750 break;
1751 case ISD::VSELECT:
1752 if (SimplifyDemandedBits(Op: Op.getOperand(i: 2), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1753 Known, TLO, Depth: Depth + 1))
1754 return true;
1755 if (SimplifyDemandedBits(Op: Op.getOperand(i: 1), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1756 Known&: Known2, TLO, Depth: Depth + 1))
1757 return true;
1758
1759 // Only known if known in both the LHS and RHS.
1760 Known = Known.intersectWith(RHS: Known2);
1761 break;
1762 case ISD::SELECT_CC:
1763 if (SimplifyDemandedBits(Op: Op.getOperand(i: 3), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1764 Known, TLO, Depth: Depth + 1))
1765 return true;
1766 if (SimplifyDemandedBits(Op: Op.getOperand(i: 2), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1767 Known&: Known2, TLO, Depth: Depth + 1))
1768 return true;
1769
1770 // If the operands are constants, see if we can simplify them.
1771 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1772 return true;
1773
1774 // Only known if known in both the LHS and RHS.
1775 Known = Known.intersectWith(RHS: Known2);
1776 break;
1777 case ISD::SETCC: {
1778 SDValue Op0 = Op.getOperand(i: 0);
1779 SDValue Op1 = Op.getOperand(i: 1);
1780 ISD::CondCode CC = cast<CondCodeSDNode>(Val: Op.getOperand(i: 2))->get();
1781 // If we're testing X < 0, X >= 0, X <= -1 or X > -1
1782 // (X is of integer type) then we only need the sign mask of the previous
1783 // result
1784 if (Op1.getValueType().isInteger() &&
1785 (((CC == ISD::SETLT || CC == ISD::SETGE) && isNullOrNullSplat(V: Op1)) ||
1786 ((CC == ISD::SETLE || CC == ISD::SETGT) &&
1787 isAllOnesOrAllOnesSplat(V: Op1)))) {
1788 KnownBits KnownOp0;
1789 if (SimplifyDemandedBits(
1790 Op: Op0, OriginalDemandedBits: APInt::getSignMask(BitWidth: Op0.getScalarValueSizeInBits()),
1791 OriginalDemandedElts: DemandedElts, Known&: KnownOp0, TLO, Depth: Depth + 1))
1792 return true;
1793 // If (1) we only need the sign-bit, (2) the setcc operands are the same
1794 // width as the setcc result, and (3) the result of a setcc conforms to 0
1795 // or -1, we may be able to bypass the setcc.
1796 if (DemandedBits.isSignMask() &&
1797 Op0.getScalarValueSizeInBits() == BitWidth &&
1798 getBooleanContents(Type: Op0.getValueType()) ==
1799 BooleanContent::ZeroOrNegativeOneBooleanContent) {
1800 // If we remove a >= 0 or > -1 (for integers), we need to introduce a
1801 // NOT Operation
1802 if (CC == ISD::SETGE || CC == ISD::SETGT) {
1803 SDLoc DL(Op);
1804 EVT VT = Op0.getValueType();
1805 SDValue NotOp0 = TLO.DAG.getNOT(DL, Val: Op0, VT);
1806 return TLO.CombineTo(O: Op, N: NotOp0);
1807 }
1808 return TLO.CombineTo(O: Op, N: Op0);
1809 }
1810 }
1811 if (getBooleanContents(Type: Op0.getValueType()) ==
1812 TargetLowering::ZeroOrOneBooleanContent &&
1813 BitWidth > 1)
1814 Known.Zero.setBitsFrom(1);
1815 break;
1816 }
1817 case ISD::SHL: {
1818 SDValue Op0 = Op.getOperand(i: 0);
1819 SDValue Op1 = Op.getOperand(i: 1);
1820 EVT ShiftVT = Op1.getValueType();
1821
1822 if (std::optional<unsigned> KnownSA =
1823 TLO.DAG.getValidShiftAmount(V: Op, DemandedElts, Depth: Depth + 1)) {
1824 unsigned ShAmt = *KnownSA;
1825 if (ShAmt == 0)
1826 return TLO.CombineTo(O: Op, N: Op0);
1827
1828 // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1829 // single shift. We can do this if the bottom bits (which are shifted
1830 // out) are never demanded.
1831 // TODO - support non-uniform vector amounts.
1832 if (Op0.getOpcode() == ISD::SRL) {
1833 if (!DemandedBits.intersects(RHS: APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: ShAmt))) {
1834 if (std::optional<unsigned> InnerSA =
1835 TLO.DAG.getValidShiftAmount(V: Op0, DemandedElts, Depth: Depth + 2)) {
1836 unsigned C1 = *InnerSA;
1837 unsigned Opc = ISD::SHL;
1838 int Diff = ShAmt - C1;
1839 if (Diff < 0) {
1840 Diff = -Diff;
1841 Opc = ISD::SRL;
1842 }
1843 SDValue NewSA = TLO.DAG.getConstant(Val: Diff, DL: dl, VT: ShiftVT);
1844 return TLO.CombineTo(
1845 O: Op, N: TLO.DAG.getNode(Opcode: Opc, DL: dl, VT, N1: Op0.getOperand(i: 0), N2: NewSA));
1846 }
1847 }
1848 }
1849
1850 // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1851 // are not demanded. This will likely allow the anyext to be folded away.
1852 // TODO - support non-uniform vector amounts.
1853 if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1854 SDValue InnerOp = Op0.getOperand(i: 0);
1855 EVT InnerVT = InnerOp.getValueType();
1856 unsigned InnerBits = InnerVT.getScalarSizeInBits();
1857 if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1858 isTypeDesirableForOp(ISD::SHL, VT: InnerVT)) {
1859 SDValue NarrowShl = TLO.DAG.getNode(
1860 Opcode: ISD::SHL, DL: dl, VT: InnerVT, N1: InnerOp,
1861 N2: TLO.DAG.getShiftAmountConstant(Val: ShAmt, VT: InnerVT, DL: dl));
1862 return TLO.CombineTo(
1863 O: Op, N: TLO.DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT, Operand: NarrowShl));
1864 }
1865
1866 // Repeat the SHL optimization above in cases where an extension
1867 // intervenes: (shl (anyext (shr x, c1)), c2) to
1868 // (shl (anyext x), c2-c1). This requires that the bottom c1 bits
1869 // aren't demanded (as above) and that the shifted upper c1 bits of
1870 // x aren't demanded.
1871 // TODO - support non-uniform vector amounts.
1872 if (InnerOp.getOpcode() == ISD::SRL && Op0.hasOneUse() &&
1873 InnerOp.hasOneUse()) {
1874 if (std::optional<unsigned> SA2 = TLO.DAG.getValidShiftAmount(
1875 V: InnerOp, DemandedElts, Depth: Depth + 2)) {
1876 unsigned InnerShAmt = *SA2;
1877 if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1878 DemandedBits.getActiveBits() <=
1879 (InnerBits - InnerShAmt + ShAmt) &&
1880 DemandedBits.countr_zero() >= ShAmt) {
1881 SDValue NewSA =
1882 TLO.DAG.getConstant(Val: ShAmt - InnerShAmt, DL: dl, VT: ShiftVT);
1883 SDValue NewExt = TLO.DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT,
1884 Operand: InnerOp.getOperand(i: 0));
1885 return TLO.CombineTo(
1886 O: Op, N: TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: NewExt, N2: NewSA));
1887 }
1888 }
1889 }
1890 }
1891
1892 APInt InDemandedMask = DemandedBits.lshr(shiftAmt: ShAmt);
1893 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: InDemandedMask, OriginalDemandedElts: DemandedElts, Known, TLO,
1894 Depth: Depth + 1)) {
1895 // Disable the nsw and nuw flags. We can no longer guarantee that we
1896 // won't wrap after simplification.
1897 Op->dropFlags(Mask: SDNodeFlags::NoWrap);
1898 return true;
1899 }
1900 Known <<= ShAmt;
1901 // low bits known zero.
1902 Known.Zero.setLowBits(ShAmt);
1903
1904 // Attempt to avoid multi-use ops if we don't need anything from them.
1905 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1906 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1907 Op: Op0, DemandedBits: InDemandedMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
1908 if (DemandedOp0) {
1909 SDValue NewOp = TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: DemandedOp0, N2: Op1);
1910 return TLO.CombineTo(O: Op, N: NewOp);
1911 }
1912 }
1913
1914 // TODO: Can we merge this fold with the one below?
1915 // Try shrinking the operation as long as the shift amount will still be
1916 // in range.
1917 if (ShAmt < DemandedBits.getActiveBits() && !VT.isVector() &&
1918 Op.getNode()->hasOneUse()) {
1919 // Search for the smallest integer type with free casts to and from
1920 // Op's type. For expedience, just check power-of-2 integer types.
1921 unsigned DemandedSize = DemandedBits.getActiveBits();
1922 for (unsigned SmallVTBits = llvm::bit_ceil(Value: DemandedSize);
1923 SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(A: SmallVTBits)) {
1924 EVT SmallVT = EVT::getIntegerVT(Context&: *TLO.DAG.getContext(), BitWidth: SmallVTBits);
1925 if (isNarrowingProfitable(N: Op.getNode(), SrcVT: VT, DestVT: SmallVT) &&
1926 isTypeDesirableForOp(ISD::SHL, VT: SmallVT) &&
1927 isTruncateFree(FromVT: VT, ToVT: SmallVT) && isZExtFree(FromTy: SmallVT, ToTy: VT) &&
1928 (!TLO.LegalOperations() || isOperationLegal(Op: ISD::SHL, VT: SmallVT))) {
1929 assert(DemandedSize <= SmallVTBits &&
1930 "Narrowed below demanded bits?");
1931 // We found a type with free casts.
1932 SDValue NarrowShl = TLO.DAG.getNode(
1933 Opcode: ISD::SHL, DL: dl, VT: SmallVT,
1934 N1: TLO.DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: SmallVT, Operand: Op.getOperand(i: 0)),
1935 N2: TLO.DAG.getShiftAmountConstant(Val: ShAmt, VT: SmallVT, DL: dl));
1936 return TLO.CombineTo(
1937 O: Op, N: TLO.DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT, Operand: NarrowShl));
1938 }
1939 }
1940 }
1941
1942 // Narrow shift to lower half - similar to ShrinkDemandedOp.
1943 // (shl i64:x, K) -> (i64 zero_extend (shl (i32 (trunc i64:x)), K))
1944 // Only do this if we demand the upper half so the knownbits are correct.
1945 unsigned HalfWidth = BitWidth / 2;
1946 if ((BitWidth % 2) == 0 && !VT.isVector() && ShAmt < HalfWidth &&
1947 DemandedBits.countLeadingOnes() >= HalfWidth) {
1948 EVT HalfVT = EVT::getIntegerVT(Context&: *TLO.DAG.getContext(), BitWidth: HalfWidth);
1949 if (isNarrowingProfitable(N: Op.getNode(), SrcVT: VT, DestVT: HalfVT) &&
1950 isTypeDesirableForOp(ISD::SHL, VT: HalfVT) &&
1951 isTruncateFree(FromVT: VT, ToVT: HalfVT) && isZExtFree(FromTy: HalfVT, ToTy: VT) &&
1952 (!TLO.LegalOperations() || isOperationLegal(Op: ISD::SHL, VT: HalfVT))) {
1953 // If we're demanding the upper bits at all, we must ensure
1954 // that the upper bits of the shift result are known to be zero,
1955 // which is equivalent to the narrow shift being NUW.
1956 if (bool IsNUW = (Known.countMinLeadingZeros() >= HalfWidth)) {
1957 bool IsNSW = Known.countMinSignBits() > HalfWidth;
1958 SDNodeFlags Flags;
1959 Flags.setNoSignedWrap(IsNSW);
1960 Flags.setNoUnsignedWrap(IsNUW);
1961 SDValue NewOp = TLO.DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HalfVT, Operand: Op0);
1962 SDValue NewShiftAmt =
1963 TLO.DAG.getShiftAmountConstant(Val: ShAmt, VT: HalfVT, DL: dl);
1964 SDValue NewShift = TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: HalfVT, N1: NewOp,
1965 N2: NewShiftAmt, Flags);
1966 SDValue NewExt =
1967 TLO.DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: NewShift);
1968 return TLO.CombineTo(O: Op, N: NewExt);
1969 }
1970 }
1971 }
1972 } else {
1973 // This is a variable shift, so we can't shift the demand mask by a known
1974 // amount. But if we are not demanding high bits, then we are not
1975 // demanding those bits from the pre-shifted operand either.
1976 if (unsigned CTLZ = DemandedBits.countl_zero()) {
1977 APInt DemandedFromOp(APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth - CTLZ));
1978 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: DemandedFromOp, OriginalDemandedElts: DemandedElts, Known, TLO,
1979 Depth: Depth + 1)) {
1980 // Disable the nsw and nuw flags. We can no longer guarantee that we
1981 // won't wrap after simplification.
1982 Op->dropFlags(Mask: SDNodeFlags::NoWrap);
1983 return true;
1984 }
1985 Known.resetAll();
1986 }
1987 }
1988
1989 // If we are only demanding sign bits then we can use the shift source
1990 // directly.
1991 if (std::optional<unsigned> MaxSA =
1992 TLO.DAG.getValidMaximumShiftAmount(V: Op, DemandedElts, Depth: Depth + 1)) {
1993 unsigned ShAmt = *MaxSA;
1994 unsigned NumSignBits =
1995 TLO.DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + 1);
1996 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
1997 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
1998 return TLO.CombineTo(O: Op, N: Op0);
1999 }
2000 break;
2001 }
2002 case ISD::SRL: {
2003 SDValue Op0 = Op.getOperand(i: 0);
2004 SDValue Op1 = Op.getOperand(i: 1);
2005 EVT ShiftVT = Op1.getValueType();
2006
2007 if (std::optional<unsigned> KnownSA =
2008 TLO.DAG.getValidShiftAmount(V: Op, DemandedElts, Depth: Depth + 1)) {
2009 unsigned ShAmt = *KnownSA;
2010 if (ShAmt == 0)
2011 return TLO.CombineTo(O: Op, N: Op0);
2012
2013 // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
2014 // single shift. We can do this if the top bits (which are shifted out)
2015 // are never demanded.
2016 // TODO - support non-uniform vector amounts.
2017 if (Op0.getOpcode() == ISD::SHL) {
2018 if (!DemandedBits.intersects(RHS: APInt::getHighBitsSet(numBits: BitWidth, hiBitsSet: ShAmt))) {
2019 if (std::optional<unsigned> InnerSA =
2020 TLO.DAG.getValidShiftAmount(V: Op0, DemandedElts, Depth: Depth + 2)) {
2021 unsigned C1 = *InnerSA;
2022 unsigned Opc = ISD::SRL;
2023 int Diff = ShAmt - C1;
2024 if (Diff < 0) {
2025 Diff = -Diff;
2026 Opc = ISD::SHL;
2027 }
2028 SDValue NewSA = TLO.DAG.getConstant(Val: Diff, DL: dl, VT: ShiftVT);
2029 return TLO.CombineTo(
2030 O: Op, N: TLO.DAG.getNode(Opcode: Opc, DL: dl, VT, N1: Op0.getOperand(i: 0), N2: NewSA));
2031 }
2032 }
2033 }
2034
2035 // If this is (srl (sra X, C1), ShAmt), see if we can combine this into a
2036 // single sra. We can do this if the top bits are never demanded.
2037 if (Op0.getOpcode() == ISD::SRA && Op0.hasOneUse()) {
2038 if (!DemandedBits.intersects(RHS: APInt::getHighBitsSet(numBits: BitWidth, hiBitsSet: ShAmt))) {
2039 if (std::optional<unsigned> InnerSA =
2040 TLO.DAG.getValidShiftAmount(V: Op0, DemandedElts, Depth: Depth + 2)) {
2041 unsigned C1 = *InnerSA;
2042 // Clamp the combined shift amount if it exceeds the bit width.
2043 unsigned Combined = std::min(a: C1 + ShAmt, b: BitWidth - 1);
2044 SDValue NewSA = TLO.DAG.getConstant(Val: Combined, DL: dl, VT: ShiftVT);
2045 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::SRA, DL: dl, VT,
2046 N1: Op0.getOperand(i: 0), N2: NewSA));
2047 }
2048 }
2049 }
2050
2051 APInt InDemandedMask = (DemandedBits << ShAmt);
2052
2053 // If the shift is exact, then it does demand the low bits (and knows that
2054 // they are zero).
2055 if (Op->getFlags().hasExact())
2056 InDemandedMask.setLowBits(ShAmt);
2057
2058 // Narrow shift to lower half - similar to ShrinkDemandedOp.
2059 // (srl i64:x, K) -> (i64 zero_extend (srl (i32 (trunc i64:x)), K))
2060 if ((BitWidth % 2) == 0 && !VT.isVector()) {
2061 APInt HiBits = APInt::getHighBitsSet(numBits: BitWidth, hiBitsSet: BitWidth / 2);
2062 EVT HalfVT = EVT::getIntegerVT(Context&: *TLO.DAG.getContext(), BitWidth: BitWidth / 2);
2063 if (isNarrowingProfitable(N: Op.getNode(), SrcVT: VT, DestVT: HalfVT) &&
2064 isTypeDesirableForOp(ISD::SRL, VT: HalfVT) &&
2065 isTruncateFree(FromVT: VT, ToVT: HalfVT) && isZExtFree(FromTy: HalfVT, ToTy: VT) &&
2066 (!TLO.LegalOperations() || isOperationLegal(Op: ISD::SRL, VT: HalfVT)) &&
2067 ((InDemandedMask.countLeadingZeros() >= (BitWidth / 2)) ||
2068 TLO.DAG.MaskedValueIsZero(Op: Op0, Mask: HiBits))) {
2069 SDValue NewOp = TLO.DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HalfVT, Operand: Op0);
2070 SDValue NewShiftAmt =
2071 TLO.DAG.getShiftAmountConstant(Val: ShAmt, VT: HalfVT, DL: dl);
2072 SDValue NewShift =
2073 TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: HalfVT, N1: NewOp, N2: NewShiftAmt);
2074 return TLO.CombineTo(
2075 O: Op, N: TLO.DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: NewShift));
2076 }
2077 }
2078
2079 // Compute the new bits that are at the top now.
2080 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: InDemandedMask, OriginalDemandedElts: DemandedElts, Known, TLO,
2081 Depth: Depth + 1))
2082 return true;
2083 Known >>= ShAmt;
2084 // High bits known zero.
2085 Known.Zero.setHighBits(ShAmt);
2086
2087 // Attempt to avoid multi-use ops if we don't need anything from them.
2088 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2089 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2090 Op: Op0, DemandedBits: InDemandedMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
2091 if (DemandedOp0) {
2092 SDValue NewOp = TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: DemandedOp0, N2: Op1);
2093 return TLO.CombineTo(O: Op, N: NewOp);
2094 }
2095 }
2096 } else {
2097 // Use generic knownbits computation as it has support for non-uniform
2098 // shift amounts.
2099 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2100 }
2101
2102 // If we are only demanding sign bits then we can use the shift source
2103 // directly.
2104 if (std::optional<unsigned> MaxSA =
2105 TLO.DAG.getValidMaximumShiftAmount(V: Op, DemandedElts, Depth: Depth + 1)) {
2106 unsigned ShAmt = *MaxSA;
2107 // Must already be signbits in DemandedBits bounds, and can't demand any
2108 // shifted in zeroes.
2109 if (DemandedBits.countl_zero() >= ShAmt) {
2110 unsigned NumSignBits =
2111 TLO.DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + 1);
2112 if (DemandedBits.countr_zero() >= (BitWidth - NumSignBits))
2113 return TLO.CombineTo(O: Op, N: Op0);
2114 }
2115 }
2116
2117 // Try to match AVG patterns (after shift simplification).
2118 if (SDValue AVG = combineShiftToAVG(Op, TLO, TLI: *this, DemandedBits,
2119 DemandedElts, Depth: Depth + 1))
2120 return TLO.CombineTo(O: Op, N: AVG);
2121
2122 break;
2123 }
2124 case ISD::SRA: {
2125 SDValue Op0 = Op.getOperand(i: 0);
2126 SDValue Op1 = Op.getOperand(i: 1);
2127 EVT ShiftVT = Op1.getValueType();
2128
2129 // If we only want bits that already match the signbit then we don't need
2130 // to shift.
2131 unsigned NumHiDemandedBits = BitWidth - DemandedBits.countr_zero();
2132 if (TLO.DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + 1) >=
2133 NumHiDemandedBits)
2134 return TLO.CombineTo(O: Op, N: Op0);
2135
2136 // If this is an arithmetic shift right and only the low-bit is set, we can
2137 // always convert this into a logical shr, even if the shift amount is
2138 // variable. The low bit of the shift cannot be an input sign bit unless
2139 // the shift amount is >= the size of the datatype, which is undefined.
2140 if (DemandedBits.isOne())
2141 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op0, N2: Op1));
2142
2143 if (std::optional<unsigned> KnownSA =
2144 TLO.DAG.getValidShiftAmount(V: Op, DemandedElts, Depth: Depth + 1)) {
2145 unsigned ShAmt = *KnownSA;
2146 if (ShAmt == 0)
2147 return TLO.CombineTo(O: Op, N: Op0);
2148
2149 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target
2150 // supports sext_inreg.
2151 if (Op0.getOpcode() == ISD::SHL) {
2152 if (std::optional<unsigned> InnerSA =
2153 TLO.DAG.getValidShiftAmount(V: Op0, DemandedElts, Depth: Depth + 2)) {
2154 unsigned LowBits = BitWidth - ShAmt;
2155 EVT ExtVT = VT.changeElementType(
2156 Context&: *TLO.DAG.getContext(),
2157 EltVT: EVT::getIntegerVT(Context&: *TLO.DAG.getContext(), BitWidth: LowBits));
2158
2159 if (*InnerSA == ShAmt) {
2160 if (!TLO.LegalOperations() ||
2161 getOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: ExtVT) == Legal)
2162 return TLO.CombineTo(
2163 O: Op, N: TLO.DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL: dl, VT,
2164 N1: Op0.getOperand(i: 0),
2165 N2: TLO.DAG.getValueType(ExtVT)));
2166
2167 // Even if we can't convert to sext_inreg, we might be able to
2168 // remove this shift pair if the input is already sign extended.
2169 unsigned NumSignBits =
2170 TLO.DAG.ComputeNumSignBits(Op: Op0.getOperand(i: 0), DemandedElts);
2171 if (NumSignBits > ShAmt)
2172 return TLO.CombineTo(O: Op, N: Op0.getOperand(i: 0));
2173 }
2174 }
2175 }
2176
2177 APInt InDemandedMask = (DemandedBits << ShAmt);
2178
2179 // If the shift is exact, then it does demand the low bits (and knows that
2180 // they are zero).
2181 if (Op->getFlags().hasExact())
2182 InDemandedMask.setLowBits(ShAmt);
2183
2184 // If any of the demanded bits are produced by the sign extension, we also
2185 // demand the input sign bit.
2186 if (DemandedBits.countl_zero() < ShAmt)
2187 InDemandedMask.setSignBit();
2188
2189 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: InDemandedMask, OriginalDemandedElts: DemandedElts, Known, TLO,
2190 Depth: Depth + 1))
2191 return true;
2192 Known >>= ShAmt;
2193
2194 // If the input sign bit is known to be zero, or if none of the top bits
2195 // are demanded, turn this into an unsigned shift right.
2196 if (Known.Zero[BitWidth - ShAmt - 1] ||
2197 DemandedBits.countl_zero() >= ShAmt) {
2198 SDNodeFlags Flags;
2199 Flags.setExact(Op->getFlags().hasExact());
2200 return TLO.CombineTo(
2201 O: Op, N: TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op0, N2: Op1, Flags));
2202 }
2203
2204 int Log2 = DemandedBits.exactLogBase2();
2205 if (Log2 >= 0) {
2206 // The bit must come from the sign.
2207 SDValue NewSA = TLO.DAG.getConstant(Val: BitWidth - 1 - Log2, DL: dl, VT: ShiftVT);
2208 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op0, N2: NewSA));
2209 }
2210
2211 if (Known.One[BitWidth - ShAmt - 1])
2212 // New bits are known one.
2213 Known.One.setHighBits(ShAmt);
2214
2215 // Attempt to avoid multi-use ops if we don't need anything from them.
2216 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2217 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2218 Op: Op0, DemandedBits: InDemandedMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
2219 if (DemandedOp0) {
2220 SDValue NewOp = TLO.DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: DemandedOp0, N2: Op1);
2221 return TLO.CombineTo(O: Op, N: NewOp);
2222 }
2223 }
2224 }
2225
2226 // Try to match AVG patterns (after shift simplification).
2227 if (SDValue AVG = combineShiftToAVG(Op, TLO, TLI: *this, DemandedBits,
2228 DemandedElts, Depth: Depth + 1))
2229 return TLO.CombineTo(O: Op, N: AVG);
2230
2231 break;
2232 }
2233 case ISD::FSHL:
2234 case ISD::FSHR: {
2235 SDValue Op0 = Op.getOperand(i: 0);
2236 SDValue Op1 = Op.getOperand(i: 1);
2237 SDValue Op2 = Op.getOperand(i: 2);
2238 bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
2239
2240 if (ConstantSDNode *SA = isConstOrConstSplat(N: Op2, DemandedElts)) {
2241 unsigned Amt = SA->getAPIntValue().urem(RHS: BitWidth);
2242
2243 // For fshl, 0-shift returns the 1st arg.
2244 // For fshr, 0-shift returns the 2nd arg.
2245 if (Amt == 0) {
2246 if (SimplifyDemandedBits(Op: IsFSHL ? Op0 : Op1, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
2247 Known, TLO, Depth: Depth + 1))
2248 return true;
2249 break;
2250 }
2251
2252 // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
2253 // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
2254 APInt Demanded0 = DemandedBits.lshr(shiftAmt: IsFSHL ? Amt : (BitWidth - Amt));
2255 APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
2256 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: Demanded0, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
2257 Depth: Depth + 1))
2258 return true;
2259 if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: Demanded1, OriginalDemandedElts: DemandedElts, Known, TLO,
2260 Depth: Depth + 1))
2261 return true;
2262
2263 Known2 <<= (IsFSHL ? Amt : (BitWidth - Amt));
2264 Known >>= (IsFSHL ? (BitWidth - Amt) : Amt);
2265 Known = Known.unionWith(RHS: Known2);
2266
2267 // Attempt to avoid multi-use ops if we don't need anything from them.
2268 if (!Demanded0.isAllOnes() || !Demanded1.isAllOnes() ||
2269 !DemandedElts.isAllOnes()) {
2270 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2271 Op: Op0, DemandedBits: Demanded0, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
2272 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2273 Op: Op1, DemandedBits: Demanded1, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
2274 if (DemandedOp0 || DemandedOp1) {
2275 DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0;
2276 DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1;
2277 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: DemandedOp0,
2278 N2: DemandedOp1, N3: Op2);
2279 return TLO.CombineTo(O: Op, N: NewOp);
2280 }
2281 }
2282 }
2283
2284 if (isPowerOf2_32(Value: BitWidth)) {
2285 // Fold FSHR(Op0,Op1,Op2) -> SRL(Op1,Op2)
2286 // iff we're guaranteed not to use Op0.
2287 // TODO: Add FSHL equivalent?
2288 if (!IsFSHL && !DemandedBits.isAllOnes() &&
2289 (!TLO.LegalOperations() || isOperationLegal(Op: ISD::SRL, VT))) {
2290 KnownBits KnownAmt =
2291 TLO.DAG.computeKnownBits(Op: Op2, DemandedElts, Depth: Depth + 1);
2292 unsigned MaxShiftAmt =
2293 KnownAmt.getMaxValue().getLimitedValue(Limit: BitWidth - 1);
2294 // Check we don't demand any shifted bits outside Op1.
2295 if (DemandedBits.countl_zero() >= MaxShiftAmt) {
2296 EVT AmtVT = Op2.getValueType();
2297 SDValue NewAmt =
2298 TLO.DAG.getNode(Opcode: ISD::AND, DL: dl, VT: AmtVT, N1: Op2,
2299 N2: TLO.DAG.getConstant(Val: BitWidth - 1, DL: dl, VT: AmtVT));
2300 SDValue NewOp = TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op1, N2: NewAmt);
2301 return TLO.CombineTo(O: Op, N: NewOp);
2302 }
2303 }
2304
2305 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2306 APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
2307 if (SimplifyDemandedBits(Op: Op2, OriginalDemandedBits: DemandedAmtBits, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
2308 Depth: Depth + 1))
2309 return true;
2310 }
2311 break;
2312 }
2313 case ISD::ROTL:
2314 case ISD::ROTR: {
2315 SDValue Op0 = Op.getOperand(i: 0);
2316 SDValue Op1 = Op.getOperand(i: 1);
2317 bool IsROTL = (Op.getOpcode() == ISD::ROTL);
2318
2319 // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
2320 if (BitWidth == TLO.DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + 1))
2321 return TLO.CombineTo(O: Op, N: Op0);
2322
2323 if (ConstantSDNode *SA = isConstOrConstSplat(N: Op1, DemandedElts)) {
2324 unsigned Amt = SA->getAPIntValue().urem(RHS: BitWidth);
2325 unsigned RevAmt = BitWidth - Amt;
2326
2327 // rotl: (Op0 << Amt) | (Op0 >> (BW - Amt))
2328 // rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt)
2329 APInt Demanded0 = DemandedBits.rotr(rotateAmt: IsROTL ? Amt : RevAmt);
2330 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: Demanded0, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
2331 Depth: Depth + 1))
2332 return true;
2333
2334 // rot*(x, 0) --> x
2335 if (Amt == 0)
2336 return TLO.CombineTo(O: Op, N: Op0);
2337
2338 // See if we don't demand either half of the rotated bits.
2339 if ((!TLO.LegalOperations() || isOperationLegal(Op: ISD::SHL, VT)) &&
2340 DemandedBits.countr_zero() >= (IsROTL ? Amt : RevAmt)) {
2341 Op1 = TLO.DAG.getConstant(Val: IsROTL ? Amt : RevAmt, DL: dl, VT: Op1.getValueType());
2342 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op0, N2: Op1));
2343 }
2344 if ((!TLO.LegalOperations() || isOperationLegal(Op: ISD::SRL, VT)) &&
2345 DemandedBits.countl_zero() >= (IsROTL ? RevAmt : Amt)) {
2346 Op1 = TLO.DAG.getConstant(Val: IsROTL ? RevAmt : Amt, DL: dl, VT: Op1.getValueType());
2347 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op0, N2: Op1));
2348 }
2349 }
2350
2351 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2352 if (isPowerOf2_32(Value: BitWidth)) {
2353 APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
2354 if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: DemandedAmtBits, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
2355 Depth: Depth + 1))
2356 return true;
2357 }
2358 break;
2359 }
2360 case ISD::SMIN:
2361 case ISD::SMAX:
2362 case ISD::UMIN:
2363 case ISD::UMAX: {
2364 unsigned Opc = Op.getOpcode();
2365 SDValue Op0 = Op.getOperand(i: 0);
2366 SDValue Op1 = Op.getOperand(i: 1);
2367
2368 // If we're only demanding signbits, then we can simplify to OR/AND node.
2369 unsigned BitOp =
2370 (Opc == ISD::SMIN || Opc == ISD::UMAX) ? ISD::OR : ISD::AND;
2371 unsigned NumSignBits =
2372 std::min(a: TLO.DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + 1),
2373 b: TLO.DAG.ComputeNumSignBits(Op: Op1, DemandedElts, Depth: Depth + 1));
2374 unsigned NumDemandedUpperBits = BitWidth - DemandedBits.countr_zero();
2375 if (NumSignBits >= NumDemandedUpperBits)
2376 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: BitOp, DL: SDLoc(Op), VT, N1: Op0, N2: Op1));
2377
2378 // Check if one arg is always less/greater than (or equal) to the other arg.
2379 KnownBits Known0 = TLO.DAG.computeKnownBits(Op: Op0, DemandedElts, Depth: Depth + 1);
2380 KnownBits Known1 = TLO.DAG.computeKnownBits(Op: Op1, DemandedElts, Depth: Depth + 1);
2381 switch (Opc) {
2382 case ISD::SMIN:
2383 if (std::optional<bool> IsSLE = KnownBits::sle(LHS: Known0, RHS: Known1))
2384 return TLO.CombineTo(O: Op, N: *IsSLE ? Op0 : Op1);
2385 if (std::optional<bool> IsSLT = KnownBits::slt(LHS: Known0, RHS: Known1))
2386 return TLO.CombineTo(O: Op, N: *IsSLT ? Op0 : Op1);
2387 Known = KnownBits::smin(LHS: Known0, RHS: Known1);
2388 break;
2389 case ISD::SMAX:
2390 if (std::optional<bool> IsSGE = KnownBits::sge(LHS: Known0, RHS: Known1))
2391 return TLO.CombineTo(O: Op, N: *IsSGE ? Op0 : Op1);
2392 if (std::optional<bool> IsSGT = KnownBits::sgt(LHS: Known0, RHS: Known1))
2393 return TLO.CombineTo(O: Op, N: *IsSGT ? Op0 : Op1);
2394 Known = KnownBits::smax(LHS: Known0, RHS: Known1);
2395 break;
2396 case ISD::UMIN:
2397 if (std::optional<bool> IsULE = KnownBits::ule(LHS: Known0, RHS: Known1))
2398 return TLO.CombineTo(O: Op, N: *IsULE ? Op0 : Op1);
2399 if (std::optional<bool> IsULT = KnownBits::ult(LHS: Known0, RHS: Known1))
2400 return TLO.CombineTo(O: Op, N: *IsULT ? Op0 : Op1);
2401 Known = KnownBits::umin(LHS: Known0, RHS: Known1);
2402 break;
2403 case ISD::UMAX:
2404 if (std::optional<bool> IsUGE = KnownBits::uge(LHS: Known0, RHS: Known1))
2405 return TLO.CombineTo(O: Op, N: *IsUGE ? Op0 : Op1);
2406 if (std::optional<bool> IsUGT = KnownBits::ugt(LHS: Known0, RHS: Known1))
2407 return TLO.CombineTo(O: Op, N: *IsUGT ? Op0 : Op1);
2408 Known = KnownBits::umax(LHS: Known0, RHS: Known1);
2409 break;
2410 }
2411 break;
2412 }
2413 case ISD::BITREVERSE: {
2414 SDValue Src = Op.getOperand(i: 0);
2415 APInt DemandedSrcBits = DemandedBits.reverseBits();
2416 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedSrcBits, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
2417 Depth: Depth + 1))
2418 return true;
2419 Known = Known2.reverseBits();
2420 break;
2421 }
2422 case ISD::BSWAP: {
2423 SDValue Src = Op.getOperand(i: 0);
2424
2425 // If the only bits demanded come from one byte of the bswap result,
2426 // just shift the input byte into position to eliminate the bswap.
2427 unsigned NLZ = DemandedBits.countl_zero();
2428 unsigned NTZ = DemandedBits.countr_zero();
2429
2430 // Round NTZ down to the next byte. If we have 11 trailing zeros, then
2431 // we need all the bits down to bit 8. Likewise, round NLZ. If we
2432 // have 14 leading zeros, round to 8.
2433 NLZ = alignDown(Value: NLZ, Align: 8);
2434 NTZ = alignDown(Value: NTZ, Align: 8);
2435 // If we need exactly one byte, we can do this transformation.
2436 if (BitWidth - NLZ - NTZ == 8) {
2437 // Replace this with either a left or right shift to get the byte into
2438 // the right place.
2439 unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL;
2440 if (!TLO.LegalOperations() || isOperationLegal(Op: ShiftOpcode, VT)) {
2441 unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
2442 SDValue ShAmt = TLO.DAG.getShiftAmountConstant(Val: ShiftAmount, VT, DL: dl);
2443 SDValue NewOp = TLO.DAG.getNode(Opcode: ShiftOpcode, DL: dl, VT, N1: Src, N2: ShAmt);
2444 return TLO.CombineTo(O: Op, N: NewOp);
2445 }
2446 }
2447
2448 APInt DemandedSrcBits = DemandedBits.byteSwap();
2449 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedSrcBits, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
2450 Depth: Depth + 1))
2451 return true;
2452 Known = Known2.byteSwap();
2453 break;
2454 }
2455 case ISD::CTPOP: {
2456 // If only 1 bit is demanded, replace with PARITY as long as we're before
2457 // op legalization.
2458 // FIXME: Limit to scalars for now.
2459 if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
2460 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::PARITY, DL: dl, VT,
2461 Operand: Op.getOperand(i: 0)));
2462
2463 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2464 break;
2465 }
2466 case ISD::PDEP: {
2467 SDValue Op0 = Op.getOperand(i: 0);
2468 SDValue Op1 = Op.getOperand(i: 1);
2469
2470 unsigned DemandedBitsLZ = OriginalDemandedBits.countl_zero();
2471 APInt LoMask = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth - DemandedBitsLZ);
2472
2473 // If the demanded bits has leading zeroes, we don't demand those from the
2474 // mask.
2475 if (SimplifyDemandedBits(Op: Op1, DemandedBits: LoMask, Known, TLO, Depth: Depth + 1))
2476 return true;
2477
2478 // The number of possible 1s in the mask determines the number of LSBs of
2479 // operand 0 used. Undemanded bits from the mask don't matter so filter
2480 // them before counting.
2481 KnownBits Known2;
2482 uint64_t Count = (~Known.Zero & LoMask).popcount();
2483 APInt DemandedMask(APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: Count));
2484 if (SimplifyDemandedBits(Op: Op0, DemandedBits: DemandedMask, Known&: Known2, TLO, Depth: Depth + 1))
2485 return true;
2486
2487 // Zeroes are retained from the mask, but not ones.
2488 Known.One.clearAllBits();
2489 // The result will have at least as many trailing zeros as the non-mask
2490 // operand since bits can only map to the same or higher bit position.
2491 Known.Zero.setLowBits(Known2.countMinTrailingZeros());
2492 break;
2493 }
2494 case ISD::SIGN_EXTEND_INREG: {
2495 SDValue Op0 = Op.getOperand(i: 0);
2496 EVT ExVT = cast<VTSDNode>(Val: Op.getOperand(i: 1))->getVT();
2497 unsigned ExVTBits = ExVT.getScalarSizeInBits();
2498
2499 // If we only care about the highest bit, don't bother shifting right.
2500 if (DemandedBits.isSignMask()) {
2501 unsigned MinSignedBits =
2502 TLO.DAG.ComputeMaxSignificantBits(Op: Op0, DemandedElts, Depth: Depth + 1);
2503 bool AlreadySignExtended = ExVTBits >= MinSignedBits;
2504 // However if the input is already sign extended we expect the sign
2505 // extension to be dropped altogether later and do not simplify.
2506 if (!AlreadySignExtended) {
2507 // Compute the correct shift amount type, which must be getShiftAmountTy
2508 // for scalar types after legalization.
2509 SDValue ShiftAmt =
2510 TLO.DAG.getShiftAmountConstant(Val: BitWidth - ExVTBits, VT, DL: dl);
2511 return TLO.CombineTo(O: Op,
2512 N: TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op0, N2: ShiftAmt));
2513 }
2514 }
2515
2516 // If none of the extended bits are demanded, eliminate the sextinreg.
2517 if (DemandedBits.getActiveBits() <= ExVTBits)
2518 return TLO.CombineTo(O: Op, N: Op0);
2519
2520 APInt InputDemandedBits = DemandedBits.getLoBits(numBits: ExVTBits);
2521
2522 // Since the sign extended bits are demanded, we know that the sign
2523 // bit is demanded.
2524 InputDemandedBits.setBit(ExVTBits - 1);
2525
2526 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: InputDemandedBits, OriginalDemandedElts: DemandedElts, Known, TLO,
2527 Depth: Depth + 1))
2528 return true;
2529
2530 // If the sign bit of the input is known set or clear, then we know the
2531 // top bits of the result.
2532
2533 // If the input sign bit is known zero, convert this into a zero extension.
2534 if (Known.Zero[ExVTBits - 1])
2535 return TLO.CombineTo(O: Op, N: TLO.DAG.getZeroExtendInReg(Op: Op0, DL: dl, VT: ExVT));
2536
2537 APInt Mask = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: ExVTBits);
2538 if (Known.One[ExVTBits - 1]) { // Input sign bit known set
2539 Known.One.setBitsFrom(ExVTBits);
2540 Known.Zero &= Mask;
2541 } else { // Input sign bit unknown
2542 Known.Zero &= Mask;
2543 Known.One &= Mask;
2544 }
2545 break;
2546 }
2547 case ISD::BUILD_PAIR: {
2548 EVT HalfVT = Op.getOperand(i: 0).getValueType();
2549 unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
2550
2551 APInt MaskLo = DemandedBits.getLoBits(numBits: HalfBitWidth).trunc(width: HalfBitWidth);
2552 APInt MaskHi = DemandedBits.getHiBits(numBits: HalfBitWidth).trunc(width: HalfBitWidth);
2553
2554 KnownBits KnownLo, KnownHi;
2555
2556 if (SimplifyDemandedBits(Op: Op.getOperand(i: 0), DemandedBits: MaskLo, Known&: KnownLo, TLO, Depth: Depth + 1))
2557 return true;
2558
2559 if (SimplifyDemandedBits(Op: Op.getOperand(i: 1), DemandedBits: MaskHi, Known&: KnownHi, TLO, Depth: Depth + 1))
2560 return true;
2561
2562 Known = KnownHi.concat(Lo: KnownLo);
2563 break;
2564 }
2565 case ISD::ZERO_EXTEND_VECTOR_INREG:
2566 if (VT.isScalableVector())
2567 return false;
2568 [[fallthrough]];
2569 case ISD::ZERO_EXTEND: {
2570 SDValue Src = Op.getOperand(i: 0);
2571 EVT SrcVT = Src.getValueType();
2572 unsigned InBits = SrcVT.getScalarSizeInBits();
2573 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2574 bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
2575
2576 // If none of the top bits are demanded, convert this into an any_extend.
2577 if (DemandedBits.getActiveBits() <= InBits) {
2578 // If we only need the non-extended bits of the bottom element
2579 // then we can just bitcast to the result.
2580 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2581 VT.getSizeInBits() == SrcVT.getSizeInBits())
2582 return TLO.CombineTo(O: Op, N: TLO.DAG.getBitcast(VT, V: Src));
2583
2584 unsigned Opc =
2585 IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
2586 if (!TLO.LegalOperations() || isOperationLegal(Op: Opc, VT))
2587 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Opc, DL: dl, VT, Operand: Src));
2588 }
2589
2590 APInt InDemandedBits = DemandedBits.trunc(width: InBits);
2591 APInt InDemandedElts = DemandedElts.zext(width: InElts);
2592 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: InDemandedBits, OriginalDemandedElts: InDemandedElts, Known, TLO,
2593 Depth: Depth + 1)) {
2594 Op->dropFlags(Mask: SDNodeFlags::NonNeg);
2595 return true;
2596 }
2597 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2598 Known = Known.zext(BitWidth);
2599
2600 // Attempt to avoid multi-use ops if we don't need anything from them.
2601 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2602 Op: Src, DemandedBits: InDemandedBits, DemandedElts: InDemandedElts, DAG&: TLO.DAG, Depth: Depth + 1))
2603 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, Operand: NewSrc));
2604 break;
2605 }
2606 case ISD::SIGN_EXTEND_VECTOR_INREG:
2607 if (VT.isScalableVector())
2608 return false;
2609 [[fallthrough]];
2610 case ISD::SIGN_EXTEND: {
2611 SDValue Src = Op.getOperand(i: 0);
2612 EVT SrcVT = Src.getValueType();
2613 unsigned InBits = SrcVT.getScalarSizeInBits();
2614 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2615 bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
2616
2617 APInt InDemandedElts = DemandedElts.zext(width: InElts);
2618 APInt InDemandedBits = DemandedBits.trunc(width: InBits);
2619
2620 // Since some of the sign extended bits are demanded, we know that the sign
2621 // bit is demanded.
2622 InDemandedBits.setBit(InBits - 1);
2623
2624 // If none of the top bits are demanded, convert this into an any_extend.
2625 if (DemandedBits.getActiveBits() <= InBits) {
2626 // If we only need the non-extended bits of the bottom element
2627 // then we can just bitcast to the result.
2628 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2629 VT.getSizeInBits() == SrcVT.getSizeInBits())
2630 return TLO.CombineTo(O: Op, N: TLO.DAG.getBitcast(VT, V: Src));
2631
2632 // Don't lose an all signbits 0/-1 splat on targets with 0/-1 booleans.
2633 if (getBooleanContents(Type: VT) != ZeroOrNegativeOneBooleanContent ||
2634 TLO.DAG.ComputeNumSignBits(Op: Src, DemandedElts: InDemandedElts, Depth: Depth + 1) !=
2635 InBits) {
2636 unsigned Opc =
2637 IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
2638 if (!TLO.LegalOperations() || isOperationLegal(Op: Opc, VT))
2639 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Opc, DL: dl, VT, Operand: Src));
2640 }
2641 }
2642
2643 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: InDemandedBits, OriginalDemandedElts: InDemandedElts, Known, TLO,
2644 Depth: Depth + 1))
2645 return true;
2646 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2647
2648 // If the sign bit is known one, the top bits match.
2649 Known = Known.sext(BitWidth);
2650
2651 // If the sign bit is known zero, convert this to a zero extend.
2652 if (Known.isNonNegative()) {
2653 unsigned Opc =
2654 IsVecInReg ? ISD::ZERO_EXTEND_VECTOR_INREG : ISD::ZERO_EXTEND;
2655 if (!TLO.LegalOperations() || isOperationLegal(Op: Opc, VT)) {
2656 SDNodeFlags Flags;
2657 if (!IsVecInReg)
2658 Flags |= SDNodeFlags::NonNeg;
2659 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Opc, DL: dl, VT, Operand: Src, Flags));
2660 }
2661 }
2662
2663 // Attempt to avoid multi-use ops if we don't need anything from them.
2664 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2665 Op: Src, DemandedBits: InDemandedBits, DemandedElts: InDemandedElts, DAG&: TLO.DAG, Depth: Depth + 1))
2666 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, Operand: NewSrc));
2667 break;
2668 }
2669 case ISD::ANY_EXTEND_VECTOR_INREG:
2670 if (VT.isScalableVector())
2671 return false;
2672 [[fallthrough]];
2673 case ISD::ANY_EXTEND: {
2674 SDValue Src = Op.getOperand(i: 0);
2675 EVT SrcVT = Src.getValueType();
2676 unsigned InBits = SrcVT.getScalarSizeInBits();
2677 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2678 bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
2679
2680 // If we only need the bottom element then we can just bitcast.
2681 // TODO: Handle ANY_EXTEND?
2682 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2683 VT.getSizeInBits() == SrcVT.getSizeInBits())
2684 return TLO.CombineTo(O: Op, N: TLO.DAG.getBitcast(VT, V: Src));
2685
2686 APInt InDemandedBits = DemandedBits.trunc(width: InBits);
2687 APInt InDemandedElts = DemandedElts.zext(width: InElts);
2688 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: InDemandedBits, OriginalDemandedElts: InDemandedElts, Known, TLO,
2689 Depth: Depth + 1))
2690 return true;
2691 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2692 Known = Known.anyext(BitWidth);
2693
2694 // Attempt to avoid multi-use ops if we don't need anything from them.
2695 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2696 Op: Src, DemandedBits: InDemandedBits, DemandedElts: InDemandedElts, DAG&: TLO.DAG, Depth: Depth + 1))
2697 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, Operand: NewSrc));
2698 break;
2699 }
2700 case ISD::TRUNCATE: {
2701 SDValue Src = Op.getOperand(i: 0);
2702
2703 // Simplify the input, using demanded bit information, and compute the known
2704 // zero/one bits live out.
2705 unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
2706 APInt TruncMask = DemandedBits.zext(width: OperandBitWidth);
2707 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: TruncMask, OriginalDemandedElts: DemandedElts, Known, TLO,
2708 Depth: Depth + 1)) {
2709 // Disable the nsw and nuw flags. We can no longer guarantee that we
2710 // won't wrap after simplification.
2711 Op->dropFlags(Mask: SDNodeFlags::NoWrap);
2712 return true;
2713 }
2714 Known = Known.trunc(BitWidth);
2715
2716 // Attempt to avoid multi-use ops if we don't need anything from them.
2717 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2718 Op: Src, DemandedBits: TruncMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1))
2719 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: NewSrc));
2720
2721 // If the input is only used by this truncate, see if we can shrink it based
2722 // on the known demanded bits.
2723 switch (Src.getOpcode()) {
2724 default:
2725 break;
2726 case ISD::SRL:
2727 // Shrink SRL by a constant if none of the high bits shifted in are
2728 // demanded.
2729 if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
2730 // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
2731 // undesirable.
2732 break;
2733
2734 if (Src.getNode()->hasOneUse()) {
2735 if (isTruncateFree(Val: Src, VT2: VT) &&
2736 !isTruncateFree(FromVT: Src.getValueType(), ToVT: VT)) {
2737 // If truncate is only free at trunc(srl), do not turn it into
2738 // srl(trunc). The check is done by first check the truncate is free
2739 // at Src's opcode(srl), then check the truncate is not done by
2740 // referencing sub-register. In test, if both trunc(srl) and
2741 // srl(trunc)'s trunc are free, srl(trunc) performs better. If only
2742 // trunc(srl)'s trunc is free, trunc(srl) is better.
2743 break;
2744 }
2745
2746 std::optional<unsigned> ShAmtC =
2747 TLO.DAG.getValidShiftAmount(V: Src, DemandedElts, Depth: Depth + 2);
2748 if (!ShAmtC || *ShAmtC >= BitWidth)
2749 break;
2750 unsigned ShVal = *ShAmtC;
2751
2752 APInt HighBits =
2753 APInt::getHighBitsSet(numBits: OperandBitWidth, hiBitsSet: OperandBitWidth - BitWidth);
2754 HighBits.lshrInPlace(ShiftAmt: ShVal);
2755 HighBits = HighBits.trunc(width: BitWidth);
2756 if (!(HighBits & DemandedBits)) {
2757 // None of the shifted in bits are needed. Add a truncate of the
2758 // shift input, then shift it.
2759 SDValue NewShAmt = TLO.DAG.getShiftAmountConstant(Val: ShVal, VT, DL: dl);
2760 SDValue NewTrunc =
2761 TLO.DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Src.getOperand(i: 0));
2762 return TLO.CombineTo(
2763 O: Op, N: TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: NewTrunc, N2: NewShAmt));
2764 }
2765 }
2766 break;
2767 }
2768
2769 break;
2770 }
2771 case ISD::AssertZext: {
2772 // AssertZext demands all of the high bits, plus any of the low bits
2773 // demanded by its users.
2774 EVT ZVT = cast<VTSDNode>(Val: Op.getOperand(i: 1))->getVT();
2775 APInt InMask = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: ZVT.getSizeInBits());
2776 if (SimplifyDemandedBits(Op: Op.getOperand(i: 0), DemandedBits: ~InMask | DemandedBits, Known,
2777 TLO, Depth: Depth + 1))
2778 return true;
2779
2780 Known.Zero |= ~InMask;
2781 Known.One &= (~Known.Zero);
2782 break;
2783 }
2784 case ISD::EXTRACT_VECTOR_ELT: {
2785 SDValue Src = Op.getOperand(i: 0);
2786 SDValue Idx = Op.getOperand(i: 1);
2787 ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
2788 unsigned EltBitWidth = Src.getScalarValueSizeInBits();
2789
2790 if (SrcEltCnt.isScalable())
2791 return false;
2792
2793 // Demand the bits from every vector element without a constant index.
2794 unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2795 APInt DemandedSrcElts = APInt::getAllOnes(numBits: NumSrcElts);
2796 if (auto *CIdx = dyn_cast<ConstantSDNode>(Val&: Idx))
2797 if (CIdx->getAPIntValue().ult(RHS: NumSrcElts))
2798 DemandedSrcElts = APInt::getOneBitSet(numBits: NumSrcElts, BitNo: CIdx->getZExtValue());
2799
2800 // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
2801 // anything about the extended bits.
2802 APInt DemandedSrcBits = DemandedBits;
2803 if (BitWidth > EltBitWidth)
2804 DemandedSrcBits = DemandedSrcBits.trunc(width: EltBitWidth);
2805
2806 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedSrcBits, OriginalDemandedElts: DemandedSrcElts, Known&: Known2, TLO,
2807 Depth: Depth + 1))
2808 return true;
2809
2810 // Attempt to avoid multi-use ops if we don't need anything from them.
2811 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2812 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2813 Op: Src, DemandedBits: DemandedSrcBits, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + 1)) {
2814 SDValue NewOp =
2815 TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: DemandedSrc, N2: Idx);
2816 return TLO.CombineTo(O: Op, N: NewOp);
2817 }
2818 }
2819
2820 Known = Known2;
2821 if (BitWidth > EltBitWidth)
2822 Known = Known.anyext(BitWidth);
2823 break;
2824 }
2825 case ISD::BITCAST: {
2826 if (VT.isScalableVector())
2827 return false;
2828 SDValue Src = Op.getOperand(i: 0);
2829 EVT SrcVT = Src.getValueType();
2830 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
2831
2832 // If this is an FP->Int bitcast and if the sign bit is the only
2833 // thing demanded, turn this into a FGETSIGN.
2834 if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
2835 DemandedBits == APInt::getSignMask(BitWidth: Op.getValueSizeInBits()) &&
2836 SrcVT.isFloatingPoint()) {
2837 if (isOperationLegalOrCustom(Op: ISD::FGETSIGN, VT)) {
2838 // Make a FGETSIGN + SHL to move the sign bit into the appropriate
2839 // place. We expect the SHL to be eliminated by other optimizations.
2840 SDValue Sign = TLO.DAG.getNode(Opcode: ISD::FGETSIGN, DL: dl, VT, Operand: Src);
2841 unsigned ShVal = Op.getValueSizeInBits() - 1;
2842 SDValue ShAmt = TLO.DAG.getShiftAmountConstant(Val: ShVal, VT, DL: dl);
2843 return TLO.CombineTo(O: Op,
2844 N: TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Sign, N2: ShAmt));
2845 }
2846 }
2847
2848 // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
2849 // Demand the elt/bit if any of the original elts/bits are demanded.
2850 if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) {
2851 unsigned Scale = BitWidth / NumSrcEltBits;
2852 unsigned NumSrcElts = SrcVT.getVectorNumElements();
2853 APInt DemandedSrcBits = APInt::getZero(numBits: NumSrcEltBits);
2854 for (unsigned i = 0; i != Scale; ++i) {
2855 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
2856 unsigned BitOffset = EltOffset * NumSrcEltBits;
2857 DemandedSrcBits |= DemandedBits.extractBits(numBits: NumSrcEltBits, bitPosition: BitOffset);
2858 }
2859 // Recursive calls below may turn not demanded elements into poison, so we
2860 // need to demand all smaller source elements that maps to a demanded
2861 // destination element.
2862 APInt DemandedSrcElts = APIntOps::ScaleBitMask(A: DemandedElts, NewBitWidth: NumSrcElts);
2863
2864 APInt KnownSrcUndef, KnownSrcZero;
2865 if (SimplifyDemandedVectorElts(Op: Src, DemandedEltMask: DemandedSrcElts, KnownUndef&: KnownSrcUndef,
2866 KnownZero&: KnownSrcZero, TLO, Depth: Depth + 1))
2867 return true;
2868
2869 KnownBits KnownSrcBits;
2870 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedSrcBits, OriginalDemandedElts: DemandedSrcElts,
2871 Known&: KnownSrcBits, TLO, Depth: Depth + 1))
2872 return true;
2873 } else if (IsLE && (NumSrcEltBits % BitWidth) == 0) {
2874 // TODO - bigendian once we have test coverage.
2875 unsigned Scale = NumSrcEltBits / BitWidth;
2876 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2877 APInt DemandedSrcBits = APInt::getZero(numBits: NumSrcEltBits);
2878 APInt DemandedSrcElts = APInt::getZero(numBits: NumSrcElts);
2879 for (unsigned i = 0; i != NumElts; ++i)
2880 if (DemandedElts[i]) {
2881 unsigned Offset = (i % Scale) * BitWidth;
2882 DemandedSrcBits.insertBits(SubBits: DemandedBits, bitPosition: Offset);
2883 DemandedSrcElts.setBit(i / Scale);
2884 }
2885
2886 if (SrcVT.isVector()) {
2887 APInt KnownSrcUndef, KnownSrcZero;
2888 if (SimplifyDemandedVectorElts(Op: Src, DemandedEltMask: DemandedSrcElts, KnownUndef&: KnownSrcUndef,
2889 KnownZero&: KnownSrcZero, TLO, Depth: Depth + 1))
2890 return true;
2891 }
2892
2893 KnownBits KnownSrcBits;
2894 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedSrcBits, OriginalDemandedElts: DemandedSrcElts,
2895 Known&: KnownSrcBits, TLO, Depth: Depth + 1))
2896 return true;
2897
2898 // Attempt to avoid multi-use ops if we don't need anything from them.
2899 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2900 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2901 Op: Src, DemandedBits: DemandedSrcBits, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + 1)) {
2902 SDValue NewOp = TLO.DAG.getBitcast(VT, V: DemandedSrc);
2903 return TLO.CombineTo(O: Op, N: NewOp);
2904 }
2905 }
2906 }
2907
2908 // If this is a bitcast, let computeKnownBits handle it. Only do this on a
2909 // recursive call where Known may be useful to the caller.
2910 if (Depth > 0) {
2911 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2912 return false;
2913 }
2914 break;
2915 }
2916 case ISD::MUL:
2917 if (DemandedBits.isPowerOf2()) {
2918 // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
2919 // If we demand exactly one bit N and we have "X * (C' << N)" where C' is
2920 // odd (has LSB set), then the left-shifted low bit of X is the answer.
2921 unsigned CTZ = DemandedBits.countr_zero();
2922 ConstantSDNode *C = isConstOrConstSplat(N: Op.getOperand(i: 1), DemandedElts);
2923 if (C && C->getAPIntValue().countr_zero() == CTZ) {
2924 SDValue AmtC = TLO.DAG.getShiftAmountConstant(Val: CTZ, VT, DL: dl);
2925 SDValue Shl = TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op.getOperand(i: 0), N2: AmtC);
2926 return TLO.CombineTo(O: Op, N: Shl);
2927 }
2928 }
2929 // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
2930 // X * X is odd iff X is odd.
2931 // 'Quadratic Reciprocity': X * X -> 0 for bit[1]
2932 if (Op.getOperand(i: 0) == Op.getOperand(i: 1) && DemandedBits.ult(RHS: 4)) {
2933 SDValue One = TLO.DAG.getConstant(Val: 1, DL: dl, VT);
2934 SDValue And1 = TLO.DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op.getOperand(i: 0), N2: One);
2935 return TLO.CombineTo(O: Op, N: And1);
2936 }
2937 [[fallthrough]];
2938 case ISD::PTRADD:
2939 if (Op.getOperand(i: 0).getValueType() != Op.getOperand(i: 1).getValueType())
2940 break;
2941 // PTRADD behaves like ADD if pointers are represented as integers.
2942 [[fallthrough]];
2943 case ISD::ADD:
2944 case ISD::SUB: {
2945 // Add, Sub, and Mul don't demand any bits in positions beyond that
2946 // of the highest bit demanded of them.
2947 SDValue Op0 = Op.getOperand(i: 0), Op1 = Op.getOperand(i: 1);
2948 SDNodeFlags Flags = Op.getNode()->getFlags();
2949 unsigned DemandedBitsLZ = DemandedBits.countl_zero();
2950 APInt LoMask = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth - DemandedBitsLZ);
2951 KnownBits KnownOp0, KnownOp1;
2952 auto GetDemandedBitsLHSMask = [&](APInt Demanded,
2953 const KnownBits &KnownRHS) {
2954 if (Op.getOpcode() == ISD::MUL)
2955 Demanded.clearHighBits(hiBits: KnownRHS.countMinTrailingZeros());
2956 return Demanded;
2957 };
2958 if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: LoMask, OriginalDemandedElts: DemandedElts, Known&: KnownOp1, TLO,
2959 Depth: Depth + 1) ||
2960 SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: GetDemandedBitsLHSMask(LoMask, KnownOp1),
2961 OriginalDemandedElts: DemandedElts, Known&: KnownOp0, TLO, Depth: Depth + 1) ||
2962 // See if the operation should be performed at a smaller bit width.
2963 ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
2964 // Disable the nsw and nuw flags. We can no longer guarantee that we
2965 // won't wrap after simplification.
2966 Op->dropFlags(Mask: SDNodeFlags::NoWrap);
2967 return true;
2968 }
2969
2970 // neg x with only low bit demanded is simply x.
2971 if (Op.getOpcode() == ISD::SUB && DemandedBits.isOne() &&
2972 isNullConstant(V: Op0))
2973 return TLO.CombineTo(O: Op, N: Op1);
2974
2975 // Attempt to avoid multi-use ops if we don't need anything from them.
2976 if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2977 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2978 Op: Op0, DemandedBits: LoMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
2979 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2980 Op: Op1, DemandedBits: LoMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
2981 if (DemandedOp0 || DemandedOp1) {
2982 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
2983 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
2984 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: Op0, N2: Op1,
2985 Flags: Flags & ~SDNodeFlags::NoWrap);
2986 return TLO.CombineTo(O: Op, N: NewOp);
2987 }
2988 }
2989
2990 // If we have a constant operand, we may be able to turn it into -1 if we
2991 // do not demand the high bits. This can make the constant smaller to
2992 // encode, allow more general folding, or match specialized instruction
2993 // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
2994 // is probably not useful (and could be detrimental).
2995 ConstantSDNode *C = isConstOrConstSplat(N: Op1);
2996 APInt HighMask = APInt::getHighBitsSet(numBits: BitWidth, hiBitsSet: DemandedBitsLZ);
2997 if (C && !C->isAllOnes() && !C->isOne() &&
2998 (C->getAPIntValue() | HighMask).isAllOnes()) {
2999 SDValue Neg1 = TLO.DAG.getAllOnesConstant(DL: dl, VT);
3000 // Disable the nsw and nuw flags. We can no longer guarantee that we
3001 // won't wrap after simplification.
3002 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: Op0, N2: Neg1,
3003 Flags: Flags & ~SDNodeFlags::NoWrap);
3004 return TLO.CombineTo(O: Op, N: NewOp);
3005 }
3006
3007 // Match a multiply with a disguised negated-power-of-2 and convert to a
3008 // an equivalent shift-left amount.
3009 // Example: (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
3010 auto getShiftLeftAmt = [&HighMask](SDValue Mul) -> unsigned {
3011 if (Mul.getOpcode() != ISD::MUL || !Mul.hasOneUse())
3012 return 0;
3013
3014 // Don't touch opaque constants. Also, ignore zero and power-of-2
3015 // multiplies. Those will get folded later.
3016 ConstantSDNode *MulC = isConstOrConstSplat(N: Mul.getOperand(i: 1));
3017 if (MulC && !MulC->isOpaque() && !MulC->isZero() &&
3018 !MulC->getAPIntValue().isPowerOf2()) {
3019 APInt UnmaskedC = MulC->getAPIntValue() | HighMask;
3020 if (UnmaskedC.isNegatedPowerOf2())
3021 return (-UnmaskedC).logBase2();
3022 }
3023 return 0;
3024 };
3025
3026 auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y,
3027 unsigned ShlAmt) {
3028 SDValue ShlAmtC = TLO.DAG.getShiftAmountConstant(Val: ShlAmt, VT, DL: dl);
3029 SDValue Shl = TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: X, N2: ShlAmtC);
3030 SDValue Res = TLO.DAG.getNode(Opcode: NT, DL: dl, VT, N1: Y, N2: Shl);
3031 return TLO.CombineTo(O: Op, N: Res);
3032 };
3033
3034 if (isOperationLegalOrCustom(Op: ISD::SHL, VT)) {
3035 if (Op.getOpcode() == ISD::ADD) {
3036 // (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
3037 if (unsigned ShAmt = getShiftLeftAmt(Op0))
3038 return foldMul(ISD::SUB, Op0.getOperand(i: 0), Op1, ShAmt);
3039 // Op0 + (X * MulC) --> Op0 - (X << log2(-MulC))
3040 if (unsigned ShAmt = getShiftLeftAmt(Op1))
3041 return foldMul(ISD::SUB, Op1.getOperand(i: 0), Op0, ShAmt);
3042 }
3043 if (Op.getOpcode() == ISD::SUB) {
3044 // Op0 - (X * MulC) --> Op0 + (X << log2(-MulC))
3045 if (unsigned ShAmt = getShiftLeftAmt(Op1))
3046 return foldMul(ISD::ADD, Op1.getOperand(i: 0), Op0, ShAmt);
3047 }
3048 }
3049
3050 if (Op.getOpcode() == ISD::MUL) {
3051 Known = KnownBits::mul(LHS: KnownOp0, RHS: KnownOp1);
3052 } else { // Op.getOpcode() is either ISD::ADD, ISD::PTRADD, or ISD::SUB.
3053 Known = KnownBits::computeForAddSub(
3054 Add: Op.getOpcode() != ISD::SUB, NSW: Flags.hasNoSignedWrap(),
3055 NUW: Flags.hasNoUnsignedWrap(), LHS: KnownOp0, RHS: KnownOp1);
3056 }
3057 break;
3058 }
3059 case ISD::FABS: {
3060 SDValue Op0 = Op.getOperand(i: 0);
3061 APInt SignMask = APInt::getSignMask(BitWidth);
3062
3063 if (!DemandedBits.intersects(RHS: SignMask))
3064 return TLO.CombineTo(O: Op, N: Op0);
3065
3066 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts, Known, TLO,
3067 Depth: Depth + 1))
3068 return true;
3069
3070 if (Known.isNonNegative())
3071 return TLO.CombineTo(O: Op, N: Op0);
3072 if (Known.isNegative())
3073 return TLO.CombineTo(
3074 O: Op, N: TLO.DAG.getNode(Opcode: ISD::FNEG, DL: dl, VT, Operand: Op0, Flags: Op->getFlags()));
3075
3076 Known.Zero |= SignMask;
3077 Known.One &= ~SignMask;
3078
3079 break;
3080 }
3081 case ISD::FCOPYSIGN: {
3082 SDValue Op0 = Op.getOperand(i: 0);
3083 SDValue Op1 = Op.getOperand(i: 1);
3084
3085 unsigned BitWidth0 = Op0.getScalarValueSizeInBits();
3086 unsigned BitWidth1 = Op1.getScalarValueSizeInBits();
3087 APInt SignMask0 = APInt::getSignMask(BitWidth: BitWidth0);
3088 APInt SignMask1 = APInt::getSignMask(BitWidth: BitWidth1);
3089
3090 if (!DemandedBits.intersects(RHS: SignMask0))
3091 return TLO.CombineTo(O: Op, N: Op0);
3092
3093 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: ~SignMask0 & DemandedBits, OriginalDemandedElts: DemandedElts,
3094 Known, TLO, Depth: Depth + 1) ||
3095 SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: SignMask1, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
3096 Depth: Depth + 1))
3097 return true;
3098
3099 if (Known2.isNonNegative())
3100 return TLO.CombineTo(
3101 O: Op, N: TLO.DAG.getNode(Opcode: ISD::FABS, DL: dl, VT, Operand: Op0, Flags: Op->getFlags()));
3102
3103 if (Known2.isNegative())
3104 return TLO.CombineTo(
3105 O: Op, N: TLO.DAG.getNode(Opcode: ISD::FNEG, DL: dl, VT,
3106 Operand: TLO.DAG.getNode(Opcode: ISD::FABS, DL: SDLoc(Op0), VT, Operand: Op0)));
3107
3108 Known.Zero &= ~SignMask0;
3109 Known.One &= ~SignMask0;
3110 break;
3111 }
3112 case ISD::FNEG: {
3113 SDValue Op0 = Op.getOperand(i: 0);
3114 APInt SignMask = APInt::getSignMask(BitWidth);
3115
3116 if (!DemandedBits.intersects(RHS: SignMask))
3117 return TLO.CombineTo(O: Op, N: Op0);
3118
3119 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts, Known, TLO,
3120 Depth: Depth + 1))
3121 return true;
3122
3123 if (!Known.isSignUnknown()) {
3124 Known.Zero ^= SignMask;
3125 Known.One ^= SignMask;
3126 }
3127
3128 break;
3129 }
3130 default:
3131 // We also ask the target about intrinsics (which could be specific to it).
3132 if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3133 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
3134 // TODO: Probably okay to remove after audit; here to reduce change size
3135 // in initial enablement patch for scalable vectors
3136 if (Op.getValueType().isScalableVector())
3137 break;
3138 if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
3139 Known, TLO, Depth))
3140 return true;
3141 break;
3142 }
3143
3144 // Just use computeKnownBits to compute output bits.
3145 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
3146 break;
3147 }
3148
3149 // If we know the value of all of the demanded bits, return this as a
3150 // constant.
3151 if (!isTargetCanonicalConstantNode(Op) &&
3152 DemandedBits.isSubsetOf(RHS: Known.Zero | Known.One)) {
3153 // Avoid folding to a constant if any OpaqueConstant is involved.
3154 if (llvm::any_of(Range: Op->ops(), P: [](SDValue V) {
3155 auto *C = dyn_cast<ConstantSDNode>(Val&: V);
3156 return C && C->isOpaque();
3157 }))
3158 return false;
3159 if (VT.isInteger())
3160 return TLO.CombineTo(O: Op, N: TLO.DAG.getConstant(Val: Known.One, DL: dl, VT));
3161 if (VT.isFloatingPoint())
3162 return TLO.CombineTo(
3163 O: Op, N: TLO.DAG.getConstantFP(Val: APFloat(VT.getFltSemantics(), Known.One),
3164 DL: dl, VT));
3165 }
3166
3167 // A multi use 'all demanded elts' simplify failed to find any knownbits.
3168 // Try again just for the original demanded elts.
3169 // Ensure we do this AFTER constant folding above.
3170 if (HasMultiUse && Known.isUnknown() && !OriginalDemandedElts.isAllOnes())
3171 Known = TLO.DAG.computeKnownBits(Op, DemandedElts: OriginalDemandedElts, Depth);
3172
3173 return false;
3174}
3175
3176bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op,
3177 const APInt &DemandedElts,
3178 DAGCombinerInfo &DCI) const {
3179 SelectionDAG &DAG = DCI.DAG;
3180 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
3181 !DCI.isBeforeLegalizeOps());
3182
3183 APInt KnownUndef, KnownZero;
3184 bool Simplified =
3185 SimplifyDemandedVectorElts(Op, DemandedEltMask: DemandedElts, KnownUndef, KnownZero, TLO);
3186 if (Simplified) {
3187 DCI.AddToWorklist(N: Op.getNode());
3188 DCI.CommitTargetLoweringOpt(TLO);
3189 }
3190
3191 return Simplified;
3192}
3193
3194/// Given a vector binary operation and known undefined elements for each input
3195/// operand, compute whether each element of the output is undefined.
3196static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG,
3197 const APInt &UndefOp0,
3198 const APInt &UndefOp1) {
3199 EVT VT = BO.getValueType();
3200 assert(DAG.getTargetLoweringInfo().isBinOp(BO.getOpcode()) && VT.isVector() &&
3201 "Vector binop only");
3202
3203 EVT EltVT = VT.getVectorElementType();
3204 unsigned NumElts = VT.isFixedLengthVector() ? VT.getVectorNumElements() : 1;
3205 assert(UndefOp0.getBitWidth() == NumElts &&
3206 UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
3207
3208 auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
3209 const APInt &UndefVals) {
3210 if (UndefVals[Index])
3211 return DAG.getUNDEF(VT: EltVT);
3212
3213 if (auto *BV = dyn_cast<BuildVectorSDNode>(Val&: V)) {
3214 // Try hard to make sure that the getNode() call is not creating temporary
3215 // nodes. Ignore opaque integers because they do not constant fold.
3216 SDValue Elt = BV->getOperand(Num: Index);
3217 auto *C = dyn_cast<ConstantSDNode>(Val&: Elt);
3218 if (isa<ConstantFPSDNode>(Val: Elt) || Elt.isUndef() || (C && !C->isOpaque()))
3219 return Elt;
3220 }
3221
3222 return SDValue();
3223 };
3224
3225 APInt KnownUndef = APInt::getZero(numBits: NumElts);
3226 for (unsigned i = 0; i != NumElts; ++i) {
3227 // If both inputs for this element are either constant or undef and match
3228 // the element type, compute the constant/undef result for this element of
3229 // the vector.
3230 // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
3231 // not handle FP constants. The code within getNode() should be refactored
3232 // to avoid the danger of creating a bogus temporary node here.
3233 SDValue C0 = getUndefOrConstantElt(BO.getOperand(i: 0), i, UndefOp0);
3234 SDValue C1 = getUndefOrConstantElt(BO.getOperand(i: 1), i, UndefOp1);
3235 if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
3236 if (DAG.getNode(Opcode: BO.getOpcode(), DL: SDLoc(BO), VT: EltVT, N1: C0, N2: C1).isUndef())
3237 KnownUndef.setBit(i);
3238 }
3239 return KnownUndef;
3240}
3241
3242bool TargetLowering::SimplifyDemandedVectorElts(
3243 SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
3244 APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
3245 bool AssumeSingleUse) const {
3246 EVT VT = Op.getValueType();
3247 unsigned Opcode = Op.getOpcode();
3248 APInt DemandedElts = OriginalDemandedElts;
3249 unsigned NumElts = DemandedElts.getBitWidth();
3250 assert(VT.isVector() && "Expected vector op");
3251
3252 KnownUndef = KnownZero = APInt::getZero(numBits: NumElts);
3253
3254 if (!shouldSimplifyDemandedVectorElts(Op, TLO))
3255 return false;
3256
3257 // TODO: For now we assume we know nothing about scalable vectors.
3258 if (VT.isScalableVector())
3259 return false;
3260
3261 assert(VT.getVectorNumElements() == NumElts &&
3262 "Mask size mismatches value type element count!");
3263
3264 // Undef operand.
3265 if (Op.isUndef()) {
3266 KnownUndef.setAllBits();
3267 return false;
3268 }
3269
3270 // If Op has other users, assume that all elements are needed.
3271 if (!AssumeSingleUse && !Op.getNode()->hasOneUse())
3272 DemandedElts.setAllBits();
3273
3274 // Not demanding any elements from Op.
3275 if (DemandedElts == 0) {
3276 KnownUndef.setAllBits();
3277 return TLO.CombineTo(O: Op, N: TLO.DAG.getUNDEF(VT));
3278 }
3279
3280 // Limit search depth.
3281 if (Depth >= SelectionDAG::MaxRecursionDepth)
3282 return false;
3283
3284 SDLoc DL(Op);
3285 unsigned EltSizeInBits = VT.getScalarSizeInBits();
3286 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
3287
3288 // Helper for demanding the specified elements and all the bits of both binary
3289 // operands.
3290 auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
3291 SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op: Op0, DemandedElts,
3292 DAG&: TLO.DAG, Depth: Depth + 1);
3293 SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op: Op1, DemandedElts,
3294 DAG&: TLO.DAG, Depth: Depth + 1);
3295 if (NewOp0 || NewOp1) {
3296 SDValue NewOp =
3297 TLO.DAG.getNode(Opcode, DL: SDLoc(Op), VT, N1: NewOp0 ? NewOp0 : Op0,
3298 N2: NewOp1 ? NewOp1 : Op1, Flags: Op->getFlags());
3299 return TLO.CombineTo(O: Op, N: NewOp);
3300 }
3301 return false;
3302 };
3303
3304 switch (Opcode) {
3305 case ISD::SCALAR_TO_VECTOR: {
3306 if (!DemandedElts[0]) {
3307 KnownUndef.setAllBits();
3308 return TLO.CombineTo(O: Op, N: TLO.DAG.getUNDEF(VT));
3309 }
3310 KnownUndef.setHighBits(NumElts - 1);
3311 break;
3312 }
3313 case ISD::BITCAST: {
3314 SDValue Src = Op.getOperand(i: 0);
3315 EVT SrcVT = Src.getValueType();
3316
3317 if (!SrcVT.isVector()) {
3318 // TODO - bigendian once we have test coverage.
3319 if (IsLE) {
3320 APInt DemandedSrcBits = APInt::getZero(numBits: SrcVT.getSizeInBits());
3321 unsigned EltSize = VT.getScalarSizeInBits();
3322 for (unsigned I = 0; I != NumElts; ++I) {
3323 if (DemandedElts[I]) {
3324 unsigned Offset = I * EltSize;
3325 DemandedSrcBits.setBits(loBit: Offset, hiBit: Offset + EltSize);
3326 }
3327 }
3328 KnownBits Known;
3329 if (SimplifyDemandedBits(Op: Src, DemandedBits: DemandedSrcBits, Known, TLO, Depth: Depth + 1))
3330 return true;
3331 }
3332 break;
3333 }
3334
3335 // Fast handling of 'identity' bitcasts.
3336 unsigned NumSrcElts = SrcVT.getVectorNumElements();
3337 if (NumSrcElts == NumElts)
3338 return SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: DemandedElts, KnownUndef,
3339 KnownZero, TLO, Depth: Depth + 1);
3340
3341 APInt SrcDemandedElts, SrcZero, SrcUndef;
3342
3343 // Bitcast from 'large element' src vector to 'small element' vector, we
3344 // must demand a source element if any DemandedElt maps to it.
3345 if ((NumElts % NumSrcElts) == 0) {
3346 unsigned Scale = NumElts / NumSrcElts;
3347 SrcDemandedElts = APIntOps::ScaleBitMask(A: DemandedElts, NewBitWidth: NumSrcElts);
3348 if (SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: SrcDemandedElts, KnownUndef&: SrcUndef, KnownZero&: SrcZero,
3349 TLO, Depth: Depth + 1))
3350 return true;
3351
3352 // Try calling SimplifyDemandedBits, converting demanded elts to the bits
3353 // of the large element.
3354 // TODO - bigendian once we have test coverage.
3355 if (IsLE) {
3356 unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
3357 APInt SrcDemandedBits = APInt::getZero(numBits: SrcEltSizeInBits);
3358 for (unsigned i = 0; i != NumElts; ++i)
3359 if (DemandedElts[i]) {
3360 unsigned Ofs = (i % Scale) * EltSizeInBits;
3361 SrcDemandedBits.setBits(loBit: Ofs, hiBit: Ofs + EltSizeInBits);
3362 }
3363
3364 KnownBits Known;
3365 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: SrcDemandedBits, OriginalDemandedElts: SrcDemandedElts, Known,
3366 TLO, Depth: Depth + 1))
3367 return true;
3368
3369 // The bitcast has split each wide element into a number of
3370 // narrow subelements. We have just computed the Known bits
3371 // for wide elements. See if element splitting results in
3372 // some subelements being zero. Only for demanded elements!
3373 for (unsigned SubElt = 0; SubElt != Scale; ++SubElt) {
3374 if (!Known.Zero.extractBits(numBits: EltSizeInBits, bitPosition: SubElt * EltSizeInBits)
3375 .isAllOnes())
3376 continue;
3377 for (unsigned SrcElt = 0; SrcElt != NumSrcElts; ++SrcElt) {
3378 unsigned Elt = Scale * SrcElt + SubElt;
3379 if (DemandedElts[Elt])
3380 KnownZero.setBit(Elt);
3381 }
3382 }
3383 }
3384
3385 // If the src element is zero/undef then all the output elements will be -
3386 // only demanded elements are guaranteed to be correct.
3387 for (unsigned i = 0; i != NumSrcElts; ++i) {
3388 if (SrcDemandedElts[i]) {
3389 if (SrcZero[i])
3390 KnownZero.setBits(loBit: i * Scale, hiBit: (i + 1) * Scale);
3391 if (SrcUndef[i])
3392 KnownUndef.setBits(loBit: i * Scale, hiBit: (i + 1) * Scale);
3393 }
3394 }
3395 }
3396
3397 // Bitcast from 'small element' src vector to 'large element' vector, we
3398 // demand all smaller source elements covered by the larger demanded element
3399 // of this vector.
3400 if ((NumSrcElts % NumElts) == 0) {
3401 unsigned Scale = NumSrcElts / NumElts;
3402 SrcDemandedElts = APIntOps::ScaleBitMask(A: DemandedElts, NewBitWidth: NumSrcElts);
3403 if (SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: SrcDemandedElts, KnownUndef&: SrcUndef, KnownZero&: SrcZero,
3404 TLO, Depth: Depth + 1))
3405 return true;
3406
3407 // If all the src elements covering an output element are zero/undef, then
3408 // the output element will be as well, assuming it was demanded.
3409 for (unsigned i = 0; i != NumElts; ++i) {
3410 if (DemandedElts[i]) {
3411 if (SrcZero.extractBits(numBits: Scale, bitPosition: i * Scale).isAllOnes())
3412 KnownZero.setBit(i);
3413 if (SrcUndef.extractBits(numBits: Scale, bitPosition: i * Scale).isAllOnes())
3414 KnownUndef.setBit(i);
3415 }
3416 }
3417 }
3418 break;
3419 }
3420 case ISD::FREEZE: {
3421 SDValue N0 = Op.getOperand(i: 0);
3422 if (TLO.DAG.isGuaranteedNotToBeUndefOrPoison(
3423 Op: N0, DemandedElts, Kind: UndefPoisonKind::UndefOrPoison, Depth: Depth + 1))
3424 return TLO.CombineTo(O: Op, N: N0);
3425
3426 // TODO: Replace this with the general fold from DAGCombiner::visitFREEZE
3427 // freeze(op(x, ...)) -> op(freeze(x), ...).
3428 if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && DemandedElts == 1)
3429 return TLO.CombineTo(
3430 O: Op, N: TLO.DAG.getNode(Opcode: ISD::SCALAR_TO_VECTOR, DL, VT,
3431 Operand: TLO.DAG.getFreeze(V: N0.getOperand(i: 0))));
3432 break;
3433 }
3434 case ISD::BUILD_VECTOR: {
3435 // Check all elements and simplify any unused elements with UNDEF.
3436 if (!DemandedElts.isAllOnes()) {
3437 // Don't simplify BROADCASTS.
3438 if (llvm::any_of(Range: Op->op_values(),
3439 P: [&](SDValue Elt) { return Op.getOperand(i: 0) != Elt; })) {
3440 SmallVector<SDValue, 32> Ops(Op->ops());
3441 bool Updated = false;
3442 for (unsigned i = 0; i != NumElts; ++i) {
3443 if (!DemandedElts[i] && !Ops[i].isUndef()) {
3444 Ops[i] = TLO.DAG.getUNDEF(VT: Ops[0].getValueType());
3445 KnownUndef.setBit(i);
3446 Updated = true;
3447 }
3448 }
3449 if (Updated)
3450 return TLO.CombineTo(O: Op, N: TLO.DAG.getBuildVector(VT, DL, Ops));
3451 }
3452 }
3453 for (unsigned i = 0; i != NumElts; ++i) {
3454 SDValue SrcOp = Op.getOperand(i);
3455 if (SrcOp.isUndef()) {
3456 KnownUndef.setBit(i);
3457 } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
3458 (isNullConstant(V: SrcOp) || isNullFPConstant(V: SrcOp))) {
3459 KnownZero.setBit(i);
3460 }
3461 }
3462 break;
3463 }
3464 case ISD::CONCAT_VECTORS: {
3465 EVT SubVT = Op.getOperand(i: 0).getValueType();
3466 unsigned NumSubVecs = Op.getNumOperands();
3467 unsigned NumSubElts = SubVT.getVectorNumElements();
3468 for (unsigned i = 0; i != NumSubVecs; ++i) {
3469 SDValue SubOp = Op.getOperand(i);
3470 APInt SubElts = DemandedElts.extractBits(numBits: NumSubElts, bitPosition: i * NumSubElts);
3471 APInt SubUndef, SubZero;
3472 if (SimplifyDemandedVectorElts(Op: SubOp, OriginalDemandedElts: SubElts, KnownUndef&: SubUndef, KnownZero&: SubZero, TLO,
3473 Depth: Depth + 1))
3474 return true;
3475 KnownUndef.insertBits(SubBits: SubUndef, bitPosition: i * NumSubElts);
3476 KnownZero.insertBits(SubBits: SubZero, bitPosition: i * NumSubElts);
3477 }
3478
3479 // Attempt to avoid multi-use ops if we don't need anything from them.
3480 if (!DemandedElts.isAllOnes()) {
3481 bool FoundNewSub = false;
3482 SmallVector<SDValue, 2> DemandedSubOps;
3483 for (unsigned i = 0; i != NumSubVecs; ++i) {
3484 SDValue SubOp = Op.getOperand(i);
3485 APInt SubElts = DemandedElts.extractBits(numBits: NumSubElts, bitPosition: i * NumSubElts);
3486 SDValue NewSubOp = SimplifyMultipleUseDemandedVectorElts(
3487 Op: SubOp, DemandedElts: SubElts, DAG&: TLO.DAG, Depth: Depth + 1);
3488 DemandedSubOps.push_back(Elt: NewSubOp ? NewSubOp : SubOp);
3489 FoundNewSub = NewSubOp ? true : FoundNewSub;
3490 }
3491 if (FoundNewSub) {
3492 SDValue NewOp =
3493 TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: SDLoc(Op), VT, Ops: DemandedSubOps);
3494 return TLO.CombineTo(O: Op, N: NewOp);
3495 }
3496 }
3497 break;
3498 }
3499 case ISD::INSERT_SUBVECTOR: {
3500 // Demand any elements from the subvector and the remainder from the src it
3501 // is inserted into.
3502 SDValue Src = Op.getOperand(i: 0);
3503 SDValue Sub = Op.getOperand(i: 1);
3504 uint64_t Idx = Op.getConstantOperandVal(i: 2);
3505 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
3506 APInt DemandedSubElts = DemandedElts.extractBits(numBits: NumSubElts, bitPosition: Idx);
3507 APInt DemandedSrcElts = DemandedElts;
3508 DemandedSrcElts.clearBits(LoBit: Idx, HiBit: Idx + NumSubElts);
3509
3510 // If none of the sub operand elements are demanded, bypass the insert.
3511 if (!DemandedSubElts)
3512 return TLO.CombineTo(O: Op, N: Src);
3513
3514 APInt SubUndef, SubZero;
3515 if (SimplifyDemandedVectorElts(Op: Sub, OriginalDemandedElts: DemandedSubElts, KnownUndef&: SubUndef, KnownZero&: SubZero, TLO,
3516 Depth: Depth + 1))
3517 return true;
3518
3519 // If none of the src operand elements are demanded, replace it with undef.
3520 if (!DemandedSrcElts && !Src.isUndef())
3521 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT,
3522 N1: TLO.DAG.getUNDEF(VT), N2: Sub,
3523 N3: Op.getOperand(i: 2)));
3524
3525 if (SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: DemandedSrcElts, KnownUndef, KnownZero,
3526 TLO, Depth: Depth + 1))
3527 return true;
3528 KnownUndef.insertBits(SubBits: SubUndef, bitPosition: Idx);
3529 KnownZero.insertBits(SubBits: SubZero, bitPosition: Idx);
3530
3531 // Attempt to avoid multi-use ops if we don't need anything from them.
3532 if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) {
3533 SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3534 Op: Src, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + 1);
3535 SDValue NewSub = SimplifyMultipleUseDemandedVectorElts(
3536 Op: Sub, DemandedElts: DemandedSubElts, DAG&: TLO.DAG, Depth: Depth + 1);
3537 if (NewSrc || NewSub) {
3538 NewSrc = NewSrc ? NewSrc : Src;
3539 NewSub = NewSub ? NewSub : Sub;
3540 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: SDLoc(Op), VT, N1: NewSrc,
3541 N2: NewSub, N3: Op.getOperand(i: 2));
3542 return TLO.CombineTo(O: Op, N: NewOp);
3543 }
3544 }
3545 break;
3546 }
3547 case ISD::EXTRACT_SUBVECTOR: {
3548 // Offset the demanded elts by the subvector index.
3549 SDValue Src = Op.getOperand(i: 0);
3550 if (Src.getValueType().isScalableVector())
3551 break;
3552 uint64_t Idx = Op.getConstantOperandVal(i: 1);
3553 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3554 APInt DemandedSrcElts = DemandedElts.zext(width: NumSrcElts).shl(shiftAmt: Idx);
3555
3556 APInt SrcUndef, SrcZero;
3557 if (SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: DemandedSrcElts, KnownUndef&: SrcUndef, KnownZero&: SrcZero, TLO,
3558 Depth: Depth + 1))
3559 return true;
3560 KnownUndef = SrcUndef.extractBits(numBits: NumElts, bitPosition: Idx);
3561 KnownZero = SrcZero.extractBits(numBits: NumElts, bitPosition: Idx);
3562
3563 // Attempt to avoid multi-use ops if we don't need anything from them.
3564 SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(Op: Src, DemandedElts: DemandedSrcElts,
3565 DAG&: TLO.DAG, Depth: Depth + 1);
3566 if (NewSrc) {
3567 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: SDLoc(Op), VT, N1: NewSrc,
3568 N2: Op.getOperand(i: 1));
3569 return TLO.CombineTo(O: Op, N: NewOp);
3570 }
3571 break;
3572 }
3573 case ISD::INSERT_VECTOR_ELT: {
3574 SDValue Vec = Op.getOperand(i: 0);
3575 SDValue Scl = Op.getOperand(i: 1);
3576 auto *CIdx = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 2));
3577
3578 // For a legal, constant insertion index, if we don't need this insertion
3579 // then strip it, else remove it from the demanded elts.
3580 if (CIdx && CIdx->getAPIntValue().ult(RHS: NumElts)) {
3581 unsigned Idx = CIdx->getZExtValue();
3582 if (!DemandedElts[Idx])
3583 return TLO.CombineTo(O: Op, N: Vec);
3584
3585 APInt DemandedVecElts(DemandedElts);
3586 DemandedVecElts.clearBit(BitPosition: Idx);
3587 if (SimplifyDemandedVectorElts(Op: Vec, OriginalDemandedElts: DemandedVecElts, KnownUndef,
3588 KnownZero, TLO, Depth: Depth + 1))
3589 return true;
3590
3591 KnownUndef.setBitVal(BitPosition: Idx, BitValue: Scl.isUndef());
3592
3593 KnownZero.setBitVal(BitPosition: Idx, BitValue: isNullConstant(V: Scl) || isNullFPConstant(V: Scl));
3594 break;
3595 }
3596
3597 APInt VecUndef, VecZero;
3598 if (SimplifyDemandedVectorElts(Op: Vec, OriginalDemandedElts: DemandedElts, KnownUndef&: VecUndef, KnownZero&: VecZero, TLO,
3599 Depth: Depth + 1))
3600 return true;
3601 // Without knowing the insertion index we can't set KnownUndef/KnownZero.
3602 break;
3603 }
3604 case ISD::VSELECT: {
3605 SDValue Sel = Op.getOperand(i: 0);
3606 SDValue LHS = Op.getOperand(i: 1);
3607 SDValue RHS = Op.getOperand(i: 2);
3608
3609 // Try to transform the select condition based on the current demanded
3610 // elements.
3611 APInt UndefSel, ZeroSel;
3612 if (SimplifyDemandedVectorElts(Op: Sel, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefSel, KnownZero&: ZeroSel, TLO,
3613 Depth: Depth + 1))
3614 return true;
3615
3616 // See if we can simplify either vselect operand.
3617 APInt DemandedLHS(DemandedElts);
3618 APInt DemandedRHS(DemandedElts);
3619 APInt UndefLHS, ZeroLHS;
3620 APInt UndefRHS, ZeroRHS;
3621 if (SimplifyDemandedVectorElts(Op: LHS, OriginalDemandedElts: DemandedLHS, KnownUndef&: UndefLHS, KnownZero&: ZeroLHS, TLO,
3622 Depth: Depth + 1))
3623 return true;
3624 if (SimplifyDemandedVectorElts(Op: RHS, OriginalDemandedElts: DemandedRHS, KnownUndef&: UndefRHS, KnownZero&: ZeroRHS, TLO,
3625 Depth: Depth + 1))
3626 return true;
3627
3628 KnownUndef = UndefLHS & UndefRHS;
3629 KnownZero = ZeroLHS & ZeroRHS;
3630
3631 // If we know that the selected element is always zero, we don't need the
3632 // select value element.
3633 APInt DemandedSel = DemandedElts & ~KnownZero;
3634 if (DemandedSel != DemandedElts)
3635 if (SimplifyDemandedVectorElts(Op: Sel, OriginalDemandedElts: DemandedSel, KnownUndef&: UndefSel, KnownZero&: ZeroSel, TLO,
3636 Depth: Depth + 1))
3637 return true;
3638
3639 break;
3640 }
3641 case ISD::VECTOR_SHUFFLE: {
3642 SDValue LHS = Op.getOperand(i: 0);
3643 SDValue RHS = Op.getOperand(i: 1);
3644 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Val&: Op)->getMask();
3645
3646 // Collect demanded elements from shuffle operands..
3647 APInt DemandedLHS(NumElts, 0);
3648 APInt DemandedRHS(NumElts, 0);
3649 for (unsigned i = 0; i != NumElts; ++i) {
3650 int M = ShuffleMask[i];
3651 if (M < 0 || !DemandedElts[i])
3652 continue;
3653 assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
3654 if (M < (int)NumElts)
3655 DemandedLHS.setBit(M);
3656 else
3657 DemandedRHS.setBit(M - NumElts);
3658 }
3659
3660 // If either side isn't demanded, replace it by UNDEF. We handle this
3661 // explicitly here to also simplify in case of multiple uses (on the
3662 // contrary to the SimplifyDemandedVectorElts calls below).
3663 bool FoldLHS = !DemandedLHS && !LHS.isUndef();
3664 bool FoldRHS = !DemandedRHS && !RHS.isUndef();
3665 if (FoldLHS || FoldRHS) {
3666 LHS = FoldLHS ? TLO.DAG.getUNDEF(VT: LHS.getValueType()) : LHS;
3667 RHS = FoldRHS ? TLO.DAG.getUNDEF(VT: RHS.getValueType()) : RHS;
3668 SDValue NewOp =
3669 TLO.DAG.getVectorShuffle(VT, dl: SDLoc(Op), N1: LHS, N2: RHS, Mask: ShuffleMask);
3670 return TLO.CombineTo(O: Op, N: NewOp);
3671 }
3672
3673 // See if we can simplify either shuffle operand.
3674 APInt UndefLHS, ZeroLHS;
3675 APInt UndefRHS, ZeroRHS;
3676 if (SimplifyDemandedVectorElts(Op: LHS, OriginalDemandedElts: DemandedLHS, KnownUndef&: UndefLHS, KnownZero&: ZeroLHS, TLO,
3677 Depth: Depth + 1))
3678 return true;
3679 if (SimplifyDemandedVectorElts(Op: RHS, OriginalDemandedElts: DemandedRHS, KnownUndef&: UndefRHS, KnownZero&: ZeroRHS, TLO,
3680 Depth: Depth + 1))
3681 return true;
3682
3683 // Simplify mask using undef elements from LHS/RHS.
3684 bool Updated = false;
3685 bool IdentityLHS = true, IdentityRHS = true;
3686 SmallVector<int, 32> NewMask(ShuffleMask);
3687 for (unsigned i = 0; i != NumElts; ++i) {
3688 int &M = NewMask[i];
3689 if (M < 0)
3690 continue;
3691 if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
3692 (M >= (int)NumElts && UndefRHS[M - NumElts])) {
3693 Updated = true;
3694 M = -1;
3695 }
3696 IdentityLHS &= (M < 0) || (M == (int)i);
3697 IdentityRHS &= (M < 0) || ((M - NumElts) == i);
3698 }
3699
3700 // Update legal shuffle masks based on demanded elements if it won't reduce
3701 // to Identity which can cause premature removal of the shuffle mask.
3702 if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
3703 SDValue LegalShuffle =
3704 buildLegalVectorShuffle(VT, DL, N0: LHS, N1: RHS, Mask: NewMask, DAG&: TLO.DAG);
3705 if (LegalShuffle)
3706 return TLO.CombineTo(O: Op, N: LegalShuffle);
3707 }
3708
3709 // Propagate undef/zero elements from LHS/RHS.
3710 for (unsigned i = 0; i != NumElts; ++i) {
3711 int M = ShuffleMask[i];
3712 if (M < 0) {
3713 KnownUndef.setBit(i);
3714 } else if (M < (int)NumElts) {
3715 if (UndefLHS[M])
3716 KnownUndef.setBit(i);
3717 if (ZeroLHS[M])
3718 KnownZero.setBit(i);
3719 } else {
3720 if (UndefRHS[M - NumElts])
3721 KnownUndef.setBit(i);
3722 if (ZeroRHS[M - NumElts])
3723 KnownZero.setBit(i);
3724 }
3725 }
3726 break;
3727 }
3728 case ISD::ANY_EXTEND_VECTOR_INREG:
3729 case ISD::SIGN_EXTEND_VECTOR_INREG:
3730 case ISD::ZERO_EXTEND_VECTOR_INREG: {
3731 APInt SrcUndef, SrcZero;
3732 SDValue Src = Op.getOperand(i: 0);
3733 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3734 APInt DemandedSrcElts = DemandedElts.zext(width: NumSrcElts);
3735 if (SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: DemandedSrcElts, KnownUndef&: SrcUndef, KnownZero&: SrcZero, TLO,
3736 Depth: Depth + 1))
3737 return true;
3738 KnownZero = SrcZero.zextOrTrunc(width: NumElts);
3739 KnownUndef = SrcUndef.zextOrTrunc(width: NumElts);
3740
3741 if (IsLE && Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
3742 Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
3743 DemandedSrcElts == 1) {
3744 // aext - if we just need the bottom element then we can bitcast.
3745 return TLO.CombineTo(O: Op, N: TLO.DAG.getBitcast(VT, V: Src));
3746 }
3747
3748 if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
3749 // zext(undef) upper bits are guaranteed to be zero.
3750 if (DemandedElts.isSubsetOf(RHS: KnownUndef))
3751 return TLO.CombineTo(O: Op, N: TLO.DAG.getConstant(Val: 0, DL: SDLoc(Op), VT));
3752 KnownUndef.clearAllBits();
3753
3754 // zext - if we just need the bottom element then we can mask:
3755 // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
3756 if (IsLE && DemandedSrcElts == 1 && Src.getOpcode() == ISD::AND &&
3757 Op->isOnlyUserOf(N: Src.getNode()) &&
3758 Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
3759 SDLoc DL(Op);
3760 EVT SrcVT = Src.getValueType();
3761 EVT SrcSVT = SrcVT.getScalarType();
3762
3763 // If we're after type legalization and SrcSVT is not legal, use the
3764 // promoted type for creating constants to avoid creating nodes with
3765 // illegal types.
3766 if (TLO.LegalTypes())
3767 SrcSVT = getLegalTypeToTransformTo(Context&: *TLO.DAG.getContext(), VT: SrcSVT);
3768
3769 SmallVector<SDValue> MaskElts;
3770 MaskElts.push_back(Elt: TLO.DAG.getAllOnesConstant(DL, VT: SrcSVT));
3771 MaskElts.append(NumInputs: NumSrcElts - 1, Elt: TLO.DAG.getConstant(Val: 0, DL, VT: SrcSVT));
3772 SDValue Mask = TLO.DAG.getBuildVector(VT: SrcVT, DL, Ops: MaskElts);
3773 if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
3774 Opcode: ISD::AND, DL, VT: SrcVT, Ops: {Src.getOperand(i: 1), Mask})) {
3775 Fold = TLO.DAG.getNode(Opcode: ISD::AND, DL, VT: SrcVT, N1: Src.getOperand(i: 0), N2: Fold);
3776 return TLO.CombineTo(O: Op, N: TLO.DAG.getBitcast(VT, V: Fold));
3777 }
3778 }
3779 }
3780 break;
3781 }
3782
3783 // TODO: There are more binop opcodes that could be handled here - MIN,
3784 // MAX, saturated math, etc.
3785 case ISD::ADD: {
3786 SDValue Op0 = Op.getOperand(i: 0);
3787 SDValue Op1 = Op.getOperand(i: 1);
3788 if (Op0 == Op1 && Op->isOnlyUserOf(N: Op0.getNode())) {
3789 APInt UndefLHS, ZeroLHS;
3790 if (SimplifyDemandedVectorElts(Op: Op0, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefLHS, KnownZero&: ZeroLHS, TLO,
3791 Depth: Depth + 1, /*AssumeSingleUse*/ true))
3792 return true;
3793 }
3794 [[fallthrough]];
3795 }
3796 case ISD::AVGCEILS:
3797 case ISD::AVGCEILU:
3798 case ISD::AVGFLOORS:
3799 case ISD::AVGFLOORU:
3800 case ISD::OR:
3801 case ISD::XOR:
3802 case ISD::SUB:
3803 case ISD::FADD:
3804 case ISD::FSUB:
3805 case ISD::FMUL:
3806 case ISD::FDIV:
3807 case ISD::FREM: {
3808 SDValue Op0 = Op.getOperand(i: 0);
3809 SDValue Op1 = Op.getOperand(i: 1);
3810
3811 APInt UndefRHS, ZeroRHS;
3812 if (SimplifyDemandedVectorElts(Op: Op1, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefRHS, KnownZero&: ZeroRHS, TLO,
3813 Depth: Depth + 1))
3814 return true;
3815 APInt UndefLHS, ZeroLHS;
3816 if (SimplifyDemandedVectorElts(Op: Op0, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefLHS, KnownZero&: ZeroLHS, TLO,
3817 Depth: Depth + 1))
3818 return true;
3819
3820 KnownZero = ZeroLHS & ZeroRHS;
3821 KnownUndef = getKnownUndefForVectorBinop(BO: Op, DAG&: TLO.DAG, UndefOp0: UndefLHS, UndefOp1: UndefRHS);
3822
3823 // Attempt to avoid multi-use ops if we don't need anything from them.
3824 // TODO - use KnownUndef to relax the demandedelts?
3825 if (!DemandedElts.isAllOnes())
3826 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3827 return true;
3828 break;
3829 }
3830 case ISD::SHL:
3831 case ISD::SRL:
3832 case ISD::SRA:
3833 case ISD::ROTL:
3834 case ISD::ROTR: {
3835 SDValue Op0 = Op.getOperand(i: 0);
3836 SDValue Op1 = Op.getOperand(i: 1);
3837
3838 APInt UndefRHS, ZeroRHS;
3839 if (SimplifyDemandedVectorElts(Op: Op1, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefRHS, KnownZero&: ZeroRHS, TLO,
3840 Depth: Depth + 1))
3841 return true;
3842 APInt UndefLHS, ZeroLHS;
3843 if (SimplifyDemandedVectorElts(Op: Op0, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefLHS, KnownZero&: ZeroLHS, TLO,
3844 Depth: Depth + 1))
3845 return true;
3846
3847 KnownZero = ZeroLHS;
3848 KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
3849
3850 // Attempt to avoid multi-use ops if we don't need anything from them.
3851 // TODO - use KnownUndef to relax the demandedelts?
3852 if (!DemandedElts.isAllOnes())
3853 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3854 return true;
3855 break;
3856 }
3857 case ISD::MUL:
3858 case ISD::MULHU:
3859 case ISD::MULHS:
3860 case ISD::AND: {
3861 SDValue Op0 = Op.getOperand(i: 0);
3862 SDValue Op1 = Op.getOperand(i: 1);
3863
3864 APInt SrcUndef, SrcZero;
3865 if (SimplifyDemandedVectorElts(Op: Op1, OriginalDemandedElts: DemandedElts, KnownUndef&: SrcUndef, KnownZero&: SrcZero, TLO,
3866 Depth: Depth + 1))
3867 return true;
3868 // FIXME: If we know that a demanded element was zero in Op1 we don't need
3869 // to demand it in Op0 - its guaranteed to be zero. There is however a
3870 // restriction, as we must not make any of the originally demanded elements
3871 // more poisonous. We could reduce amount of elements demanded, but then we
3872 // also need a to inform SimplifyDemandedVectorElts that some elements must
3873 // not be made more poisonous.
3874 if (SimplifyDemandedVectorElts(Op: Op0, OriginalDemandedElts: DemandedElts, KnownUndef, KnownZero,
3875 TLO, Depth: Depth + 1))
3876 return true;
3877
3878 KnownUndef &= DemandedElts;
3879 KnownZero &= DemandedElts;
3880
3881 // If every element pair has a zero/undef/poison then just fold to zero.
3882 // fold (and x, undef/poison) -> 0 / (and x, 0) -> 0
3883 // fold (mul x, undef/poison) -> 0 / (mul x, 0) -> 0
3884 if (DemandedElts.isSubsetOf(RHS: SrcZero | KnownZero | SrcUndef | KnownUndef))
3885 return TLO.CombineTo(O: Op, N: TLO.DAG.getConstant(Val: 0, DL: SDLoc(Op), VT));
3886
3887 // If either side has a zero element, then the result element is zero, even
3888 // if the other is an UNDEF.
3889 // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
3890 // and then handle 'and' nodes with the rest of the binop opcodes.
3891 KnownZero |= SrcZero;
3892 KnownUndef &= SrcUndef;
3893 KnownUndef &= ~KnownZero;
3894
3895 // Attempt to avoid multi-use ops if we don't need anything from them.
3896 if (!DemandedElts.isAllOnes())
3897 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3898 return true;
3899 break;
3900 }
3901 case ISD::TRUNCATE:
3902 case ISD::SIGN_EXTEND:
3903 case ISD::ZERO_EXTEND:
3904 if (SimplifyDemandedVectorElts(Op: Op.getOperand(i: 0), OriginalDemandedElts: DemandedElts, KnownUndef,
3905 KnownZero, TLO, Depth: Depth + 1))
3906 return true;
3907
3908 if (!DemandedElts.isAllOnes())
3909 if (SDValue NewOp = SimplifyMultipleUseDemandedVectorElts(
3910 Op: Op.getOperand(i: 0), DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1))
3911 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode, DL: SDLoc(Op), VT, Operand: NewOp));
3912
3913 if (Op.getOpcode() == ISD::ZERO_EXTEND) {
3914 // zext(undef) upper bits are guaranteed to be zero.
3915 if (DemandedElts.isSubsetOf(RHS: KnownUndef))
3916 return TLO.CombineTo(O: Op, N: TLO.DAG.getConstant(Val: 0, DL: SDLoc(Op), VT));
3917 KnownUndef.clearAllBits();
3918 }
3919 break;
3920 case ISD::SINT_TO_FP:
3921 case ISD::UINT_TO_FP:
3922 case ISD::FP_TO_SINT:
3923 case ISD::FP_TO_UINT:
3924 if (SimplifyDemandedVectorElts(Op: Op.getOperand(i: 0), OriginalDemandedElts: DemandedElts, KnownUndef,
3925 KnownZero, TLO, Depth: Depth + 1))
3926 return true;
3927 // Don't fall through to generic undef -> undef handling.
3928 return false;
3929 default: {
3930 if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
3931 if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
3932 KnownZero, TLO, Depth))
3933 return true;
3934 } else {
3935 KnownBits Known;
3936 APInt DemandedBits = APInt::getAllOnes(numBits: EltSizeInBits);
3937 if (SimplifyDemandedBits(Op, OriginalDemandedBits: DemandedBits, OriginalDemandedElts, Known,
3938 TLO, Depth, AssumeSingleUse))
3939 return true;
3940 }
3941 break;
3942 }
3943 }
3944 assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
3945
3946 // Constant fold all undef cases.
3947 // TODO: Handle zero cases as well.
3948 if (DemandedElts.isSubsetOf(RHS: KnownUndef))
3949 return TLO.CombineTo(O: Op, N: TLO.DAG.getUNDEF(VT));
3950
3951 return false;
3952}
3953
3954/// Determine which of the bits specified in Mask are known to be either zero or
3955/// one and return them in the Known.
3956void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
3957 KnownBits &Known,
3958 const APInt &DemandedElts,
3959 const SelectionDAG &DAG,
3960 unsigned Depth) const {
3961 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3962 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3963 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3964 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3965 "Should use MaskedValueIsZero if you don't know whether Op"
3966 " is a target node!");
3967 Known.resetAll();
3968}
3969
3970void TargetLowering::computeKnownBitsForTargetInstr(
3971 GISelValueTracking &Analysis, Register R, KnownBits &Known,
3972 const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3973 unsigned Depth) const {
3974 Known.resetAll();
3975}
3976
3977void TargetLowering::computeKnownFPClassForTargetInstr(
3978 GISelValueTracking &Analysis, Register R, KnownFPClass &Known,
3979 const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3980 unsigned Depth) const {
3981 Known.resetAll();
3982}
3983
3984void TargetLowering::computeKnownBitsForFrameIndex(
3985 const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
3986 // The low bits are known zero if the pointer is aligned.
3987 Known.Zero.setLowBits(Log2(A: MF.getFrameInfo().getObjectAlign(ObjectIdx: FrameIdx)));
3988}
3989
3990Align TargetLowering::computeKnownAlignForTargetInstr(
3991 GISelValueTracking &Analysis, Register R, const MachineRegisterInfo &MRI,
3992 unsigned Depth) const {
3993 return Align(1);
3994}
3995
3996/// This method can be implemented by targets that want to expose additional
3997/// information about sign bits to the DAG Combiner.
3998unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
3999 const APInt &,
4000 const SelectionDAG &,
4001 unsigned Depth) const {
4002 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4003 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4004 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4005 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4006 "Should use ComputeNumSignBits if you don't know whether Op"
4007 " is a target node!");
4008 return 1;
4009}
4010
4011unsigned TargetLowering::computeNumSignBitsForTargetInstr(
4012 GISelValueTracking &Analysis, Register R, const APInt &DemandedElts,
4013 const MachineRegisterInfo &MRI, unsigned Depth) const {
4014 return 1;
4015}
4016
4017bool TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
4018 SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
4019 TargetLoweringOpt &TLO, unsigned Depth) const {
4020 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4021 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4022 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4023 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4024 "Should use SimplifyDemandedVectorElts if you don't know whether Op"
4025 " is a target node!");
4026 return false;
4027}
4028
4029bool TargetLowering::SimplifyDemandedBitsForTargetNode(
4030 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
4031 KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
4032 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4033 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4034 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4035 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4036 "Should use SimplifyDemandedBits if you don't know whether Op"
4037 " is a target node!");
4038 computeKnownBitsForTargetNode(Op, Known, DemandedElts, DAG: TLO.DAG, Depth);
4039 return false;
4040}
4041
4042SDValue TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
4043 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
4044 SelectionDAG &DAG, unsigned Depth) const {
4045 assert(
4046 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4047 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4048 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4049 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4050 "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
4051 " is a target node!");
4052 return SDValue();
4053}
4054
4055SDValue
4056TargetLowering::buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0,
4057 SDValue N1, MutableArrayRef<int> Mask,
4058 SelectionDAG &DAG) const {
4059 bool LegalMask = isShuffleMaskLegal(Mask, VT);
4060 if (!LegalMask) {
4061 std::swap(a&: N0, b&: N1);
4062 ShuffleVectorSDNode::commuteMask(Mask);
4063 LegalMask = isShuffleMaskLegal(Mask, VT);
4064 }
4065
4066 if (!LegalMask)
4067 return SDValue();
4068
4069 return DAG.getVectorShuffle(VT, dl: DL, N1: N0, N2: N1, Mask);
4070}
4071
4072const Constant *TargetLowering::getTargetConstantFromLoad(LoadSDNode*) const {
4073 return nullptr;
4074}
4075
4076bool TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode(
4077 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
4078 UndefPoisonKind Kind, unsigned Depth) const {
4079 assert(
4080 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4081 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4082 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4083 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4084 "Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"
4085 " is a target node!");
4086
4087 // If Op can't create undef/poison and none of its operands are undef/poison
4088 // then Op is never undef/poison.
4089 return !canCreateUndefOrPoisonForTargetNode(Op, DemandedElts, DAG, Kind,
4090 /*ConsiderFlags*/ true, Depth) &&
4091 all_of(Range: Op->ops(), P: [&](SDValue V) {
4092 return DAG.isGuaranteedNotToBeUndefOrPoison(Op: V, Kind, Depth: Depth + 1);
4093 });
4094}
4095
4096bool TargetLowering::canCreateUndefOrPoisonForTargetNode(
4097 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
4098 UndefPoisonKind Kind, bool ConsiderFlags, unsigned Depth) const {
4099 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4100 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4101 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4102 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4103 "Should use canCreateUndefOrPoison if you don't know whether Op"
4104 " is a target node!");
4105 // Be conservative and return true.
4106 return true;
4107}
4108
4109void TargetLowering::computeKnownFPClassForTargetNode(const SDValue Op,
4110 KnownFPClass &Known,
4111 const APInt &DemandedElts,
4112 const SelectionDAG &DAG,
4113 unsigned Depth) const {
4114 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4115 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4116 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4117 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4118 "Should use computeKnownFPClass if you don't know whether Op"
4119 " is a target node!");
4120}
4121
4122bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
4123 const APInt &DemandedElts,
4124 const SelectionDAG &DAG,
4125 bool SNaN,
4126 unsigned Depth) const {
4127 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4128 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4129 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4130 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4131 "Should use isKnownNeverNaN if you don't know whether Op"
4132 " is a target node!");
4133 return false;
4134}
4135
4136bool TargetLowering::isSplatValueForTargetNode(SDValue Op,
4137 const APInt &DemandedElts,
4138 APInt &UndefElts,
4139 const SelectionDAG &DAG,
4140 unsigned Depth) const {
4141 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4142 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4143 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4144 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4145 "Should use isSplatValue if you don't know whether Op"
4146 " is a target node!");
4147 return false;
4148}
4149
4150// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
4151// work with truncating build vectors and vectors with elements of less than
4152// 8 bits.
4153bool TargetLowering::isConstTrueVal(SDValue N) const {
4154 if (!N)
4155 return false;
4156
4157 unsigned EltWidth;
4158 APInt CVal;
4159 if (ConstantSDNode *CN = isConstOrConstSplat(N, /*AllowUndefs=*/false,
4160 /*AllowTruncation=*/true)) {
4161 CVal = CN->getAPIntValue();
4162 EltWidth = N.getValueType().getScalarSizeInBits();
4163 } else
4164 return false;
4165
4166 // If this is a truncating splat, truncate the splat value.
4167 // Otherwise, we may fail to match the expected values below.
4168 if (EltWidth < CVal.getBitWidth())
4169 CVal = CVal.trunc(width: EltWidth);
4170
4171 switch (getBooleanContents(Type: N.getValueType())) {
4172 case UndefinedBooleanContent:
4173 return CVal[0];
4174 case ZeroOrOneBooleanContent:
4175 return CVal.isOne();
4176 case ZeroOrNegativeOneBooleanContent:
4177 return CVal.isAllOnes();
4178 }
4179
4180 llvm_unreachable("Invalid boolean contents");
4181}
4182
4183bool TargetLowering::isConstFalseVal(SDValue N) const {
4184 if (!N)
4185 return false;
4186
4187 const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val&: N);
4188 if (!CN) {
4189 const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Val&: N);
4190 if (!BV)
4191 return false;
4192
4193 // Only interested in constant splats, we don't care about undef
4194 // elements in identifying boolean constants and getConstantSplatNode
4195 // returns NULL if all ops are undef;
4196 CN = BV->getConstantSplatNode();
4197 if (!CN)
4198 return false;
4199 }
4200
4201 if (getBooleanContents(Type: N->getValueType(ResNo: 0)) == UndefinedBooleanContent)
4202 return !CN->getAPIntValue()[0];
4203
4204 return CN->isZero();
4205}
4206
4207bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
4208 bool SExt) const {
4209 if (VT == MVT::i1)
4210 return N->isOne();
4211
4212 TargetLowering::BooleanContent Cnt = getBooleanContents(Type: VT);
4213 switch (Cnt) {
4214 case TargetLowering::ZeroOrOneBooleanContent:
4215 // An extended value of 1 is always true, unless its original type is i1,
4216 // in which case it will be sign extended to -1.
4217 return (N->isOne() && !SExt) || (SExt && (N->getValueType(ResNo: 0) != MVT::i1));
4218 case TargetLowering::UndefinedBooleanContent:
4219 case TargetLowering::ZeroOrNegativeOneBooleanContent:
4220 return N->isAllOnes() && SExt;
4221 }
4222 llvm_unreachable("Unexpected enumeration.");
4223}
4224
4225/// This helper function of SimplifySetCC tries to optimize the comparison when
4226/// either operand of the SetCC node is a bitwise-and instruction.
4227SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
4228 ISD::CondCode Cond, const SDLoc &DL,
4229 DAGCombinerInfo &DCI) const {
4230 if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
4231 std::swap(a&: N0, b&: N1);
4232
4233 SelectionDAG &DAG = DCI.DAG;
4234 EVT OpVT = N0.getValueType();
4235 if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
4236 (Cond != ISD::SETEQ && Cond != ISD::SETNE))
4237 return SDValue();
4238
4239 // (X & Y) != 0 --> zextOrTrunc(X & Y)
4240 // iff everything but LSB is known zero:
4241 if (Cond == ISD::SETNE && isNullConstant(V: N1) &&
4242 (getBooleanContents(Type: OpVT) == TargetLowering::UndefinedBooleanContent ||
4243 getBooleanContents(Type: OpVT) == TargetLowering::ZeroOrOneBooleanContent)) {
4244 unsigned NumEltBits = OpVT.getScalarSizeInBits();
4245 APInt UpperBits = APInt::getHighBitsSet(numBits: NumEltBits, hiBitsSet: NumEltBits - 1);
4246 if (DAG.MaskedValueIsZero(Op: N0, Mask: UpperBits))
4247 return DAG.getBoolExtOrTrunc(Op: N0, SL: DL, VT, OpVT);
4248 }
4249
4250 // Try to eliminate a power-of-2 mask constant by converting to a signbit
4251 // test in a narrow type that we can truncate to with no cost. Examples:
4252 // (i32 X & 32768) == 0 --> (trunc X to i16) >= 0
4253 // (i32 X & 32768) != 0 --> (trunc X to i16) < 0
4254 // TODO: This conservatively checks for type legality on the source and
4255 // destination types. That may inhibit optimizations, but it also
4256 // allows setcc->shift transforms that may be more beneficial.
4257 auto *AndC = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1));
4258 if (AndC && isNullConstant(V: N1) && AndC->getAPIntValue().isPowerOf2() &&
4259 isTypeLegal(VT: OpVT) && N0.hasOneUse()) {
4260 EVT NarrowVT = EVT::getIntegerVT(Context&: *DAG.getContext(),
4261 BitWidth: AndC->getAPIntValue().getActiveBits());
4262 if (isTruncateFree(FromVT: OpVT, ToVT: NarrowVT) && isTypeLegal(VT: NarrowVT)) {
4263 SDValue Trunc = DAG.getZExtOrTrunc(Op: N0.getOperand(i: 0), DL, VT: NarrowVT);
4264 SDValue Zero = DAG.getConstant(Val: 0, DL, VT: NarrowVT);
4265 return DAG.getSetCC(DL, VT, LHS: Trunc, RHS: Zero,
4266 Cond: Cond == ISD::SETEQ ? ISD::SETGE : ISD::SETLT);
4267 }
4268 }
4269
4270 // Match these patterns in any of their permutations:
4271 // (X & Y) == Y
4272 // (X & Y) != Y
4273 SDValue X, Y;
4274 if (N0.getOperand(i: 0) == N1) {
4275 X = N0.getOperand(i: 1);
4276 Y = N0.getOperand(i: 0);
4277 } else if (N0.getOperand(i: 1) == N1) {
4278 X = N0.getOperand(i: 0);
4279 Y = N0.getOperand(i: 1);
4280 } else {
4281 return SDValue();
4282 }
4283
4284 // TODO: We should invert (X & Y) eq/ne 0 -> (X & Y) ne/eq Y if
4285 // `isXAndYEqZeroPreferableToXAndYEqY` is false. This is a bit difficult as
4286 // its liable to create and infinite loop.
4287 SDValue Zero = DAG.getConstant(Val: 0, DL, VT: OpVT);
4288 if (isXAndYEqZeroPreferableToXAndYEqY(Cond, OpVT) &&
4289 DAG.isKnownToBeAPowerOfTwo(Val: Y)) {
4290 // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
4291 // Note that where Y is variable and is known to have at most one bit set
4292 // (for example, if it is Z & 1) we cannot do this; the expressions are not
4293 // equivalent when Y == 0.
4294 assert(OpVT.isInteger());
4295 Cond = ISD::getSetCCInverse(Operation: Cond, Type: OpVT);
4296 if (DCI.isBeforeLegalizeOps() ||
4297 isCondCodeLegal(CC: Cond, VT: N0.getSimpleValueType()))
4298 return DAG.getSetCC(DL, VT, LHS: N0, RHS: Zero, Cond);
4299 } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
4300 // If the target supports an 'and-not' or 'and-complement' logic operation,
4301 // try to use that to make a comparison operation more efficient.
4302 // But don't do this transform if the mask is a single bit because there are
4303 // more efficient ways to deal with that case (for example, 'bt' on x86 or
4304 // 'rlwinm' on PPC).
4305
4306 // Bail out if the compare operand that we want to turn into a zero is
4307 // already a zero (otherwise, infinite loop).
4308 if (isNullConstant(V: Y))
4309 return SDValue();
4310
4311 // Transform this into: ~X & Y == 0.
4312 SDValue NotX = DAG.getNOT(DL: SDLoc(X), Val: X, VT: OpVT);
4313 SDValue NewAnd = DAG.getNode(Opcode: ISD::AND, DL: SDLoc(N0), VT: OpVT, N1: NotX, N2: Y);
4314 return DAG.getSetCC(DL, VT, LHS: NewAnd, RHS: Zero, Cond);
4315 }
4316
4317 return SDValue();
4318}
4319
4320/// This helper function of SimplifySetCC tries to optimize the comparison when
4321/// either operand of the SetCC node is a bitwise-or instruction.
4322/// For now, this just transforms (X | Y) ==/!= Y into X & ~Y ==/!= 0.
4323SDValue TargetLowering::foldSetCCWithOr(EVT VT, SDValue N0, SDValue N1,
4324 ISD::CondCode Cond, const SDLoc &DL,
4325 DAGCombinerInfo &DCI) const {
4326 if (N1.getOpcode() == ISD::OR && N0.getOpcode() != ISD::OR)
4327 std::swap(a&: N0, b&: N1);
4328
4329 SelectionDAG &DAG = DCI.DAG;
4330 EVT OpVT = N0.getValueType();
4331 if (!N0.hasOneUse() || !OpVT.isInteger() ||
4332 (Cond != ISD::SETEQ && Cond != ISD::SETNE))
4333 return SDValue();
4334
4335 // (X | Y) == Y
4336 // (X | Y) != Y
4337 SDValue X;
4338 if (sd_match(N: N0, P: m_Or(L: m_Value(N&: X), R: m_Specific(N: N1))) && hasAndNotCompare(Y: X)) {
4339 // If the target supports an 'and-not' or 'and-complement' logic operation,
4340 // try to use that to make a comparison operation more efficient.
4341
4342 // Bail out if the compare operand that we want to turn into a zero is
4343 // already a zero (otherwise, infinite loop).
4344 if (isNullConstant(V: N1))
4345 return SDValue();
4346
4347 // Transform this into: X & ~Y ==/!= 0.
4348 SDValue NotY = DAG.getNOT(DL: SDLoc(N1), Val: N1, VT: OpVT);
4349 SDValue NewAnd = DAG.getNode(Opcode: ISD::AND, DL: SDLoc(N0), VT: OpVT, N1: X, N2: NotY);
4350 return DAG.getSetCC(DL, VT, LHS: NewAnd, RHS: DAG.getConstant(Val: 0, DL, VT: OpVT), Cond);
4351 }
4352
4353 return SDValue();
4354}
4355
4356/// There are multiple IR patterns that could be checking whether certain
4357/// truncation of a signed number would be lossy or not. The pattern which is
4358/// best at IR level, may not lower optimally. Thus, we want to unfold it.
4359/// We are looking for the following pattern: (KeptBits is a constant)
4360/// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
4361/// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
4362/// KeptBits also can't be 1, that would have been folded to %x dstcond 0
4363/// We will unfold it into the natural trunc+sext pattern:
4364/// ((%x << C) a>> C) dstcond %x
4365/// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
4366SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
4367 EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
4368 const SDLoc &DL) const {
4369 // We must be comparing with a constant.
4370 ConstantSDNode *C1;
4371 if (!(C1 = dyn_cast<ConstantSDNode>(Val&: N1)))
4372 return SDValue();
4373
4374 // N0 should be: add %x, (1 << (KeptBits-1))
4375 if (N0->getOpcode() != ISD::ADD)
4376 return SDValue();
4377
4378 // And we must be 'add'ing a constant.
4379 ConstantSDNode *C01;
4380 if (!(C01 = dyn_cast<ConstantSDNode>(Val: N0->getOperand(Num: 1))))
4381 return SDValue();
4382
4383 SDValue X = N0->getOperand(Num: 0);
4384 EVT XVT = X.getValueType();
4385
4386 // Validate constants ...
4387
4388 APInt I1 = C1->getAPIntValue();
4389
4390 ISD::CondCode NewCond;
4391 if (Cond == ISD::CondCode::SETULT) {
4392 NewCond = ISD::CondCode::SETEQ;
4393 } else if (Cond == ISD::CondCode::SETULE) {
4394 NewCond = ISD::CondCode::SETEQ;
4395 // But need to 'canonicalize' the constant.
4396 I1 += 1;
4397 } else if (Cond == ISD::CondCode::SETUGT) {
4398 NewCond = ISD::CondCode::SETNE;
4399 // But need to 'canonicalize' the constant.
4400 I1 += 1;
4401 } else if (Cond == ISD::CondCode::SETUGE) {
4402 NewCond = ISD::CondCode::SETNE;
4403 } else
4404 return SDValue();
4405
4406 APInt I01 = C01->getAPIntValue();
4407
4408 auto checkConstants = [&I1, &I01]() -> bool {
4409 // Both of them must be power-of-two, and the constant from setcc is bigger.
4410 return I1.ugt(RHS: I01) && I1.isPowerOf2() && I01.isPowerOf2();
4411 };
4412
4413 if (checkConstants()) {
4414 // Great, e.g. got icmp ult i16 (add i16 %x, 128), 256
4415 } else {
4416 // What if we invert constants? (and the target predicate)
4417 I1.negate();
4418 I01.negate();
4419 assert(XVT.isInteger());
4420 NewCond = getSetCCInverse(Operation: NewCond, Type: XVT);
4421 if (!checkConstants())
4422 return SDValue();
4423 // Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
4424 }
4425
4426 // They are power-of-two, so which bit is set?
4427 const unsigned KeptBits = I1.logBase2();
4428 const unsigned KeptBitsMinusOne = I01.logBase2();
4429
4430 // Magic!
4431 if (KeptBits != (KeptBitsMinusOne + 1))
4432 return SDValue();
4433 assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
4434
4435 // We don't want to do this in every single case.
4436 SelectionDAG &DAG = DCI.DAG;
4437 if (!shouldTransformSignedTruncationCheck(XVT, KeptBits))
4438 return SDValue();
4439
4440 // Unfold into: sext_inreg(%x) cond %x
4441 // Where 'cond' will be either 'eq' or 'ne'.
4442 SDValue SExtInReg = DAG.getNode(
4443 Opcode: ISD::SIGN_EXTEND_INREG, DL, VT: XVT, N1: X,
4444 N2: DAG.getValueType(EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: KeptBits)));
4445 return DAG.getSetCC(DL, VT: SCCVT, LHS: SExtInReg, RHS: X, Cond: NewCond);
4446}
4447
4448// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
4449SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
4450 EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
4451 DAGCombinerInfo &DCI, const SDLoc &DL) const {
4452 assert(isConstOrConstSplat(N1C) && isConstOrConstSplat(N1C)->isZero() &&
4453 "Should be a comparison with 0.");
4454 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4455 "Valid only for [in]equality comparisons.");
4456
4457 unsigned NewShiftOpcode;
4458 SDValue X, C, Y;
4459
4460 SelectionDAG &DAG = DCI.DAG;
4461
4462 // Look for '(C l>>/<< Y)'.
4463 auto Match = [&NewShiftOpcode, &X, &C, &Y, &DAG, this](SDValue V) {
4464 // The shift should be one-use.
4465 if (!V.hasOneUse())
4466 return false;
4467 unsigned OldShiftOpcode = V.getOpcode();
4468 switch (OldShiftOpcode) {
4469 case ISD::SHL:
4470 NewShiftOpcode = ISD::SRL;
4471 break;
4472 case ISD::SRL:
4473 NewShiftOpcode = ISD::SHL;
4474 break;
4475 default:
4476 return false; // must be a logical shift.
4477 }
4478 // We should be shifting a constant.
4479 // FIXME: best to use isConstantOrConstantVector().
4480 C = V.getOperand(i: 0);
4481 ConstantSDNode *CC =
4482 isConstOrConstSplat(N: C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4483 if (!CC)
4484 return false;
4485 Y = V.getOperand(i: 1);
4486
4487 ConstantSDNode *XC =
4488 isConstOrConstSplat(N: X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4489 return shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
4490 X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
4491 };
4492
4493 // LHS of comparison should be an one-use 'and'.
4494 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
4495 return SDValue();
4496
4497 X = N0.getOperand(i: 0);
4498 SDValue Mask = N0.getOperand(i: 1);
4499
4500 // 'and' is commutative!
4501 if (!Match(Mask)) {
4502 std::swap(a&: X, b&: Mask);
4503 if (!Match(Mask))
4504 return SDValue();
4505 }
4506
4507 EVT VT = X.getValueType();
4508
4509 // Produce:
4510 // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
4511 SDValue T0 = DAG.getNode(Opcode: NewShiftOpcode, DL, VT, N1: X, N2: Y);
4512 SDValue T1 = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: T0, N2: C);
4513 SDValue T2 = DAG.getSetCC(DL, VT: SCCVT, LHS: T1, RHS: N1C, Cond);
4514 return T2;
4515}
4516
4517/// Try to fold an equality comparison with a {add/sub/xor} binary operation as
4518/// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
4519/// handle the commuted versions of these patterns.
4520SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
4521 ISD::CondCode Cond, const SDLoc &DL,
4522 DAGCombinerInfo &DCI) const {
4523 unsigned BOpcode = N0.getOpcode();
4524 assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
4525 "Unexpected binop");
4526 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
4527
4528 // (X + Y) == X --> Y == 0
4529 // (X - Y) == X --> Y == 0
4530 // (X ^ Y) == X --> Y == 0
4531 SelectionDAG &DAG = DCI.DAG;
4532 EVT OpVT = N0.getValueType();
4533 SDValue X = N0.getOperand(i: 0);
4534 SDValue Y = N0.getOperand(i: 1);
4535 if (X == N1)
4536 return DAG.getSetCC(DL, VT, LHS: Y, RHS: DAG.getConstant(Val: 0, DL, VT: OpVT), Cond);
4537
4538 if (Y != N1)
4539 return SDValue();
4540
4541 // (X + Y) == Y --> X == 0
4542 // (X ^ Y) == Y --> X == 0
4543 if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
4544 return DAG.getSetCC(DL, VT, LHS: X, RHS: DAG.getConstant(Val: 0, DL, VT: OpVT), Cond);
4545
4546 // The shift would not be valid if the operands are boolean (i1).
4547 if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
4548 return SDValue();
4549
4550 // (X - Y) == Y --> X == Y << 1
4551 SDValue One = DAG.getShiftAmountConstant(Val: 1, VT: OpVT, DL);
4552 SDValue YShl1 = DAG.getNode(Opcode: ISD::SHL, DL, VT: N1.getValueType(), N1: Y, N2: One);
4553 if (!DCI.isCalledByLegalizer())
4554 DCI.AddToWorklist(N: YShl1.getNode());
4555 return DAG.getSetCC(DL, VT, LHS: X, RHS: YShl1, Cond);
4556}
4557
4558static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT,
4559 SDValue N0, const APInt &C1,
4560 ISD::CondCode Cond, const SDLoc &dl,
4561 SelectionDAG &DAG) {
4562 // Look through truncs that don't change the value of a ctpop.
4563 // FIXME: Add vector support? Need to be careful with setcc result type below.
4564 SDValue CTPOP = N0;
4565 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
4566 N0.getScalarValueSizeInBits() > Log2_32(Value: N0.getOperand(i: 0).getScalarValueSizeInBits()))
4567 CTPOP = N0.getOperand(i: 0);
4568
4569 if (CTPOP.getOpcode() != ISD::CTPOP || !CTPOP.hasOneUse())
4570 return SDValue();
4571
4572 EVT CTVT = CTPOP.getValueType();
4573 SDValue CTOp = CTPOP.getOperand(i: 0);
4574
4575 // Expand a power-of-2-or-zero comparison based on ctpop:
4576 // (ctpop x) u< 2 -> (x & x-1) == 0
4577 // (ctpop x) u> 1 -> (x & x-1) != 0
4578 if (Cond == ISD::SETULT || Cond == ISD::SETUGT) {
4579 // Keep the CTPOP if it is a cheap vector op.
4580 if (CTVT.isVector() && TLI.isCtpopFast(VT: CTVT))
4581 return SDValue();
4582
4583 unsigned CostLimit = TLI.getCustomCtpopCost(VT: CTVT, Cond);
4584 if (C1.ugt(RHS: CostLimit + (Cond == ISD::SETULT)))
4585 return SDValue();
4586 if (C1 == 0 && (Cond == ISD::SETULT))
4587 return SDValue(); // This is handled elsewhere.
4588
4589 unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);
4590
4591 SDValue NegOne = DAG.getAllOnesConstant(DL: dl, VT: CTVT);
4592 SDValue Result = CTOp;
4593 for (unsigned i = 0; i < Passes; i++) {
4594 SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: CTVT, N1: Result, N2: NegOne);
4595 Result = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: CTVT, N1: Result, N2: Add);
4596 }
4597 ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;
4598 return DAG.getSetCC(DL: dl, VT, LHS: Result, RHS: DAG.getConstant(Val: 0, DL: dl, VT: CTVT), Cond: CC);
4599 }
4600
4601 // Expand a power-of-2 comparison based on ctpop
4602 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
4603 // Keep the CTPOP if it is cheap.
4604 if (TLI.isCtpopFast(VT: CTVT))
4605 return SDValue();
4606
4607 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT: CTVT);
4608 SDValue NegOne = DAG.getAllOnesConstant(DL: dl, VT: CTVT);
4609 assert(CTVT.isInteger());
4610 SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: CTVT, N1: CTOp, N2: NegOne);
4611
4612 // Its not uncommon for known-never-zero X to exist in (ctpop X) eq/ne 1, so
4613 // check before emitting a potentially unnecessary op.
4614 if (DAG.isKnownNeverZero(Op: CTOp)) {
4615 // (ctpop x) == 1 --> (x & x-1) == 0
4616 // (ctpop x) != 1 --> (x & x-1) != 0
4617 SDValue And = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: CTVT, N1: CTOp, N2: Add);
4618 SDValue RHS = DAG.getSetCC(DL: dl, VT, LHS: And, RHS: Zero, Cond);
4619 return RHS;
4620 }
4621
4622 // (ctpop x) == 1 --> (x ^ x-1) > x-1
4623 // (ctpop x) != 1 --> (x ^ x-1) <= x-1
4624 SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: CTVT, N1: CTOp, N2: Add);
4625 ISD::CondCode CmpCond = Cond == ISD::SETEQ ? ISD::SETUGT : ISD::SETULE;
4626 return DAG.getSetCC(DL: dl, VT, LHS: Xor, RHS: Add, Cond: CmpCond);
4627 }
4628
4629 return SDValue();
4630}
4631
4632static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1,
4633 ISD::CondCode Cond, const SDLoc &dl,
4634 SelectionDAG &DAG) {
4635 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4636 return SDValue();
4637
4638 auto *C1 = isConstOrConstSplat(N: N1, /* AllowUndefs */ true);
4639 if (!C1 || !(C1->isZero() || C1->isAllOnes()))
4640 return SDValue();
4641
4642 auto getRotateSource = [](SDValue X) {
4643 if (X.getOpcode() == ISD::ROTL || X.getOpcode() == ISD::ROTR)
4644 return X.getOperand(i: 0);
4645 return SDValue();
4646 };
4647
4648 // Peek through a rotated value compared against 0 or -1:
4649 // (rot X, Y) == 0/-1 --> X == 0/-1
4650 // (rot X, Y) != 0/-1 --> X != 0/-1
4651 if (SDValue R = getRotateSource(N0))
4652 return DAG.getSetCC(DL: dl, VT, LHS: R, RHS: N1, Cond);
4653
4654 // Peek through an 'or' of a rotated value compared against 0:
4655 // or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
4656 // or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
4657 //
4658 // TODO: Add the 'and' with -1 sibling.
4659 // TODO: Recurse through a series of 'or' ops to find the rotate.
4660 EVT OpVT = N0.getValueType();
4661 if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) {
4662 if (SDValue R = getRotateSource(N0.getOperand(i: 0))) {
4663 SDValue NewOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1: R, N2: N0.getOperand(i: 1));
4664 return DAG.getSetCC(DL: dl, VT, LHS: NewOr, RHS: N1, Cond);
4665 }
4666 if (SDValue R = getRotateSource(N0.getOperand(i: 1))) {
4667 SDValue NewOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1: R, N2: N0.getOperand(i: 0));
4668 return DAG.getSetCC(DL: dl, VT, LHS: NewOr, RHS: N1, Cond);
4669 }
4670 }
4671
4672 return SDValue();
4673}
4674
4675static SDValue foldSetCCWithFunnelShift(EVT VT, SDValue N0, SDValue N1,
4676 ISD::CondCode Cond, const SDLoc &dl,
4677 SelectionDAG &DAG) {
4678 // If we are testing for all-bits-clear, we might be able to do that with
4679 // less shifting since bit-order does not matter.
4680 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4681 return SDValue();
4682
4683 auto *C1 = isConstOrConstSplat(N: N1, /* AllowUndefs */ true);
4684 if (!C1 || !C1->isZero())
4685 return SDValue();
4686
4687 if (!N0.hasOneUse() ||
4688 (N0.getOpcode() != ISD::FSHL && N0.getOpcode() != ISD::FSHR))
4689 return SDValue();
4690
4691 unsigned BitWidth = N0.getScalarValueSizeInBits();
4692 auto *ShAmtC = isConstOrConstSplat(N: N0.getOperand(i: 2));
4693 if (!ShAmtC)
4694 return SDValue();
4695
4696 uint64_t ShAmt = ShAmtC->getAPIntValue().urem(RHS: BitWidth);
4697 if (ShAmt == 0)
4698 return SDValue();
4699
4700 // Canonicalize fshr as fshl to reduce pattern-matching.
4701 if (N0.getOpcode() == ISD::FSHR)
4702 ShAmt = BitWidth - ShAmt;
4703
4704 // Match an 'or' with a specific operand 'Other' in either commuted variant.
4705 SDValue X, Y;
4706 auto matchOr = [&X, &Y](SDValue Or, SDValue Other) {
4707 if (Or.getOpcode() != ISD::OR || !Or.hasOneUse())
4708 return false;
4709 if (Or.getOperand(i: 0) == Other) {
4710 X = Or.getOperand(i: 0);
4711 Y = Or.getOperand(i: 1);
4712 return true;
4713 }
4714 if (Or.getOperand(i: 1) == Other) {
4715 X = Or.getOperand(i: 1);
4716 Y = Or.getOperand(i: 0);
4717 return true;
4718 }
4719 return false;
4720 };
4721
4722 EVT OpVT = N0.getValueType();
4723 EVT ShAmtVT = N0.getOperand(i: 2).getValueType();
4724 SDValue F0 = N0.getOperand(i: 0);
4725 SDValue F1 = N0.getOperand(i: 1);
4726 if (matchOr(F0, F1)) {
4727 // fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0
4728 SDValue NewShAmt = DAG.getConstant(Val: ShAmt, DL: dl, VT: ShAmtVT);
4729 SDValue Shift = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: OpVT, N1: Y, N2: NewShAmt);
4730 SDValue NewOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1: Shift, N2: X);
4731 return DAG.getSetCC(DL: dl, VT, LHS: NewOr, RHS: N1, Cond);
4732 }
4733 if (matchOr(F1, F0)) {
4734 // fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0
4735 SDValue NewShAmt = DAG.getConstant(Val: BitWidth - ShAmt, DL: dl, VT: ShAmtVT);
4736 SDValue Shift = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: OpVT, N1: Y, N2: NewShAmt);
4737 SDValue NewOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1: Shift, N2: X);
4738 return DAG.getSetCC(DL: dl, VT, LHS: NewOr, RHS: N1, Cond);
4739 }
4740
4741 return SDValue();
4742}
4743
4744/// Try to simplify a setcc built with the specified operands and cc. If it is
4745/// unable to simplify it, return a null SDValue.
4746SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
4747 ISD::CondCode Cond, bool foldBooleans,
4748 DAGCombinerInfo &DCI,
4749 const SDLoc &dl) const {
4750 SelectionDAG &DAG = DCI.DAG;
4751 const DataLayout &Layout = DAG.getDataLayout();
4752 EVT OpVT = N0.getValueType();
4753 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4754
4755 // Constant fold or commute setcc.
4756 if (SDValue Fold = DAG.FoldSetCC(VT, N1: N0, N2: N1, Cond, dl))
4757 return Fold;
4758
4759 bool N0ConstOrSplat =
4760 isConstOrConstSplat(N: N0, /*AllowUndefs*/ false, /*AllowTruncate*/ AllowTruncation: true);
4761 bool N1ConstOrSplat =
4762 isConstOrConstSplat(N: N1, /*AllowUndefs*/ false, /*AllowTruncate*/ AllowTruncation: true);
4763
4764 // Canonicalize toward having the constant on the RHS.
4765 // TODO: Handle non-splat vector constants. All undef causes trouble.
4766 // FIXME: We can't yet fold constant scalable vector splats, so avoid an
4767 // infinite loop here when we encounter one.
4768 ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Operation: Cond);
4769 if (N0ConstOrSplat && !N1ConstOrSplat &&
4770 (DCI.isBeforeLegalizeOps() ||
4771 isCondCodeLegal(CC: SwappedCC, VT: N0.getSimpleValueType())))
4772 return DAG.getSetCC(DL: dl, VT, LHS: N1, RHS: N0, Cond: SwappedCC);
4773
4774 // If we have a subtract with the same 2 non-constant operands as this setcc
4775 // -- but in reverse order -- then try to commute the operands of this setcc
4776 // to match. A matching pair of setcc (cmp) and sub may be combined into 1
4777 // instruction on some targets.
4778 if (!N0ConstOrSplat && !N1ConstOrSplat &&
4779 (DCI.isBeforeLegalizeOps() ||
4780 isCondCodeLegal(CC: SwappedCC, VT: N0.getSimpleValueType())) &&
4781 DAG.doesNodeExist(Opcode: ISD::SUB, VTList: DAG.getVTList(VT: OpVT), Ops: {N1, N0}) &&
4782 !DAG.doesNodeExist(Opcode: ISD::SUB, VTList: DAG.getVTList(VT: OpVT), Ops: {N0, N1}))
4783 return DAG.getSetCC(DL: dl, VT, LHS: N1, RHS: N0, Cond: SwappedCC);
4784
4785 if (SDValue V = foldSetCCWithRotate(VT, N0, N1, Cond, dl, DAG))
4786 return V;
4787
4788 if (SDValue V = foldSetCCWithFunnelShift(VT, N0, N1, Cond, dl, DAG))
4789 return V;
4790
4791 if (auto *N1C = isConstOrConstSplat(N: N1)) {
4792 const APInt &C1 = N1C->getAPIntValue();
4793
4794 // Optimize some CTPOP cases.
4795 if (SDValue V = simplifySetCCWithCTPOP(TLI: *this, VT, N0, C1, Cond, dl, DAG))
4796 return V;
4797
4798 // For equality to 0 of a no-wrap multiply, decompose and test each op:
4799 // X * Y == 0 --> (X == 0) || (Y == 0)
4800 // X * Y != 0 --> (X != 0) && (Y != 0)
4801 // TODO: This bails out if minsize is set, but if the target doesn't have a
4802 // single instruction multiply for this type, it would likely be
4803 // smaller to decompose.
4804 if (C1.isZero() && (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4805 N0.getOpcode() == ISD::MUL && N0.hasOneUse() &&
4806 (N0->getFlags().hasNoUnsignedWrap() ||
4807 N0->getFlags().hasNoSignedWrap()) &&
4808 !Attr.hasFnAttr(Kind: Attribute::MinSize)) {
4809 SDValue IsXZero = DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0), RHS: N1, Cond);
4810 SDValue IsYZero = DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 1), RHS: N1, Cond);
4811 unsigned LogicOp = Cond == ISD::SETEQ ? ISD::OR : ISD::AND;
4812 return DAG.getNode(Opcode: LogicOp, DL: dl, VT, N1: IsXZero, N2: IsYZero);
4813 }
4814
4815 // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
4816 // equality comparison, then we're just comparing whether X itself is
4817 // zero.
4818 if (N0.getOpcode() == ISD::SRL && (C1.isZero() || C1.isOne()) &&
4819 N0.getOperand(i: 0).getOpcode() == ISD::CTLZ &&
4820 llvm::has_single_bit<uint32_t>(Value: N0.getScalarValueSizeInBits())) {
4821 if (ConstantSDNode *ShAmt = isConstOrConstSplat(N: N0.getOperand(i: 1))) {
4822 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4823 ShAmt->getAPIntValue() == Log2_32(Value: N0.getScalarValueSizeInBits())) {
4824 if ((C1 == 0) == (Cond == ISD::SETEQ)) {
4825 // (srl (ctlz x), 5) == 0 -> X != 0
4826 // (srl (ctlz x), 5) != 1 -> X != 0
4827 Cond = ISD::SETNE;
4828 } else {
4829 // (srl (ctlz x), 5) != 0 -> X == 0
4830 // (srl (ctlz x), 5) == 1 -> X == 0
4831 Cond = ISD::SETEQ;
4832 }
4833 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT: N0.getValueType());
4834 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0).getOperand(i: 0), RHS: Zero,
4835 Cond);
4836 }
4837 }
4838 }
4839 }
4840
4841 // setcc X, 0, setlt --> X (when X is all sign bits)
4842 // setcc X, 0, setne --> X (when X is all sign bits)
4843 //
4844 // When we know that X has 0 or -1 in each element (or scalar), this
4845 // comparison will produce X. This is only true when boolean contents are
4846 // represented via 0s and -1s.
4847 if (VT == OpVT &&
4848 // Check that the result of setcc is 0 and -1.
4849 getBooleanContents(Type: VT) == ZeroOrNegativeOneBooleanContent &&
4850 // Match only for checks X < 0 and X != 0
4851 (Cond == ISD::SETLT || Cond == ISD::SETNE) && isNullOrNullSplat(V: N1) &&
4852 // The identity holds iff we know all sign bits for all lanes.
4853 DAG.ComputeNumSignBits(Op: N0) == N0.getScalarValueSizeInBits())
4854 return N0;
4855
4856 // FIXME: Support vectors.
4857 if (auto *N1C = dyn_cast<ConstantSDNode>(Val: N1.getNode())) {
4858 const APInt &C1 = N1C->getAPIntValue();
4859
4860 // (zext x) == C --> x == (trunc C)
4861 // (sext x) == C --> x == (trunc C)
4862 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4863 DCI.isBeforeLegalize() && N0->hasOneUse()) {
4864 unsigned MinBits = N0.getValueSizeInBits();
4865 SDValue PreExt;
4866 bool Signed = false;
4867 if (N0->getOpcode() == ISD::ZERO_EXTEND) {
4868 // ZExt
4869 MinBits = N0->getOperand(Num: 0).getValueSizeInBits();
4870 PreExt = N0->getOperand(Num: 0);
4871 } else if (N0->getOpcode() == ISD::AND) {
4872 // DAGCombine turns costly ZExts into ANDs
4873 if (auto *C = dyn_cast<ConstantSDNode>(Val: N0->getOperand(Num: 1)))
4874 if ((C->getAPIntValue()+1).isPowerOf2()) {
4875 MinBits = C->getAPIntValue().countr_one();
4876 PreExt = N0->getOperand(Num: 0);
4877 }
4878 } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
4879 // SExt
4880 MinBits = N0->getOperand(Num: 0).getValueSizeInBits();
4881 PreExt = N0->getOperand(Num: 0);
4882 Signed = true;
4883 } else if (auto *LN0 = dyn_cast<LoadSDNode>(Val&: N0)) {
4884 // ZEXTLOAD / SEXTLOAD
4885 if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
4886 MinBits = LN0->getMemoryVT().getSizeInBits();
4887 PreExt = N0;
4888 } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
4889 Signed = true;
4890 MinBits = LN0->getMemoryVT().getSizeInBits();
4891 PreExt = N0;
4892 }
4893 }
4894
4895 // Figure out how many bits we need to preserve this constant.
4896 unsigned ReqdBits = Signed ? C1.getSignificantBits() : C1.getActiveBits();
4897
4898 // Make sure we're not losing bits from the constant.
4899 if (MinBits > 0 &&
4900 MinBits < C1.getBitWidth() &&
4901 MinBits >= ReqdBits) {
4902 EVT MinVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: MinBits);
4903 if (isTypeDesirableForOp(ISD::SETCC, VT: MinVT)) {
4904 // Will get folded away.
4905 SDValue Trunc = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MinVT, Operand: PreExt);
4906 if (MinBits == 1 && C1 == 1)
4907 // Invert the condition.
4908 return DAG.getSetCC(DL: dl, VT, LHS: Trunc, RHS: DAG.getConstant(Val: 0, DL: dl, VT: MVT::i1),
4909 Cond: Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4910 SDValue C = DAG.getConstant(Val: C1.trunc(width: MinBits), DL: dl, VT: MinVT);
4911 return DAG.getSetCC(DL: dl, VT, LHS: Trunc, RHS: C, Cond);
4912 }
4913
4914 // If truncating the setcc operands is not desirable, we can still
4915 // simplify the expression in some cases:
4916 // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
4917 // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
4918 // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
4919 // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
4920 // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
4921 // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
4922 SDValue TopSetCC = N0->getOperand(Num: 0);
4923 unsigned N0Opc = N0->getOpcode();
4924 bool SExt = (N0Opc == ISD::SIGN_EXTEND);
4925 if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
4926 TopSetCC.getOpcode() == ISD::SETCC &&
4927 (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
4928 (isConstFalseVal(N: N1) ||
4929 isExtendedTrueVal(N: N1C, VT: N0->getValueType(ResNo: 0), SExt))) {
4930
4931 bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) ||
4932 (!N1C->isZero() && Cond == ISD::SETNE);
4933
4934 if (!Inverse)
4935 return TopSetCC;
4936
4937 ISD::CondCode InvCond = ISD::getSetCCInverse(
4938 Operation: cast<CondCodeSDNode>(Val: TopSetCC.getOperand(i: 2))->get(),
4939 Type: TopSetCC.getOperand(i: 0).getValueType());
4940 return DAG.getSetCC(DL: dl, VT, LHS: TopSetCC.getOperand(i: 0),
4941 RHS: TopSetCC.getOperand(i: 1),
4942 Cond: InvCond);
4943 }
4944 }
4945 }
4946
4947 // If the LHS is '(and load, const)', the RHS is 0, the test is for
4948 // equality or unsigned, and all 1 bits of the const are in the same
4949 // partial word, see if we can shorten the load.
4950 if (DCI.isBeforeLegalize() &&
4951 !ISD::isSignedIntSetCC(Code: Cond) &&
4952 N0.getOpcode() == ISD::AND && C1 == 0 &&
4953 N0.getNode()->hasOneUse() &&
4954 isa<LoadSDNode>(Val: N0.getOperand(i: 0)) &&
4955 N0.getOperand(i: 0).getNode()->hasOneUse() &&
4956 isa<ConstantSDNode>(Val: N0.getOperand(i: 1))) {
4957 auto *Lod = cast<LoadSDNode>(Val: N0.getOperand(i: 0));
4958 APInt bestMask;
4959 unsigned bestWidth = 0, bestOffset = 0;
4960 if (Lod->isSimple() && Lod->isUnindexed() &&
4961 (Lod->getMemoryVT().isByteSized() ||
4962 isPaddedAtMostSignificantBitsWhenStored(VT: Lod->getMemoryVT()))) {
4963 unsigned memWidth = Lod->getMemoryVT().getStoreSizeInBits();
4964 unsigned origWidth = N0.getValueSizeInBits();
4965 unsigned maskWidth = origWidth;
4966 // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
4967 // 8 bits, but have to be careful...
4968 if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
4969 origWidth = Lod->getMemoryVT().getSizeInBits();
4970 const APInt &Mask = N0.getConstantOperandAPInt(i: 1);
4971 // Only consider power-of-2 widths (and at least one byte) as candiates
4972 // for the narrowed load.
4973 for (unsigned width = 8; width < origWidth; width *= 2) {
4974 EVT newVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: width);
4975 APInt newMask = APInt::getLowBitsSet(numBits: maskWidth, loBitsSet: width);
4976 // Avoid accessing any padding here for now (we could use memWidth
4977 // instead of origWidth here otherwise).
4978 unsigned maxOffset = origWidth - width;
4979 for (unsigned offset = 0; offset <= maxOffset; offset += 8) {
4980 if (Mask.isSubsetOf(RHS: newMask)) {
4981 unsigned ptrOffset =
4982 Layout.isLittleEndian() ? offset : memWidth - width - offset;
4983 unsigned IsFast = 0;
4984 assert((ptrOffset % 8) == 0 && "Non-Bytealigned pointer offset");
4985 Align NewAlign = commonAlignment(A: Lod->getAlign(), Offset: ptrOffset / 8);
4986 if (shouldReduceLoadWidth(Load: Lod, ExtTy: ISD::NON_EXTLOAD, NewVT: newVT,
4987 ByteOffset: ptrOffset / 8) &&
4988 allowsMemoryAccess(
4989 Context&: *DAG.getContext(), DL: Layout, VT: newVT, AddrSpace: Lod->getAddressSpace(),
4990 Alignment: NewAlign, Flags: Lod->getMemOperand()->getFlags(), Fast: &IsFast) &&
4991 IsFast) {
4992 bestOffset = ptrOffset / 8;
4993 bestMask = Mask.lshr(shiftAmt: offset);
4994 bestWidth = width;
4995 break;
4996 }
4997 }
4998 newMask <<= 8;
4999 }
5000 if (bestWidth)
5001 break;
5002 }
5003 }
5004 if (bestWidth) {
5005 EVT newVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: bestWidth);
5006 SDValue Ptr = Lod->getBasePtr();
5007 if (bestOffset != 0)
5008 Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: TypeSize::getFixed(ExactSize: bestOffset));
5009 SDValue NewLoad =
5010 DAG.getLoad(VT: newVT, dl, Chain: Lod->getChain(), Ptr,
5011 PtrInfo: Lod->getPointerInfo().getWithOffset(O: bestOffset),
5012 Alignment: Lod->getBaseAlign());
5013 SDValue And =
5014 DAG.getNode(Opcode: ISD::AND, DL: dl, VT: newVT, N1: NewLoad,
5015 N2: DAG.getConstant(Val: bestMask.trunc(width: bestWidth), DL: dl, VT: newVT));
5016 return DAG.getSetCC(DL: dl, VT, LHS: And, RHS: DAG.getConstant(Val: 0LL, DL: dl, VT: newVT), Cond);
5017 }
5018 }
5019
5020 // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
5021 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
5022 unsigned InSize = N0.getOperand(i: 0).getValueSizeInBits();
5023
5024 // If the comparison constant has bits in the upper part, the
5025 // zero-extended value could never match.
5026 if (C1.intersects(RHS: APInt::getHighBitsSet(numBits: C1.getBitWidth(),
5027 hiBitsSet: C1.getBitWidth() - InSize))) {
5028 switch (Cond) {
5029 case ISD::SETUGT:
5030 case ISD::SETUGE:
5031 case ISD::SETEQ:
5032 return DAG.getConstant(Val: 0, DL: dl, VT);
5033 case ISD::SETULT:
5034 case ISD::SETULE:
5035 case ISD::SETNE:
5036 return DAG.getConstant(Val: 1, DL: dl, VT);
5037 case ISD::SETGT:
5038 case ISD::SETGE:
5039 // True if the sign bit of C1 is set.
5040 return DAG.getConstant(Val: C1.isNegative(), DL: dl, VT);
5041 case ISD::SETLT:
5042 case ISD::SETLE:
5043 // True if the sign bit of C1 isn't set.
5044 return DAG.getConstant(Val: C1.isNonNegative(), DL: dl, VT);
5045 default:
5046 break;
5047 }
5048 }
5049
5050 // Otherwise, we can perform the comparison with the low bits.
5051 switch (Cond) {
5052 case ISD::SETEQ:
5053 case ISD::SETNE:
5054 case ISD::SETUGT:
5055 case ISD::SETUGE:
5056 case ISD::SETULT:
5057 case ISD::SETULE: {
5058 EVT newVT = N0.getOperand(i: 0).getValueType();
5059 // FIXME: Should use isNarrowingProfitable.
5060 if (DCI.isBeforeLegalizeOps() ||
5061 (isOperationLegal(Op: ISD::SETCC, VT: newVT) &&
5062 isCondCodeLegal(CC: Cond, VT: newVT.getSimpleVT()) &&
5063 isTypeDesirableForOp(ISD::SETCC, VT: newVT))) {
5064 EVT NewSetCCVT = getSetCCResultType(DL: Layout, Context&: *DAG.getContext(), VT: newVT);
5065 SDValue NewConst = DAG.getConstant(Val: C1.trunc(width: InSize), DL: dl, VT: newVT);
5066
5067 SDValue NewSetCC = DAG.getSetCC(DL: dl, VT: NewSetCCVT, LHS: N0.getOperand(i: 0),
5068 RHS: NewConst, Cond);
5069 return DAG.getBoolExtOrTrunc(Op: NewSetCC, SL: dl, VT, OpVT: N0.getValueType());
5070 }
5071 break;
5072 }
5073 default:
5074 break; // todo, be more careful with signed comparisons
5075 }
5076 } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
5077 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5078 !isSExtCheaperThanZExt(FromTy: cast<VTSDNode>(Val: N0.getOperand(i: 1))->getVT(),
5079 ToTy: OpVT)) {
5080 EVT ExtSrcTy = cast<VTSDNode>(Val: N0.getOperand(i: 1))->getVT();
5081 unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
5082 EVT ExtDstTy = N0.getValueType();
5083 unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
5084
5085 // If the constant doesn't fit into the number of bits for the source of
5086 // the sign extension, it is impossible for both sides to be equal.
5087 if (C1.getSignificantBits() > ExtSrcTyBits)
5088 return DAG.getBoolConstant(V: Cond == ISD::SETNE, DL: dl, VT, OpVT);
5089
5090 assert(ExtDstTy == N0.getOperand(0).getValueType() &&
5091 ExtDstTy != ExtSrcTy && "Unexpected types!");
5092 APInt Imm = APInt::getLowBitsSet(numBits: ExtDstTyBits, loBitsSet: ExtSrcTyBits);
5093 SDValue ZextOp = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ExtDstTy, N1: N0.getOperand(i: 0),
5094 N2: DAG.getConstant(Val: Imm, DL: dl, VT: ExtDstTy));
5095 if (!DCI.isCalledByLegalizer())
5096 DCI.AddToWorklist(N: ZextOp.getNode());
5097 // Otherwise, make this a use of a zext.
5098 return DAG.getSetCC(DL: dl, VT, LHS: ZextOp,
5099 RHS: DAG.getConstant(Val: C1 & Imm, DL: dl, VT: ExtDstTy), Cond);
5100 } else if ((N1C->isZero() || N1C->isOne()) &&
5101 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5102 // SETCC (X), [0|1], [EQ|NE] -> X if X is known 0/1. i1 types are
5103 // excluded as they are handled below whilst checking for foldBooleans.
5104 if ((N0.getOpcode() == ISD::SETCC || VT.getScalarType() != MVT::i1) &&
5105 isTypeLegal(VT) && VT.bitsLE(VT: N0.getValueType()) &&
5106 (N0.getValueType() == MVT::i1 ||
5107 getBooleanContents(Type: N0.getValueType()) == ZeroOrOneBooleanContent) &&
5108 DAG.MaskedValueIsZero(
5109 Op: N0, Mask: APInt::getBitsSetFrom(numBits: N0.getValueSizeInBits(), loBit: 1))) {
5110 bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
5111 if (TrueWhenTrue)
5112 return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: N0);
5113 // Invert the condition.
5114 if (N0.getOpcode() == ISD::SETCC) {
5115 ISD::CondCode CC = cast<CondCodeSDNode>(Val: N0.getOperand(i: 2))->get();
5116 CC = ISD::getSetCCInverse(Operation: CC, Type: N0.getOperand(i: 0).getValueType());
5117 if (DCI.isBeforeLegalizeOps() ||
5118 isCondCodeLegal(CC, VT: N0.getOperand(i: 0).getSimpleValueType()))
5119 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0), RHS: N0.getOperand(i: 1), Cond: CC);
5120 }
5121 }
5122
5123 if ((N0.getOpcode() == ISD::XOR ||
5124 (N0.getOpcode() == ISD::AND &&
5125 N0.getOperand(i: 0).getOpcode() == ISD::XOR &&
5126 N0.getOperand(i: 1) == N0.getOperand(i: 0).getOperand(i: 1))) &&
5127 isOneConstant(V: N0.getOperand(i: 1))) {
5128 // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
5129 // can only do this if the top bits are known zero.
5130 unsigned BitWidth = N0.getValueSizeInBits();
5131 if (DAG.MaskedValueIsZero(Op: N0,
5132 Mask: APInt::getHighBitsSet(numBits: BitWidth,
5133 hiBitsSet: BitWidth-1))) {
5134 // Okay, get the un-inverted input value.
5135 SDValue Val;
5136 if (N0.getOpcode() == ISD::XOR) {
5137 Val = N0.getOperand(i: 0);
5138 } else {
5139 assert(N0.getOpcode() == ISD::AND &&
5140 N0.getOperand(0).getOpcode() == ISD::XOR);
5141 // ((X^1)&1)^1 -> X & 1
5142 Val = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: N0.getValueType(),
5143 N1: N0.getOperand(i: 0).getOperand(i: 0),
5144 N2: N0.getOperand(i: 1));
5145 }
5146
5147 return DAG.getSetCC(DL: dl, VT, LHS: Val, RHS: N1,
5148 Cond: Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
5149 }
5150 } else if (N1C->isOne()) {
5151 SDValue Op0 = N0;
5152 if (Op0.getOpcode() == ISD::TRUNCATE)
5153 Op0 = Op0.getOperand(i: 0);
5154
5155 if ((Op0.getOpcode() == ISD::XOR) &&
5156 Op0.getOperand(i: 0).getOpcode() == ISD::SETCC &&
5157 Op0.getOperand(i: 1).getOpcode() == ISD::SETCC) {
5158 SDValue XorLHS = Op0.getOperand(i: 0);
5159 SDValue XorRHS = Op0.getOperand(i: 1);
5160 // Ensure that the input setccs return an i1 type or 0/1 value.
5161 if (Op0.getValueType() == MVT::i1 ||
5162 (getBooleanContents(Type: XorLHS.getOperand(i: 0).getValueType()) ==
5163 ZeroOrOneBooleanContent &&
5164 getBooleanContents(Type: XorRHS.getOperand(i: 0).getValueType()) ==
5165 ZeroOrOneBooleanContent)) {
5166 // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
5167 Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ;
5168 return DAG.getSetCC(DL: dl, VT, LHS: XorLHS, RHS: XorRHS, Cond);
5169 }
5170 }
5171 if (Op0.getOpcode() == ISD::AND && isOneConstant(V: Op0.getOperand(i: 1))) {
5172 // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
5173 if (Op0.getValueType().bitsGT(VT))
5174 Op0 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
5175 N1: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Op0.getOperand(i: 0)),
5176 N2: DAG.getConstant(Val: 1, DL: dl, VT));
5177 else if (Op0.getValueType().bitsLT(VT))
5178 Op0 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
5179 N1: DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT, Operand: Op0.getOperand(i: 0)),
5180 N2: DAG.getConstant(Val: 1, DL: dl, VT));
5181
5182 return DAG.getSetCC(DL: dl, VT, LHS: Op0,
5183 RHS: DAG.getConstant(Val: 0, DL: dl, VT: Op0.getValueType()),
5184 Cond: Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
5185 }
5186 if (Op0.getOpcode() == ISD::AssertZext &&
5187 cast<VTSDNode>(Val: Op0.getOperand(i: 1))->getVT() == MVT::i1)
5188 return DAG.getSetCC(DL: dl, VT, LHS: Op0,
5189 RHS: DAG.getConstant(Val: 0, DL: dl, VT: Op0.getValueType()),
5190 Cond: Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
5191 }
5192 }
5193
5194 // Given:
5195 // icmp eq/ne (urem %x, %y), 0
5196 // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
5197 // icmp eq/ne %x, 0
5198 if (N0.getOpcode() == ISD::UREM && N1C->isZero() &&
5199 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5200 KnownBits XKnown = DAG.computeKnownBits(Op: N0.getOperand(i: 0));
5201 KnownBits YKnown = DAG.computeKnownBits(Op: N0.getOperand(i: 1));
5202 if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
5203 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0), RHS: N1, Cond);
5204 }
5205
5206 // Fold set_cc seteq (ashr X, BW-1), -1 -> set_cc setlt X, 0
5207 // and set_cc setne (ashr X, BW-1), -1 -> set_cc setge X, 0
5208 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5209 N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(Val: N0.getOperand(i: 1)) &&
5210 N0.getConstantOperandAPInt(i: 1) == OpVT.getScalarSizeInBits() - 1 &&
5211 N1C->isAllOnes()) {
5212 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0),
5213 RHS: DAG.getConstant(Val: 0, DL: dl, VT: OpVT),
5214 Cond: Cond == ISD::SETEQ ? ISD::SETLT : ISD::SETGE);
5215 }
5216
5217 // fold (setcc (trunc x) c) -> (setcc x c)
5218 if (N0.getOpcode() == ISD::TRUNCATE &&
5219 ((N0->getFlags().hasNoUnsignedWrap() && !ISD::isSignedIntSetCC(Code: Cond)) ||
5220 (N0->getFlags().hasNoSignedWrap() &&
5221 !ISD::isUnsignedIntSetCC(Code: Cond))) &&
5222 isTypeDesirableForOp(ISD::SETCC, VT: N0.getOperand(i: 0).getValueType())) {
5223 EVT NewVT = N0.getOperand(i: 0).getValueType();
5224 SDValue NewConst = DAG.getConstant(
5225 Val: (N0->getFlags().hasNoSignedWrap() && !ISD::isUnsignedIntSetCC(Code: Cond))
5226 ? C1.sext(width: NewVT.getSizeInBits())
5227 : C1.zext(width: NewVT.getSizeInBits()),
5228 DL: dl, VT: NewVT);
5229 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0), RHS: NewConst, Cond);
5230 }
5231
5232 if (SDValue V =
5233 optimizeSetCCOfSignedTruncationCheck(SCCVT: VT, N0, N1, Cond, DCI, DL: dl))
5234 return V;
5235 }
5236
5237 // These simplifications apply to splat vectors as well.
5238 // TODO: Handle more splat vector cases.
5239 if (auto *N1C = isConstOrConstSplat(N: N1)) {
5240 const APInt &C1 = N1C->getAPIntValue();
5241
5242 APInt MinVal, MaxVal;
5243 unsigned OperandBitSize = N1C->getValueType(ResNo: 0).getScalarSizeInBits();
5244 if (ISD::isSignedIntSetCC(Code: Cond)) {
5245 MinVal = APInt::getSignedMinValue(numBits: OperandBitSize);
5246 MaxVal = APInt::getSignedMaxValue(numBits: OperandBitSize);
5247 } else {
5248 MinVal = APInt::getMinValue(numBits: OperandBitSize);
5249 MaxVal = APInt::getMaxValue(numBits: OperandBitSize);
5250 }
5251
5252 // Canonicalize GE/LE comparisons to use GT/LT comparisons.
5253 if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
5254 // X >= MIN --> true
5255 if (C1 == MinVal)
5256 return DAG.getBoolConstant(V: true, DL: dl, VT, OpVT);
5257
5258 if (!VT.isVector()) { // TODO: Support this for vectors.
5259 // X >= C0 --> X > (C0 - 1)
5260 APInt C = C1 - 1;
5261 ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT;
5262 if ((DCI.isBeforeLegalizeOps() ||
5263 isCondCodeLegal(CC: NewCC, VT: OpVT.getSimpleVT())) &&
5264 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
5265 isLegalICmpImmediate(C.getSExtValue())))) {
5266 return DAG.getSetCC(DL: dl, VT, LHS: N0,
5267 RHS: DAG.getConstant(Val: C, DL: dl, VT: N1.getValueType()),
5268 Cond: NewCC);
5269 }
5270 }
5271 }
5272
5273 if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
5274 // X <= MAX --> true
5275 if (C1 == MaxVal)
5276 return DAG.getBoolConstant(V: true, DL: dl, VT, OpVT);
5277
5278 // X <= C0 --> X < (C0 + 1)
5279 if (!VT.isVector()) { // TODO: Support this for vectors.
5280 APInt C = C1 + 1;
5281 ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT;
5282 if ((DCI.isBeforeLegalizeOps() ||
5283 isCondCodeLegal(CC: NewCC, VT: OpVT.getSimpleVT())) &&
5284 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
5285 isLegalICmpImmediate(C.getSExtValue())))) {
5286 return DAG.getSetCC(DL: dl, VT, LHS: N0,
5287 RHS: DAG.getConstant(Val: C, DL: dl, VT: N1.getValueType()),
5288 Cond: NewCC);
5289 }
5290 }
5291 }
5292
5293 if (Cond == ISD::SETLT || Cond == ISD::SETULT) {
5294 if (C1 == MinVal)
5295 return DAG.getBoolConstant(V: false, DL: dl, VT, OpVT); // X < MIN --> false
5296
5297 // TODO: Support this for vectors after legalize ops.
5298 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5299 // Canonicalize setlt X, Max --> setne X, Max
5300 if (C1 == MaxVal)
5301 return DAG.getSetCC(DL: dl, VT, LHS: N0, RHS: N1, Cond: ISD::SETNE);
5302
5303 // If we have setult X, 1, turn it into seteq X, 0
5304 if (C1 == MinVal+1)
5305 return DAG.getSetCC(DL: dl, VT, LHS: N0,
5306 RHS: DAG.getConstant(Val: MinVal, DL: dl, VT: N0.getValueType()),
5307 Cond: ISD::SETEQ);
5308 }
5309 }
5310
5311 if (Cond == ISD::SETGT || Cond == ISD::SETUGT) {
5312 if (C1 == MaxVal)
5313 return DAG.getBoolConstant(V: false, DL: dl, VT, OpVT); // X > MAX --> false
5314
5315 // TODO: Support this for vectors after legalize ops.
5316 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5317 // Canonicalize setgt X, Min --> setne X, Min
5318 if (C1 == MinVal)
5319 return DAG.getSetCC(DL: dl, VT, LHS: N0, RHS: N1, Cond: ISD::SETNE);
5320
5321 // If we have setugt X, Max-1, turn it into seteq X, Max
5322 if (C1 == MaxVal-1)
5323 return DAG.getSetCC(DL: dl, VT, LHS: N0,
5324 RHS: DAG.getConstant(Val: MaxVal, DL: dl, VT: N0.getValueType()),
5325 Cond: ISD::SETEQ);
5326 }
5327 }
5328
5329 if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
5330 // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
5331 if (C1.isZero())
5332 if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
5333 SCCVT: VT, N0, N1C: N1, Cond, DCI, DL: dl))
5334 return CC;
5335
5336 // For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y).
5337 // For example, when high 32-bits of i64 X are known clear:
5338 // all bits clear: (X | (Y<<32)) == 0 --> (X | Y) == 0
5339 // all bits set: (X | (Y<<32)) == -1 --> (X & Y) == -1
5340 bool CmpZero = N1C->isZero();
5341 bool CmpNegOne = N1C->isAllOnes();
5342 if ((CmpZero || CmpNegOne) && N0.hasOneUse()) {
5343 // Match or(lo,shl(hi,bw/2)) pattern.
5344 auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
5345 unsigned EltBits = V.getScalarValueSizeInBits();
5346 if (V.getOpcode() != ISD::OR || (EltBits % 2) != 0)
5347 return false;
5348 SDValue LHS = V.getOperand(i: 0);
5349 SDValue RHS = V.getOperand(i: 1);
5350 APInt HiBits = APInt::getHighBitsSet(numBits: EltBits, hiBitsSet: EltBits / 2);
5351 // Unshifted element must have zero upperbits.
5352 if (RHS.getOpcode() == ISD::SHL &&
5353 isa<ConstantSDNode>(Val: RHS.getOperand(i: 1)) &&
5354 RHS.getConstantOperandAPInt(i: 1) == (EltBits / 2) &&
5355 DAG.MaskedValueIsZero(Op: LHS, Mask: HiBits)) {
5356 Lo = LHS;
5357 Hi = RHS.getOperand(i: 0);
5358 return true;
5359 }
5360 if (LHS.getOpcode() == ISD::SHL &&
5361 isa<ConstantSDNode>(Val: LHS.getOperand(i: 1)) &&
5362 LHS.getConstantOperandAPInt(i: 1) == (EltBits / 2) &&
5363 DAG.MaskedValueIsZero(Op: RHS, Mask: HiBits)) {
5364 Lo = RHS;
5365 Hi = LHS.getOperand(i: 0);
5366 return true;
5367 }
5368 return false;
5369 };
5370
5371 auto MergeConcat = [&](SDValue Lo, SDValue Hi) {
5372 unsigned EltBits = N0.getScalarValueSizeInBits();
5373 unsigned HalfBits = EltBits / 2;
5374 APInt HiBits = APInt::getHighBitsSet(numBits: EltBits, hiBitsSet: HalfBits);
5375 SDValue LoBits = DAG.getConstant(Val: ~HiBits, DL: dl, VT: OpVT);
5376 SDValue HiMask = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: OpVT, N1: Hi, N2: LoBits);
5377 SDValue NewN0 =
5378 DAG.getNode(Opcode: CmpZero ? ISD::OR : ISD::AND, DL: dl, VT: OpVT, N1: Lo, N2: HiMask);
5379 SDValue NewN1 = CmpZero ? DAG.getConstant(Val: 0, DL: dl, VT: OpVT) : LoBits;
5380 return DAG.getSetCC(DL: dl, VT, LHS: NewN0, RHS: NewN1, Cond);
5381 };
5382
5383 SDValue Lo, Hi;
5384 if (IsConcat(N0, Lo, Hi))
5385 return MergeConcat(Lo, Hi);
5386
5387 if (N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR) {
5388 SDValue Lo0, Lo1, Hi0, Hi1;
5389 if (IsConcat(N0.getOperand(i: 0), Lo0, Hi0) &&
5390 IsConcat(N0.getOperand(i: 1), Lo1, Hi1)) {
5391 return MergeConcat(DAG.getNode(Opcode: N0.getOpcode(), DL: dl, VT: OpVT, N1: Lo0, N2: Lo1),
5392 DAG.getNode(Opcode: N0.getOpcode(), DL: dl, VT: OpVT, N1: Hi0, N2: Hi1));
5393 }
5394 }
5395 }
5396 }
5397
5398 // If we have "setcc X, C0", check to see if we can shrink the immediate
5399 // by changing cc.
5400 // TODO: Support this for vectors after legalize ops.
5401 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5402 // SETUGT X, SINTMAX -> SETLT X, 0
5403 // SETUGE X, SINTMIN -> SETLT X, 0
5404 if ((Cond == ISD::SETUGT && C1.isMaxSignedValue()) ||
5405 (Cond == ISD::SETUGE && C1.isMinSignedValue()))
5406 return DAG.getSetCC(DL: dl, VT, LHS: N0,
5407 RHS: DAG.getConstant(Val: 0, DL: dl, VT: N1.getValueType()),
5408 Cond: ISD::SETLT);
5409
5410 // SETULT X, SINTMIN -> SETGT X, -1
5411 // SETULE X, SINTMAX -> SETGT X, -1
5412 if ((Cond == ISD::SETULT && C1.isMinSignedValue()) ||
5413 (Cond == ISD::SETULE && C1.isMaxSignedValue()))
5414 return DAG.getSetCC(DL: dl, VT, LHS: N0,
5415 RHS: DAG.getAllOnesConstant(DL: dl, VT: N1.getValueType()),
5416 Cond: ISD::SETGT);
5417 }
5418 }
5419
5420 // Back to non-vector simplifications.
5421 // TODO: Can we do these for vector splats?
5422 if (auto *N1C = dyn_cast<ConstantSDNode>(Val: N1.getNode())) {
5423 const APInt &C1 = N1C->getAPIntValue();
5424 EVT ShValTy = N0.getValueType();
5425
5426 // Fold bit comparisons when we can. This will result in an
5427 // incorrect value when boolean false is negative one, unless
5428 // the bitsize is 1 in which case the false value is the same
5429 // in practice regardless of the representation.
5430 if ((VT.getSizeInBits() == 1 ||
5431 getBooleanContents(Type: N0.getValueType()) == ZeroOrOneBooleanContent) &&
5432 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5433 (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(VT: ShValTy))) &&
5434 N0.getOpcode() == ISD::AND) {
5435 if (auto *AndRHS = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) {
5436 if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
5437 // Perform the xform if the AND RHS is a single bit.
5438 unsigned ShCt = AndRHS->getAPIntValue().logBase2();
5439 if (AndRHS->getAPIntValue().isPowerOf2() &&
5440 !shouldAvoidTransformToShift(VT: ShValTy, Amount: ShCt)) {
5441 return DAG.getNode(
5442 Opcode: ISD::TRUNCATE, DL: dl, VT,
5443 Operand: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: ShValTy, N1: N0,
5444 N2: DAG.getShiftAmountConstant(Val: ShCt, VT: ShValTy, DL: dl)));
5445 }
5446 } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
5447 // (X & 8) == 8 --> (X & 8) >> 3
5448 // Perform the xform if C1 is a single bit.
5449 unsigned ShCt = C1.logBase2();
5450 if (C1.isPowerOf2() && !shouldAvoidTransformToShift(VT: ShValTy, Amount: ShCt)) {
5451 return DAG.getNode(
5452 Opcode: ISD::TRUNCATE, DL: dl, VT,
5453 Operand: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: ShValTy, N1: N0,
5454 N2: DAG.getShiftAmountConstant(Val: ShCt, VT: ShValTy, DL: dl)));
5455 }
5456 }
5457 }
5458 }
5459
5460 if (C1.getSignificantBits() <= 64 &&
5461 !isLegalICmpImmediate(C1.getSExtValue())) {
5462 // (X & -256) == 256 -> (X >> 8) == 1
5463 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5464 N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
5465 if (auto *AndRHS = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) {
5466 const APInt &AndRHSC = AndRHS->getAPIntValue();
5467 if (AndRHSC.isNegatedPowerOf2() && C1.isSubsetOf(RHS: AndRHSC)) {
5468 unsigned ShiftBits = AndRHSC.countr_zero();
5469 if (!shouldAvoidTransformToShift(VT: ShValTy, Amount: ShiftBits)) {
5470 // If using an unsigned shift doesn't yield a legal compare
5471 // immediate, try using sra instead.
5472 APInt NewC = C1.lshr(shiftAmt: ShiftBits);
5473 if (NewC.getSignificantBits() <= 64 &&
5474 !isLegalICmpImmediate(NewC.getSExtValue())) {
5475 APInt SignedC = C1.ashr(ShiftAmt: ShiftBits);
5476 if (SignedC.getSignificantBits() <= 64 &&
5477 isLegalICmpImmediate(SignedC.getSExtValue())) {
5478 SDValue Shift = DAG.getNode(
5479 Opcode: ISD::SRA, DL: dl, VT: ShValTy, N1: N0.getOperand(i: 0),
5480 N2: DAG.getShiftAmountConstant(Val: ShiftBits, VT: ShValTy, DL: dl));
5481 SDValue CmpRHS = DAG.getConstant(Val: SignedC, DL: dl, VT: ShValTy);
5482 return DAG.getSetCC(DL: dl, VT, LHS: Shift, RHS: CmpRHS, Cond);
5483 }
5484 }
5485 SDValue Shift = DAG.getNode(
5486 Opcode: ISD::SRL, DL: dl, VT: ShValTy, N1: N0.getOperand(i: 0),
5487 N2: DAG.getShiftAmountConstant(Val: ShiftBits, VT: ShValTy, DL: dl));
5488 SDValue CmpRHS = DAG.getConstant(Val: NewC, DL: dl, VT: ShValTy);
5489 return DAG.getSetCC(DL: dl, VT, LHS: Shift, RHS: CmpRHS, Cond);
5490 }
5491 }
5492 }
5493 } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
5494 Cond == ISD::SETULE || Cond == ISD::SETUGT) {
5495 bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
5496 // X < 0x100000000 -> (X >> 32) < 1
5497 // X >= 0x100000000 -> (X >> 32) >= 1
5498 // X <= 0x0ffffffff -> (X >> 32) < 1
5499 // X > 0x0ffffffff -> (X >> 32) >= 1
5500 unsigned ShiftBits;
5501 APInt NewC = C1;
5502 ISD::CondCode NewCond = Cond;
5503 if (AdjOne) {
5504 ShiftBits = C1.countr_one();
5505 NewC = NewC + 1;
5506 NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
5507 } else {
5508 ShiftBits = C1.countr_zero();
5509 }
5510 NewC.lshrInPlace(ShiftAmt: ShiftBits);
5511 if (ShiftBits && NewC.getSignificantBits() <= 64 &&
5512 isLegalICmpImmediate(NewC.getSExtValue()) &&
5513 !shouldAvoidTransformToShift(VT: ShValTy, Amount: ShiftBits)) {
5514 SDValue Shift =
5515 DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: ShValTy, N1: N0,
5516 N2: DAG.getShiftAmountConstant(Val: ShiftBits, VT: ShValTy, DL: dl));
5517 SDValue CmpRHS = DAG.getConstant(Val: NewC, DL: dl, VT: ShValTy);
5518 return DAG.getSetCC(DL: dl, VT, LHS: Shift, RHS: CmpRHS, Cond: NewCond);
5519 }
5520 }
5521 }
5522 }
5523
5524 if (!isa<ConstantFPSDNode>(Val: N0) && isa<ConstantFPSDNode>(Val: N1)) {
5525 auto *CFP = cast<ConstantFPSDNode>(Val&: N1);
5526 assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
5527
5528 // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
5529 // constant if knowing that the operand is non-nan is enough. We prefer to
5530 // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
5531 // materialize 0.0.
5532 if (Cond == ISD::SETO || Cond == ISD::SETUO)
5533 return DAG.getSetCC(DL: dl, VT, LHS: N0, RHS: N0, Cond);
5534
5535 // setcc (fneg x), C -> setcc swap(pred) x, -C
5536 if (N0.getOpcode() == ISD::FNEG) {
5537 ISD::CondCode SwapCond = ISD::getSetCCSwappedOperands(Operation: Cond);
5538 if (DCI.isBeforeLegalizeOps() ||
5539 isCondCodeLegal(CC: SwapCond, VT: N0.getSimpleValueType())) {
5540 SDValue NegN1 = DAG.getNode(Opcode: ISD::FNEG, DL: dl, VT: N0.getValueType(), Operand: N1);
5541 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0), RHS: NegN1, Cond: SwapCond);
5542 }
5543 }
5544
5545 // setueq/setoeq X, (fabs Inf) -> is_fpclass X, fcInf
5546 if (isOperationLegalOrCustom(Op: ISD::IS_FPCLASS, VT: N0.getValueType()) &&
5547 !isFPImmLegal(CFP->getValueAPF(), CFP->getValueType(ResNo: 0))) {
5548 bool IsFabs = N0.getOpcode() == ISD::FABS;
5549 SDValue Op = IsFabs ? N0.getOperand(i: 0) : N0;
5550 if ((Cond == ISD::SETOEQ || Cond == ISD::SETUEQ) && CFP->isInfinity()) {
5551 FPClassTest Flag = CFP->isNegative() ? (IsFabs ? fcNone : fcNegInf)
5552 : (IsFabs ? fcInf : fcPosInf);
5553 if (Cond == ISD::SETUEQ)
5554 Flag |= fcNan;
5555 return DAG.getNode(Opcode: ISD::IS_FPCLASS, DL: dl, VT, N1: Op,
5556 N2: DAG.getTargetConstant(Val: Flag, DL: dl, VT: MVT::i32));
5557 }
5558 }
5559
5560 // If the condition is not legal, see if we can find an equivalent one
5561 // which is legal.
5562 if (!isCondCodeLegal(CC: Cond, VT: N0.getSimpleValueType())) {
5563 // If the comparison was an awkward floating-point == or != and one of
5564 // the comparison operands is infinity or negative infinity, convert the
5565 // condition to a less-awkward <= or >=.
5566 if (CFP->getValueAPF().isInfinity()) {
5567 bool IsNegInf = CFP->getValueAPF().isNegative();
5568 ISD::CondCode NewCond = ISD::SETCC_INVALID;
5569 switch (Cond) {
5570 case ISD::SETOEQ: NewCond = IsNegInf ? ISD::SETOLE : ISD::SETOGE; break;
5571 case ISD::SETUEQ: NewCond = IsNegInf ? ISD::SETULE : ISD::SETUGE; break;
5572 case ISD::SETUNE: NewCond = IsNegInf ? ISD::SETUGT : ISD::SETULT; break;
5573 case ISD::SETONE: NewCond = IsNegInf ? ISD::SETOGT : ISD::SETOLT; break;
5574 default: break;
5575 }
5576 if (NewCond != ISD::SETCC_INVALID &&
5577 isCondCodeLegal(CC: NewCond, VT: N0.getSimpleValueType()))
5578 return DAG.getSetCC(DL: dl, VT, LHS: N0, RHS: N1, Cond: NewCond);
5579 }
5580 }
5581 }
5582
5583 if (N0 == N1) {
5584 // The sext(setcc()) => setcc() optimization relies on the appropriate
5585 // constant being emitted.
5586 assert(!N0.getValueType().isInteger() &&
5587 "Integer types should be handled by FoldSetCC");
5588
5589 bool EqTrue = ISD::isTrueWhenEqual(Cond);
5590 unsigned UOF = ISD::getUnorderedFlavor(Cond);
5591 if (UOF == 2) // FP operators that are undefined on NaNs.
5592 return DAG.getBoolConstant(V: EqTrue, DL: dl, VT, OpVT);
5593 if (UOF == unsigned(EqTrue))
5594 return DAG.getBoolConstant(V: EqTrue, DL: dl, VT, OpVT);
5595 // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
5596 // if it is not already.
5597 ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
5598 if (NewCond != Cond &&
5599 (DCI.isBeforeLegalizeOps() ||
5600 isCondCodeLegal(CC: NewCond, VT: N0.getSimpleValueType())))
5601 return DAG.getSetCC(DL: dl, VT, LHS: N0, RHS: N1, Cond: NewCond);
5602 }
5603
5604 // ~X > ~Y --> Y > X
5605 // ~X < ~Y --> Y < X
5606 // ~X < C --> X > ~C
5607 // ~X > C --> X < ~C
5608 if ((isSignedIntSetCC(Code: Cond) || isUnsignedIntSetCC(Code: Cond)) &&
5609 N0.getValueType().isInteger()) {
5610 if (isBitwiseNot(V: N0)) {
5611 if (isBitwiseNot(V: N1))
5612 return DAG.getSetCC(DL: dl, VT, LHS: N1.getOperand(i: 0), RHS: N0.getOperand(i: 0), Cond);
5613
5614 if (DAG.isConstantIntBuildVectorOrConstantInt(N: N1) &&
5615 !DAG.isConstantIntBuildVectorOrConstantInt(N: N0.getOperand(i: 0))) {
5616 SDValue Not = DAG.getNOT(DL: dl, Val: N1, VT: OpVT);
5617 return DAG.getSetCC(DL: dl, VT, LHS: Not, RHS: N0.getOperand(i: 0), Cond);
5618 }
5619 }
5620 }
5621
5622 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5623 N0.getValueType().isInteger()) {
5624 if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
5625 N0.getOpcode() == ISD::XOR) {
5626 // Simplify (X+Y) == (X+Z) --> Y == Z
5627 if (N0.getOpcode() == N1.getOpcode()) {
5628 if (N0.getOperand(i: 0) == N1.getOperand(i: 0))
5629 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 1), RHS: N1.getOperand(i: 1), Cond);
5630 if (N0.getOperand(i: 1) == N1.getOperand(i: 1))
5631 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0), RHS: N1.getOperand(i: 0), Cond);
5632 if (isCommutativeBinOp(Opcode: N0.getOpcode())) {
5633 // If X op Y == Y op X, try other combinations.
5634 if (N0.getOperand(i: 0) == N1.getOperand(i: 1))
5635 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 1), RHS: N1.getOperand(i: 0),
5636 Cond);
5637 if (N0.getOperand(i: 1) == N1.getOperand(i: 0))
5638 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0), RHS: N1.getOperand(i: 1),
5639 Cond);
5640 }
5641 }
5642
5643 // If RHS is a legal immediate value for a compare instruction, we need
5644 // to be careful about increasing register pressure needlessly.
5645 bool LegalRHSImm = false;
5646
5647 if (auto *RHSC = dyn_cast<ConstantSDNode>(Val&: N1)) {
5648 if (auto *LHSR = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) {
5649 // Turn (X+C1) == C2 --> X == C2-C1
5650 if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse())
5651 return DAG.getSetCC(
5652 DL: dl, VT, LHS: N0.getOperand(i: 0),
5653 RHS: DAG.getConstant(Val: RHSC->getAPIntValue() - LHSR->getAPIntValue(),
5654 DL: dl, VT: N0.getValueType()),
5655 Cond);
5656
5657 // Turn (X^C1) == C2 --> X == C1^C2
5658 if (N0.getOpcode() == ISD::XOR && N0.getNode()->hasOneUse())
5659 return DAG.getSetCC(
5660 DL: dl, VT, LHS: N0.getOperand(i: 0),
5661 RHS: DAG.getConstant(Val: LHSR->getAPIntValue() ^ RHSC->getAPIntValue(),
5662 DL: dl, VT: N0.getValueType()),
5663 Cond);
5664 }
5665
5666 // Turn (C1-X) == C2 --> X == C1-C2
5667 if (auto *SUBC = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 0)))
5668 if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse())
5669 return DAG.getSetCC(
5670 DL: dl, VT, LHS: N0.getOperand(i: 1),
5671 RHS: DAG.getConstant(Val: SUBC->getAPIntValue() - RHSC->getAPIntValue(),
5672 DL: dl, VT: N0.getValueType()),
5673 Cond);
5674
5675 // Could RHSC fold directly into a compare?
5676 if (RHSC->getValueType(ResNo: 0).getSizeInBits() <= 64)
5677 LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
5678 }
5679
5680 // (X+Y) == X --> Y == 0 and similar folds.
5681 // Don't do this if X is an immediate that can fold into a cmp
5682 // instruction and X+Y has other uses. It could be an induction variable
5683 // chain, and the transform would increase register pressure.
5684 if (!LegalRHSImm || N0.hasOneUse())
5685 if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, DL: dl, DCI))
5686 return V;
5687 }
5688
5689 if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
5690 N1.getOpcode() == ISD::XOR)
5691 if (SDValue V = foldSetCCWithBinOp(VT, N0: N1, N1: N0, Cond, DL: dl, DCI))
5692 return V;
5693
5694 if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, DL: dl, DCI))
5695 return V;
5696
5697 if (SDValue V = foldSetCCWithOr(VT, N0, N1, Cond, DL: dl, DCI))
5698 return V;
5699 }
5700
5701 // Fold remainder of division by a constant.
5702 if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) &&
5703 N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5704 // When division is cheap or optimizing for minimum size,
5705 // fall through to DIVREM creation by skipping this fold.
5706 if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Kind: Attribute::MinSize)) {
5707 if (N0.getOpcode() == ISD::UREM) {
5708 if (SDValue Folded = buildUREMEqFold(SETCCVT: VT, REMNode: N0, CompTargetNode: N1, Cond, DCI, DL: dl))
5709 return Folded;
5710 } else if (N0.getOpcode() == ISD::SREM) {
5711 if (SDValue Folded = buildSREMEqFold(SETCCVT: VT, REMNode: N0, CompTargetNode: N1, Cond, DCI, DL: dl))
5712 return Folded;
5713 }
5714 }
5715 }
5716
5717 // Fold away ALL boolean setcc's.
5718 if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
5719 SDValue Temp;
5720 switch (Cond) {
5721 default: llvm_unreachable("Unknown integer setcc!");
5722 case ISD::SETEQ: // X == Y -> ~(X^Y)
5723 Temp = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: OpVT, N1: N0, N2: N1);
5724 N0 = DAG.getNOT(DL: dl, Val: Temp, VT: OpVT);
5725 if (!DCI.isCalledByLegalizer())
5726 DCI.AddToWorklist(N: Temp.getNode());
5727 break;
5728 case ISD::SETNE: // X != Y --> (X^Y)
5729 N0 = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: OpVT, N1: N0, N2: N1);
5730 break;
5731 case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
5732 case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
5733 Temp = DAG.getNOT(DL: dl, Val: N0, VT: OpVT);
5734 N0 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: OpVT, N1, N2: Temp);
5735 if (!DCI.isCalledByLegalizer())
5736 DCI.AddToWorklist(N: Temp.getNode());
5737 break;
5738 case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
5739 case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
5740 Temp = DAG.getNOT(DL: dl, Val: N1, VT: OpVT);
5741 N0 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: OpVT, N1: N0, N2: Temp);
5742 if (!DCI.isCalledByLegalizer())
5743 DCI.AddToWorklist(N: Temp.getNode());
5744 break;
5745 case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
5746 case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
5747 Temp = DAG.getNOT(DL: dl, Val: N0, VT: OpVT);
5748 N0 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1, N2: Temp);
5749 if (!DCI.isCalledByLegalizer())
5750 DCI.AddToWorklist(N: Temp.getNode());
5751 break;
5752 case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
5753 case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
5754 Temp = DAG.getNOT(DL: dl, Val: N1, VT: OpVT);
5755 N0 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1: N0, N2: Temp);
5756 break;
5757 }
5758 if (VT.getScalarType() != MVT::i1) {
5759 if (!DCI.isCalledByLegalizer())
5760 DCI.AddToWorklist(N: N0.getNode());
5761 // FIXME: If running after legalize, we probably can't do this.
5762 ISD::NodeType ExtendCode = getExtendForContent(Content: getBooleanContents(Type: OpVT));
5763 N0 = DAG.getNode(Opcode: ExtendCode, DL: dl, VT, Operand: N0);
5764 }
5765 return N0;
5766 }
5767
5768 // Fold (setcc (trunc x) (trunc y)) -> (setcc x y)
5769 if (N0.getOpcode() == ISD::TRUNCATE && N1.getOpcode() == ISD::TRUNCATE &&
5770 N0.getOperand(i: 0).getValueType() == N1.getOperand(i: 0).getValueType() &&
5771 ((!ISD::isSignedIntSetCC(Code: Cond) && N0->getFlags().hasNoUnsignedWrap() &&
5772 N1->getFlags().hasNoUnsignedWrap()) ||
5773 (!ISD::isUnsignedIntSetCC(Code: Cond) && N0->getFlags().hasNoSignedWrap() &&
5774 N1->getFlags().hasNoSignedWrap())) &&
5775 isTypeDesirableForOp(ISD::SETCC, VT: N0.getOperand(i: 0).getValueType())) {
5776 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0), RHS: N1.getOperand(i: 0), Cond);
5777 }
5778
5779 // Fold (setcc (sub nsw a, b), zero, s??) -> (setcc a, b, s??)
5780 // TODO: Remove that .isVector() check
5781 if (VT.isVector() && isZeroOrZeroSplat(N: N1) && N0.getOpcode() == ISD::SUB &&
5782 N0->getFlags().hasNoSignedWrap() && ISD::isSignedIntSetCC(Code: Cond)) {
5783 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0), RHS: N0.getOperand(i: 1), Cond);
5784 }
5785
5786 // Could not fold it.
5787 return SDValue();
5788}
5789
5790/// Returns true (and the GlobalValue and the offset) if the node is a
5791/// GlobalAddress + offset.
5792bool TargetLowering::isGAPlusOffset(SDNode *WN, const GlobalValue *&GA,
5793 int64_t &Offset) const {
5794
5795 SDNode *N = unwrapAddress(N: SDValue(WN, 0)).getNode();
5796
5797 if (auto *GASD = dyn_cast<GlobalAddressSDNode>(Val: N)) {
5798 GA = GASD->getGlobal();
5799 Offset += GASD->getOffset();
5800 return true;
5801 }
5802
5803 if (N->isAnyAdd()) {
5804 SDValue N1 = N->getOperand(Num: 0);
5805 SDValue N2 = N->getOperand(Num: 1);
5806 if (isGAPlusOffset(WN: N1.getNode(), GA, Offset)) {
5807 if (auto *V = dyn_cast<ConstantSDNode>(Val&: N2)) {
5808 Offset += V->getSExtValue();
5809 return true;
5810 }
5811 } else if (isGAPlusOffset(WN: N2.getNode(), GA, Offset)) {
5812 if (auto *V = dyn_cast<ConstantSDNode>(Val&: N1)) {
5813 Offset += V->getSExtValue();
5814 return true;
5815 }
5816 }
5817 }
5818
5819 return false;
5820}
5821
5822SDValue TargetLowering::PerformDAGCombine(SDNode *N,
5823 DAGCombinerInfo &DCI) const {
5824 // Default implementation: no optimization.
5825 return SDValue();
5826}
5827
5828//===----------------------------------------------------------------------===//
5829// Inline Assembler Implementation Methods
5830//===----------------------------------------------------------------------===//
5831
5832TargetLowering::ConstraintType
5833TargetLowering::getConstraintType(StringRef Constraint) const {
5834 unsigned S = Constraint.size();
5835
5836 if (S == 1) {
5837 switch (Constraint[0]) {
5838 default: break;
5839 case 'r':
5840 return C_RegisterClass;
5841 case 'm': // memory
5842 case 'o': // offsetable
5843 case 'V': // not offsetable
5844 return C_Memory;
5845 case 'p': // Address.
5846 return C_Address;
5847 case 'n': // Simple Integer
5848 case 'E': // Floating Point Constant
5849 case 'F': // Floating Point Constant
5850 return C_Immediate;
5851 case 'i': // Simple Integer or Relocatable Constant
5852 case 's': // Relocatable Constant
5853 case 'X': // Allow ANY value.
5854 case 'I': // Target registers.
5855 case 'J':
5856 case 'K':
5857 case 'L':
5858 case 'M':
5859 case 'N':
5860 case 'O':
5861 case 'P':
5862 case '<':
5863 case '>':
5864 return C_Other;
5865 }
5866 }
5867
5868 if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') {
5869 if (S == 8 && Constraint.substr(Start: 1, N: 6) == "memory") // "{memory}"
5870 return C_Memory;
5871 return C_Register;
5872 }
5873 return C_Unknown;
5874}
5875
5876/// Try to replace an X constraint, which matches anything, with another that
5877/// has more specific requirements based on the type of the corresponding
5878/// operand.
5879const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
5880 if (ConstraintVT.isInteger())
5881 return "r";
5882 if (ConstraintVT.isFloatingPoint())
5883 return "f"; // works for many targets
5884 return nullptr;
5885}
5886
5887SDValue TargetLowering::LowerAsmOutputForConstraint(
5888 SDValue &Chain, SDValue &Glue, const SDLoc &DL,
5889 const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
5890 return SDValue();
5891}
5892
5893/// Lower the specified operand into the Ops vector.
5894/// If it is invalid, don't add anything to Ops.
5895void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
5896 StringRef Constraint,
5897 std::vector<SDValue> &Ops,
5898 SelectionDAG &DAG) const {
5899
5900 if (Constraint.size() > 1)
5901 return;
5902
5903 char ConstraintLetter = Constraint[0];
5904 switch (ConstraintLetter) {
5905 default: break;
5906 case 'X': // Allows any operand
5907 case 'i': // Simple Integer or Relocatable Constant
5908 case 'n': // Simple Integer
5909 case 's': { // Relocatable Constant
5910
5911 ConstantSDNode *C;
5912 uint64_t Offset = 0;
5913
5914 // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
5915 // etc., since getelementpointer is variadic. We can't use
5916 // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
5917 // while in this case the GA may be furthest from the root node which is
5918 // likely an ISD::ADD.
5919 while (true) {
5920 if ((C = dyn_cast<ConstantSDNode>(Val&: Op)) && ConstraintLetter != 's') {
5921 // gcc prints these as sign extended. Sign extend value to 64 bits
5922 // now; without this it would get ZExt'd later in
5923 // ScheduleDAGSDNodes::EmitNode, which is very generic.
5924 bool IsBool = C->getConstantIntValue()->getBitWidth() == 1;
5925 BooleanContent BCont = getBooleanContents(Type: MVT::i64);
5926 ISD::NodeType ExtOpc =
5927 IsBool ? getExtendForContent(Content: BCont) : ISD::SIGN_EXTEND;
5928 int64_t ExtVal =
5929 ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue() : C->getSExtValue();
5930 Ops.push_back(
5931 x: DAG.getTargetConstant(Val: Offset + ExtVal, DL: SDLoc(C), VT: MVT::i64));
5932 return;
5933 }
5934 if (ConstraintLetter != 'n') {
5935 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Val&: Op)) {
5936 Ops.push_back(x: DAG.getTargetGlobalAddress(GV: GA->getGlobal(), DL: SDLoc(Op),
5937 VT: GA->getValueType(ResNo: 0),
5938 offset: Offset + GA->getOffset()));
5939 return;
5940 }
5941 if (const auto *BA = dyn_cast<BlockAddressSDNode>(Val&: Op)) {
5942 Ops.push_back(x: DAG.getTargetBlockAddress(
5943 BA: BA->getBlockAddress(), VT: BA->getValueType(ResNo: 0),
5944 Offset: Offset + BA->getOffset(), TargetFlags: BA->getTargetFlags()));
5945 return;
5946 }
5947 if (isa<BasicBlockSDNode>(Val: Op)) {
5948 Ops.push_back(x: Op);
5949 return;
5950 }
5951 }
5952 const unsigned OpCode = Op.getOpcode();
5953 if (OpCode == ISD::ADD || OpCode == ISD::SUB) {
5954 if ((C = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 0))))
5955 Op = Op.getOperand(i: 1);
5956 // Subtraction is not commutative.
5957 else if (OpCode == ISD::ADD &&
5958 (C = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 1))))
5959 Op = Op.getOperand(i: 0);
5960 else
5961 return;
5962 Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue();
5963 continue;
5964 }
5965 return;
5966 }
5967 break;
5968 }
5969 }
5970}
5971
5972void TargetLowering::CollectTargetIntrinsicOperands(
5973 const CallInst &I, SmallVectorImpl<SDValue> &Ops, SelectionDAG &DAG) const {
5974}
5975
5976std::pair<unsigned, const TargetRegisterClass *>
5977TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
5978 StringRef Constraint,
5979 MVT VT) const {
5980 if (!Constraint.starts_with(Prefix: "{"))
5981 return std::make_pair(x: 0u, y: static_cast<TargetRegisterClass *>(nullptr));
5982 assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?");
5983
5984 // Remove the braces from around the name.
5985 StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
5986
5987 std::pair<unsigned, const TargetRegisterClass *> R =
5988 std::make_pair(x: 0u, y: static_cast<const TargetRegisterClass *>(nullptr));
5989
5990 // Figure out which register class contains this reg.
5991 for (const TargetRegisterClass *RC : RI->regclasses()) {
5992 // If none of the value types for this register class are valid, we
5993 // can't use it. For example, 64-bit reg classes on 32-bit targets.
5994 if (!isLegalRC(TRI: *RI, RC: *RC))
5995 continue;
5996
5997 for (const MCPhysReg &PR : *RC) {
5998 if (RegName.equals_insensitive(RHS: RI->getRegAsmName(Reg: PR))) {
5999 std::pair<unsigned, const TargetRegisterClass *> S =
6000 std::make_pair(x: PR, y&: RC);
6001
6002 // If this register class has the requested value type, return it,
6003 // otherwise keep searching and return the first class found
6004 // if no other is found which explicitly has the requested type.
6005 if (RI->isTypeLegalForClass(RC: *RC, T: VT))
6006 return S;
6007 if (!R.second)
6008 R = S;
6009 }
6010 }
6011 }
6012
6013 return R;
6014}
6015
6016//===----------------------------------------------------------------------===//
6017// Constraint Selection.
6018
6019/// Return true of this is an input operand that is a matching constraint like
6020/// "4".
6021bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {
6022 assert(!ConstraintCode.empty() && "No known constraint!");
6023 return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
6024}
6025
6026/// If this is an input matching constraint, this method returns the output
6027/// operand it matches.
6028unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
6029 assert(!ConstraintCode.empty() && "No known constraint!");
6030 return atoi(nptr: ConstraintCode.c_str());
6031}
6032
6033/// Split up the constraint string from the inline assembly value into the
6034/// specific constraints and their prefixes, and also tie in the associated
6035/// operand values.
6036/// If this returns an empty vector, and if the constraint string itself
6037/// isn't empty, there was an error parsing.
6038TargetLowering::AsmOperandInfoVector
6039TargetLowering::ParseConstraints(const DataLayout &DL,
6040 const TargetRegisterInfo *TRI,
6041 const CallBase &Call) const {
6042 /// Information about all of the constraints.
6043 AsmOperandInfoVector ConstraintOperands;
6044 const InlineAsm *IA = cast<InlineAsm>(Val: Call.getCalledOperand());
6045 unsigned maCount = 0; // Largest number of multiple alternative constraints.
6046
6047 // Do a prepass over the constraints, canonicalizing them, and building up the
6048 // ConstraintOperands list.
6049 unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
6050 unsigned ResNo = 0; // ResNo - The result number of the next output.
6051 unsigned LabelNo = 0; // LabelNo - CallBr indirect dest number.
6052
6053 for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
6054 ConstraintOperands.emplace_back(args: std::move(CI));
6055 AsmOperandInfo &OpInfo = ConstraintOperands.back();
6056
6057 // Update multiple alternative constraint count.
6058 if (OpInfo.multipleAlternatives.size() > maCount)
6059 maCount = OpInfo.multipleAlternatives.size();
6060
6061 OpInfo.ConstraintVT = MVT::Other;
6062
6063 // Compute the value type for each operand.
6064 switch (OpInfo.Type) {
6065 case InlineAsm::isOutput: {
6066 // Indirect outputs just consume an argument.
6067 if (OpInfo.isIndirect) {
6068 OpInfo.CallOperandVal = Call.getArgOperand(i: ArgNo);
6069 break;
6070 }
6071
6072 // The return value of the call is this value. As such, there is no
6073 // corresponding argument.
6074 assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
6075 EVT VT;
6076 if (auto *STy = dyn_cast<StructType>(Val: Call.getType())) {
6077 VT = getAsmOperandValueType(DL, Ty: STy->getElementType(N: ResNo));
6078 } else {
6079 assert(ResNo == 0 && "Asm only has one result!");
6080 VT = getAsmOperandValueType(DL, Ty: Call.getType());
6081 }
6082 OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
6083 ++ResNo;
6084 break;
6085 }
6086 case InlineAsm::isInput:
6087 OpInfo.CallOperandVal = Call.getArgOperand(i: ArgNo);
6088 break;
6089 case InlineAsm::isLabel:
6090 OpInfo.CallOperandVal = cast<CallBrInst>(Val: &Call)->getIndirectDest(i: LabelNo);
6091 ++LabelNo;
6092 continue;
6093 case InlineAsm::isClobber:
6094 // Nothing to do.
6095 break;
6096 }
6097
6098 if (OpInfo.CallOperandVal) {
6099 llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
6100 if (OpInfo.isIndirect) {
6101 OpTy = Call.getParamElementType(ArgNo);
6102 assert(OpTy && "Indirect operand must have elementtype attribute");
6103 }
6104
6105 // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
6106 if (StructType *STy = dyn_cast<StructType>(Val: OpTy))
6107 if (STy->getNumElements() == 1)
6108 OpTy = STy->getElementType(N: 0);
6109
6110 // If OpTy is not a single value, it may be a struct/union that we
6111 // can tile with integers.
6112 if (!OpTy->isSingleValueType() && OpTy->isSized()) {
6113 unsigned BitSize = DL.getTypeSizeInBits(Ty: OpTy);
6114 switch (BitSize) {
6115 default: break;
6116 case 1:
6117 case 8:
6118 case 16:
6119 case 32:
6120 case 64:
6121 case 128:
6122 OpTy = IntegerType::get(C&: OpTy->getContext(), NumBits: BitSize);
6123 break;
6124 }
6125 }
6126
6127 EVT VT = getAsmOperandValueType(DL, Ty: OpTy, AllowUnknown: true);
6128 OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
6129 ArgNo++;
6130 }
6131 }
6132
6133 // If we have multiple alternative constraints, select the best alternative.
6134 if (!ConstraintOperands.empty()) {
6135 if (maCount) {
6136 unsigned bestMAIndex = 0;
6137 int bestWeight = -1;
6138 // weight: -1 = invalid match, and 0 = so-so match to 5 = good match.
6139 int weight = -1;
6140 unsigned maIndex;
6141 // Compute the sums of the weights for each alternative, keeping track
6142 // of the best (highest weight) one so far.
6143 for (maIndex = 0; maIndex < maCount; ++maIndex) {
6144 int weightSum = 0;
6145 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
6146 cIndex != eIndex; ++cIndex) {
6147 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
6148 if (OpInfo.Type == InlineAsm::isClobber)
6149 continue;
6150
6151 // If this is an output operand with a matching input operand,
6152 // look up the matching input. If their types mismatch, e.g. one
6153 // is an integer, the other is floating point, or their sizes are
6154 // different, flag it as an maCantMatch.
6155 if (OpInfo.hasMatchingInput()) {
6156 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
6157 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
6158 if ((OpInfo.ConstraintVT.isInteger() !=
6159 Input.ConstraintVT.isInteger()) ||
6160 (OpInfo.ConstraintVT.getSizeInBits() !=
6161 Input.ConstraintVT.getSizeInBits())) {
6162 weightSum = -1; // Can't match.
6163 break;
6164 }
6165 }
6166 }
6167 weight = getMultipleConstraintMatchWeight(info&: OpInfo, maIndex);
6168 if (weight == -1) {
6169 weightSum = -1;
6170 break;
6171 }
6172 weightSum += weight;
6173 }
6174 // Update best.
6175 if (weightSum > bestWeight) {
6176 bestWeight = weightSum;
6177 bestMAIndex = maIndex;
6178 }
6179 }
6180
6181 // Now select chosen alternative in each constraint.
6182 for (AsmOperandInfo &cInfo : ConstraintOperands)
6183 if (cInfo.Type != InlineAsm::isClobber)
6184 cInfo.selectAlternative(index: bestMAIndex);
6185 }
6186 }
6187
6188 // Check and hook up tied operands, choose constraint code to use.
6189 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
6190 cIndex != eIndex; ++cIndex) {
6191 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
6192
6193 // If this is an output operand with a matching input operand, look up the
6194 // matching input. If their types mismatch, e.g. one is an integer, the
6195 // other is floating point, or their sizes are different, flag it as an
6196 // error.
6197 if (OpInfo.hasMatchingInput()) {
6198 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
6199
6200 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
6201 std::pair<unsigned, const TargetRegisterClass *> MatchRC =
6202 getRegForInlineAsmConstraint(RI: TRI, Constraint: OpInfo.ConstraintCode,
6203 VT: OpInfo.ConstraintVT);
6204 std::pair<unsigned, const TargetRegisterClass *> InputRC =
6205 getRegForInlineAsmConstraint(RI: TRI, Constraint: Input.ConstraintCode,
6206 VT: Input.ConstraintVT);
6207 const bool OutOpIsIntOrFP = OpInfo.ConstraintVT.isInteger() ||
6208 OpInfo.ConstraintVT.isFloatingPoint();
6209 const bool InOpIsIntOrFP = Input.ConstraintVT.isInteger() ||
6210 Input.ConstraintVT.isFloatingPoint();
6211 if ((OutOpIsIntOrFP != InOpIsIntOrFP) ||
6212 (MatchRC.second != InputRC.second)) {
6213 report_fatal_error(reason: "Unsupported asm: input constraint"
6214 " with a matching output constraint of"
6215 " incompatible type!");
6216 }
6217 }
6218 }
6219 }
6220
6221 return ConstraintOperands;
6222}
6223
6224/// Return a number indicating our preference for chosing a type of constraint
6225/// over another, for the purpose of sorting them. Immediates are almost always
6226/// preferrable (when they can be emitted). A higher return value means a
6227/// stronger preference for one constraint type relative to another.
6228/// FIXME: We should prefer registers over memory but doing so may lead to
6229/// unrecoverable register exhaustion later.
6230/// https://github.com/llvm/llvm-project/issues/20571
6231static unsigned getConstraintPiority(TargetLowering::ConstraintType CT) {
6232 switch (CT) {
6233 case TargetLowering::C_Immediate:
6234 case TargetLowering::C_Other:
6235 return 4;
6236 case TargetLowering::C_Memory:
6237 case TargetLowering::C_Address:
6238 return 3;
6239 case TargetLowering::C_RegisterClass:
6240 return 2;
6241 case TargetLowering::C_Register:
6242 return 1;
6243 case TargetLowering::C_Unknown:
6244 return 0;
6245 }
6246 llvm_unreachable("Invalid constraint type");
6247}
6248
6249/// Examine constraint type and operand type and determine a weight value.
6250/// This object must already have been set up with the operand type
6251/// and the current alternative constraint selected.
6252TargetLowering::ConstraintWeight
6253 TargetLowering::getMultipleConstraintMatchWeight(
6254 AsmOperandInfo &info, int maIndex) const {
6255 InlineAsm::ConstraintCodeVector *rCodes;
6256 if (maIndex >= (int)info.multipleAlternatives.size())
6257 rCodes = &info.Codes;
6258 else
6259 rCodes = &info.multipleAlternatives[maIndex].Codes;
6260 ConstraintWeight BestWeight = CW_Invalid;
6261
6262 // Loop over the options, keeping track of the most general one.
6263 for (const std::string &rCode : *rCodes) {
6264 ConstraintWeight weight =
6265 getSingleConstraintMatchWeight(info, constraint: rCode.c_str());
6266 if (weight > BestWeight)
6267 BestWeight = weight;
6268 }
6269
6270 return BestWeight;
6271}
6272
6273/// Examine constraint type and operand type and determine a weight value.
6274/// This object must already have been set up with the operand type
6275/// and the current alternative constraint selected.
6276TargetLowering::ConstraintWeight
6277 TargetLowering::getSingleConstraintMatchWeight(
6278 AsmOperandInfo &info, const char *constraint) const {
6279 ConstraintWeight weight = CW_Invalid;
6280 Value *CallOperandVal = info.CallOperandVal;
6281 // If we don't have a value, we can't do a match,
6282 // but allow it at the lowest weight.
6283 if (!CallOperandVal)
6284 return CW_Default;
6285 // Look at the constraint type.
6286 switch (*constraint) {
6287 case 'i': // immediate integer.
6288 case 'n': // immediate integer with a known value.
6289 if (isa<ConstantInt>(Val: CallOperandVal))
6290 weight = CW_Constant;
6291 break;
6292 case 's': // non-explicit intregal immediate.
6293 if (isa<GlobalValue>(Val: CallOperandVal))
6294 weight = CW_Constant;
6295 break;
6296 case 'E': // immediate float if host format.
6297 case 'F': // immediate float.
6298 if (isa<ConstantFP>(Val: CallOperandVal))
6299 weight = CW_Constant;
6300 break;
6301 case '<': // memory operand with autodecrement.
6302 case '>': // memory operand with autoincrement.
6303 case 'm': // memory operand.
6304 case 'o': // offsettable memory operand
6305 case 'V': // non-offsettable memory operand
6306 weight = CW_Memory;
6307 break;
6308 case 'r': // general register.
6309 case 'g': // general register, memory operand or immediate integer.
6310 // note: Clang converts "g" to "imr".
6311 if (CallOperandVal->getType()->isIntegerTy())
6312 weight = CW_Register;
6313 break;
6314 case 'X': // any operand.
6315 default:
6316 weight = CW_Default;
6317 break;
6318 }
6319 return weight;
6320}
6321
6322/// If there are multiple different constraints that we could pick for this
6323/// operand (e.g. "imr") try to pick the 'best' one.
6324/// This is somewhat tricky: constraints (TargetLowering::ConstraintType) fall
6325/// into seven classes:
6326/// Register -> one specific register
6327/// RegisterClass -> a group of regs
6328/// Memory -> memory
6329/// Address -> a symbolic memory reference
6330/// Immediate -> immediate values
6331/// Other -> magic values (such as "Flag Output Operands")
6332/// Unknown -> something we don't recognize yet and can't handle
6333/// Ideally, we would pick the most specific constraint possible: if we have
6334/// something that fits into a register, we would pick it. The problem here
6335/// is that if we have something that could either be in a register or in
6336/// memory that use of the register could cause selection of *other*
6337/// operands to fail: they might only succeed if we pick memory. Because of
6338/// this the heuristic we use is:
6339///
6340/// 1) If there is an 'other' constraint, and if the operand is valid for
6341/// that constraint, use it. This makes us take advantage of 'i'
6342/// constraints when available.
6343/// 2) Otherwise, pick the most general constraint present. This prefers
6344/// 'm' over 'r', for example.
6345///
6346TargetLowering::ConstraintGroup TargetLowering::getConstraintPreferences(
6347 TargetLowering::AsmOperandInfo &OpInfo) const {
6348 ConstraintGroup Ret;
6349
6350 Ret.reserve(N: OpInfo.Codes.size());
6351 for (StringRef Code : OpInfo.Codes) {
6352 TargetLowering::ConstraintType CType = getConstraintType(Constraint: Code);
6353
6354 // Indirect 'other' or 'immediate' constraints are not allowed.
6355 if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory ||
6356 CType == TargetLowering::C_Register ||
6357 CType == TargetLowering::C_RegisterClass))
6358 continue;
6359
6360 // Things with matching constraints can only be registers, per gcc
6361 // documentation. This mainly affects "g" constraints.
6362 if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
6363 continue;
6364
6365 Ret.emplace_back(Args&: Code, Args&: CType);
6366 }
6367
6368 llvm::stable_sort(Range&: Ret, C: [](ConstraintPair a, ConstraintPair b) {
6369 return getConstraintPiority(CT: a.second) > getConstraintPiority(CT: b.second);
6370 });
6371
6372 return Ret;
6373}
6374
6375/// If we have an immediate, see if we can lower it. Return true if we can,
6376/// false otherwise.
6377static bool lowerImmediateIfPossible(TargetLowering::ConstraintPair &P,
6378 SDValue Op, SelectionDAG *DAG,
6379 const TargetLowering &TLI) {
6380
6381 assert((P.second == TargetLowering::C_Other ||
6382 P.second == TargetLowering::C_Immediate) &&
6383 "need immediate or other");
6384
6385 if (!Op.getNode())
6386 return false;
6387
6388 std::vector<SDValue> ResultOps;
6389 TLI.LowerAsmOperandForConstraint(Op, Constraint: P.first, Ops&: ResultOps, DAG&: *DAG);
6390 return !ResultOps.empty();
6391}
6392
6393/// Determines the constraint code and constraint type to use for the specific
6394/// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
6395void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
6396 SDValue Op,
6397 SelectionDAG *DAG) const {
6398 assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
6399
6400 // Single-letter constraints ('r') are very common.
6401 if (OpInfo.Codes.size() == 1) {
6402 OpInfo.ConstraintCode = OpInfo.Codes[0];
6403 OpInfo.ConstraintType = getConstraintType(Constraint: OpInfo.ConstraintCode);
6404 } else {
6405 ConstraintGroup G = getConstraintPreferences(OpInfo);
6406 if (G.empty())
6407 return;
6408
6409 unsigned BestIdx = 0;
6410 for (const unsigned E = G.size();
6411 BestIdx < E && (G[BestIdx].second == TargetLowering::C_Other ||
6412 G[BestIdx].second == TargetLowering::C_Immediate);
6413 ++BestIdx) {
6414 if (lowerImmediateIfPossible(P&: G[BestIdx], Op, DAG, TLI: *this))
6415 break;
6416 // If we're out of constraints, just pick the first one.
6417 if (BestIdx + 1 == E) {
6418 BestIdx = 0;
6419 break;
6420 }
6421 }
6422
6423 OpInfo.ConstraintCode = G[BestIdx].first;
6424 OpInfo.ConstraintType = G[BestIdx].second;
6425 }
6426
6427 // 'X' matches anything.
6428 if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
6429 // Constants are handled elsewhere. For Functions, the type here is the
6430 // type of the result, which is not what we want to look at; leave them
6431 // alone.
6432 Value *v = OpInfo.CallOperandVal;
6433 if (isa<ConstantInt>(Val: v) || isa<Function>(Val: v)) {
6434 return;
6435 }
6436
6437 if (isa<BasicBlock>(Val: v) || isa<BlockAddress>(Val: v)) {
6438 OpInfo.ConstraintCode = "i";
6439 return;
6440 }
6441
6442 // Otherwise, try to resolve it to something we know about by looking at
6443 // the actual operand type.
6444 if (const char *Repl = LowerXConstraint(ConstraintVT: OpInfo.ConstraintVT)) {
6445 OpInfo.ConstraintCode = Repl;
6446 OpInfo.ConstraintType = getConstraintType(Constraint: OpInfo.ConstraintCode);
6447 }
6448 }
6449}
6450
6451/// Given an exact SDIV by a constant, create a multiplication
6452/// with the multiplicative inverse of the constant.
6453/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6454static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
6455 const SDLoc &dl, SelectionDAG &DAG,
6456 SmallVectorImpl<SDNode *> &Created) {
6457 SDValue Op0 = N->getOperand(Num: 0);
6458 SDValue Op1 = N->getOperand(Num: 1);
6459 EVT VT = N->getValueType(ResNo: 0);
6460 EVT ShVT = TLI.getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
6461 EVT ShSVT = ShVT.getScalarType();
6462
6463 bool UseSRA = false;
6464 SmallVector<SDValue, 16> Shifts, Factors;
6465
6466 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6467 if (C->isZero())
6468 return false;
6469
6470 EVT CT = C->getValueType(ResNo: 0);
6471 APInt Divisor = C->getAPIntValue();
6472 unsigned Shift = Divisor.countr_zero();
6473 if (Shift) {
6474 Divisor.ashrInPlace(ShiftAmt: Shift);
6475 UseSRA = true;
6476 }
6477 APInt Factor = Divisor.multiplicativeInverse();
6478 Shifts.push_back(Elt: DAG.getConstant(Val: Shift, DL: dl, VT: ShSVT));
6479 Factors.push_back(Elt: DAG.getConstant(Val: Factor, DL: dl, VT: CT));
6480 return true;
6481 };
6482
6483 // Collect all magic values from the build vector.
6484 if (!ISD::matchUnaryPredicate(Op: Op1, Match: BuildSDIVPattern))
6485 return SDValue();
6486
6487 SDValue Shift, Factor;
6488 if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6489 Shift = DAG.getBuildVector(VT: ShVT, DL: dl, Ops: Shifts);
6490 Factor = DAG.getBuildVector(VT, DL: dl, Ops: Factors);
6491 } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6492 assert(Shifts.size() == 1 && Factors.size() == 1 &&
6493 "Expected matchUnaryPredicate to return one element for scalable "
6494 "vectors");
6495 Shift = DAG.getSplatVector(VT: ShVT, DL: dl, Op: Shifts[0]);
6496 Factor = DAG.getSplatVector(VT, DL: dl, Op: Factors[0]);
6497 } else {
6498 assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6499 Shift = Shifts[0];
6500 Factor = Factors[0];
6501 }
6502
6503 SDValue Res = Op0;
6504 if (UseSRA) {
6505 Res = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: Res, N2: Shift, Flags: SDNodeFlags::Exact);
6506 Created.push_back(Elt: Res.getNode());
6507 }
6508
6509 return DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: Res, N2: Factor);
6510}
6511
6512/// Given an exact UDIV by a constant, create a multiplication
6513/// with the multiplicative inverse of the constant.
6514/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6515static SDValue BuildExactUDIV(const TargetLowering &TLI, SDNode *N,
6516 const SDLoc &dl, SelectionDAG &DAG,
6517 SmallVectorImpl<SDNode *> &Created) {
6518 EVT VT = N->getValueType(ResNo: 0);
6519 EVT ShVT = TLI.getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
6520 EVT ShSVT = ShVT.getScalarType();
6521
6522 bool UseSRL = false;
6523 SmallVector<SDValue, 16> Shifts, Factors;
6524
6525 auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6526 if (C->isZero())
6527 return false;
6528
6529 EVT CT = C->getValueType(ResNo: 0);
6530 APInt Divisor = C->getAPIntValue();
6531 unsigned Shift = Divisor.countr_zero();
6532 if (Shift) {
6533 Divisor.lshrInPlace(ShiftAmt: Shift);
6534 UseSRL = true;
6535 }
6536 // Calculate the multiplicative inverse modulo BW.
6537 APInt Factor = Divisor.multiplicativeInverse();
6538 Shifts.push_back(Elt: DAG.getConstant(Val: Shift, DL: dl, VT: ShSVT));
6539 Factors.push_back(Elt: DAG.getConstant(Val: Factor, DL: dl, VT: CT));
6540 return true;
6541 };
6542
6543 SDValue Op1 = N->getOperand(Num: 1);
6544
6545 // Collect all magic values from the build vector.
6546 if (!ISD::matchUnaryPredicate(Op: Op1, Match: BuildUDIVPattern))
6547 return SDValue();
6548
6549 SDValue Shift, Factor;
6550 if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6551 Shift = DAG.getBuildVector(VT: ShVT, DL: dl, Ops: Shifts);
6552 Factor = DAG.getBuildVector(VT, DL: dl, Ops: Factors);
6553 } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6554 assert(Shifts.size() == 1 && Factors.size() == 1 &&
6555 "Expected matchUnaryPredicate to return one element for scalable "
6556 "vectors");
6557 Shift = DAG.getSplatVector(VT: ShVT, DL: dl, Op: Shifts[0]);
6558 Factor = DAG.getSplatVector(VT, DL: dl, Op: Factors[0]);
6559 } else {
6560 assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6561 Shift = Shifts[0];
6562 Factor = Factors[0];
6563 }
6564
6565 SDValue Res = N->getOperand(Num: 0);
6566 if (UseSRL) {
6567 Res = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Res, N2: Shift, Flags: SDNodeFlags::Exact);
6568 Created.push_back(Elt: Res.getNode());
6569 }
6570
6571 return DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: Res, N2: Factor);
6572}
6573
6574SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
6575 SelectionDAG &DAG,
6576 SmallVectorImpl<SDNode *> &Created) const {
6577 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6578 if (isIntDivCheap(VT: N->getValueType(ResNo: 0), Attr))
6579 return SDValue(N, 0); // Lower SDIV as SDIV
6580 return SDValue();
6581}
6582
6583SDValue
6584TargetLowering::BuildSREMPow2(SDNode *N, const APInt &Divisor,
6585 SelectionDAG &DAG,
6586 SmallVectorImpl<SDNode *> &Created) const {
6587 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6588 if (isIntDivCheap(VT: N->getValueType(ResNo: 0), Attr))
6589 return SDValue(N, 0); // Lower SREM as SREM
6590 return SDValue();
6591}
6592
6593/// Build sdiv by power-of-2 with conditional move instructions
6594/// Ref: "Hacker's Delight" by Henry Warren 10-1
6595/// If conditional move/branch is preferred, we lower sdiv x, +/-2**k into:
6596/// bgez x, label
6597/// add x, x, 2**k-1
6598/// label:
6599/// sra res, x, k
6600/// neg res, res (when the divisor is negative)
6601SDValue TargetLowering::buildSDIVPow2WithCMov(
6602 SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
6603 SmallVectorImpl<SDNode *> &Created) const {
6604 unsigned Lg2 = Divisor.countr_zero();
6605 EVT VT = N->getValueType(ResNo: 0);
6606
6607 SDLoc DL(N);
6608 SDValue N0 = N->getOperand(Num: 0);
6609 SDValue Zero = DAG.getConstant(Val: 0, DL, VT);
6610 APInt Lg2Mask = APInt::getLowBitsSet(numBits: VT.getSizeInBits(), loBitsSet: Lg2);
6611 SDValue Pow2MinusOne = DAG.getConstant(Val: Lg2Mask, DL, VT);
6612
6613 // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
6614 EVT CCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
6615 SDValue Cmp = DAG.getSetCC(DL, VT: CCVT, LHS: N0, RHS: Zero, Cond: ISD::SETLT);
6616 SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: N0, N2: Pow2MinusOne);
6617 SDValue CMov = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: Cmp, N2: Add, N3: N0);
6618
6619 Created.push_back(Elt: Cmp.getNode());
6620 Created.push_back(Elt: Add.getNode());
6621 Created.push_back(Elt: CMov.getNode());
6622
6623 // Divide by pow2.
6624 SDValue SRA = DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: CMov,
6625 N2: DAG.getShiftAmountConstant(Val: Lg2, VT, DL));
6626
6627 // If we're dividing by a positive value, we're done. Otherwise, we must
6628 // negate the result.
6629 if (Divisor.isNonNegative())
6630 return SRA;
6631
6632 Created.push_back(Elt: SRA.getNode());
6633 return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Zero, N2: SRA);
6634}
6635
6636/// Given an ISD::SDIV node expressing a divide by constant,
6637/// return a DAG expression to select that will generate the same value by
6638/// multiplying by a magic number.
6639/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6640SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
6641 bool IsAfterLegalization,
6642 bool IsAfterLegalTypes,
6643 SmallVectorImpl<SDNode *> &Created) const {
6644 SDLoc dl(N);
6645
6646 // If the sdiv has an 'exact' bit we can use a simpler lowering.
6647 if (N->getFlags().hasExact())
6648 return BuildExactSDIV(TLI: *this, N, dl, DAG, Created);
6649
6650 EVT VT = N->getValueType(ResNo: 0);
6651 EVT SVT = VT.getScalarType();
6652 EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
6653 EVT ShSVT = ShVT.getScalarType();
6654 unsigned EltBits = VT.getScalarSizeInBits();
6655 EVT MulVT;
6656
6657 // Check to see if we can do this.
6658 // FIXME: We should be more aggressive here.
6659 EVT QueryVT = VT;
6660 if (VT.isVector()) {
6661 // If the vector type will be legalized to a vector type with the same
6662 // element type, allow the transform before type legalization if MULHS or
6663 // SMUL_LOHI are supported.
6664 QueryVT = getLegalTypeToTransformTo(Context&: *DAG.getContext(), VT);
6665 if (!QueryVT.isVector() ||
6666 QueryVT.getVectorElementType() != VT.getVectorElementType())
6667 return SDValue();
6668 } else if (!isTypeLegal(VT)) {
6669 // Limit this to simple scalars for now.
6670 if (!VT.isSimple())
6671 return SDValue();
6672
6673 // If this type will be promoted to a large enough type with a legal
6674 // multiply operation, we can go ahead and do this transform.
6675 if (getTypeAction(VT: VT.getSimpleVT()) != TypePromoteInteger)
6676 return SDValue();
6677
6678 MulVT = getTypeToTransformTo(Context&: *DAG.getContext(), VT);
6679 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6680 !isOperationLegal(Op: ISD::MUL, VT: MulVT))
6681 return SDValue();
6682 }
6683
6684 bool HasMULHS =
6685 isOperationLegalOrCustom(Op: ISD::MULHS, VT: QueryVT, LegalOnly: IsAfterLegalization);
6686 bool HasSMUL_LOHI =
6687 isOperationLegalOrCustom(Op: ISD::SMUL_LOHI, VT: QueryVT, LegalOnly: IsAfterLegalization);
6688
6689 if (isTypeLegal(VT) && !HasMULHS && !HasSMUL_LOHI && MulVT == EVT()) {
6690 // If type twice as wide legal, widen and use a mul plus a shift.
6691 EVT WideVT = VT.widenIntegerElementType(Context&: *DAG.getContext());
6692 // Some targets like AMDGPU try to go from SDIV to SDIVREM which is then
6693 // custom lowered. This is very expensive so avoid it at all costs for
6694 // constant divisors.
6695 if ((!IsAfterLegalTypes && isOperationExpand(Op: ISD::SDIV, VT) &&
6696 isOperationCustom(Op: ISD::SDIVREM, VT: VT.getScalarType())) ||
6697 isOperationLegalOrCustom(Op: ISD::MUL, VT: WideVT))
6698 MulVT = WideVT;
6699 }
6700
6701 if (!HasMULHS && !HasSMUL_LOHI && MulVT == EVT())
6702 return SDValue();
6703
6704 // If we're after type legalization and SVT is not legal, use the
6705 // promoted type for creating constants to avoid creating nodes with
6706 // illegal types.
6707 if (IsAfterLegalTypes && VT.isVector()) {
6708 SVT = getTypeToTransformTo(Context&: *DAG.getContext(), VT: SVT);
6709 if (SVT.bitsLT(VT: VT.getScalarType()))
6710 return SDValue();
6711 ShSVT = getTypeToTransformTo(Context&: *DAG.getContext(), VT: ShSVT);
6712 if (ShSVT.bitsLT(VT: ShVT.getScalarType()))
6713 return SDValue();
6714 }
6715 const unsigned SVTBits = SVT.getSizeInBits();
6716
6717 SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
6718
6719 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6720 if (C->isZero())
6721 return false;
6722 // Truncate the divisor to the target scalar type in case it was promoted
6723 // during type legalization.
6724 APInt Divisor = C->getAPIntValue().trunc(width: EltBits);
6725 SignedDivisionByConstantInfo magics = SignedDivisionByConstantInfo::get(D: Divisor);
6726 int NumeratorFactor = 0;
6727 int ShiftMask = -1;
6728
6729 if (Divisor.isOne() || Divisor.isAllOnes()) {
6730 // If d is +1/-1, we just multiply the numerator by +1/-1.
6731 NumeratorFactor = Divisor.getSExtValue();
6732 magics.Magic = 0;
6733 magics.ShiftAmount = 0;
6734 ShiftMask = 0;
6735 } else if (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) {
6736 // If d > 0 and m < 0, add the numerator.
6737 NumeratorFactor = 1;
6738 } else if (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) {
6739 // If d < 0 and m > 0, subtract the numerator.
6740 NumeratorFactor = -1;
6741 }
6742
6743 MagicFactors.push_back(
6744 Elt: DAG.getConstant(Val: magics.Magic.zext(width: SVTBits), DL: dl, VT: SVT));
6745 Factors.push_back(Elt: DAG.getSignedConstant(Val: NumeratorFactor, DL: dl, VT: SVT));
6746 Shifts.push_back(Elt: DAG.getConstant(Val: magics.ShiftAmount, DL: dl, VT: ShSVT));
6747 ShiftMasks.push_back(Elt: DAG.getSignedConstant(Val: ShiftMask, DL: dl, VT: SVT));
6748 return true;
6749 };
6750
6751 SDValue N0 = N->getOperand(Num: 0);
6752 SDValue N1 = N->getOperand(Num: 1);
6753
6754 // Collect the shifts / magic values from each element.
6755 if (!ISD::matchUnaryPredicate(Op: N1, Match: BuildSDIVPattern, /*AllowUndefs=*/false,
6756 /*AllowTruncation=*/true))
6757 return SDValue();
6758
6759 SDValue MagicFactor, Factor, Shift, ShiftMask;
6760 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6761 MagicFactor = DAG.getBuildVector(VT, DL: dl, Ops: MagicFactors);
6762 Factor = DAG.getBuildVector(VT, DL: dl, Ops: Factors);
6763 Shift = DAG.getBuildVector(VT: ShVT, DL: dl, Ops: Shifts);
6764 ShiftMask = DAG.getBuildVector(VT, DL: dl, Ops: ShiftMasks);
6765 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6766 assert(MagicFactors.size() == 1 && Factors.size() == 1 &&
6767 Shifts.size() == 1 && ShiftMasks.size() == 1 &&
6768 "Expected matchUnaryPredicate to return one element for scalable "
6769 "vectors");
6770 MagicFactor = DAG.getSplatVector(VT, DL: dl, Op: MagicFactors[0]);
6771 Factor = DAG.getSplatVector(VT, DL: dl, Op: Factors[0]);
6772 Shift = DAG.getSplatVector(VT: ShVT, DL: dl, Op: Shifts[0]);
6773 ShiftMask = DAG.getSplatVector(VT, DL: dl, Op: ShiftMasks[0]);
6774 } else {
6775 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6776 MagicFactor = MagicFactors[0];
6777 Factor = Factors[0];
6778 Shift = Shifts[0];
6779 ShiftMask = ShiftMasks[0];
6780 }
6781
6782 // Multiply the numerator (operand 0) by the magic value.
6783 auto GetMULHS = [&](SDValue X, SDValue Y) {
6784 if (HasMULHS)
6785 return DAG.getNode(Opcode: ISD::MULHS, DL: dl, VT, N1: X, N2: Y);
6786 if (HasSMUL_LOHI) {
6787 SDValue LoHi =
6788 DAG.getNode(Opcode: ISD::SMUL_LOHI, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: VT), N1: X, N2: Y);
6789 return LoHi.getValue(R: 1);
6790 }
6791
6792 X = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: MulVT, Operand: X);
6793 Y = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: MulVT, Operand: Y);
6794 Y = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: MulVT, N1: X, N2: Y);
6795 Y = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: MulVT, N1: Y,
6796 N2: DAG.getShiftAmountConstant(Val: EltBits, VT: MulVT, DL: dl));
6797 return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Y);
6798 };
6799
6800 SDValue Q = GetMULHS(N0, MagicFactor);
6801 if (!Q)
6802 return SDValue();
6803
6804 Created.push_back(Elt: Q.getNode());
6805
6806 // (Optionally) Add/subtract the numerator using Factor.
6807 Factor = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: N0, N2: Factor);
6808 Created.push_back(Elt: Factor.getNode());
6809 Q = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Q, N2: Factor);
6810 Created.push_back(Elt: Q.getNode());
6811
6812 // Shift right algebraic by shift value.
6813 Q = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: Q, N2: Shift);
6814 Created.push_back(Elt: Q.getNode());
6815
6816 // Extract the sign bit, mask it and add it to the quotient.
6817 SDValue SignShift = DAG.getConstant(Val: EltBits - 1, DL: dl, VT: ShVT);
6818 SDValue T = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Q, N2: SignShift);
6819 Created.push_back(Elt: T.getNode());
6820 T = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: T, N2: ShiftMask);
6821 Created.push_back(Elt: T.getNode());
6822 return DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Q, N2: T);
6823}
6824
6825/// Given an ISD::UDIV node expressing a divide by constant,
6826/// return a DAG expression to select that will generate the same value by
6827/// multiplying by a magic number.
6828/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6829SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
6830 bool IsAfterLegalization,
6831 bool IsAfterLegalTypes,
6832 SmallVectorImpl<SDNode *> &Created) const {
6833 SDLoc dl(N);
6834
6835 // If the udiv has an 'exact' bit we can use a simpler lowering.
6836 if (N->getFlags().hasExact())
6837 return BuildExactUDIV(TLI: *this, N, dl, DAG, Created);
6838
6839 EVT VT = N->getValueType(ResNo: 0);
6840 EVT SVT = VT.getScalarType();
6841 EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
6842 EVT ShSVT = ShVT.getScalarType();
6843 unsigned EltBits = VT.getScalarSizeInBits();
6844 EVT MulVT;
6845
6846 // Check to see if we can do this.
6847 // FIXME: We should be more aggressive here.
6848 EVT QueryVT = VT;
6849 if (VT.isVector()) {
6850 // If the vector type will be legalized to a vector type with the same
6851 // element type, allow the transform before type legalization if MULHU or
6852 // UMUL_LOHI are supported.
6853 QueryVT = getLegalTypeToTransformTo(Context&: *DAG.getContext(), VT);
6854 if (!QueryVT.isVector() ||
6855 QueryVT.getVectorElementType() != VT.getVectorElementType())
6856 return SDValue();
6857 } else if (!isTypeLegal(VT)) {
6858 // Limit this to simple scalars for now.
6859 if (!VT.isSimple())
6860 return SDValue();
6861
6862 // If this type will be promoted to a large enough type with a legal
6863 // multiply operation, we can go ahead and do this transform.
6864 if (getTypeAction(VT: VT.getSimpleVT()) != TypePromoteInteger)
6865 return SDValue();
6866
6867 MulVT = getTypeToTransformTo(Context&: *DAG.getContext(), VT);
6868 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6869 !isOperationLegal(Op: ISD::MUL, VT: MulVT))
6870 return SDValue();
6871 }
6872
6873 bool HasMULHU =
6874 isOperationLegalOrCustom(Op: ISD::MULHU, VT: QueryVT, LegalOnly: IsAfterLegalization);
6875 bool HasUMUL_LOHI =
6876 isOperationLegalOrCustom(Op: ISD::UMUL_LOHI, VT: QueryVT, LegalOnly: IsAfterLegalization);
6877
6878 if (isTypeLegal(VT) && !HasMULHU && !HasUMUL_LOHI && MulVT == EVT()) {
6879 // If type twice as wide legal, widen and use a mul plus a shift.
6880 EVT WideVT = VT.widenIntegerElementType(Context&: *DAG.getContext());
6881 // Some targets like AMDGPU try to go from UDIV to UDIVREM which is then
6882 // custom lowered. This is very expensive so avoid it at all costs for
6883 // constant divisors.
6884 if ((!IsAfterLegalTypes && isOperationExpand(Op: ISD::UDIV, VT) &&
6885 isOperationCustom(Op: ISD::UDIVREM, VT: VT.getScalarType())) ||
6886 isOperationLegalOrCustom(Op: ISD::MUL, VT: WideVT))
6887 MulVT = WideVT;
6888 }
6889
6890 if (!HasMULHU && !HasUMUL_LOHI && MulVT == EVT())
6891 return SDValue();
6892
6893 SDValue N0 = N->getOperand(Num: 0);
6894 SDValue N1 = N->getOperand(Num: 1);
6895
6896 // Try to use leading zeros of the dividend to reduce the multiplier and
6897 // avoid expensive fixups.
6898 unsigned KnownLeadingZeros = DAG.computeKnownBits(Op: N0).countMinLeadingZeros();
6899
6900 // If we're after type legalization and SVT is not legal, use the
6901 // promoted type for creating constants to avoid creating nodes with
6902 // illegal types.
6903 if (IsAfterLegalTypes && VT.isVector()) {
6904 SVT = getTypeToTransformTo(Context&: *DAG.getContext(), VT: SVT);
6905 if (SVT.bitsLT(VT: VT.getScalarType()))
6906 return SDValue();
6907 ShSVT = getTypeToTransformTo(Context&: *DAG.getContext(), VT: ShSVT);
6908 if (ShSVT.bitsLT(VT: ShVT.getScalarType()))
6909 return SDValue();
6910 }
6911 const unsigned SVTBits = SVT.getSizeInBits();
6912
6913 // Allow i32 to be widened to i64 for uncooperative divisors if i64 MULHU or
6914 // UMUL_LOHI is supported.
6915 const EVT WideSVT = MVT::i64;
6916 const bool HasWideMULHU =
6917 VT == MVT::i32 &&
6918 isOperationLegalOrCustom(Op: ISD::MULHU, VT: WideSVT, LegalOnly: IsAfterLegalization);
6919 const bool HasWideUMUL_LOHI =
6920 VT == MVT::i32 &&
6921 isOperationLegalOrCustom(Op: ISD::UMUL_LOHI, VT: WideSVT, LegalOnly: IsAfterLegalization);
6922 const bool AllowWiden = (HasWideMULHU || HasWideUMUL_LOHI);
6923
6924 bool UseNPQ = false, UsePreShift = false, UsePostShift = false;
6925 bool UseWiden = false;
6926 SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
6927
6928 auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6929 if (C->isZero())
6930 return false;
6931 // Truncate the divisor to the target scalar type in case it was promoted
6932 // during type legalization.
6933 APInt Divisor = C->getAPIntValue().trunc(width: EltBits);
6934
6935 SDValue PreShift, MagicFactor, NPQFactor, PostShift;
6936
6937 // Magic algorithm doesn't work for division by 1. We need to emit a select
6938 // at the end.
6939 if (Divisor.isOne()) {
6940 PreShift = PostShift = DAG.getUNDEF(VT: ShSVT);
6941 MagicFactor = NPQFactor = DAG.getUNDEF(VT: SVT);
6942 } else {
6943 UnsignedDivisionByConstantInfo magics =
6944 UnsignedDivisionByConstantInfo::get(
6945 D: Divisor, LeadingZeros: std::min(a: KnownLeadingZeros, b: Divisor.countl_zero()),
6946 /*AllowEvenDivisorOptimization=*/true,
6947 /*AllowWidenOptimization=*/AllowWiden);
6948
6949 if (magics.Widen) {
6950 UseWiden = true;
6951 MagicFactor = DAG.getConstant(Val: magics.Magic, DL: dl, VT: WideSVT);
6952 } else {
6953 MagicFactor = DAG.getConstant(Val: magics.Magic.zext(width: SVTBits), DL: dl, VT: SVT);
6954 }
6955
6956 assert(magics.PreShift < Divisor.getBitWidth() &&
6957 "We shouldn't generate an undefined shift!");
6958 assert(magics.PostShift < Divisor.getBitWidth() &&
6959 "We shouldn't generate an undefined shift!");
6960 assert((!magics.IsAdd || magics.PreShift == 0) &&
6961 "Unexpected pre-shift");
6962 PreShift = DAG.getConstant(Val: magics.PreShift, DL: dl, VT: ShSVT);
6963 PostShift = DAG.getConstant(Val: magics.PostShift, DL: dl, VT: ShSVT);
6964 NPQFactor = DAG.getConstant(
6965 Val: magics.IsAdd ? APInt::getOneBitSet(numBits: SVTBits, BitNo: EltBits - 1)
6966 : APInt::getZero(numBits: SVTBits),
6967 DL: dl, VT: SVT);
6968 UseNPQ |= magics.IsAdd;
6969 UsePreShift |= magics.PreShift != 0;
6970 UsePostShift |= magics.PostShift != 0;
6971 }
6972
6973 PreShifts.push_back(Elt: PreShift);
6974 MagicFactors.push_back(Elt: MagicFactor);
6975 NPQFactors.push_back(Elt: NPQFactor);
6976 PostShifts.push_back(Elt: PostShift);
6977 return true;
6978 };
6979
6980 // Collect the shifts/magic values from each element.
6981 if (!ISD::matchUnaryPredicate(Op: N1, Match: BuildUDIVPattern, /*AllowUndefs=*/false,
6982 /*AllowTruncation=*/true))
6983 return SDValue();
6984
6985 SDValue PreShift, PostShift, MagicFactor, NPQFactor;
6986 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6987 PreShift = DAG.getBuildVector(VT: ShVT, DL: dl, Ops: PreShifts);
6988 MagicFactor = DAG.getBuildVector(VT, DL: dl, Ops: MagicFactors);
6989 NPQFactor = DAG.getBuildVector(VT, DL: dl, Ops: NPQFactors);
6990 PostShift = DAG.getBuildVector(VT: ShVT, DL: dl, Ops: PostShifts);
6991 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6992 assert(PreShifts.size() == 1 && MagicFactors.size() == 1 &&
6993 NPQFactors.size() == 1 && PostShifts.size() == 1 &&
6994 "Expected matchUnaryPredicate to return one for scalable vectors");
6995 PreShift = DAG.getSplatVector(VT: ShVT, DL: dl, Op: PreShifts[0]);
6996 MagicFactor = DAG.getSplatVector(VT, DL: dl, Op: MagicFactors[0]);
6997 NPQFactor = DAG.getSplatVector(VT, DL: dl, Op: NPQFactors[0]);
6998 PostShift = DAG.getSplatVector(VT: ShVT, DL: dl, Op: PostShifts[0]);
6999 } else {
7000 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
7001 PreShift = PreShifts[0];
7002 MagicFactor = MagicFactors[0];
7003 PostShift = PostShifts[0];
7004 }
7005
7006 if (UseWiden) {
7007 // Compute: (WideSVT(x) * MagicFactor) >> WideSVTBits.
7008 SDValue WideN0 = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: WideSVT, Operand: N0);
7009
7010 // Perform WideSVTxWideSVT -> 2*WideSVT multiplication and extract high
7011 // WideSVT bits
7012 SDValue High;
7013 if (HasWideMULHU) {
7014 High = DAG.getNode(Opcode: ISD::MULHU, DL: dl, VT: WideSVT, N1: WideN0, N2: MagicFactor);
7015 } else {
7016 assert(HasWideUMUL_LOHI);
7017 SDValue LoHi =
7018 DAG.getNode(Opcode: ISD::UMUL_LOHI, DL: dl, VTList: DAG.getVTList(VT1: WideSVT, VT2: WideSVT),
7019 N1: WideN0, N2: MagicFactor);
7020 High = LoHi.getValue(R: 1);
7021 }
7022
7023 Created.push_back(Elt: High.getNode());
7024 return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: High);
7025 }
7026
7027 SDValue Q = N0;
7028 if (UsePreShift) {
7029 Q = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Q, N2: PreShift);
7030 Created.push_back(Elt: Q.getNode());
7031 }
7032
7033 auto GetMULHU = [&](SDValue X, SDValue Y) {
7034 if (HasMULHU)
7035 return DAG.getNode(Opcode: ISD::MULHU, DL: dl, VT, N1: X, N2: Y);
7036 if (HasUMUL_LOHI) {
7037 SDValue LoHi =
7038 DAG.getNode(Opcode: ISD::UMUL_LOHI, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: VT), N1: X, N2: Y);
7039 return LoHi.getValue(R: 1);
7040 }
7041
7042 X = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: MulVT, Operand: X);
7043 Y = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: MulVT, Operand: Y);
7044 Y = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: MulVT, N1: X, N2: Y);
7045 Y = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: MulVT, N1: Y,
7046 N2: DAG.getShiftAmountConstant(Val: EltBits, VT: MulVT, DL: dl));
7047 return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Y);
7048 };
7049
7050 // Multiply the numerator (operand 0) by the magic value.
7051 Q = GetMULHU(Q, MagicFactor);
7052 if (!Q)
7053 return SDValue();
7054
7055 Created.push_back(Elt: Q.getNode());
7056
7057 if (UseNPQ) {
7058 SDValue NPQ = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: N0, N2: Q);
7059 Created.push_back(Elt: NPQ.getNode());
7060
7061 // For vectors we might have a mix of non-NPQ/NPQ paths, so use
7062 // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
7063 if (VT.isVector())
7064 NPQ = GetMULHU(NPQ, NPQFactor);
7065 else
7066 NPQ = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: NPQ, N2: DAG.getConstant(Val: 1, DL: dl, VT: ShVT));
7067
7068 Created.push_back(Elt: NPQ.getNode());
7069
7070 Q = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: NPQ, N2: Q);
7071 Created.push_back(Elt: Q.getNode());
7072 }
7073
7074 if (UsePostShift) {
7075 Q = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Q, N2: PostShift);
7076 Created.push_back(Elt: Q.getNode());
7077 }
7078
7079 EVT SetCCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
7080
7081 SDValue One = DAG.getConstant(Val: 1, DL: dl, VT);
7082 SDValue IsOne = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: N1, RHS: One, Cond: ISD::SETEQ);
7083 return DAG.getSelect(DL: dl, VT, Cond: IsOne, LHS: N0, RHS: Q);
7084}
7085
7086/// If all values in Values that *don't* match the predicate are same 'splat'
7087/// value, then replace all values with that splat value.
7088/// Else, if AlternativeReplacement was provided, then replace all values that
7089/// do match predicate with AlternativeReplacement value.
7090static void
7091turnVectorIntoSplatVector(MutableArrayRef<SDValue> Values,
7092 std::function<bool(SDValue)> Predicate,
7093 SDValue AlternativeReplacement = SDValue()) {
7094 SDValue Replacement;
7095 // Is there a value for which the Predicate does *NOT* match? What is it?
7096 auto SplatValue = llvm::find_if_not(Range&: Values, P: Predicate);
7097 if (SplatValue != Values.end()) {
7098 // Does Values consist only of SplatValue's and values matching Predicate?
7099 if (llvm::all_of(Range&: Values, P: [Predicate, SplatValue](SDValue Value) {
7100 return Value == *SplatValue || Predicate(Value);
7101 })) // Then we shall replace values matching predicate with SplatValue.
7102 Replacement = *SplatValue;
7103 }
7104 if (!Replacement) {
7105 // Oops, we did not find the "baseline" splat value.
7106 if (!AlternativeReplacement)
7107 return; // Nothing to do.
7108 // Let's replace with provided value then.
7109 Replacement = AlternativeReplacement;
7110 }
7111 std::replace_if(first: Values.begin(), last: Values.end(), pred: Predicate, new_value: Replacement);
7112}
7113
7114/// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
7115/// where the divisor and comparison target are constants,
7116/// return a DAG expression that will generate the same comparison result
7117/// using only multiplications, additions and shifts/rotations.
7118/// Ref: "Hacker's Delight" 10-17.
7119SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
7120 SDValue CompTargetNode,
7121 ISD::CondCode Cond,
7122 DAGCombinerInfo &DCI,
7123 const SDLoc &DL) const {
7124 SmallVector<SDNode *, 5> Built;
7125 if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
7126 DCI, DL, Created&: Built)) {
7127 for (SDNode *N : Built)
7128 DCI.AddToWorklist(N);
7129 return Folded;
7130 }
7131
7132 return SDValue();
7133}
7134
7135SDValue
7136TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
7137 SDValue CompTargetNode, ISD::CondCode Cond,
7138 DAGCombinerInfo &DCI, const SDLoc &DL,
7139 SmallVectorImpl<SDNode *> &Created) const {
7140 // fold (seteq/ne (urem N, D), C) ->
7141 // (setule/ugt (rotr (mul (sub N, C), P), K), Q)
7142 // - D must be constant, with D = D0 * 2^K where D0 is odd
7143 // - P is the multiplicative inverse of D0 modulo 2^W
7144 // - Q = floor(((2^W) - 1) / D)
7145 // where W is the width of the common type of N and D.
7146 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
7147 "Only applicable for (in)equality comparisons.");
7148
7149 SelectionDAG &DAG = DCI.DAG;
7150
7151 EVT VT = REMNode.getValueType();
7152 EVT SVT = VT.getScalarType();
7153 EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
7154 EVT ShSVT = ShVT.getScalarType();
7155
7156 // If MUL is unavailable, we cannot proceed in any case.
7157 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::MUL, VT))
7158 return SDValue();
7159
7160 bool ComparingWithAllZeros = true;
7161 bool AllComparisonsWithNonZerosAreTautological = true;
7162 bool HadTautologicalLanes = false;
7163 bool AllLanesAreTautological = true;
7164 bool HadEvenDivisor = false;
7165 bool AllDivisorsArePowerOfTwo = true;
7166 bool HadTautologicalInvertedLanes = false;
7167 SmallVector<SDValue, 16> PAmts, KAmts, QAmts;
7168
7169 auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
7170 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
7171 if (CDiv->isZero())
7172 return false;
7173
7174 const APInt &D = CDiv->getAPIntValue();
7175 const APInt &Cmp = CCmp->getAPIntValue();
7176
7177 ComparingWithAllZeros &= Cmp.isZero();
7178
7179 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
7180 // if C2 is not less than C1, the comparison is always false.
7181 // But we will only be able to produce the comparison that will give the
7182 // opposive tautological answer. So this lane would need to be fixed up.
7183 bool TautologicalInvertedLane = D.ule(RHS: Cmp);
7184 HadTautologicalInvertedLanes |= TautologicalInvertedLane;
7185
7186 // If all lanes are tautological (either all divisors are ones, or divisor
7187 // is not greater than the constant we are comparing with),
7188 // we will prefer to avoid the fold.
7189 bool TautologicalLane = D.isOne() || TautologicalInvertedLane;
7190 HadTautologicalLanes |= TautologicalLane;
7191 AllLanesAreTautological &= TautologicalLane;
7192
7193 // If we are comparing with non-zero, we need'll need to subtract said
7194 // comparison value from the LHS. But there is no point in doing that if
7195 // every lane where we are comparing with non-zero is tautological..
7196 if (!Cmp.isZero())
7197 AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
7198
7199 // Decompose D into D0 * 2^K
7200 unsigned K = D.countr_zero();
7201 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
7202 APInt D0 = D.lshr(shiftAmt: K);
7203
7204 // D is even if it has trailing zeros.
7205 HadEvenDivisor |= (K != 0);
7206 // D is a power-of-two if D0 is one.
7207 // If all divisors are power-of-two, we will prefer to avoid the fold.
7208 AllDivisorsArePowerOfTwo &= D0.isOne();
7209
7210 // P = inv(D0, 2^W)
7211 // 2^W requires W + 1 bits, so we have to extend and then truncate.
7212 unsigned W = D.getBitWidth();
7213 APInt P = D0.multiplicativeInverse();
7214 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
7215
7216 // Q = floor((2^W - 1) u/ D)
7217 // R = ((2^W - 1) u% D)
7218 APInt Q, R;
7219 APInt::udivrem(LHS: APInt::getAllOnes(numBits: W), RHS: D, Quotient&: Q, Remainder&: R);
7220
7221 // If we are comparing with zero, then that comparison constant is okay,
7222 // else it may need to be one less than that.
7223 if (Cmp.ugt(RHS: R))
7224 Q -= 1;
7225
7226 assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&
7227 "We are expecting that K is always less than all-ones for ShSVT");
7228
7229 // If the lane is tautological the result can be constant-folded.
7230 if (TautologicalLane) {
7231 // Set P and K amount to a bogus values so we can try to splat them.
7232 P = 0;
7233 KAmts.push_back(Elt: DAG.getAllOnesConstant(DL, VT: ShSVT));
7234 // And ensure that comparison constant is tautological,
7235 // it will always compare true/false.
7236 Q.setAllBits();
7237 } else {
7238 KAmts.push_back(Elt: DAG.getConstant(Val: K, DL, VT: ShSVT));
7239 }
7240
7241 PAmts.push_back(Elt: DAG.getConstant(Val: P, DL, VT: SVT));
7242 QAmts.push_back(Elt: DAG.getConstant(Val: Q, DL, VT: SVT));
7243 return true;
7244 };
7245
7246 SDValue N = REMNode.getOperand(i: 0);
7247 SDValue D = REMNode.getOperand(i: 1);
7248
7249 // Collect the values from each element.
7250 if (!ISD::matchBinaryPredicate(LHS: D, RHS: CompTargetNode, Match: BuildUREMPattern))
7251 return SDValue();
7252
7253 // If all lanes are tautological, the result can be constant-folded.
7254 if (AllLanesAreTautological)
7255 return SDValue();
7256
7257 // If this is a urem by a powers-of-two, avoid the fold since it can be
7258 // best implemented as a bit test.
7259 if (AllDivisorsArePowerOfTwo)
7260 return SDValue();
7261
7262 SDValue PVal, KVal, QVal;
7263 if (D.getOpcode() == ISD::BUILD_VECTOR) {
7264 if (HadTautologicalLanes) {
7265 // Try to turn PAmts into a splat, since we don't care about the values
7266 // that are currently '0'. If we can't, just keep '0'`s.
7267 turnVectorIntoSplatVector(Values: PAmts, Predicate: isNullConstant);
7268 // Try to turn KAmts into a splat, since we don't care about the values
7269 // that are currently '-1'. If we can't, change them to '0'`s.
7270 turnVectorIntoSplatVector(Values: KAmts, Predicate: isAllOnesConstant,
7271 AlternativeReplacement: DAG.getConstant(Val: 0, DL, VT: ShSVT));
7272 }
7273
7274 PVal = DAG.getBuildVector(VT, DL, Ops: PAmts);
7275 KVal = DAG.getBuildVector(VT: ShVT, DL, Ops: KAmts);
7276 QVal = DAG.getBuildVector(VT, DL, Ops: QAmts);
7277 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
7278 assert(PAmts.size() == 1 && KAmts.size() == 1 && QAmts.size() == 1 &&
7279 "Expected matchBinaryPredicate to return one element for "
7280 "SPLAT_VECTORs");
7281 PVal = DAG.getSplatVector(VT, DL, Op: PAmts[0]);
7282 KVal = DAG.getSplatVector(VT: ShVT, DL, Op: KAmts[0]);
7283 QVal = DAG.getSplatVector(VT, DL, Op: QAmts[0]);
7284 } else {
7285 PVal = PAmts[0];
7286 KVal = KAmts[0];
7287 QVal = QAmts[0];
7288 }
7289
7290 if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
7291 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::SUB, VT))
7292 return SDValue(); // FIXME: Could/should use `ISD::ADD`?
7293 assert(CompTargetNode.getValueType() == N.getValueType() &&
7294 "Expecting that the types on LHS and RHS of comparisons match.");
7295 N = DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: N, N2: CompTargetNode);
7296 }
7297
7298 // (mul N, P)
7299 SDValue Op0 = DAG.getNode(Opcode: ISD::MUL, DL, VT, N1: N, N2: PVal);
7300 Created.push_back(Elt: Op0.getNode());
7301
7302 // Rotate right only if any divisor was even. We avoid rotates for all-odd
7303 // divisors as a performance improvement, since rotating by 0 is a no-op.
7304 if (HadEvenDivisor) {
7305 // We need ROTR to do this.
7306 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::ROTR, VT))
7307 return SDValue();
7308 // UREM: (rotr (mul N, P), K)
7309 Op0 = DAG.getNode(Opcode: ISD::ROTR, DL, VT, N1: Op0, N2: KVal);
7310 Created.push_back(Elt: Op0.getNode());
7311 }
7312
7313 // UREM: (setule/setugt (rotr (mul N, P), K), Q)
7314 SDValue NewCC =
7315 DAG.getSetCC(DL, VT: SETCCVT, LHS: Op0, RHS: QVal,
7316 Cond: ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
7317 if (!HadTautologicalInvertedLanes)
7318 return NewCC;
7319
7320 // If any lanes previously compared always-false, the NewCC will give
7321 // always-true result for them, so we need to fixup those lanes.
7322 // Or the other way around for inequality predicate.
7323 assert(VT.isVector() && "Can/should only get here for vectors.");
7324 Created.push_back(Elt: NewCC.getNode());
7325
7326 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
7327 // if C2 is not less than C1, the comparison is always false.
7328 // But we have produced the comparison that will give the
7329 // opposive tautological answer. So these lanes would need to be fixed up.
7330 SDValue TautologicalInvertedChannels =
7331 DAG.getSetCC(DL, VT: SETCCVT, LHS: D, RHS: CompTargetNode, Cond: ISD::SETULE);
7332 Created.push_back(Elt: TautologicalInvertedChannels.getNode());
7333
7334 // NOTE: we avoid letting illegal types through even if we're before legalize
7335 // ops – legalization has a hard time producing good code for this.
7336 if (isOperationLegalOrCustom(Op: ISD::VSELECT, VT: SETCCVT)) {
7337 // If we have a vector select, let's replace the comparison results in the
7338 // affected lanes with the correct tautological result.
7339 SDValue Replacement = DAG.getBoolConstant(V: Cond == ISD::SETEQ ? false : true,
7340 DL, VT: SETCCVT, OpVT: SETCCVT);
7341 return DAG.getNode(Opcode: ISD::VSELECT, DL, VT: SETCCVT, N1: TautologicalInvertedChannels,
7342 N2: Replacement, N3: NewCC);
7343 }
7344
7345 // Else, we can just invert the comparison result in the appropriate lanes.
7346 //
7347 // NOTE: see the note above VSELECT above.
7348 if (isOperationLegalOrCustom(Op: ISD::XOR, VT: SETCCVT))
7349 return DAG.getNode(Opcode: ISD::XOR, DL, VT: SETCCVT, N1: NewCC,
7350 N2: TautologicalInvertedChannels);
7351
7352 return SDValue(); // Don't know how to lower.
7353}
7354
7355/// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
7356/// where the divisor is constant and the comparison target is zero,
7357/// return a DAG expression that will generate the same comparison result
7358/// using only multiplications, additions and shifts/rotations.
7359/// Ref: "Hacker's Delight" 10-17.
7360SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
7361 SDValue CompTargetNode,
7362 ISD::CondCode Cond,
7363 DAGCombinerInfo &DCI,
7364 const SDLoc &DL) const {
7365 SmallVector<SDNode *, 7> Built;
7366 if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
7367 DCI, DL, Created&: Built)) {
7368 assert(Built.size() <= 7 && "Max size prediction failed.");
7369 for (SDNode *N : Built)
7370 DCI.AddToWorklist(N);
7371 return Folded;
7372 }
7373
7374 return SDValue();
7375}
7376
7377SDValue
7378TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
7379 SDValue CompTargetNode, ISD::CondCode Cond,
7380 DAGCombinerInfo &DCI, const SDLoc &DL,
7381 SmallVectorImpl<SDNode *> &Created) const {
7382 // Derived from Hacker's Delight, 2nd Edition, by Hank Warren. Section 10-17.
7383 // Fold:
7384 // (seteq/ne (srem N, D), 0)
7385 // To:
7386 // (setule/ugt (rotr (add (mul N, P), A), K), Q)
7387 //
7388 // - D must be constant, with D = D0 * 2^K where D0 is odd
7389 // - P is the multiplicative inverse of D0 modulo 2^W
7390 // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
7391 // - Q = floor((2 * A) / (2^K))
7392 // where W is the width of the common type of N and D.
7393 //
7394 // When D is a power of two (and thus D0 is 1), the normal
7395 // formula for A and Q don't apply, because the derivation
7396 // depends on D not dividing 2^(W-1), and thus theorem ZRS
7397 // does not apply. This specifically fails when N = INT_MIN.
7398 //
7399 // Instead, for power-of-two D, we use:
7400 // - A = 0
7401 // | -> No offset needed. We're effectively treating it the same as urem.
7402 // - Q = 2^(W-K) - 1
7403 // |-> Test that the top K bits are zero after rotation
7404 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
7405 "Only applicable for (in)equality comparisons.");
7406
7407 SelectionDAG &DAG = DCI.DAG;
7408
7409 EVT VT = REMNode.getValueType();
7410 EVT SVT = VT.getScalarType();
7411 EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
7412 EVT ShSVT = ShVT.getScalarType();
7413
7414 // If we are after ops legalization, and MUL is unavailable, we can not
7415 // proceed.
7416 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::MUL, VT))
7417 return SDValue();
7418
7419 // TODO: Could support comparing with non-zero too.
7420 ConstantSDNode *CompTarget = isConstOrConstSplat(N: CompTargetNode);
7421 if (!CompTarget || !CompTarget->isZero())
7422 return SDValue();
7423
7424 bool HadOneDivisor = false;
7425 bool AllDivisorsAreOnes = true;
7426 bool HadEvenDivisor = false;
7427 bool AllDivisorsArePowerOfTwo = true;
7428 SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts;
7429
7430 auto BuildSREMPattern = [&](ConstantSDNode *C) {
7431 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
7432 if (C->isZero())
7433 return false;
7434
7435 // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
7436
7437 // WARNING: this fold is only valid for positive divisors!
7438 // `rem %X, -C` is equivalent to `rem %X, C`
7439 APInt D = C->getAPIntValue().abs();
7440
7441 // If all divisors are ones, we will prefer to avoid the fold.
7442 HadOneDivisor |= D.isOne();
7443 AllDivisorsAreOnes &= D.isOne();
7444
7445 // Decompose D into D0 * 2^K
7446 unsigned K = D.countr_zero();
7447 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
7448 APInt D0 = D.lshr(shiftAmt: K);
7449
7450 // D is even if it has trailing zeros.
7451 HadEvenDivisor |= (K != 0);
7452
7453 // D is a power-of-two if D0 is one. This includes INT_MIN.
7454 // If all divisors are power-of-two, we will prefer to avoid the fold.
7455 AllDivisorsArePowerOfTwo &= D0.isOne();
7456
7457 // P = inv(D0, 2^W)
7458 // 2^W requires W + 1 bits, so we have to extend and then truncate.
7459 unsigned W = D.getBitWidth();
7460 APInt P = D0.multiplicativeInverse();
7461 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
7462
7463 // A = floor((2^(W - 1) - 1) / D0) & -2^K
7464 APInt A = APInt::getSignedMaxValue(numBits: W).udiv(RHS: D0);
7465 A.clearLowBits(loBits: K);
7466
7467 // Q = floor((2 * A) / (2^K))
7468 APInt Q = (2 * A).udiv(RHS: APInt::getOneBitSet(numBits: W, BitNo: K));
7469
7470 assert(APInt::getAllOnes(SVT.getSizeInBits()).ugt(A) &&
7471 "We are expecting that A is always less than all-ones for SVT");
7472 assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&
7473 "We are expecting that K is always less than all-ones for ShSVT");
7474
7475 // If D was a power of two, apply the alternate constant derivation.
7476 if (D0.isOne()) {
7477 // A = 0
7478 A = APInt(W, 0);
7479 // - Q = 2^(W-K) - 1
7480 Q = APInt::getLowBitsSet(numBits: W, loBitsSet: W - K);
7481 }
7482
7483 // If the divisor is 1 the result can be constant-folded.
7484 if (D.isOne()) {
7485 // Set P, A and K to a bogus values so we can try to splat them.
7486 P = 0;
7487 A.setAllBits();
7488 KAmts.push_back(Elt: DAG.getAllOnesConstant(DL, VT: ShSVT));
7489
7490 // x ?% 1 == 0 <--> true <--> x u<= -1
7491 Q.setAllBits();
7492 } else {
7493 KAmts.push_back(Elt: DAG.getConstant(Val: K, DL, VT: ShSVT));
7494 }
7495
7496 PAmts.push_back(Elt: DAG.getConstant(Val: P, DL, VT: SVT));
7497 AAmts.push_back(Elt: DAG.getConstant(Val: A, DL, VT: SVT));
7498 QAmts.push_back(Elt: DAG.getConstant(Val: Q, DL, VT: SVT));
7499 return true;
7500 };
7501
7502 SDValue N = REMNode.getOperand(i: 0);
7503 SDValue D = REMNode.getOperand(i: 1);
7504
7505 // Collect the values from each element.
7506 if (!ISD::matchUnaryPredicate(Op: D, Match: BuildSREMPattern))
7507 return SDValue();
7508
7509 // If this is a srem by a one, avoid the fold since it can be constant-folded.
7510 if (AllDivisorsAreOnes)
7511 return SDValue();
7512
7513 // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
7514 // since it can be best implemented as a bit test.
7515 if (AllDivisorsArePowerOfTwo)
7516 return SDValue();
7517
7518 SDValue PVal, AVal, KVal, QVal;
7519 if (D.getOpcode() == ISD::BUILD_VECTOR) {
7520 if (HadOneDivisor) {
7521 // Try to turn PAmts into a splat, since we don't care about the values
7522 // that are currently '0'. If we can't, just keep '0'`s.
7523 turnVectorIntoSplatVector(Values: PAmts, Predicate: isNullConstant);
7524 // Try to turn AAmts into a splat, since we don't care about the
7525 // values that are currently '-1'. If we can't, change them to '0'`s.
7526 turnVectorIntoSplatVector(Values: AAmts, Predicate: isAllOnesConstant,
7527 AlternativeReplacement: DAG.getConstant(Val: 0, DL, VT: SVT));
7528 // Try to turn KAmts into a splat, since we don't care about the values
7529 // that are currently '-1'. If we can't, change them to '0'`s.
7530 turnVectorIntoSplatVector(Values: KAmts, Predicate: isAllOnesConstant,
7531 AlternativeReplacement: DAG.getConstant(Val: 0, DL, VT: ShSVT));
7532 }
7533
7534 PVal = DAG.getBuildVector(VT, DL, Ops: PAmts);
7535 AVal = DAG.getBuildVector(VT, DL, Ops: AAmts);
7536 KVal = DAG.getBuildVector(VT: ShVT, DL, Ops: KAmts);
7537 QVal = DAG.getBuildVector(VT, DL, Ops: QAmts);
7538 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
7539 assert(PAmts.size() == 1 && AAmts.size() == 1 && KAmts.size() == 1 &&
7540 QAmts.size() == 1 &&
7541 "Expected matchUnaryPredicate to return one element for scalable "
7542 "vectors");
7543 PVal = DAG.getSplatVector(VT, DL, Op: PAmts[0]);
7544 AVal = DAG.getSplatVector(VT, DL, Op: AAmts[0]);
7545 KVal = DAG.getSplatVector(VT: ShVT, DL, Op: KAmts[0]);
7546 QVal = DAG.getSplatVector(VT, DL, Op: QAmts[0]);
7547 } else {
7548 assert(isa<ConstantSDNode>(D) && "Expected a constant");
7549 PVal = PAmts[0];
7550 AVal = AAmts[0];
7551 KVal = KAmts[0];
7552 QVal = QAmts[0];
7553 }
7554
7555 // (mul N, P)
7556 SDValue Op0 = DAG.getNode(Opcode: ISD::MUL, DL, VT, N1: N, N2: PVal);
7557 Created.push_back(Elt: Op0.getNode());
7558
7559 // We need ADD to do this.
7560 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::ADD, VT))
7561 return SDValue();
7562
7563 // (add (mul N, P), A)
7564 Op0 = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Op0, N2: AVal);
7565 Created.push_back(Elt: Op0.getNode());
7566
7567 // Rotate right only if any divisor was even. We avoid rotates for all-odd
7568 // divisors as a performance improvement, since rotating by 0 is a no-op.
7569 if (HadEvenDivisor) {
7570 // We need ROTR to do this.
7571 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::ROTR, VT))
7572 return SDValue();
7573 // SREM: (rotr (add (mul N, P), A), K)
7574 Op0 = DAG.getNode(Opcode: ISD::ROTR, DL, VT, N1: Op0, N2: KVal);
7575 Created.push_back(Elt: Op0.getNode());
7576 }
7577
7578 // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
7579 return DAG.getSetCC(DL, VT: SETCCVT, LHS: Op0, RHS: QVal,
7580 Cond: (Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT);
7581}
7582
7583SDValue TargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
7584 const DenormalMode &Mode,
7585 SDNodeFlags Flags) const {
7586 SDLoc DL(Op);
7587 EVT VT = Op.getValueType();
7588 EVT CCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
7589 SDValue FPZero = DAG.getConstantFP(Val: 0.0, DL, VT);
7590
7591 // This is specifically a check for the handling of denormal inputs, not the
7592 // result.
7593 if (Mode.Input == DenormalMode::PreserveSign ||
7594 Mode.Input == DenormalMode::PositiveZero) {
7595 // Test = X == 0.0
7596 return DAG.getSetCC(DL, VT: CCVT, LHS: Op, RHS: FPZero, Cond: ISD::SETEQ, /*Chain=*/{},
7597 /*Signaling=*/IsSignaling: false, Flags);
7598 }
7599
7600 // Testing it with denormal inputs to avoid wrong estimate.
7601 //
7602 // Test = fabs(X) < SmallestNormal
7603 const fltSemantics &FltSem = VT.getFltSemantics();
7604 APFloat SmallestNorm = APFloat::getSmallestNormalized(Sem: FltSem);
7605 SDValue NormC = DAG.getConstantFP(Val: SmallestNorm, DL, VT);
7606 SDValue Fabs = DAG.getNode(Opcode: ISD::FABS, DL, VT, Operand: Op, Flags);
7607 return DAG.getSetCC(DL, VT: CCVT, LHS: Fabs, RHS: NormC, Cond: ISD::SETLT, /*Chain=*/{},
7608 /*Signaling=*/IsSignaling: false, Flags);
7609}
7610
7611SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
7612 bool LegalOps, bool OptForSize,
7613 NegatibleCost &Cost,
7614 unsigned Depth) const {
7615 // fneg is removable even if it has multiple uses.
7616 if (Op.getOpcode() == ISD::FNEG) {
7617 Cost = NegatibleCost::Cheaper;
7618 return Op.getOperand(i: 0);
7619 }
7620
7621 // Don't recurse exponentially.
7622 if (Depth > SelectionDAG::MaxRecursionDepth)
7623 return SDValue();
7624
7625 // Pre-increment recursion depth for use in recursive calls.
7626 ++Depth;
7627 const SDNodeFlags Flags = Op->getFlags();
7628 EVT VT = Op.getValueType();
7629 unsigned Opcode = Op.getOpcode();
7630
7631 // Don't allow anything with multiple uses unless we know it is free.
7632 if (!Op.hasOneUse() && Opcode != ISD::ConstantFP) {
7633 bool IsFreeExtend = Opcode == ISD::FP_EXTEND &&
7634 isFPExtFree(DestVT: VT, SrcVT: Op.getOperand(i: 0).getValueType());
7635 if (!IsFreeExtend)
7636 return SDValue();
7637 }
7638
7639 auto RemoveDeadNode = [&](SDValue N) {
7640 if (N && N.getNode()->use_empty())
7641 DAG.RemoveDeadNode(N: N.getNode());
7642 };
7643
7644 SDLoc DL(Op);
7645
7646 // Because getNegatedExpression can delete nodes we need a handle to keep
7647 // temporary nodes alive in case the recursion manages to create an identical
7648 // node.
7649 std::list<HandleSDNode> Handles;
7650
7651 switch (Opcode) {
7652 case ISD::ConstantFP: {
7653 // Don't invert constant FP values after legalization unless the target says
7654 // the negated constant is legal.
7655 bool IsOpLegal =
7656 isOperationLegal(Op: ISD::ConstantFP, VT) ||
7657 isFPImmLegal(neg(X: cast<ConstantFPSDNode>(Val&: Op)->getValueAPF()), VT,
7658 ForCodeSize: OptForSize);
7659
7660 if (LegalOps && !IsOpLegal)
7661 break;
7662
7663 APFloat V = cast<ConstantFPSDNode>(Val&: Op)->getValueAPF();
7664 V.changeSign();
7665 SDValue CFP = DAG.getConstantFP(Val: V, DL, VT);
7666
7667 // If we already have the use of the negated floating constant, it is free
7668 // to negate it even it has multiple uses.
7669 if (!Op.hasOneUse() && CFP.use_empty())
7670 break;
7671 Cost = NegatibleCost::Neutral;
7672 return CFP;
7673 }
7674 case ISD::SPLAT_VECTOR: {
7675 // fold splat_vector(fneg(X)) -> splat_vector(-X)
7676 SDValue X = Op.getOperand(i: 0);
7677 if (!isOperationLegal(Op: ISD::SPLAT_VECTOR, VT))
7678 break;
7679
7680 SDValue NegX = getCheaperNegatedExpression(Op: X, DAG, LegalOps, OptForSize);
7681 if (!NegX)
7682 break;
7683 Cost = NegatibleCost::Cheaper;
7684 return DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL, VT, Operand: NegX);
7685 }
7686 case ISD::BUILD_VECTOR: {
7687 // Only permit BUILD_VECTOR of constants.
7688 if (llvm::any_of(Range: Op->op_values(), P: [&](SDValue N) {
7689 return !N.isUndef() && !isa<ConstantFPSDNode>(Val: N);
7690 }))
7691 break;
7692
7693 bool IsOpLegal =
7694 (isOperationLegal(Op: ISD::ConstantFP, VT) &&
7695 isOperationLegal(Op: ISD::BUILD_VECTOR, VT)) ||
7696 llvm::all_of(Range: Op->op_values(), P: [&](SDValue N) {
7697 return N.isUndef() ||
7698 isFPImmLegal(neg(X: cast<ConstantFPSDNode>(Val&: N)->getValueAPF()), VT,
7699 ForCodeSize: OptForSize);
7700 });
7701
7702 if (LegalOps && !IsOpLegal)
7703 break;
7704
7705 SmallVector<SDValue, 4> Ops;
7706 for (SDValue C : Op->op_values()) {
7707 if (C.isUndef()) {
7708 Ops.push_back(Elt: C);
7709 continue;
7710 }
7711 APFloat V = cast<ConstantFPSDNode>(Val&: C)->getValueAPF();
7712 V.changeSign();
7713 Ops.push_back(Elt: DAG.getConstantFP(Val: V, DL, VT: C.getValueType()));
7714 }
7715 Cost = NegatibleCost::Neutral;
7716 return DAG.getBuildVector(VT, DL, Ops);
7717 }
7718 case ISD::FADD: {
7719 if (!Flags.hasNoSignedZeros())
7720 break;
7721
7722 // After operation legalization, it might not be legal to create new FSUBs.
7723 if (LegalOps && !isOperationLegalOrCustom(Op: ISD::FSUB, VT))
7724 break;
7725 SDValue X = Op.getOperand(i: 0), Y = Op.getOperand(i: 1);
7726
7727 // fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y)
7728 NegatibleCost CostX = NegatibleCost::Expensive;
7729 SDValue NegX =
7730 getNegatedExpression(Op: X, DAG, LegalOps, OptForSize, Cost&: CostX, Depth);
7731 // Prevent this node from being deleted by the next call.
7732 if (NegX)
7733 Handles.emplace_back(args&: NegX);
7734
7735 // fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
7736 NegatibleCost CostY = NegatibleCost::Expensive;
7737 SDValue NegY =
7738 getNegatedExpression(Op: Y, DAG, LegalOps, OptForSize, Cost&: CostY, Depth);
7739
7740 // We're done with the handles.
7741 Handles.clear();
7742
7743 // Negate the X if its cost is less or equal than Y.
7744 if (NegX && (CostX <= CostY)) {
7745 Cost = CostX;
7746 SDValue N = DAG.getNode(Opcode: ISD::FSUB, DL, VT, N1: NegX, N2: Y, Flags);
7747 if (NegY != N)
7748 RemoveDeadNode(NegY);
7749 return N;
7750 }
7751
7752 // Negate the Y if it is not expensive.
7753 if (NegY) {
7754 Cost = CostY;
7755 SDValue N = DAG.getNode(Opcode: ISD::FSUB, DL, VT, N1: NegY, N2: X, Flags);
7756 if (NegX != N)
7757 RemoveDeadNode(NegX);
7758 return N;
7759 }
7760 break;
7761 }
7762 case ISD::FSUB: {
7763 // We can't turn -(A-B) into B-A when we honor signed zeros.
7764 if (!Flags.hasNoSignedZeros())
7765 break;
7766
7767 SDValue X = Op.getOperand(i: 0), Y = Op.getOperand(i: 1);
7768 // fold (fneg (fsub 0, Y)) -> Y
7769 if (ConstantFPSDNode *C = isConstOrConstSplatFP(N: X, /*AllowUndefs*/ true))
7770 if (C->isZero()) {
7771 Cost = NegatibleCost::Cheaper;
7772 return Y;
7773 }
7774
7775 // fold (fneg (fsub X, Y)) -> (fsub Y, X)
7776 Cost = NegatibleCost::Neutral;
7777 return DAG.getNode(Opcode: ISD::FSUB, DL, VT, N1: Y, N2: X, Flags);
7778 }
7779 case ISD::FMUL:
7780 case ISD::FDIV: {
7781 SDValue X = Op.getOperand(i: 0), Y = Op.getOperand(i: 1);
7782
7783 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
7784 NegatibleCost CostX = NegatibleCost::Expensive;
7785 SDValue NegX =
7786 getNegatedExpression(Op: X, DAG, LegalOps, OptForSize, Cost&: CostX, Depth);
7787 // Prevent this node from being deleted by the next call.
7788 if (NegX)
7789 Handles.emplace_back(args&: NegX);
7790
7791 // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
7792 NegatibleCost CostY = NegatibleCost::Expensive;
7793 SDValue NegY =
7794 getNegatedExpression(Op: Y, DAG, LegalOps, OptForSize, Cost&: CostY, Depth);
7795
7796 // We're done with the handles.
7797 Handles.clear();
7798
7799 // Negate the X if its cost is less or equal than Y.
7800 if (NegX && (CostX <= CostY)) {
7801 Cost = CostX;
7802 SDValue N = DAG.getNode(Opcode, DL, VT, N1: NegX, N2: Y, Flags);
7803 if (NegY != N)
7804 RemoveDeadNode(NegY);
7805 return N;
7806 }
7807
7808 // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
7809 if (auto *C = isConstOrConstSplatFP(N: Op.getOperand(i: 1)))
7810 if (C->isExactlyValue(V: 2.0) && Op.getOpcode() == ISD::FMUL)
7811 break;
7812
7813 // Negate the Y if it is not expensive.
7814 if (NegY) {
7815 Cost = CostY;
7816 SDValue N = DAG.getNode(Opcode, DL, VT, N1: X, N2: NegY, Flags);
7817 if (NegX != N)
7818 RemoveDeadNode(NegX);
7819 return N;
7820 }
7821 break;
7822 }
7823 case ISD::FMA:
7824 case ISD::FMULADD:
7825 case ISD::FMAD: {
7826 if (!Flags.hasNoSignedZeros())
7827 break;
7828
7829 SDValue X = Op.getOperand(i: 0), Y = Op.getOperand(i: 1), Z = Op.getOperand(i: 2);
7830 NegatibleCost CostZ = NegatibleCost::Expensive;
7831 SDValue NegZ =
7832 getNegatedExpression(Op: Z, DAG, LegalOps, OptForSize, Cost&: CostZ, Depth);
7833 // Give up if fail to negate the Z.
7834 if (!NegZ)
7835 break;
7836
7837 // Prevent this node from being deleted by the next two calls.
7838 Handles.emplace_back(args&: NegZ);
7839
7840 // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
7841 NegatibleCost CostX = NegatibleCost::Expensive;
7842 SDValue NegX =
7843 getNegatedExpression(Op: X, DAG, LegalOps, OptForSize, Cost&: CostX, Depth);
7844 // Prevent this node from being deleted by the next call.
7845 if (NegX)
7846 Handles.emplace_back(args&: NegX);
7847
7848 // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
7849 NegatibleCost CostY = NegatibleCost::Expensive;
7850 SDValue NegY =
7851 getNegatedExpression(Op: Y, DAG, LegalOps, OptForSize, Cost&: CostY, Depth);
7852
7853 // We're done with the handles.
7854 Handles.clear();
7855
7856 // Negate the X if its cost is less or equal than Y.
7857 if (NegX && (CostX <= CostY)) {
7858 Cost = std::min(a: CostX, b: CostZ);
7859 SDValue N = DAG.getNode(Opcode, DL, VT, N1: NegX, N2: Y, N3: NegZ, Flags);
7860 if (NegY != N)
7861 RemoveDeadNode(NegY);
7862 return N;
7863 }
7864
7865 // Negate the Y if it is not expensive.
7866 if (NegY) {
7867 Cost = std::min(a: CostY, b: CostZ);
7868 SDValue N = DAG.getNode(Opcode, DL, VT, N1: X, N2: NegY, N3: NegZ, Flags);
7869 if (NegX != N)
7870 RemoveDeadNode(NegX);
7871 return N;
7872 }
7873 break;
7874 }
7875
7876 case ISD::FP_EXTEND:
7877 case ISD::FSIN:
7878 if (SDValue NegV = getNegatedExpression(Op: Op.getOperand(i: 0), DAG, LegalOps,
7879 OptForSize, Cost, Depth))
7880 return DAG.getNode(Opcode, DL, VT, Operand: NegV);
7881 break;
7882 case ISD::FP_ROUND:
7883 if (SDValue NegV = getNegatedExpression(Op: Op.getOperand(i: 0), DAG, LegalOps,
7884 OptForSize, Cost, Depth))
7885 return DAG.getNode(Opcode: ISD::FP_ROUND, DL, VT, N1: NegV, N2: Op.getOperand(i: 1));
7886 break;
7887 case ISD::SELECT:
7888 case ISD::VSELECT: {
7889 // fold (fneg (select C, LHS, RHS)) -> (select C, (fneg LHS), (fneg RHS))
7890 // iff at least one cost is cheaper and the other is neutral/cheaper
7891 SDValue LHS = Op.getOperand(i: 1);
7892 NegatibleCost CostLHS = NegatibleCost::Expensive;
7893 SDValue NegLHS =
7894 getNegatedExpression(Op: LHS, DAG, LegalOps, OptForSize, Cost&: CostLHS, Depth);
7895 if (!NegLHS || CostLHS > NegatibleCost::Neutral) {
7896 RemoveDeadNode(NegLHS);
7897 break;
7898 }
7899
7900 // Prevent this node from being deleted by the next call.
7901 Handles.emplace_back(args&: NegLHS);
7902
7903 SDValue RHS = Op.getOperand(i: 2);
7904 NegatibleCost CostRHS = NegatibleCost::Expensive;
7905 SDValue NegRHS =
7906 getNegatedExpression(Op: RHS, DAG, LegalOps, OptForSize, Cost&: CostRHS, Depth);
7907
7908 // We're done with the handles.
7909 Handles.clear();
7910
7911 if (!NegRHS || CostRHS > NegatibleCost::Neutral ||
7912 (CostLHS != NegatibleCost::Cheaper &&
7913 CostRHS != NegatibleCost::Cheaper)) {
7914 RemoveDeadNode(NegLHS);
7915 RemoveDeadNode(NegRHS);
7916 break;
7917 }
7918
7919 Cost = std::min(a: CostLHS, b: CostRHS);
7920 return DAG.getSelect(DL, VT, Cond: Op.getOperand(i: 0), LHS: NegLHS, RHS: NegRHS);
7921 }
7922 }
7923
7924 return SDValue();
7925}
7926
7927//===----------------------------------------------------------------------===//
7928// Legalization Utilities
7929//===----------------------------------------------------------------------===//
7930
7931bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
7932 SDValue LHS, SDValue RHS,
7933 SmallVectorImpl<SDValue> &Result,
7934 EVT HiLoVT, SelectionDAG &DAG,
7935 MulExpansionKind Kind, SDValue LL,
7936 SDValue LH, SDValue RL, SDValue RH) const {
7937 assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
7938 Opcode == ISD::SMUL_LOHI);
7939
7940 bool HasMULHS = (Kind == MulExpansionKind::Always) ||
7941 isOperationLegalOrCustom(Op: ISD::MULHS, VT: HiLoVT);
7942 bool HasMULHU = (Kind == MulExpansionKind::Always) ||
7943 isOperationLegalOrCustom(Op: ISD::MULHU, VT: HiLoVT);
7944 bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7945 isOperationLegalOrCustom(Op: ISD::SMUL_LOHI, VT: HiLoVT);
7946 bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7947 isOperationLegalOrCustom(Op: ISD::UMUL_LOHI, VT: HiLoVT);
7948
7949 if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
7950 return false;
7951
7952 unsigned OuterBitSize = VT.getScalarSizeInBits();
7953 unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
7954
7955 // LL, LH, RL, and RH must be either all NULL or all set to a value.
7956 assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
7957 (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
7958
7959 auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
7960 bool Signed) -> bool {
7961 if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
7962 SDVTList VTs = DAG.getVTList(VT1: HiLoVT, VT2: HiLoVT);
7963 Lo = DAG.getNode(Opcode: Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, DL: dl, VTList: VTs, N1: L, N2: R);
7964 Hi = Lo.getValue(R: 1);
7965 return true;
7966 }
7967 if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
7968 Lo = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: HiLoVT, N1: L, N2: R);
7969 Hi = DAG.getNode(Opcode: Signed ? ISD::MULHS : ISD::MULHU, DL: dl, VT: HiLoVT, N1: L, N2: R);
7970 return true;
7971 }
7972 return false;
7973 };
7974
7975 SDValue Lo, Hi;
7976
7977 if (!LL.getNode() && !RL.getNode() &&
7978 isOperationLegalOrCustom(Op: ISD::TRUNCATE, VT: HiLoVT)) {
7979 LL = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: LHS);
7980 RL = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: RHS);
7981 }
7982
7983 if (!LL.getNode())
7984 return false;
7985
7986 APInt HighMask = APInt::getHighBitsSet(numBits: OuterBitSize, hiBitsSet: InnerBitSize);
7987 if (DAG.MaskedValueIsZero(Op: LHS, Mask: HighMask) &&
7988 DAG.MaskedValueIsZero(Op: RHS, Mask: HighMask)) {
7989 // The inputs are both zero-extended.
7990 if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
7991 Result.push_back(Elt: Lo);
7992 Result.push_back(Elt: Hi);
7993 if (Opcode != ISD::MUL) {
7994 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT: HiLoVT);
7995 Result.push_back(Elt: Zero);
7996 Result.push_back(Elt: Zero);
7997 }
7998 return true;
7999 }
8000 }
8001
8002 if (!VT.isVector() && Opcode == ISD::MUL &&
8003 DAG.ComputeMaxSignificantBits(Op: LHS) <= InnerBitSize &&
8004 DAG.ComputeMaxSignificantBits(Op: RHS) <= InnerBitSize) {
8005 // The input values are both sign-extended.
8006 // TODO non-MUL case?
8007 if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
8008 Result.push_back(Elt: Lo);
8009 Result.push_back(Elt: Hi);
8010 return true;
8011 }
8012 }
8013
8014 unsigned ShiftAmount = OuterBitSize - InnerBitSize;
8015 SDValue Shift = DAG.getShiftAmountConstant(Val: ShiftAmount, VT, DL: dl);
8016
8017 if (!LH.getNode() && !RH.getNode() &&
8018 isOperationLegalOrCustom(Op: ISD::SRL, VT) &&
8019 isOperationLegalOrCustom(Op: ISD::TRUNCATE, VT: HiLoVT)) {
8020 LH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: LHS, N2: Shift);
8021 LH = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: LH);
8022 RH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: RHS, N2: Shift);
8023 RH = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: RH);
8024 }
8025
8026 if (!LH.getNode())
8027 return false;
8028
8029 if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
8030 return false;
8031
8032 Result.push_back(Elt: Lo);
8033
8034 if (Opcode == ISD::MUL) {
8035 RH = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: HiLoVT, N1: LL, N2: RH);
8036 LH = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: HiLoVT, N1: LH, N2: RL);
8037 Hi = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: HiLoVT, N1: Hi, N2: RH);
8038 Hi = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: HiLoVT, N1: Hi, N2: LH);
8039 Result.push_back(Elt: Hi);
8040 return true;
8041 }
8042
8043 // Compute the full width result.
8044 auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
8045 Lo = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: Lo);
8046 Hi = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: Hi);
8047 Hi = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Hi, N2: Shift);
8048 return DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Lo, N2: Hi);
8049 };
8050
8051 SDValue Next = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: Hi);
8052 if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
8053 return false;
8054
8055 // This is effectively the add part of a multiply-add of half-sized operands,
8056 // so it cannot overflow.
8057 Next = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Next, N2: Merge(Lo, Hi));
8058
8059 if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
8060 return false;
8061
8062 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT: HiLoVT);
8063 EVT BoolType = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
8064
8065 bool UseGlue = (isOperationLegalOrCustom(Op: ISD::ADDC, VT) &&
8066 isOperationLegalOrCustom(Op: ISD::ADDE, VT));
8067 if (UseGlue)
8068 Next = DAG.getNode(Opcode: ISD::ADDC, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: MVT::Glue), N1: Next,
8069 N2: Merge(Lo, Hi));
8070 else
8071 Next = DAG.getNode(Opcode: ISD::UADDO_CARRY, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: BoolType), N1: Next,
8072 N2: Merge(Lo, Hi), N3: DAG.getConstant(Val: 0, DL: dl, VT: BoolType));
8073
8074 SDValue Carry = Next.getValue(R: 1);
8075 Result.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: Next));
8076 Next = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Next, N2: Shift);
8077
8078 if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
8079 return false;
8080
8081 if (UseGlue)
8082 Hi = DAG.getNode(Opcode: ISD::ADDE, DL: dl, VTList: DAG.getVTList(VT1: HiLoVT, VT2: MVT::Glue), N1: Hi, N2: Zero,
8083 N3: Carry);
8084 else
8085 Hi = DAG.getNode(Opcode: ISD::UADDO_CARRY, DL: dl, VTList: DAG.getVTList(VT1: HiLoVT, VT2: BoolType), N1: Hi,
8086 N2: Zero, N3: Carry);
8087
8088 Next = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Next, N2: Merge(Lo, Hi));
8089
8090 if (Opcode == ISD::SMUL_LOHI) {
8091 SDValue NextSub = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Next,
8092 N2: DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: RL));
8093 Next = DAG.getSelectCC(DL: dl, LHS: LH, RHS: Zero, True: NextSub, False: Next, Cond: ISD::SETLT);
8094
8095 NextSub = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Next,
8096 N2: DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: LL));
8097 Next = DAG.getSelectCC(DL: dl, LHS: RH, RHS: Zero, True: NextSub, False: Next, Cond: ISD::SETLT);
8098 }
8099
8100 Result.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: Next));
8101 Next = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Next, N2: Shift);
8102 Result.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: Next));
8103 return true;
8104}
8105
8106bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
8107 SelectionDAG &DAG, MulExpansionKind Kind,
8108 SDValue LL, SDValue LH, SDValue RL,
8109 SDValue RH) const {
8110 SmallVector<SDValue, 2> Result;
8111 bool Ok = expandMUL_LOHI(Opcode: N->getOpcode(), VT: N->getValueType(ResNo: 0), dl: SDLoc(N),
8112 LHS: N->getOperand(Num: 0), RHS: N->getOperand(Num: 1), Result, HiLoVT,
8113 DAG, Kind, LL, LH, RL, RH);
8114 if (Ok) {
8115 assert(Result.size() == 2);
8116 Lo = Result[0];
8117 Hi = Result[1];
8118 }
8119 return Ok;
8120}
8121
8122// Optimize unsigned division or remainder by constants for types twice as large
8123// as a legal VT.
8124//
8125// If (1 << (BitWidth / 2)) % Constant == 1, then the remainder
8126// can be computed
8127// as:
8128// Sum = __builtin_uadd_overflow(Lo, High, &Sum);
8129// Remainder = Sum % Constant;
8130//
8131// If (1 << (BitWidth / 2)) % Constant != 1, we can search for a smaller value
8132// W such that W != (BitWidth / 2) and (1 << W) % Constant == 1. We can break
8133// High:Low into 3 chunks of W bits and compute remainder as
8134// Sum = Chunk0 + Chunk1 + Chunk2;
8135// Remainder = Sum % Constant;
8136//
8137// This is based on "Remainder by Summing Digits" from Hacker's Delight.
8138//
8139// For division, we can compute the remainder using the algorithm described
8140// above, subtract it from the dividend to get an exact multiple of Constant.
8141// Then multiply that exact multiply by the multiplicative inverse modulo
8142// (1 << (BitWidth / 2)) to get the quotient.
8143
8144// If Constant is even, we can shift right the dividend and the divisor by the
8145// number of trailing zeros in Constant before applying the remainder algorithm.
8146// If we're after the quotient, we can subtract this value from the shifted
8147// dividend and multiply by the multiplicative inverse of the shifted divisor.
8148// If we want the remainder, we shift the value left by the number of trailing
8149// zeros and add the bits that were shifted out of the dividend.
8150bool TargetLowering::expandUDIVREMByConstantViaUREMDecomposition(
8151 SDNode *N, APInt Divisor, SmallVectorImpl<SDValue> &Result, EVT HiLoVT,
8152 SelectionDAG &DAG, SDValue LL, SDValue LH) const {
8153 unsigned Opcode = N->getOpcode();
8154 EVT VT = N->getValueType(ResNo: 0);
8155
8156 unsigned BitWidth = Divisor.getBitWidth();
8157 unsigned HBitWidth = BitWidth / 2;
8158 assert(VT.getScalarSizeInBits() == BitWidth &&
8159 HiLoVT.getScalarSizeInBits() == HBitWidth && "Unexpected VTs");
8160
8161 // If the divisor is even, shift it until it becomes odd.
8162 unsigned TrailingZeros = 0;
8163 if (!Divisor[0]) {
8164 TrailingZeros = Divisor.countr_zero();
8165 Divisor.lshrInPlace(ShiftAmt: TrailingZeros);
8166 }
8167
8168 // After removing trailing zeros, the divisor needs to be less than
8169 // (1 << HBitWidth).
8170 APInt HalfMaxPlus1 = APInt::getOneBitSet(numBits: BitWidth, BitNo: HBitWidth);
8171 if (Divisor.uge(RHS: HalfMaxPlus1))
8172 return false;
8173
8174 // Look for the largest chunk width W such that (1 << W) % Divisor == 1 or
8175 // (1 << W) % Divisor == -1.
8176 unsigned BestChunkWidth = 0, AltChunkWidth = 0;
8177 for (unsigned I = HBitWidth, E = HBitWidth / 2; I > E; --I) {
8178 // Skip HBitWidth-1, it doesn't have enough bits for carries.
8179 if (I == HBitWidth - 1)
8180 continue;
8181
8182 APInt Mod = APInt::getOneBitSet(numBits: Divisor.getBitWidth(), BitNo: I).urem(RHS: Divisor);
8183
8184 if (Mod.isOne()) {
8185 BestChunkWidth = I;
8186 break;
8187 }
8188
8189 // We have an alternate strategy for Remainder == Divisor - 1.
8190 // FIXME: Support HBitWidth.
8191 if (I != HBitWidth && Mod == Divisor - 1)
8192 AltChunkWidth = I;
8193 }
8194
8195 bool Alternate = false;
8196 if (!BestChunkWidth) {
8197 if (!AltChunkWidth)
8198 return false;
8199 Alternate = true;
8200 BestChunkWidth = AltChunkWidth;
8201 }
8202
8203 SDLoc dl(N);
8204
8205 assert(!LL == !LH && "Expected both input halves or no input halves!");
8206 if (!LL)
8207 std::tie(args&: LL, args&: LH) = DAG.SplitScalar(N: N->getOperand(Num: 0), DL: dl, LoVT: HiLoVT, HiVT: HiLoVT);
8208
8209 bool HasFSHR = isOperationLegal(Op: ISD::FSHR, VT: HiLoVT);
8210
8211 auto GetFSHR = [&](SDValue Lo, SDValue Hi, unsigned ShiftAmt) {
8212 assert(ShiftAmt > 0 && ShiftAmt < HBitWidth);
8213 if (HasFSHR)
8214 return DAG.getNode(Opcode: ISD::FSHR, DL: dl, VT: HiLoVT, N1: Hi, N2: Lo,
8215 N3: DAG.getShiftAmountConstant(Val: ShiftAmt, VT: HiLoVT, DL: dl));
8216 return DAG.getNode(
8217 Opcode: ISD::OR, DL: dl, VT: HiLoVT,
8218 N1: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: HiLoVT, N1: Lo,
8219 N2: DAG.getShiftAmountConstant(Val: ShiftAmt, VT: HiLoVT, DL: dl)),
8220 N2: DAG.getNode(
8221 Opcode: ISD::SHL, DL: dl, VT: HiLoVT, N1: Hi,
8222 N2: DAG.getShiftAmountConstant(Val: HBitWidth - ShiftAmt, VT: HiLoVT, DL: dl)));
8223 };
8224
8225 // Helper to perform a right shift on a 128-bit value split into two halves.
8226 // Handles shifts >= HBitWidth by moving Hi to Lo and shifting Hi.
8227 auto ShiftRight = [&](SDValue &Lo, SDValue &Hi, unsigned ShiftAmt) {
8228 if (ShiftAmt == 0)
8229 return;
8230 if (ShiftAmt < HBitWidth) {
8231 Lo = GetFSHR(Lo, Hi, ShiftAmt);
8232 Hi = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: HiLoVT, N1: Hi,
8233 N2: DAG.getShiftAmountConstant(Val: ShiftAmt, VT: HiLoVT, DL: dl));
8234 } else if (ShiftAmt == HBitWidth) {
8235 Lo = Hi;
8236 Hi = DAG.getConstant(Val: 0, DL: dl, VT: HiLoVT);
8237 } else {
8238 Lo = DAG.getNode(
8239 Opcode: ISD::SRL, DL: dl, VT: HiLoVT, N1: Hi,
8240 N2: DAG.getShiftAmountConstant(Val: ShiftAmt - HBitWidth, VT: HiLoVT, DL: dl));
8241 Hi = DAG.getConstant(Val: 0, DL: dl, VT: HiLoVT);
8242 }
8243 };
8244
8245 // Shift the input by the number of TrailingZeros in the divisor. The
8246 // shifted out bits will be added to the remainder later.
8247 SDValue PartialRemL, PartialRemH;
8248 if (TrailingZeros && Opcode != ISD::UDIV) {
8249 // Save the shifted off bits if we need the remainder.
8250 if (TrailingZeros < HBitWidth) {
8251 APInt Mask = APInt::getLowBitsSet(numBits: HBitWidth, loBitsSet: TrailingZeros);
8252 PartialRemL = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: HiLoVT, N1: LL,
8253 N2: DAG.getConstant(Val: Mask, DL: dl, VT: HiLoVT));
8254 } else if (TrailingZeros == HBitWidth) {
8255 // All of LL is part of the remainder.
8256 PartialRemL = LL;
8257 } else {
8258 // TrailingZeros > HBitWidth: LL and part of LH are the remainder.
8259 PartialRemL = LL;
8260 APInt Mask = APInt::getLowBitsSet(numBits: HBitWidth, loBitsSet: TrailingZeros - HBitWidth);
8261 PartialRemH = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: HiLoVT, N1: LH,
8262 N2: DAG.getConstant(Val: Mask, DL: dl, VT: HiLoVT));
8263 }
8264 }
8265
8266 SDValue Sum;
8267 // If BestChunkWidth is HBitWidth add low and high half. If there is a carry
8268 // out, add that to the final sum.
8269 if (BestChunkWidth == HBitWidth) {
8270 assert(!Alternate);
8271 // Shift LH:LL right if there were trailing zeros in the divisor.
8272 ShiftRight(LL, LH, TrailingZeros);
8273
8274 // Use uaddo_carry if we can, otherwise use a compare to detect overflow.
8275 EVT SetCCType =
8276 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: HiLoVT);
8277 if (isOperationLegalOrCustom(Op: ISD::UADDO_CARRY, VT: HiLoVT)) {
8278 SDVTList VTList = DAG.getVTList(VT1: HiLoVT, VT2: SetCCType);
8279 Sum = DAG.getNode(Opcode: ISD::UADDO, DL: dl, VTList, N1: LL, N2: LH);
8280 Sum = DAG.getNode(Opcode: ISD::UADDO_CARRY, DL: dl, VTList, N1: Sum,
8281 N2: DAG.getConstant(Val: 0, DL: dl, VT: HiLoVT), N3: Sum.getValue(R: 1));
8282 } else {
8283 Sum = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: HiLoVT, N1: LL, N2: LH);
8284 SDValue Carry = DAG.getSetCC(DL: dl, VT: SetCCType, LHS: Sum, RHS: LL, Cond: ISD::SETULT);
8285 // If the boolean for the target is 0 or 1, we can add the setcc result
8286 // directly.
8287 if (getBooleanContents(Type: HiLoVT) ==
8288 TargetLoweringBase::ZeroOrOneBooleanContent)
8289 Carry = DAG.getZExtOrTrunc(Op: Carry, DL: dl, VT: HiLoVT);
8290 else
8291 Carry = DAG.getSelect(DL: dl, VT: HiLoVT, Cond: Carry, LHS: DAG.getConstant(Val: 1, DL: dl, VT: HiLoVT),
8292 RHS: DAG.getConstant(Val: 0, DL: dl, VT: HiLoVT));
8293 Sum = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: HiLoVT, N1: Sum, N2: Carry);
8294 }
8295 } else {
8296 // Otherwise split into multple chunks and add them together. We chose
8297 // BestChunkWidth so that the sum will not overflow.
8298 SDValue Mask = DAG.getConstant(
8299 Val: APInt::getLowBitsSet(numBits: HBitWidth, loBitsSet: BestChunkWidth), DL: dl, VT: HiLoVT);
8300
8301 for (unsigned I = 0; I < BitWidth - TrailingZeros; I += BestChunkWidth) {
8302 // If there were trailing zeros in the divisor, increase the shift amount.
8303 unsigned Shift = I + TrailingZeros;
8304 SDValue Chunk;
8305 if (Shift == 0)
8306 Chunk = LL;
8307 else if (Shift >= HBitWidth)
8308 Chunk = DAG.getNode(
8309 Opcode: ISD::SRL, DL: dl, VT: HiLoVT, N1: LH,
8310 N2: DAG.getShiftAmountConstant(Val: Shift - HBitWidth, VT: HiLoVT, DL: dl));
8311 else
8312 Chunk = GetFSHR(LL, LH, Shift);
8313 // If we're on the last chunk, we don't need an AND.
8314 if (I + BestChunkWidth < BitWidth - TrailingZeros)
8315 Chunk = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: HiLoVT, N1: Chunk, N2: Mask);
8316 if (!Sum) {
8317 Sum = Chunk;
8318 } else {
8319 // For Alternate, we need to subtract odd chunks.
8320 unsigned ChunkNum = I / BestChunkWidth;
8321 unsigned Opc = (Alternate && (ChunkNum % 2) != 0) ? ISD::SUB : ISD::ADD;
8322 Sum = DAG.getNode(Opcode: Opc, DL: dl, VT: HiLoVT, N1: Sum, N2: Chunk);
8323 }
8324 }
8325
8326 // For Alternate, the sum may be negative, but we need a positive sum. We
8327 // can increase it by a multiple of the divisor to make it positive. For 3
8328 // chunks the largest negative value is -(2^BestChunkWidth - 1). For 4
8329 // chunks, it's 2*-(2^BestChunkWidth - 1). We know that 2^BestChunkWidth + 1
8330 // is a multiple of the divisor. Add that 1 or 2 times to make the sum
8331 // positive.
8332 if (Alternate) {
8333 unsigned NumChunks = divideCeil(Numerator: BitWidth - TrailingZeros, Denominator: BestChunkWidth);
8334 assert(NumChunks <= 4);
8335
8336 APInt Adjust = APInt::getOneBitSet(numBits: HBitWidth, BitNo: BestChunkWidth);
8337 Adjust.setBit(0);
8338 // If there are 4 chunks, we need to adjust twice.
8339 if (NumChunks == 4)
8340 Adjust <<= 1;
8341 Sum = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: HiLoVT, N1: Sum,
8342 N2: DAG.getConstant(Val: Adjust, DL: dl, VT: HiLoVT));
8343 }
8344 }
8345
8346 // Perform a HiLoVT urem on the Sum using truncated divisor.
8347 SDValue RemL =
8348 DAG.getNode(Opcode: ISD::UREM, DL: dl, VT: HiLoVT, N1: Sum,
8349 N2: DAG.getConstant(Val: Divisor.trunc(width: HBitWidth), DL: dl, VT: HiLoVT));
8350 SDValue RemH = DAG.getConstant(Val: 0, DL: dl, VT: HiLoVT);
8351
8352 if (Opcode != ISD::UREM) {
8353 // If we didn't shift LH/LR earlier, do it now.
8354 if (BestChunkWidth != HBitWidth)
8355 ShiftRight(LL, LH, TrailingZeros);
8356
8357 // Subtract the remainder from the shifted dividend.
8358 SDValue Dividend = DAG.getNode(Opcode: ISD::BUILD_PAIR, DL: dl, VT, N1: LL, N2: LH);
8359 SDValue Rem = DAG.getNode(Opcode: ISD::BUILD_PAIR, DL: dl, VT, N1: RemL, N2: RemH);
8360
8361 Dividend = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Dividend, N2: Rem);
8362
8363 // Multiply by the multiplicative inverse of the divisor modulo
8364 // (1 << BitWidth).
8365 APInt MulFactor = Divisor.multiplicativeInverse();
8366
8367 SDValue Quotient = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: Dividend,
8368 N2: DAG.getConstant(Val: MulFactor, DL: dl, VT));
8369
8370 // Split the quotient into low and high parts.
8371 SDValue QuotL, QuotH;
8372 std::tie(args&: QuotL, args&: QuotH) = DAG.SplitScalar(N: Quotient, DL: dl, LoVT: HiLoVT, HiVT: HiLoVT);
8373 Result.push_back(Elt: QuotL);
8374 Result.push_back(Elt: QuotH);
8375 }
8376
8377 if (Opcode != ISD::UDIV) {
8378 // If we shifted the input, shift the remainder left and add the bits we
8379 // shifted off the input.
8380 if (TrailingZeros) {
8381 if (TrailingZeros < HBitWidth) {
8382 // Shift RemH:RemL left by TrailingZeros.
8383 // RemH gets the high bits shifted out of RemL.
8384 RemH = DAG.getNode(
8385 Opcode: ISD::SRL, DL: dl, VT: HiLoVT, N1: RemL,
8386 N2: DAG.getShiftAmountConstant(Val: HBitWidth - TrailingZeros, VT: HiLoVT, DL: dl));
8387 RemL =
8388 DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: HiLoVT, N1: RemL,
8389 N2: DAG.getShiftAmountConstant(Val: TrailingZeros, VT: HiLoVT, DL: dl));
8390 // OR in the partial remainder.
8391 RemL = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: HiLoVT, N1: RemL, N2: PartialRemL,
8392 Flags: SDNodeFlags::Disjoint);
8393 } else if (TrailingZeros == HBitWidth) {
8394 // Shift left by exactly HBitWidth: RemH becomes RemL, RemL becomes
8395 // PartialRemL.
8396 RemH = RemL;
8397 RemL = PartialRemL;
8398 } else {
8399 // Shift left by more than HBitWidth.
8400 RemH = DAG.getNode(
8401 Opcode: ISD::SHL, DL: dl, VT: HiLoVT, N1: RemL,
8402 N2: DAG.getShiftAmountConstant(Val: TrailingZeros - HBitWidth, VT: HiLoVT, DL: dl));
8403 RemH = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: HiLoVT, N1: RemH, N2: PartialRemH,
8404 Flags: SDNodeFlags::Disjoint);
8405 RemL = PartialRemL;
8406 }
8407 }
8408 Result.push_back(Elt: RemL);
8409 Result.push_back(Elt: RemH);
8410 }
8411
8412 return true;
8413}
8414
8415bool TargetLowering::expandUDIVREMByConstantViaUMulHiMagic(
8416 SDNode *N, const APInt &Divisor, SmallVectorImpl<SDValue> &Result,
8417 EVT HiLoVT, SelectionDAG &DAG, SDValue LL, SDValue LH) const {
8418
8419 SDValue N0 = N->getOperand(Num: 0);
8420 EVT VT = N0->getValueType(ResNo: 0);
8421 SDLoc DL{N};
8422
8423 assert(!Divisor.isOne() && "Magic algorithm does not work for division by 1");
8424
8425 // This helper creates a MUL_LOHI of the pair (LL, LH) by a constant.
8426 auto MakeMUL_LOHIByConst = [&](unsigned Opc, SDValue LL, SDValue LH,
8427 const APInt &Const,
8428 SmallVectorImpl<SDValue> &Result) {
8429 SDValue LHS = DAG.getNode(Opcode: ISD::BUILD_PAIR, DL, VT, N1: LL, N2: LH);
8430 SDValue RHS = DAG.getConstant(Val: Const, DL, VT);
8431 auto [RL, RH] = DAG.SplitScalar(N: RHS, DL, LoVT: HiLoVT, HiVT: HiLoVT);
8432 return expandMUL_LOHI(Opcode: Opc, VT, dl: DL, LHS, RHS, Result, HiLoVT, DAG,
8433 Kind: TargetLowering::MulExpansionKind::OnlyLegalOrCustom,
8434 LL, LH, RL, RH);
8435 };
8436
8437 // This helper creates an ADD/SUB of the pairs (LL, LH) and (RL, RH).
8438 auto MakeAddSubLong = [&](unsigned Opc, SDValue LL, SDValue LH, SDValue RL,
8439 SDValue RH) {
8440 SDValue AddSubNode =
8441 DAG.getNode(Opcode: Opc == ISD::ADD ? ISD::UADDO : ISD::USUBO, DL,
8442 VTList: DAG.getVTList(VT1: HiLoVT, VT2: MVT::i1), N1: LL, N2: RL);
8443 SDValue OutL = AddSubNode.getValue(R: 0);
8444 SDValue Overflow = AddSubNode.getValue(R: 1);
8445 SDValue AddSubWithOverflow =
8446 DAG.getNode(Opcode: Opc == ISD::ADD ? ISD::UADDO_CARRY : ISD::USUBO_CARRY, DL,
8447 VTList: DAG.getVTList(VT1: HiLoVT, VT2: MVT::i1), N1: LH, N2: RH, N3: Overflow);
8448 SDValue OutH = AddSubWithOverflow.getValue(R: 0);
8449 return std::make_pair(x&: OutL, y&: OutH);
8450 };
8451
8452 // This helper creates a SRL of the pair (LL, LH) by Shift.
8453 auto MakeSRLLong = [&](SDValue LL, SDValue LH, unsigned Shift) {
8454 unsigned HBitWidth = HiLoVT.getScalarSizeInBits();
8455 if (Shift < HBitWidth) {
8456 SDValue ShAmt = DAG.getShiftAmountConstant(Val: Shift, VT: HiLoVT, DL);
8457 SDValue ResL = DAG.getNode(Opcode: ISD::FSHR, DL, VT: HiLoVT, N1: LH, N2: LL, N3: ShAmt);
8458 SDValue ResH = DAG.getNode(Opcode: ISD::SRL, DL, VT: HiLoVT, N1: LH, N2: ShAmt);
8459 return std::make_pair(x&: ResL, y&: ResH);
8460 }
8461 SDValue Zero = DAG.getConstant(Val: 0, DL, VT: HiLoVT);
8462 if (Shift == HBitWidth)
8463 return std::make_pair(x&: LH, y&: Zero);
8464 assert(Shift - HBitWidth < HBitWidth &&
8465 "We shouldn't generate an undefined shift");
8466 SDValue ShAmt = DAG.getShiftAmountConstant(Val: Shift - HBitWidth, VT: HiLoVT, DL);
8467 return std::make_pair(x: DAG.getNode(Opcode: ISD::SRL, DL, VT: HiLoVT, N1: LH, N2: ShAmt), y&: Zero);
8468 };
8469
8470 // Knowledge of leading zeros may help to reduce the multiplier.
8471 unsigned KnownLeadingZeros = DAG.computeKnownBits(Op: N0).countMinLeadingZeros();
8472
8473 UnsignedDivisionByConstantInfo Magics = UnsignedDivisionByConstantInfo::get(
8474 D: Divisor, LeadingZeros: std::min(a: KnownLeadingZeros, b: Divisor.countl_zero()));
8475
8476 assert(!LL == !LH && "Expected both input halves or no input halves!");
8477 if (!LL)
8478 std::tie(args&: LL, args&: LH) = DAG.SplitScalar(N: N0, DL, LoVT: HiLoVT, HiVT: HiLoVT);
8479 SDValue QL = LL;
8480 SDValue QH = LH;
8481 if (Magics.PreShift != 0)
8482 std::tie(args&: QL, args&: QH) = MakeSRLLong(QL, QH, Magics.PreShift);
8483
8484 SmallVector<SDValue, 4> UMulResult;
8485 if (!MakeMUL_LOHIByConst(ISD::UMUL_LOHI, QL, QH, Magics.Magic, UMulResult))
8486 return false;
8487
8488 QL = UMulResult[2];
8489 QH = UMulResult[3];
8490
8491 if (Magics.IsAdd) {
8492 auto [NPQL, NPQH] = MakeAddSubLong(ISD::SUB, LL, LH, QL, QH);
8493 std::tie(args&: NPQL, args&: NPQH) = MakeSRLLong(NPQL, NPQH, 1);
8494 std::tie(args&: QL, args&: QH) = MakeAddSubLong(ISD::ADD, NPQL, NPQH, QL, QH);
8495 }
8496
8497 if (Magics.PostShift != 0)
8498 std::tie(args&: QL, args&: QH) = MakeSRLLong(QL, QH, Magics.PostShift);
8499
8500 unsigned Opcode = N->getOpcode();
8501 if (Opcode != ISD::UREM) {
8502 Result.push_back(Elt: QL);
8503 Result.push_back(Elt: QH);
8504 }
8505
8506 if (Opcode != ISD::UDIV) {
8507 SmallVector<SDValue, 2> MulResult;
8508 if (!MakeMUL_LOHIByConst(ISD::MUL, QL, QH, Divisor, MulResult))
8509 return false;
8510
8511 assert(MulResult.size() == 2);
8512
8513 auto [RemL, RemH] =
8514 MakeAddSubLong(ISD::SUB, LL, LH, MulResult[0], MulResult[1]);
8515
8516 Result.push_back(Elt: RemL);
8517 Result.push_back(Elt: RemH);
8518 }
8519
8520 return true;
8521}
8522
8523bool TargetLowering::expandDIVREMByConstant(SDNode *N,
8524 SmallVectorImpl<SDValue> &Result,
8525 EVT HiLoVT, SelectionDAG &DAG,
8526 SDValue LL, SDValue LH) const {
8527 unsigned Opcode = N->getOpcode();
8528
8529 // TODO: Support signed division/remainder.
8530 if (Opcode == ISD::SREM || Opcode == ISD::SDIV || Opcode == ISD::SDIVREM)
8531 return false;
8532 assert(
8533 (Opcode == ISD::UREM || Opcode == ISD::UDIV || Opcode == ISD::UDIVREM) &&
8534 "Unexpected opcode");
8535
8536 auto *CN = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 1));
8537 if (!CN)
8538 return false;
8539
8540 APInt Divisor = CN->getAPIntValue();
8541
8542 // We depend on the UREM by constant optimization in DAGCombiner that requires
8543 // high multiply.
8544 if (!isOperationLegalOrCustom(Op: ISD::MULHU, VT: HiLoVT) &&
8545 !isOperationLegalOrCustom(Op: ISD::UMUL_LOHI, VT: HiLoVT))
8546 return false;
8547
8548 // Don't expand if optimizing for size.
8549 if (DAG.shouldOptForSize())
8550 return false;
8551
8552 // Early out for 0 or 1 divisors.
8553 if (Divisor.ule(RHS: 1))
8554 return false;
8555
8556 if (expandUDIVREMByConstantViaUREMDecomposition(N, Divisor, Result, HiLoVT,
8557 DAG, LL, LH))
8558 return true;
8559
8560 if (expandUDIVREMByConstantViaUMulHiMagic(N, Divisor, Result, HiLoVT, DAG, LL,
8561 LH))
8562 return true;
8563
8564 return false;
8565}
8566
8567// Check that (every element of) Z is undef or not an exact multiple of BW.
8568static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
8569 return ISD::matchUnaryPredicate(
8570 Op: Z,
8571 Match: [=](ConstantSDNode *C) { return !C || C->getAPIntValue().urem(RHS: BW) != 0; },
8572 /*AllowUndefs=*/true, /*AllowTruncation=*/true);
8573}
8574
8575static SDValue expandVPFunnelShift(SDNode *Node, SelectionDAG &DAG) {
8576 EVT VT = Node->getValueType(ResNo: 0);
8577 SDValue ShX, ShY;
8578 SDValue ShAmt, InvShAmt;
8579 SDValue X = Node->getOperand(Num: 0);
8580 SDValue Y = Node->getOperand(Num: 1);
8581 SDValue Z = Node->getOperand(Num: 2);
8582 SDValue Mask = Node->getOperand(Num: 3);
8583 SDValue VL = Node->getOperand(Num: 4);
8584
8585 unsigned BW = VT.getScalarSizeInBits();
8586 bool IsFSHL = Node->getOpcode() == ISD::VP_FSHL;
8587 SDLoc DL(SDValue(Node, 0));
8588
8589 EVT ShVT = Z.getValueType();
8590 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8591 // fshl: X << C | Y >> (BW - C)
8592 // fshr: X << (BW - C) | Y >> C
8593 // where C = Z % BW is not zero
8594 SDValue BitWidthC = DAG.getConstant(Val: BW, DL, VT: ShVT);
8595 ShAmt = DAG.getNode(Opcode: ISD::VP_UREM, DL, VT: ShVT, N1: Z, N2: BitWidthC, N3: Mask, N4: VL);
8596 InvShAmt = DAG.getNode(Opcode: ISD::VP_SUB, DL, VT: ShVT, N1: BitWidthC, N2: ShAmt, N3: Mask, N4: VL);
8597 ShX = DAG.getNode(Opcode: ISD::VP_SHL, DL, VT, N1: X, N2: IsFSHL ? ShAmt : InvShAmt, N3: Mask,
8598 N4: VL);
8599 ShY = DAG.getNode(Opcode: ISD::VP_SRL, DL, VT, N1: Y, N2: IsFSHL ? InvShAmt : ShAmt, N3: Mask,
8600 N4: VL);
8601 } else {
8602 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
8603 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
8604 SDValue BitMask = DAG.getConstant(Val: BW - 1, DL, VT: ShVT);
8605 if (isPowerOf2_32(Value: BW)) {
8606 // Z % BW -> Z & (BW - 1)
8607 ShAmt = DAG.getNode(Opcode: ISD::VP_AND, DL, VT: ShVT, N1: Z, N2: BitMask, N3: Mask, N4: VL);
8608 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8609 SDValue NotZ = DAG.getNode(Opcode: ISD::VP_XOR, DL, VT: ShVT, N1: Z,
8610 N2: DAG.getAllOnesConstant(DL, VT: ShVT), N3: Mask, N4: VL);
8611 InvShAmt = DAG.getNode(Opcode: ISD::VP_AND, DL, VT: ShVT, N1: NotZ, N2: BitMask, N3: Mask, N4: VL);
8612 } else {
8613 SDValue BitWidthC = DAG.getConstant(Val: BW, DL, VT: ShVT);
8614 ShAmt = DAG.getNode(Opcode: ISD::VP_UREM, DL, VT: ShVT, N1: Z, N2: BitWidthC, N3: Mask, N4: VL);
8615 InvShAmt = DAG.getNode(Opcode: ISD::VP_SUB, DL, VT: ShVT, N1: BitMask, N2: ShAmt, N3: Mask, N4: VL);
8616 }
8617
8618 SDValue One = DAG.getConstant(Val: 1, DL, VT: ShVT);
8619 if (IsFSHL) {
8620 ShX = DAG.getNode(Opcode: ISD::VP_SHL, DL, VT, N1: X, N2: ShAmt, N3: Mask, N4: VL);
8621 SDValue ShY1 = DAG.getNode(Opcode: ISD::VP_SRL, DL, VT, N1: Y, N2: One, N3: Mask, N4: VL);
8622 ShY = DAG.getNode(Opcode: ISD::VP_SRL, DL, VT, N1: ShY1, N2: InvShAmt, N3: Mask, N4: VL);
8623 } else {
8624 SDValue ShX1 = DAG.getNode(Opcode: ISD::VP_SHL, DL, VT, N1: X, N2: One, N3: Mask, N4: VL);
8625 ShX = DAG.getNode(Opcode: ISD::VP_SHL, DL, VT, N1: ShX1, N2: InvShAmt, N3: Mask, N4: VL);
8626 ShY = DAG.getNode(Opcode: ISD::VP_SRL, DL, VT, N1: Y, N2: ShAmt, N3: Mask, N4: VL);
8627 }
8628 }
8629 return DAG.getNode(Opcode: ISD::VP_OR, DL, VT, N1: ShX, N2: ShY, N3: Mask, N4: VL);
8630}
8631
8632SDValue TargetLowering::expandFunnelShift(SDNode *Node,
8633 SelectionDAG &DAG) const {
8634 if (Node->isVPOpcode())
8635 return expandVPFunnelShift(Node, DAG);
8636
8637 EVT VT = Node->getValueType(ResNo: 0);
8638
8639 if (VT.isVector() && (!isOperationLegalOrCustom(Op: ISD::SHL, VT) ||
8640 !isOperationLegalOrCustom(Op: ISD::SRL, VT) ||
8641 !isOperationLegalOrCustom(Op: ISD::SUB, VT) ||
8642 !isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT)))
8643 return SDValue();
8644
8645 SDValue X = Node->getOperand(Num: 0);
8646 SDValue Y = Node->getOperand(Num: 1);
8647 SDValue Z = Node->getOperand(Num: 2);
8648
8649 unsigned BW = VT.getScalarSizeInBits();
8650 bool IsFSHL = Node->getOpcode() == ISD::FSHL;
8651 SDLoc DL(SDValue(Node, 0));
8652
8653 EVT ShVT = Z.getValueType();
8654
8655 // If a funnel shift in the other direction is more supported, use it.
8656 unsigned RevOpcode = IsFSHL ? ISD::FSHR : ISD::FSHL;
8657 if (!isOperationLegalOrCustom(Op: Node->getOpcode(), VT) &&
8658 isOperationLegalOrCustom(Op: RevOpcode, VT) && isPowerOf2_32(Value: BW)) {
8659 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8660 // fshl X, Y, Z -> fshr X, Y, -Z
8661 // fshr X, Y, Z -> fshl X, Y, -Z
8662 Z = DAG.getNegative(Val: Z, DL, VT: ShVT);
8663 } else {
8664 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
8665 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
8666 SDValue One = DAG.getConstant(Val: 1, DL, VT: ShVT);
8667 if (IsFSHL) {
8668 Y = DAG.getNode(Opcode: RevOpcode, DL, VT, N1: X, N2: Y, N3: One);
8669 X = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: X, N2: One);
8670 } else {
8671 X = DAG.getNode(Opcode: RevOpcode, DL, VT, N1: X, N2: Y, N3: One);
8672 Y = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Y, N2: One);
8673 }
8674 Z = DAG.getNOT(DL, Val: Z, VT: ShVT);
8675 }
8676 return DAG.getNode(Opcode: RevOpcode, DL, VT, N1: X, N2: Y, N3: Z);
8677 }
8678
8679 SDValue ShX, ShY;
8680 SDValue ShAmt, InvShAmt;
8681 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8682 // fshl: X << C | Y >> (BW - C)
8683 // fshr: X << (BW - C) | Y >> C
8684 // where C = Z % BW is not zero
8685 SDValue BitWidthC = DAG.getConstant(Val: BW, DL, VT: ShVT);
8686 ShAmt = DAG.getNode(Opcode: ISD::UREM, DL, VT: ShVT, N1: Z, N2: BitWidthC);
8687 InvShAmt = DAG.getNode(Opcode: ISD::SUB, DL, VT: ShVT, N1: BitWidthC, N2: ShAmt);
8688 ShX = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: X, N2: IsFSHL ? ShAmt : InvShAmt);
8689 ShY = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Y, N2: IsFSHL ? InvShAmt : ShAmt);
8690 } else {
8691 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
8692 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
8693 SDValue Mask = DAG.getConstant(Val: BW - 1, DL, VT: ShVT);
8694 if (isPowerOf2_32(Value: BW)) {
8695 // Z % BW -> Z & (BW - 1)
8696 ShAmt = DAG.getNode(Opcode: ISD::AND, DL, VT: ShVT, N1: Z, N2: Mask);
8697 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8698 InvShAmt = DAG.getNode(Opcode: ISD::AND, DL, VT: ShVT, N1: DAG.getNOT(DL, Val: Z, VT: ShVT), N2: Mask);
8699 } else {
8700 SDValue BitWidthC = DAG.getConstant(Val: BW, DL, VT: ShVT);
8701 ShAmt = DAG.getNode(Opcode: ISD::UREM, DL, VT: ShVT, N1: Z, N2: BitWidthC);
8702 InvShAmt = DAG.getNode(Opcode: ISD::SUB, DL, VT: ShVT, N1: Mask, N2: ShAmt);
8703 }
8704
8705 SDValue One = DAG.getConstant(Val: 1, DL, VT: ShVT);
8706 if (IsFSHL) {
8707 ShX = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: X, N2: ShAmt);
8708 SDValue ShY1 = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Y, N2: One);
8709 ShY = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: ShY1, N2: InvShAmt);
8710 } else {
8711 SDValue ShX1 = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: X, N2: One);
8712 ShX = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: ShX1, N2: InvShAmt);
8713 ShY = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Y, N2: ShAmt);
8714 }
8715 }
8716 return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: ShX, N2: ShY);
8717}
8718
8719// TODO: Merge with expandFunnelShift.
8720SDValue TargetLowering::expandROT(SDNode *Node, bool AllowVectorOps,
8721 SelectionDAG &DAG) const {
8722 EVT VT = Node->getValueType(ResNo: 0);
8723 unsigned EltSizeInBits = VT.getScalarSizeInBits();
8724 bool IsLeft = Node->getOpcode() == ISD::ROTL;
8725 SDValue Op0 = Node->getOperand(Num: 0);
8726 SDValue Op1 = Node->getOperand(Num: 1);
8727 SDLoc DL(SDValue(Node, 0));
8728
8729 EVT ShVT = Op1.getValueType();
8730 SDValue Zero = DAG.getConstant(Val: 0, DL, VT: ShVT);
8731
8732 // If a rotate in the other direction is more supported, use it.
8733 unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
8734 if (!isOperationLegalOrCustom(Op: Node->getOpcode(), VT) &&
8735 isOperationLegalOrCustom(Op: RevRot, VT) && isPowerOf2_32(Value: EltSizeInBits)) {
8736 SDValue Sub = DAG.getNode(Opcode: ISD::SUB, DL, VT: ShVT, N1: Zero, N2: Op1);
8737 return DAG.getNode(Opcode: RevRot, DL, VT, N1: Op0, N2: Sub);
8738 }
8739
8740 if (!AllowVectorOps && VT.isVector() &&
8741 (!isOperationLegalOrCustom(Op: ISD::SHL, VT) ||
8742 !isOperationLegalOrCustom(Op: ISD::SRL, VT) ||
8743 !isOperationLegalOrCustom(Op: ISD::SUB, VT) ||
8744 !isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT) ||
8745 !isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT)))
8746 return SDValue();
8747
8748 unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
8749 unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
8750 SDValue BitWidthMinusOneC = DAG.getConstant(Val: EltSizeInBits - 1, DL, VT: ShVT);
8751 SDValue ShVal;
8752 SDValue HsVal;
8753 if (isPowerOf2_32(Value: EltSizeInBits)) {
8754 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
8755 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
8756 SDValue NegOp1 = DAG.getNode(Opcode: ISD::SUB, DL, VT: ShVT, N1: Zero, N2: Op1);
8757 SDValue ShAmt = DAG.getNode(Opcode: ISD::AND, DL, VT: ShVT, N1: Op1, N2: BitWidthMinusOneC);
8758 ShVal = DAG.getNode(Opcode: ShOpc, DL, VT, N1: Op0, N2: ShAmt);
8759 SDValue HsAmt = DAG.getNode(Opcode: ISD::AND, DL, VT: ShVT, N1: NegOp1, N2: BitWidthMinusOneC);
8760 HsVal = DAG.getNode(Opcode: HsOpc, DL, VT, N1: Op0, N2: HsAmt);
8761 } else {
8762 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
8763 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
8764 SDValue BitWidthC = DAG.getConstant(Val: EltSizeInBits, DL, VT: ShVT);
8765 SDValue ShAmt = DAG.getNode(Opcode: ISD::UREM, DL, VT: ShVT, N1: Op1, N2: BitWidthC);
8766 ShVal = DAG.getNode(Opcode: ShOpc, DL, VT, N1: Op0, N2: ShAmt);
8767 SDValue HsAmt = DAG.getNode(Opcode: ISD::SUB, DL, VT: ShVT, N1: BitWidthMinusOneC, N2: ShAmt);
8768 SDValue One = DAG.getConstant(Val: 1, DL, VT: ShVT);
8769 HsVal =
8770 DAG.getNode(Opcode: HsOpc, DL, VT, N1: DAG.getNode(Opcode: HsOpc, DL, VT, N1: Op0, N2: One), N2: HsAmt);
8771 }
8772 return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: ShVal, N2: HsVal);
8773}
8774
8775/// Check if CLMUL on VT can eventually reach a type with legal CLMUL through
8776/// a chain of halving decompositions (halving element width) and/or vector
8777/// widening (doubling element count). This guides expansion strategy selection:
8778/// if true, the halving/widening path produces better code than bit-by-bit.
8779///
8780/// HalveDepth tracks halving steps only (each creates ~4x more operations).
8781/// Widening steps are cheap (O(1) pad/extract) and don't count.
8782/// Limiting halvings to 2 prevents exponential blowup:
8783/// 1 halving: ~4 sub-CLMULs (good, e.g. v8i16 -> v8i8)
8784/// 2 halvings: ~16 sub-CLMULs (acceptable, e.g. v4i32 -> v4i16 -> v8i8)
8785/// 3 halvings: ~64 sub-CLMULs (worse than bit-by-bit expansion)
8786static bool canNarrowCLMULToLegal(const TargetLowering &TLI, LLVMContext &Ctx,
8787 EVT VT, unsigned HalveDepth = 0,
8788 unsigned TotalDepth = 0) {
8789 if (HalveDepth > 2 || TotalDepth > 8 || !VT.isFixedLengthVector())
8790 return false;
8791 if (TLI.isOperationLegalOrCustom(Op: ISD::CLMUL, VT))
8792 return true;
8793 if (!TLI.isTypeLegal(VT))
8794 return false;
8795
8796 unsigned BW = VT.getScalarSizeInBits();
8797
8798 // Halve: halve element width, same element count.
8799 // This is the expensive step -- each halving creates ~4x more operations.
8800 if (BW % 2 == 0) {
8801 EVT HalfEltVT = EVT::getIntegerVT(Context&: Ctx, BitWidth: BW / 2);
8802 EVT HalfVT = VT.changeVectorElementType(Context&: Ctx, EltVT: HalfEltVT);
8803 if (TLI.isTypeLegal(VT: HalfVT) &&
8804 canNarrowCLMULToLegal(TLI, Ctx, VT: HalfVT, HalveDepth: HalveDepth + 1, TotalDepth: TotalDepth + 1))
8805 return true;
8806 }
8807
8808 // Widen: double element count (fixed-width vectors only).
8809 // This is cheap -- just INSERT_SUBVECTOR + EXTRACT_SUBVECTOR.
8810 EVT WideVT = VT.getDoubleNumVectorElementsVT(Context&: Ctx);
8811 if (TLI.isTypeLegal(VT: WideVT) &&
8812 canNarrowCLMULToLegal(TLI, Ctx, VT: WideVT, HalveDepth, TotalDepth: TotalDepth + 1))
8813 return true;
8814
8815 return false;
8816}
8817
8818SDValue TargetLowering::expandCLMUL(SDNode *Node, SelectionDAG &DAG) const {
8819 SDLoc DL(Node);
8820 EVT VT = Node->getValueType(ResNo: 0);
8821 SDValue X = Node->getOperand(Num: 0);
8822 SDValue Y = Node->getOperand(Num: 1);
8823 unsigned BW = VT.getScalarSizeInBits();
8824 unsigned Opcode = Node->getOpcode();
8825 LLVMContext &Ctx = *DAG.getContext();
8826
8827 switch (Opcode) {
8828 case ISD::CLMUL: {
8829 // For vector types, try decomposition strategies that leverage legal
8830 // CLMUL on narrower or wider element types, avoiding the expensive
8831 // bit-by-bit expansion.
8832 if (VT.isVector()) {
8833 // Strategy 1: Halving decomposition to half-element-width CLMUL.
8834 // Applies ExpandIntRes_CLMUL's identity element-wise:
8835 // CLMUL(X, Y) = (Hi << HalfBW) | Lo
8836 // where:
8837 // Lo = CLMUL(XLo, YLo)
8838 // Hi = CLMULH(XLo, YLo) ^ CLMUL(XLo, YHi) ^ CLMUL(XHi, YLo)
8839 unsigned HalfBW = BW / 2;
8840 if (BW % 2 == 0) {
8841 EVT HalfEltVT = EVT::getIntegerVT(Context&: Ctx, BitWidth: HalfBW);
8842 EVT HalfVT =
8843 EVT::getVectorVT(Context&: Ctx, VT: HalfEltVT, EC: VT.getVectorElementCount());
8844 if (isTypeLegal(VT: HalfVT) && canNarrowCLMULToLegal(TLI: *this, Ctx, VT: HalfVT,
8845 /*HalveDepth=*/1)) {
8846 SDValue ShAmt = DAG.getShiftAmountConstant(Val: HalfBW, VT, DL);
8847
8848 // Extract low and high halves of each element.
8849 SDValue XLo = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: HalfVT, Operand: X);
8850 SDValue XHi = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: HalfVT,
8851 Operand: DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: X, N2: ShAmt));
8852 SDValue YLo = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: HalfVT, Operand: Y);
8853 SDValue YHi = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: HalfVT,
8854 Operand: DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Y, N2: ShAmt));
8855
8856 // Lo = CLMUL(XLo, YLo)
8857 SDValue Lo = DAG.getNode(Opcode: ISD::CLMUL, DL, VT: HalfVT, N1: XLo, N2: YLo);
8858
8859 // Hi = CLMULH(XLo, YLo) ^ CLMUL(XLo, YHi) ^ CLMUL(XHi, YLo)
8860 SDValue LoH = DAG.getNode(Opcode: ISD::CLMULH, DL, VT: HalfVT, N1: XLo, N2: YLo);
8861 SDValue Cross1 = DAG.getNode(Opcode: ISD::CLMUL, DL, VT: HalfVT, N1: XLo, N2: YHi);
8862 SDValue Cross2 = DAG.getNode(Opcode: ISD::CLMUL, DL, VT: HalfVT, N1: XHi, N2: YLo);
8863 SDValue Cross = DAG.getNode(Opcode: ISD::XOR, DL, VT: HalfVT, N1: Cross1, N2: Cross2);
8864 SDValue Hi = DAG.getNode(Opcode: ISD::XOR, DL, VT: HalfVT, N1: LoH, N2: Cross);
8865
8866 // Reassemble: Result = ZExt(Lo) | (AnyExt(Hi) << HalfBW)
8867 SDValue LoExt = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT, Operand: Lo);
8868 SDValue HiExt = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT, Operand: Hi);
8869 SDValue HiShifted = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: HiExt, N2: ShAmt);
8870 return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: LoExt, N2: HiShifted);
8871 }
8872 }
8873
8874 // Strategy 2: Promote to double-element-width CLMUL.
8875 // CLMUL(X, Y) = Trunc(CLMUL(AnyExt(X), AnyExt(Y)))
8876 {
8877 EVT ExtVT = VT.widenIntegerElementType(Context&: Ctx);
8878 if (isTypeLegal(VT: ExtVT) && isOperationLegalOrCustom(Op: ISD::CLMUL, VT: ExtVT)) {
8879 // If CLMUL on ExtVT is Custom (not Legal), the target may
8880 // scalarize it, costing O(NumElements) scalar ops. The bit-by-bit
8881 // fallback costs O(BW) vectorized iterations. Only widen when
8882 // element count is small enough that scalarization is cheaper.
8883 unsigned NumElts = VT.getVectorMinNumElements();
8884 if (isOperationLegal(Op: ISD::CLMUL, VT: ExtVT) || NumElts < BW) {
8885 SDValue XExt = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: ExtVT, Operand: X);
8886 SDValue YExt = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: ExtVT, Operand: Y);
8887 SDValue Mul = DAG.getNode(Opcode: ISD::CLMUL, DL, VT: ExtVT, N1: XExt, N2: YExt);
8888 return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Mul);
8889 }
8890 }
8891 }
8892
8893 // Strategy 3: Widen element count (pad with undef, do CLMUL on wider
8894 // vector, extract lower result). CLMUL is element-wise, so upper
8895 // (undef) lanes don't affect the lower results.
8896 // e.g. v4i16 => pad to v8i16 => halve to v8i8 PMUL => extract v4i16.
8897 if (auto EC = VT.getVectorElementCount(); EC.isFixed()) {
8898 EVT WideVT = EVT::getVectorVT(Context&: Ctx, VT: VT.getVectorElementType(), EC: EC * 2);
8899 if (isTypeLegal(VT: WideVT) && canNarrowCLMULToLegal(TLI: *this, Ctx, VT: WideVT)) {
8900 SDValue Undef = DAG.getUNDEF(VT: WideVT);
8901 SDValue XWide = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: WideVT, N1: Undef,
8902 N2: X, N3: DAG.getVectorIdxConstant(Val: 0, DL));
8903 SDValue YWide = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: WideVT, N1: Undef,
8904 N2: Y, N3: DAG.getVectorIdxConstant(Val: 0, DL));
8905 SDValue WideRes = DAG.getNode(Opcode: ISD::CLMUL, DL, VT: WideVT, N1: XWide, N2: YWide);
8906 return DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT, N1: WideRes,
8907 N2: DAG.getVectorIdxConstant(Val: 0, DL));
8908 }
8909 }
8910 }
8911
8912 // Special case: clmul(X, ~0) is equivalent to a "parallel prefix XOR" or
8913 // "bitwise parity" operation.
8914 if (isAllOnesOrAllOnesSplat(V: Y)) {
8915 SDValue R = X;
8916 for (unsigned I = 1; I < BW; I <<= 1) {
8917 SDValue ShAmt = DAG.getShiftAmountConstant(Val: I, VT, DL);
8918 SDValue Shifted = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: R, N2: ShAmt);
8919 R = DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: R, N2: Shifted);
8920 }
8921 return R;
8922 }
8923
8924 // NOTE: If you change this expansion, please update the cost model
8925 // calculation in BasicTTIImpl::getTypeBasedIntrinsicInstrCost for
8926 // Intrinsic::clmul.
8927
8928 // Strategy 4: multiplication with holes.
8929 //
8930 // Uses "holes" (sequences of zeroes) to avoid carry spilling. When carries
8931 // do occur, they wind up in a "hole" and are subsequently masked out of the
8932 // result.
8933 //
8934 // A hole of 3 bits is optimal for 32-bit and 64-bit inputs. 128-bit
8935 // integers need a larger hole, and for smaller integers the fallback below
8936 // is more efficient.
8937 //
8938 // Based on bmul64 in bearssl and bmul in the rust polyval crate.
8939 if (BW >= 32 && BW <= 64 &&
8940 isOperationLegalOrCustom(Op: ISD::MUL, VT: getTypeToTransformTo(Context&: Ctx, VT))) {
8941
8942 // Set every fourth bit of each nibble, equivalent to 0b00010001...0001.
8943 APInt MaskVal = APInt::getSplat(NewLen: BW, V: APInt(4, 0b0001));
8944
8945 // Create versions of X and Y that keep only the I-th bit of
8946 // each nibble.
8947 SDValue M[4], Xp[4], Yp[4];
8948 for (unsigned I = 0; I < 4; ++I) {
8949 M[I] = DAG.getConstant(Val: MaskVal.shl(shiftAmt: I), DL, VT);
8950 Xp[I] = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: X, N2: M[I]);
8951 Yp[I] = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Y, N2: M[I]);
8952 }
8953
8954 // Codegens these expressions (16 multiplications):
8955 //
8956 // z0 = (x0 * y0) ^ (x1 * y3) ^ (x2 * y2) ^ (x3 * y1);
8957 // z1 = (x0 * y1) ^ (x1 * y0) ^ (x2 * y3) ^ (x3 * y2);
8958 // z2 = (x0 * y2) ^ (x1 * y1) ^ (x2 * y0) ^ (x3 * y3);
8959 // z3 = (x0 * y3) ^ (x1 * y2) ^ (x2 * y1) ^ (x3 * y0);
8960 SDValue Res = DAG.getConstant(Val: 0, DL, VT);
8961 for (unsigned I = 0; I < 4; ++I) {
8962 SDValue Zi = DAG.getConstant(Val: 0, DL, VT);
8963 for (unsigned J = 0; J < 4; ++J) {
8964 unsigned K = (I + 4 - J) % 4;
8965 SDValue P = DAG.getNode(Opcode: ISD::MUL, DL, VT, N1: Xp[J], N2: Yp[K]);
8966 Zi = DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: Zi, N2: P);
8967 }
8968
8969 // Keep only the bits belonging to this iteration, and bitwise or it all
8970 // together.
8971 Zi = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Zi, N2: M[I]);
8972 Res = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: Res, N2: Zi, Flags: SDNodeFlags::Disjoint);
8973 }
8974 return Res;
8975 }
8976
8977 // Strategy 5: the naive fallback.
8978 EVT SetCCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: Ctx, VT);
8979
8980 SDValue Res = DAG.getConstant(Val: 0, DL, VT);
8981 for (unsigned I = 0; I < BW; ++I) {
8982 SDValue ShiftAmt = DAG.getShiftAmountConstant(Val: I, VT, DL);
8983 SDValue Mask = DAG.getConstant(Val: APInt::getOneBitSet(numBits: BW, BitNo: I), DL, VT);
8984 SDValue YMasked = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Y, N2: Mask);
8985
8986 // For targets with a fast bit test instruction (e.g., x86 BT) or without
8987 // multiply, use a shift-based expansion to avoid expensive MUL
8988 // instructions.
8989 SDValue Part;
8990 if (!hasBitTest(X: Y, Y: ShiftAmt) &&
8991 isOperationLegalOrCustom(
8992 Op: ISD::MUL, VT: getTypeToTransformTo(Context&: *DAG.getContext(), VT))) {
8993 Part = DAG.getNode(Opcode: ISD::MUL, DL, VT, N1: X, N2: YMasked);
8994 } else {
8995 // Canonical bit test: (Y & (1 << I)) != 0
8996 SDValue Zero = DAG.getConstant(Val: 0, DL, VT);
8997 SDValue Cond = DAG.getSetCC(DL, VT: SetCCVT, LHS: YMasked, RHS: Zero, Cond: ISD::SETEQ);
8998 SDValue XShifted = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: X, N2: ShiftAmt);
8999 Part = DAG.getSelect(DL, VT, Cond, LHS: Zero, RHS: XShifted);
9000 }
9001 Res = DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: Res, N2: Part);
9002 }
9003 return Res;
9004 }
9005 case ISD::CLMULR:
9006 // If we have CLMUL/CLMULH, merge the shifted results to form CLMULR.
9007 if (isOperationLegalOrCustom(Op: ISD::CLMUL, VT) &&
9008 isOperationLegalOrCustom(Op: ISD::CLMULH, VT)) {
9009 SDValue Lo = DAG.getNode(Opcode: ISD::CLMUL, DL, VT, N1: X, N2: Y);
9010 SDValue Hi = DAG.getNode(Opcode: ISD::CLMULH, DL, VT, N1: X, N2: Y);
9011 Lo = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Lo,
9012 N2: DAG.getShiftAmountConstant(Val: BW - 1, VT, DL));
9013 Hi = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Hi,
9014 N2: DAG.getShiftAmountConstant(Val: 1, VT, DL));
9015 return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: Lo, N2: Hi);
9016 }
9017 [[fallthrough]];
9018 case ISD::CLMULH: {
9019 EVT ExtVT = VT.widenIntegerElementType(Context&: Ctx);
9020 // Use bitreverse-based lowering (CLMULR/H = rev(CLMUL(rev,rev)) >> S)
9021 // when any of these hold:
9022 // (a) ZERO_EXTEND to ExtVT or SRL on ExtVT isn't legal.
9023 // (b) CLMUL is legal on VT but not on ExtVT (e.g. v8i8 on AArch64).
9024 // (c) CLMUL on ExtVT isn't legal, but CLMUL on VT can be efficiently
9025 // expanded via halving/widening to reach legal CLMUL. The bitreverse
9026 // path creates CLMUL(VT) which will be expanded efficiently. The
9027 // promote path would create CLMUL(ExtVT) => halving => CLMULH(VT),
9028 // causing a cycle.
9029 // Note: when CLMUL is legal on ExtVT, the zext => CLMUL(ExtVT) => shift
9030 // => trunc path is preferred over the bitreverse path, as it avoids the
9031 // cost of 3 bitreverse operations.
9032 if (!isOperationLegalOrCustom(Op: ISD::ZERO_EXTEND, VT: ExtVT) ||
9033 !isOperationLegalOrCustom(Op: ISD::SRL, VT: ExtVT) ||
9034 (!isOperationLegalOrCustom(Op: ISD::CLMUL, VT: ExtVT) &&
9035 (isOperationLegalOrCustom(Op: ISD::CLMUL, VT) ||
9036 canNarrowCLMULToLegal(TLI: *this, Ctx, VT)))) {
9037 SDValue XRev = DAG.getNode(Opcode: ISD::BITREVERSE, DL, VT, Operand: X);
9038 SDValue YRev = DAG.getNode(Opcode: ISD::BITREVERSE, DL, VT, Operand: Y);
9039 SDValue ClMul = DAG.getNode(Opcode: ISD::CLMUL, DL, VT, N1: XRev, N2: YRev);
9040 SDValue Res = DAG.getNode(Opcode: ISD::BITREVERSE, DL, VT, Operand: ClMul);
9041 if (Opcode == ISD::CLMULH)
9042 Res = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Res,
9043 N2: DAG.getShiftAmountConstant(Val: 1, VT, DL));
9044 return Res;
9045 }
9046 SDValue XExt = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: ExtVT, Operand: X);
9047 SDValue YExt = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: ExtVT, Operand: Y);
9048 SDValue ClMul = DAG.getNode(Opcode: ISD::CLMUL, DL, VT: ExtVT, N1: XExt, N2: YExt);
9049 unsigned ShAmt = Opcode == ISD::CLMULR ? BW - 1 : BW;
9050 SDValue HiBits = DAG.getNode(Opcode: ISD::SRL, DL, VT: ExtVT, N1: ClMul,
9051 N2: DAG.getShiftAmountConstant(Val: ShAmt, VT: ExtVT, DL));
9052 return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: HiBits);
9053 }
9054 }
9055 llvm_unreachable("Expected CLMUL, CLMULR, or CLMULH");
9056}
9057
9058SDValue TargetLowering::expandPEXT(SDNode *Node, SelectionDAG &DAG) const {
9059 SDLoc DL(Node);
9060 EVT VT = Node->getValueType(ResNo: 0);
9061 SDValue Val = Node->getOperand(Num: 0);
9062 SDValue Msk = Node->getOperand(Num: 1);
9063 unsigned BW = VT.getScalarSizeInBits();
9064
9065 // Hacker's Delight §7-4: Compress, or Generalized Extract
9066 SDValue X = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Val, N2: Msk);
9067 SDValue M = Msk;
9068 SDValue One = DAG.getShiftAmountConstant(Val: 1, VT, DL);
9069 SDValue Mk = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: DAG.getNOT(DL, Val: M, VT), N2: One);
9070
9071 // Repeatedly compute which bits would shift to the right by an odd amount,
9072 // shift all such bits in parallel using a mask, and double the shift amount.
9073 for (unsigned I = 1; I < BW; I *= 2) {
9074 // This expands the "parallel prefix" operation to clmul(Mk, ~0).
9075 SDValue Mp =
9076 DAG.getNode(Opcode: ISD::CLMUL, DL, VT, N1: Mk, N2: DAG.getAllOnesConstant(DL, VT));
9077 SDValue Mv = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Mp, N2: M);
9078 SDValue ShiftI = DAG.getShiftAmountConstant(Val: I, VT, DL);
9079 SDValue MvS = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Mv, N2: ShiftI);
9080 M = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: M, N2: Mv), N2: MvS,
9081 Flags: SDNodeFlags::Disjoint);
9082 SDValue T = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: X, N2: Mv);
9083 SDValue TS = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: T, N2: ShiftI);
9084 X = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: X, N2: T), N2: TS,
9085 Flags: SDNodeFlags::Disjoint);
9086 if (I * 2 < BW)
9087 Mk = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Mk, N2: DAG.getNOT(DL, Val: Mp, VT));
9088 }
9089
9090 return X;
9091}
9092
9093SDValue TargetLowering::expandPDEP(SDNode *Node, SelectionDAG &DAG) const {
9094 SDLoc DL(Node);
9095 EVT VT = Node->getValueType(ResNo: 0);
9096 SDValue Val = Node->getOperand(Num: 0);
9097 SDValue Msk = Node->getOperand(Num: 1);
9098 unsigned BW = VT.getScalarSizeInBits();
9099
9100 // Hacker's Delight §7-5: Expand, or Generalized Insert.
9101 unsigned LogBW = Log2_32_Ceil(Value: BW);
9102 SmallVector<SDValue, 8> MvArray(LogBW);
9103 SDValue One = DAG.getShiftAmountConstant(Val: 1, VT, DL);
9104 SDValue Mc = Msk;
9105 SDValue Mk = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: DAG.getNOT(DL, Val: Msk, VT), N2: One);
9106
9107 // First pass: compute move masks for each power of two that a bit moves by.
9108 for (unsigned S = 0; S < LogBW; ++S) {
9109 unsigned ShiftS = 1u << S;
9110 // This expands the "parallel prefix" operation to clmul(Mk, ~0).
9111 SDValue Mp =
9112 DAG.getNode(Opcode: ISD::CLMUL, DL, VT, N1: Mk, N2: DAG.getAllOnesConstant(DL, VT));
9113 SDValue Mv = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Mp, N2: Mc);
9114 MvArray[S] = Mv;
9115 if (S + 1 < LogBW) {
9116 SDValue McXorMv = DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: Mc, N2: Mv);
9117 SDValue MvShifted = DAG.getNode(
9118 Opcode: ISD::SRL, DL, VT, N1: Mv, N2: DAG.getShiftAmountConstant(Val: ShiftS, VT, DL));
9119 Mc = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: McXorMv, N2: MvShifted,
9120 Flags: SDNodeFlags::Disjoint);
9121 Mk = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Mk, N2: DAG.getNOT(DL, Val: Mp, VT));
9122 }
9123 }
9124
9125 // Second pass: move bits by 32, 16, 8, 4, 2, 1, using masks, in parallel.
9126 // Each pass handles half the shift amount of the previous pass.
9127 SDValue X = Val;
9128 for (int S = (int)LogBW - 1; S >= 0; --S) {
9129 SDValue ShiftSv = DAG.getShiftAmountConstant(Val: 1ull << S, VT, DL);
9130 SDValue T = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: X, N2: ShiftSv);
9131 SDValue UnshiftedBits =
9132 DAG.getNode(Opcode: ISD::AND, DL, VT, N1: X, N2: DAG.getNOT(DL, Val: MvArray[S], VT));
9133 SDValue ShiftedBits = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: T, N2: MvArray[S]);
9134 X = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: UnshiftedBits, N2: ShiftedBits,
9135 Flags: SDNodeFlags::Disjoint);
9136 }
9137
9138 return DAG.getNode(Opcode: ISD::AND, DL, VT, N1: X, N2: Msk);
9139}
9140
9141void TargetLowering::expandShiftParts(SDNode *Node, SDValue &Lo, SDValue &Hi,
9142 SelectionDAG &DAG) const {
9143 assert(Node->getNumOperands() == 3 && "Not a double-shift!");
9144 EVT VT = Node->getValueType(ResNo: 0);
9145 unsigned VTBits = VT.getScalarSizeInBits();
9146 assert(isPowerOf2_32(VTBits) && "Power-of-two integer type expected");
9147
9148 bool IsSHL = Node->getOpcode() == ISD::SHL_PARTS;
9149 bool IsSRA = Node->getOpcode() == ISD::SRA_PARTS;
9150 SDValue ShOpLo = Node->getOperand(Num: 0);
9151 SDValue ShOpHi = Node->getOperand(Num: 1);
9152 SDValue ShAmt = Node->getOperand(Num: 2);
9153 EVT ShAmtVT = ShAmt.getValueType();
9154 EVT ShAmtCCVT =
9155 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: ShAmtVT);
9156 SDLoc dl(Node);
9157
9158 // ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and
9159 // ISD::SRA/L nodes haven't. Insert an AND to be safe, it's usually optimized
9160 // away during isel.
9161 SDValue SafeShAmt = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ShAmtVT, N1: ShAmt,
9162 N2: DAG.getConstant(Val: VTBits - 1, DL: dl, VT: ShAmtVT));
9163 SDValue Tmp1 = IsSRA ? DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: ShOpHi,
9164 N2: DAG.getConstant(Val: VTBits - 1, DL: dl, VT: ShAmtVT))
9165 : DAG.getConstant(Val: 0, DL: dl, VT);
9166
9167 SDValue Tmp2, Tmp3;
9168 if (IsSHL) {
9169 Tmp2 = DAG.getNode(Opcode: ISD::FSHL, DL: dl, VT, N1: ShOpHi, N2: ShOpLo, N3: ShAmt);
9170 Tmp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: ShOpLo, N2: SafeShAmt);
9171 } else {
9172 Tmp2 = DAG.getNode(Opcode: ISD::FSHR, DL: dl, VT, N1: ShOpHi, N2: ShOpLo, N3: ShAmt);
9173 Tmp3 = DAG.getNode(Opcode: IsSRA ? ISD::SRA : ISD::SRL, DL: dl, VT, N1: ShOpHi, N2: SafeShAmt);
9174 }
9175
9176 // If the shift amount is larger or equal than the width of a part we don't
9177 // use the result from the FSHL/FSHR. Insert a test and select the appropriate
9178 // values for large shift amounts.
9179 SDValue AndNode = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ShAmtVT, N1: ShAmt,
9180 N2: DAG.getConstant(Val: VTBits, DL: dl, VT: ShAmtVT));
9181 SDValue Cond = DAG.getSetCC(DL: dl, VT: ShAmtCCVT, LHS: AndNode,
9182 RHS: DAG.getConstant(Val: 0, DL: dl, VT: ShAmtVT), Cond: ISD::SETNE);
9183
9184 if (IsSHL) {
9185 Hi = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT, N1: Cond, N2: Tmp3, N3: Tmp2);
9186 Lo = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT, N1: Cond, N2: Tmp1, N3: Tmp3);
9187 } else {
9188 Lo = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT, N1: Cond, N2: Tmp3, N3: Tmp2);
9189 Hi = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT, N1: Cond, N2: Tmp1, N3: Tmp3);
9190 }
9191}
9192
9193SDValue TargetLowering::expandFCANONICALIZE(SDNode *Node,
9194 SelectionDAG &DAG) const {
9195 // This implements llvm.canonicalize.f* by multiplication with 1.0, as
9196 // suggested in
9197 // https://llvm.org/docs/LangRef.html#llvm-canonicalize-intrinsic.
9198 // It uses strict_fp operations even outside a strict_fp context in order
9199 // to guarantee that the canonicalization is not optimized away by later
9200 // passes. The result chain introduced by that is intentionally ignored
9201 // since no ordering requirement is intended here.
9202 EVT VT = Node->getValueType(ResNo: 0);
9203 SDLoc DL(Node);
9204 SDNodeFlags Flags = Node->getFlags();
9205 Flags.setNoFPExcept(true);
9206 SDValue One = DAG.getConstantFP(Val: 1.0, DL, VT);
9207 SDValue Mul =
9208 DAG.getNode(Opcode: ISD::STRICT_FMUL, DL, ResultTys: {VT, MVT::Other},
9209 Ops: {DAG.getEntryNode(), Node->getOperand(Num: 0), One}, Flags);
9210 return Mul;
9211}
9212
9213SDValue TargetLowering::expandCONVERT_TO_ARBITRARY_FP(SDNode *Node,
9214 SelectionDAG &DAG) const {
9215 // Expand conversion from a native IEEE float type to an arbitrary FP format
9216 // returning the result as an integer using bit manipulation.
9217 EVT ResVT = Node->getValueType(ResNo: 0);
9218 SDLoc dl(Node);
9219
9220 SDValue FloatVal = Node->getOperand(Num: 0);
9221 const uint64_t SemEnum = Node->getConstantOperandVal(Num: 1);
9222 const auto Sem = static_cast<APFloatBase::Semantics>(SemEnum);
9223 const auto RoundMode =
9224 static_cast<RoundingMode>(Node->getConstantOperandVal(Num: 2));
9225 const bool Saturate = Node->getConstantOperandVal(Num: 3) != 0;
9226
9227 // Supported destination formats.
9228 switch (Sem) {
9229 case APFloatBase::S_Float8E5M2:
9230 case APFloatBase::S_Float8E4M3FN:
9231 case APFloatBase::S_Float6E3M2FN:
9232 case APFloatBase::S_Float6E2M3FN:
9233 case APFloatBase::S_Float4E2M1FN:
9234 break;
9235 default:
9236 DAG.getContext()->emitError(ErrorStr: "CONVERT_TO_ARBITRARY_FP: not implemented "
9237 "destination format (semantics enum " +
9238 Twine(SemEnum) + ")");
9239 return SDValue();
9240 }
9241
9242 // Supported rounding modes.
9243 switch (RoundMode) {
9244 case RoundingMode::NearestTiesToEven:
9245 case RoundingMode::TowardZero:
9246 case RoundingMode::TowardPositive:
9247 case RoundingMode::TowardNegative:
9248 case RoundingMode::NearestTiesToAway:
9249 break;
9250 default:
9251 DAG.getContext()->emitError(
9252 ErrorStr: "CONVERT_TO_ARBITRARY_FP: unsupported rounding mode (enum " +
9253 Twine(static_cast<int>(RoundMode)) + ")");
9254 return SDValue();
9255 }
9256
9257 // Destination format parameters.
9258 const fltSemantics &DstSem = APFloatBase::EnumToSemantics(S: Sem);
9259 const unsigned DstBits = APFloat::getSizeInBits(Sem: DstSem);
9260 const unsigned DstPrecision = APFloat::semanticsPrecision(DstSem);
9261 const unsigned DstMant = DstPrecision - 1;
9262 const unsigned DstExpBits = DstBits - DstMant - 1;
9263 const int DstBias = 1 - APFloat::semanticsMinExponent(DstSem);
9264 const unsigned DstExpMax = (1U << DstExpBits) - 1;
9265 const uint64_t DstMantMask = (DstMant > 0) ? ((1ULL << DstMant) - 1) : 0;
9266 const fltNonfiniteBehavior DstNFBehavior = DstSem.nonFiniteBehavior;
9267 const fltNanEncoding DstNanEnc = DstSem.nanEncoding;
9268
9269 // Compute the maximum normal exponent for the destination format.
9270 const unsigned DstExpMaxNormal =
9271 DstNFBehavior == fltNonfiniteBehavior::IEEE754 ? DstExpMax - 1
9272 : DstExpMax;
9273
9274 // For NanOnly formats the max exponent field for finite values
9275 // is DstExpMax, but the encoding with exp = DstExpMax and
9276 // mant = all-ones is NaN. So DstExpMaxNormal = DstExpMax, but max
9277 // mantissa at that exponent is DstMantMask - 1 (if NanEnc == AllOnes) to
9278 // avoid the NaN encoding.
9279 uint64_t DstMaxMantAtMaxExp = DstMantMask;
9280 if (DstNFBehavior == fltNonfiniteBehavior::NanOnly &&
9281 DstNanEnc == fltNanEncoding::AllOnes)
9282 DstMaxMantAtMaxExp = DstMantMask - 1;
9283
9284 // Source format parameters.
9285 EVT SrcVT = FloatVal.getValueType();
9286 const fltSemantics &SrcSem = SrcVT.getScalarType().getFltSemantics();
9287 const unsigned SrcBits = APFloat::getSizeInBits(Sem: SrcSem);
9288 const unsigned SrcPrecision = APFloat::semanticsPrecision(SrcSem);
9289 const unsigned SrcMant = SrcPrecision - 1;
9290 const uint64_t SrcMantMask = (1ULL << SrcMant) - 1;
9291
9292 // Work in the source integer type. Match the destination shape so the
9293 // expansion stays vector when ResVT is a vector.
9294 EVT IntScalarVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: SrcBits);
9295 EVT IntVT = ResVT.changeElementType(Context&: *DAG.getContext(), EltVT: IntScalarVT);
9296 EVT SetCCVT =
9297 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: IntVT);
9298 EVT FPSetCCVT =
9299 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: SrcVT);
9300
9301 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT: IntVT);
9302 SDValue One = DAG.getConstant(Val: 1, DL: dl, VT: IntVT);
9303
9304 // Bitcast source float to integer to extract the sign bit.
9305 SDValue Src = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: IntVT, Operand: FloatVal);
9306 SDValue SignBit =
9307 DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: IntVT, N1: Src,
9308 N2: DAG.getShiftAmountConstant(Val: SrcBits - 1, VT: IntVT, DL: dl));
9309
9310 // Classify the input.
9311 SDValue FPZero = DAG.getConstantFP(Val: 0.0, DL: dl, VT: SrcVT);
9312 SDValue FPInf = DAG.getConstantFP(Val: APFloat::getInf(Sem: SrcSem), DL: dl, VT: SrcVT);
9313 SDValue AbsVal = DAG.getNode(Opcode: ISD::FABS, DL: dl, VT: SrcVT, Operand: FloatVal);
9314 SDValue IsNaN = DAG.getSetCC(DL: dl, VT: FPSetCCVT, LHS: FloatVal, RHS: FPZero, Cond: ISD::SETUO);
9315 SDValue IsInf = DAG.getSetCC(DL: dl, VT: FPSetCCVT, LHS: AbsVal, RHS: FPInf, Cond: ISD::SETOEQ);
9316 SDValue IsZero = DAG.getSetCC(DL: dl, VT: FPSetCCVT, LHS: FloatVal, RHS: FPZero, Cond: ISD::SETOEQ);
9317
9318 // Split into a normalized fraction and unbiased exponent. FFREXP normalizes
9319 // source denormals automatically. The result is unspecified for Inf/NaN, but
9320 // those inputs are detected above and override the final result.
9321 EVT FrexpExpScalarVT =
9322 getValueType(DL: DAG.getDataLayout(), Ty: Type::getInt32Ty(C&: *DAG.getContext()));
9323 EVT FrexpExpVT = SrcVT.changeElementType(Context&: *DAG.getContext(), EltVT: FrexpExpScalarVT);
9324 SDValue Frexp =
9325 DAG.getNode(Opcode: ISD::FFREXP, DL: dl, VTList: DAG.getVTList(VT1: SrcVT, VT2: FrexpExpVT), N: FloatVal);
9326 SDValue FrexpFrac = Frexp.getValue(R: 0);
9327 SDValue FrexpExp = Frexp.getValue(R: 1);
9328
9329 SDValue FrexpFracInt = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: IntVT, Operand: FrexpFrac);
9330 SDValue EffSrcMant = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: IntVT, N1: FrexpFracInt,
9331 N2: DAG.getConstant(Val: SrcMantMask, DL: dl, VT: IntVT));
9332
9333 SDValue FrexpExpExt = DAG.getSExtOrTrunc(Op: FrexpExp, DL: dl, VT: IntVT);
9334 SDValue NewExp = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: IntVT, N1: FrexpExpExt,
9335 N2: DAG.getConstant(Val: DstBias - 1, DL: dl, VT: IntVT));
9336
9337 // Compute rounding increment given the round bit, sticky bits, and LSB
9338 // of the truncated mantissa.
9339 auto ComputeRoundUp = [&](SDValue RoundBit, SDValue StickyBits,
9340 SDValue LSB) -> SDValue {
9341 switch (RoundMode) {
9342 case RoundingMode::NearestTiesToEven: {
9343 // Round up if round_bit && (sticky || lsb)
9344 SDValue StickyOrLSB = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: IntVT, N1: StickyBits, N2: LSB);
9345 return DAG.getNode(Opcode: ISD::AND, DL: dl, VT: IntVT, N1: RoundBit, N2: StickyOrLSB);
9346 }
9347 case RoundingMode::TowardZero:
9348 return Zero;
9349 case RoundingMode::TowardPositive: {
9350 // Round up if positive and any truncated bits are set.
9351 SDValue AnyTruncBits =
9352 DAG.getNode(Opcode: ISD::OR, DL: dl, VT: IntVT, N1: RoundBit, N2: StickyBits);
9353 SDValue HasTruncBits =
9354 DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: AnyTruncBits, RHS: Zero, Cond: ISD::SETNE);
9355 SDValue IsPositive = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: SignBit, RHS: Zero, Cond: ISD::SETEQ);
9356 SDValue DoRound =
9357 DAG.getNode(Opcode: ISD::AND, DL: dl, VT: SetCCVT, N1: HasTruncBits, N2: IsPositive);
9358 return DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: IntVT, Operand: DoRound);
9359 }
9360 case RoundingMode::TowardNegative: {
9361 // Round up if negative and any truncated bits are set (to -Inf).
9362 SDValue AnyTruncBits =
9363 DAG.getNode(Opcode: ISD::OR, DL: dl, VT: IntVT, N1: RoundBit, N2: StickyBits);
9364 SDValue HasTruncBits =
9365 DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: AnyTruncBits, RHS: Zero, Cond: ISD::SETNE);
9366 SDValue IsNegative = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: SignBit, RHS: Zero, Cond: ISD::SETNE);
9367 SDValue DoRound =
9368 DAG.getNode(Opcode: ISD::AND, DL: dl, VT: SetCCVT, N1: HasTruncBits, N2: IsNegative);
9369 return DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: IntVT, Operand: DoRound);
9370 }
9371 case RoundingMode::NearestTiesToAway:
9372 return RoundBit;
9373 default:
9374 llvm_unreachable("unsupported rounding mode");
9375 }
9376 };
9377
9378 // Round mantissa from SrcMant bits to DstMant bits.
9379 SDValue TruncMant;
9380 SDValue RoundUp;
9381 if (SrcMant > DstMant) {
9382 const unsigned Shift = SrcMant - DstMant;
9383 SDValue ShiftConst = DAG.getShiftAmountConstant(Val: Shift, VT: IntVT, DL: dl);
9384 TruncMant = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: IntVT, N1: EffSrcMant, N2: ShiftConst);
9385
9386 // Check bit at position Shift - 1 aka the round bit.
9387 SDValue RoundBit;
9388 if (Shift >= 1) {
9389 SDValue RoundBitShift = DAG.getShiftAmountConstant(Val: Shift - 1, VT: IntVT, DL: dl);
9390 SDValue ShiftedMant =
9391 DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: IntVT, N1: EffSrcMant, N2: RoundBitShift);
9392 RoundBit = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: IntVT, N1: ShiftedMant, N2: One);
9393 } else {
9394 RoundBit = Zero;
9395 }
9396
9397 // OR of all bits below the round bit to get sticky bits.
9398 SDValue StickyBits;
9399 if (Shift >= 2) {
9400 uint64_t StickyMask = maskTrailingOnes<uint64_t>(N: Shift - 1);
9401 StickyBits = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: IntVT, N1: EffSrcMant,
9402 N2: DAG.getConstant(Val: StickyMask, DL: dl, VT: IntVT));
9403 StickyBits = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: StickyBits, RHS: Zero, Cond: ISD::SETNE);
9404 StickyBits = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: IntVT, Operand: StickyBits);
9405 } else {
9406 StickyBits = Zero;
9407 }
9408
9409 // LSB of truncated mantissa.
9410 SDValue LSB = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: IntVT, N1: TruncMant, N2: One);
9411
9412 RoundUp = ComputeRoundUp(RoundBit, StickyBits, LSB);
9413 } else {
9414 // If DstMant >= SrcMant, then no rounding needed, just shift left.
9415 SDValue MantShift =
9416 DAG.getShiftAmountConstant(Val: DstMant - SrcMant, VT: IntVT, DL: dl);
9417 TruncMant = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: IntVT, N1: EffSrcMant, N2: MantShift);
9418 RoundUp = Zero;
9419 }
9420
9421 // Apply rounding.
9422 SDValue RoundedMant = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: IntVT, N1: TruncMant, N2: RoundUp);
9423
9424 // Handle mantissa overflow from rounding.
9425 // If rounded_mant > DstMantMask, carry into exponent.
9426 SDValue MantOverflow =
9427 DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: RoundedMant,
9428 RHS: DAG.getConstant(Val: DstMantMask, DL: dl, VT: IntVT), Cond: ISD::SETGT);
9429 // On overflow: mant = 0, exp += 1.
9430 SDValue AdjMant = DAG.getSelect(DL: dl, VT: IntVT, Cond: MantOverflow, LHS: Zero, RHS: RoundedMant);
9431 SDValue AdjExp =
9432 DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: IntVT, N1: NewExp,
9433 N2: DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: IntVT, Operand: MantOverflow));
9434
9435 // Precompute sign shifted to MSB of destination.
9436 SDValue SignShifted =
9437 DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: IntVT, N1: SignBit,
9438 N2: DAG.getShiftAmountConstant(Val: DstBits - 1, VT: IntVT, DL: dl));
9439
9440 // Destination denormal conversion (when new_exp <= 0).
9441 // Shift the mantissa right by 1 - new_exp additional bits and set the
9442 // exponent field to 0.
9443 SDValue ExpIsNeg = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: AdjExp,
9444 RHS: DAG.getConstant(Val: 1, DL: dl, VT: IntVT), Cond: ISD::SETLT);
9445
9446 SDValue DenormResult;
9447 {
9448 // denorm_shift = 1 - NewExp.
9449 SDValue DenormShift = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: IntVT, N1: One, N2: NewExp);
9450
9451 // full_src_mant = (1 << SrcMant) | EffSrcMant.
9452 SDValue ImplicitOne =
9453 DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: IntVT, N1: One,
9454 N2: DAG.getShiftAmountConstant(Val: SrcMant, VT: IntVT, DL: dl));
9455 SDValue FullSrcMant =
9456 DAG.getNode(Opcode: ISD::OR, DL: dl, VT: IntVT, N1: EffSrcMant, N2: ImplicitOne);
9457
9458 // Total right shift = DenormShift + (SrcMant - DstMant).
9459 int64_t MantDelta = static_cast<int64_t>(SrcMant) - DstMant;
9460 SDValue TotalShift =
9461 DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: IntVT, N1: DenormShift,
9462 N2: DAG.getSignedConstant(Val: MantDelta, DL: dl, VT: IntVT));
9463
9464 // Clamp total shift to avoid UB, then truncate denorm mantissa.
9465 EVT ShiftVT = getShiftAmountTy(LHSTy: IntVT, DL: DAG.getDataLayout());
9466 SDValue MaxShift = DAG.getConstant(Val: SrcBits - 1, DL: dl, VT: IntVT);
9467 SDValue ClampedShift =
9468 DAG.getNode(Opcode: ISD::UMIN, DL: dl, VT: IntVT, N1: TotalShift, N2: MaxShift);
9469 SDValue DenormTruncMant =
9470 DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: IntVT, N1: FullSrcMant,
9471 N2: DAG.getZExtOrTrunc(Op: ClampedShift, DL: dl, VT: ShiftVT));
9472
9473 // Rounding for denorm path.
9474 SDValue DenormRoundUp;
9475 {
9476 // Round bit is at position TotalShift - 1 of FullSrcMant.
9477 // Clamp to at least 1 so the subtraction doesn't underflow and create
9478 // shift nodes with invalid shift amounts.
9479 SDValue SafeShift = DAG.getNode(Opcode: ISD::UMAX, DL: dl, VT: IntVT, N1: ClampedShift, N2: One);
9480 SDValue RoundBitPos = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: IntVT, N1: SafeShift, N2: One);
9481 SDValue RoundBitPosAmt = DAG.getZExtOrTrunc(Op: RoundBitPos, DL: dl, VT: ShiftVT);
9482 SDValue DenormRoundBit = DAG.getNode(
9483 Opcode: ISD::AND, DL: dl, VT: IntVT,
9484 N1: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: IntVT, N1: FullSrcMant, N2: RoundBitPosAmt), N2: One);
9485
9486 // Sticky: all bits below round bit.
9487 // sticky_mask = (1 << RoundBitPos) - 1
9488 SDValue StickyMask = DAG.getNode(
9489 Opcode: ISD::SUB, DL: dl, VT: IntVT,
9490 N1: DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: IntVT, N1: One, N2: RoundBitPosAmt), N2: One);
9491 SDValue DenormStickyBits =
9492 DAG.getNode(Opcode: ISD::AND, DL: dl, VT: IntVT, N1: FullSrcMant, N2: StickyMask);
9493 SDValue HasSticky = DAG.getNode(
9494 Opcode: ISD::ZERO_EXTEND, DL: dl, VT: IntVT,
9495 Operand: DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: DenormStickyBits, RHS: Zero, Cond: ISD::SETNE));
9496
9497 SDValue DenormLSB =
9498 DAG.getNode(Opcode: ISD::AND, DL: dl, VT: IntVT, N1: DenormTruncMant, N2: One);
9499
9500 DenormRoundUp = ComputeRoundUp(DenormRoundBit, HasSticky, DenormLSB);
9501
9502 // Only apply rounding if TotalShift >= 1 (i.e., there are bits to round).
9503 SDValue ShiftGEOne =
9504 DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: ClampedShift, RHS: One, Cond: ISD::SETUGE);
9505 DenormRoundUp = DAG.getSelect(DL: dl, VT: IntVT, Cond: ShiftGEOne, LHS: DenormRoundUp, RHS: Zero);
9506 }
9507
9508 SDValue DenormRoundedMant =
9509 DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: IntVT, N1: DenormTruncMant, N2: DenormRoundUp);
9510
9511 // If rounding caused overflow into the normal range, then we get the
9512 // smallest normal number.
9513 SDValue DenormMantOF =
9514 DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: DenormRoundedMant,
9515 RHS: DAG.getConstant(Val: DstMantMask, DL: dl, VT: IntVT), Cond: ISD::SETGT);
9516 SDValue DenormFinalMant =
9517 DAG.getSelect(DL: dl, VT: IntVT, Cond: DenormMantOF, LHS: Zero, RHS: DenormRoundedMant);
9518 SDValue DenormFinalExp = DAG.getSelect(DL: dl, VT: IntVT, Cond: DenormMantOF, LHS: One, RHS: Zero);
9519
9520 // Assemble: sign | (exp << DstMant) | mant
9521 SDValue DenormExpShifted =
9522 DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: IntVT, N1: DenormFinalExp,
9523 N2: DAG.getShiftAmountConstant(Val: DstMant, VT: IntVT, DL: dl));
9524 DenormResult = DAG.getNode(
9525 Opcode: ISD::OR, DL: dl, VT: IntVT,
9526 N1: DAG.getNode(Opcode: ISD::OR, DL: dl, VT: IntVT, N1: SignShifted, N2: DenormExpShifted),
9527 N2: DenormFinalMant);
9528 }
9529
9530 // Exponent overflow detection.
9531 SDValue ExpOF =
9532 DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: AdjExp,
9533 RHS: DAG.getConstant(Val: DstExpMaxNormal, DL: dl, VT: IntVT), Cond: ISD::SETGT);
9534
9535 // Also check if AdjExp == DstExpMaxNormal and mantissa overflow into
9536 // a value that exceeds the max allowed mantissa at that exponent.
9537 SDValue ExpAtMax =
9538 DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: AdjExp,
9539 RHS: DAG.getConstant(Val: DstExpMaxNormal, DL: dl, VT: IntVT), Cond: ISD::SETEQ);
9540 SDValue MantExceedsMax =
9541 DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: AdjMant,
9542 RHS: DAG.getConstant(Val: DstMaxMantAtMaxExp, DL: dl, VT: IntVT), Cond: ISD::SETGT);
9543 SDValue ExpMantOF =
9544 DAG.getNode(Opcode: ISD::AND, DL: dl, VT: SetCCVT, N1: ExpAtMax, N2: MantExceedsMax);
9545 SDValue IsOverflow = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: SetCCVT, N1: ExpOF, N2: ExpMantOF);
9546
9547 // Build overflow result.
9548 SDValue OverflowResult;
9549
9550 if (Saturate) {
9551 // Clamp to max finite value:
9552 // sign | (DstExpMaxNormal << DstMant) | DstMaxMantAtMaxExp
9553 uint64_t MaxFinite =
9554 ((uint64_t)DstExpMaxNormal << DstMant) | DstMaxMantAtMaxExp;
9555 OverflowResult = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: IntVT, N1: SignShifted,
9556 N2: DAG.getConstant(Val: MaxFinite, DL: dl, VT: IntVT));
9557 } else if (DstNFBehavior == fltNonfiniteBehavior::IEEE754) {
9558 // Produce infinity.
9559 uint64_t InfBits = (uint64_t)DstExpMax << DstMant;
9560 OverflowResult = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: IntVT, N1: SignShifted,
9561 N2: DAG.getConstant(Val: InfBits, DL: dl, VT: IntVT));
9562 } else {
9563 // Emit poison if no Inf in format and not saturating.
9564 OverflowResult = DAG.getPOISON(VT: IntVT);
9565 }
9566
9567 // Assemble normal result: sign | (AdjExp << DstMant) | AdjMant
9568 SDValue NormExpShifted =
9569 DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: IntVT, N1: AdjExp,
9570 N2: DAG.getShiftAmountConstant(Val: DstMant, VT: IntVT, DL: dl));
9571 SDValue NormResult = DAG.getNode(
9572 Opcode: ISD::OR, DL: dl, VT: IntVT,
9573 N1: DAG.getNode(Opcode: ISD::OR, DL: dl, VT: IntVT, N1: SignShifted, N2: NormExpShifted), N2: AdjMant);
9574
9575 // Build special-value results.
9576 SDValue NaNResult;
9577 if (DstNFBehavior == fltNonfiniteBehavior::IEEE754) {
9578 // Produce canonical NaN.
9579 const uint64_t QNaNBit = (DstMant > 0) ? (1ULL << (DstMant - 1)) : 0;
9580 NaNResult =
9581 DAG.getConstant(Val: ((uint64_t)DstExpMax << DstMant) | QNaNBit, DL: dl, VT: IntVT);
9582 } else if (DstNFBehavior == fltNonfiniteBehavior::NanOnly &&
9583 DstNanEnc == fltNanEncoding::AllOnes) {
9584 // E4M3FN-style: NaN is exp=all-ones, mant=all-ones.
9585 NaNResult = DAG.getConstant(Val: ((uint64_t)DstExpMax << DstMant) | DstMantMask,
9586 DL: dl, VT: IntVT);
9587 } else {
9588 // NaN -> poison for finite only values.
9589 NaNResult = DAG.getPOISON(VT: IntVT);
9590 }
9591
9592 // Inf handling.
9593 SDValue InfResult;
9594 if (DstNFBehavior == fltNonfiniteBehavior::IEEE754) {
9595 // Produce signed infinity.
9596 uint64_t InfBits = (uint64_t)DstExpMax << DstMant;
9597 InfResult = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: IntVT, N1: SignShifted,
9598 N2: DAG.getConstant(Val: InfBits, DL: dl, VT: IntVT));
9599 } else if (Saturate) {
9600 // Inf saturates to max finite.
9601 uint64_t MaxFinite =
9602 ((uint64_t)DstExpMaxNormal << DstMant) | DstMaxMantAtMaxExp;
9603 InfResult = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: IntVT, N1: SignShifted,
9604 N2: DAG.getConstant(Val: MaxFinite, DL: dl, VT: IntVT));
9605 } else {
9606 // No Inf and not saturating -> poison.
9607 InfResult = DAG.getPOISON(VT: IntVT);
9608 }
9609
9610 SDValue ZeroResult = SignShifted;
9611
9612 // Final selection in an order: NaN takes priority, then Inf, then Zero.
9613 SDValue FiniteResult =
9614 DAG.getSelect(DL: dl, VT: IntVT, Cond: ExpIsNeg, LHS: DenormResult, RHS: NormResult);
9615 FiniteResult =
9616 DAG.getSelect(DL: dl, VT: IntVT, Cond: IsOverflow, LHS: OverflowResult, RHS: FiniteResult);
9617
9618 SDValue Result = FiniteResult;
9619 Result = DAG.getSelect(DL: dl, VT: IntVT, Cond: IsZero, LHS: ZeroResult, RHS: Result);
9620 Result = DAG.getSelect(DL: dl, VT: IntVT, Cond: IsInf, LHS: InfResult, RHS: Result);
9621 Result = DAG.getSelect(DL: dl, VT: IntVT, Cond: IsNaN, LHS: NaNResult, RHS: Result);
9622
9623 // Truncate to destination integer type.
9624 return DAG.getZExtOrTrunc(Op: Result, DL: dl, VT: ResVT);
9625}
9626
9627SDValue
9628TargetLowering::expandCONVERT_FROM_ARBITRARY_FP(SDNode *Node,
9629 SelectionDAG &DAG) const {
9630 SDLoc dl(Node);
9631 EVT DstVT = Node->getValueType(ResNo: 0);
9632 EVT DstScalarVT = DstVT.getScalarType();
9633
9634 SDValue IntVal = Node->getOperand(Num: 0);
9635 const uint64_t SemEnum = Node->getConstantOperandVal(Num: 1);
9636 const auto Sem = static_cast<APFloatBase::Semantics>(SemEnum);
9637
9638 // Supported source formats.
9639 switch (Sem) {
9640 case APFloatBase::S_Float8E5M2:
9641 case APFloatBase::S_Float8E4M3FN:
9642 case APFloatBase::S_Float6E3M2FN:
9643 case APFloatBase::S_Float6E2M3FN:
9644 case APFloatBase::S_Float4E2M1FN:
9645 break;
9646 default:
9647 DAG.getContext()->emitError(ErrorStr: "CONVERT_FROM_ARBITRARY_FP: not implemented "
9648 "source format (semantics enum " +
9649 Twine(SemEnum) + ")");
9650 return SDValue();
9651 }
9652
9653 const fltSemantics &SrcSem = APFloatBase::EnumToSemantics(S: Sem);
9654 const unsigned SrcBits = APFloat::getSizeInBits(Sem: SrcSem);
9655 const unsigned SrcPrecision = APFloat::semanticsPrecision(SrcSem);
9656 const unsigned SrcMant = SrcPrecision - 1;
9657 const unsigned SrcExp = SrcBits - SrcMant - 1;
9658 const int SrcBias = 1 - APFloat::semanticsMinExponent(SrcSem);
9659 const fltNonfiniteBehavior NFBehavior = SrcSem.nonFiniteBehavior;
9660
9661 // Destination format parameters.
9662 const fltSemantics &DstSem = DstScalarVT.getFltSemantics();
9663 const unsigned DstBits = APFloat::getSizeInBits(Sem: DstSem);
9664 const unsigned DstMant = APFloat::semanticsPrecision(DstSem) - 1;
9665 const unsigned DstExpBits = DstBits - DstMant - 1;
9666 const int DstMinExp = APFloat::semanticsMinExponent(DstSem);
9667 const int DstBias = 1 - DstMinExp;
9668 const uint64_t DstExpAllOnes = (1ULL << DstExpBits) - 1;
9669
9670 // Work in an integer type matching the destination float width.
9671 EVT IntScalarVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: DstBits);
9672 EVT IntVT = DstVT.isVector()
9673 ? EVT::getVectorVT(Context&: *DAG.getContext(), VT: IntScalarVT,
9674 EC: DstVT.getVectorElementCount())
9675 : IntScalarVT;
9676
9677 SDValue Src = DAG.getZExtOrTrunc(Op: IntVal, DL: dl, VT: IntVT);
9678
9679 EVT SetCCVT =
9680 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: IntVT);
9681
9682 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT: IntVT);
9683 SDValue One = DAG.getConstant(Val: 1, DL: dl, VT: IntVT);
9684
9685 // Extract bit fields.
9686 const uint64_t MantMask = (SrcMant > 0) ? ((1ULL << SrcMant) - 1) : 0;
9687 const uint64_t ExpMask = (1ULL << SrcExp) - 1;
9688
9689 SDValue MantField = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: IntVT, N1: Src,
9690 N2: DAG.getConstant(Val: MantMask, DL: dl, VT: IntVT));
9691
9692 SDValue ExpField =
9693 DAG.getNode(Opcode: ISD::AND, DL: dl, VT: IntVT,
9694 N1: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: IntVT, N1: Src,
9695 N2: DAG.getShiftAmountConstant(Val: SrcMant, VT: IntVT, DL: dl)),
9696 N2: DAG.getConstant(Val: ExpMask, DL: dl, VT: IntVT));
9697
9698 SDValue SignBit =
9699 DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: IntVT, N1: Src,
9700 N2: DAG.getShiftAmountConstant(Val: SrcBits - 1, VT: IntVT, DL: dl));
9701
9702 SDValue SignShifted =
9703 DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: IntVT, N1: SignBit,
9704 N2: DAG.getShiftAmountConstant(Val: DstBits - 1, VT: IntVT, DL: dl));
9705
9706 // Classify the input.
9707 SDValue ExpAllOnes = DAG.getConstant(Val: ExpMask, DL: dl, VT: IntVT);
9708 SDValue IsExpAllOnes =
9709 DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: ExpField, RHS: ExpAllOnes, Cond: ISD::SETEQ);
9710 SDValue IsExpZero = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: ExpField, RHS: Zero, Cond: ISD::SETEQ);
9711 SDValue IsMantZero = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: MantField, RHS: Zero, Cond: ISD::SETEQ);
9712 SDValue IsMantNonZero =
9713 DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: MantField, RHS: Zero, Cond: ISD::SETNE);
9714
9715 SDValue IsNaN;
9716 if (NFBehavior == fltNonfiniteBehavior::FiniteOnly) {
9717 IsNaN = DAG.getBoolConstant(V: false, DL: dl, VT: SetCCVT, OpVT: IntVT);
9718 } else if (NFBehavior == fltNonfiniteBehavior::IEEE754) {
9719 IsNaN = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: SetCCVT, N1: IsExpAllOnes, N2: IsMantNonZero);
9720 } else {
9721 assert(SrcSem.nanEncoding == fltNanEncoding::AllOnes);
9722 SDValue MantAllOnes = DAG.getConstant(Val: MantMask, DL: dl, VT: IntVT);
9723 SDValue IsMantAllOnes =
9724 DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: MantField, RHS: MantAllOnes, Cond: ISD::SETEQ);
9725 IsNaN = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: SetCCVT, N1: IsExpAllOnes, N2: IsMantAllOnes);
9726 }
9727
9728 SDValue IsInf;
9729 if (NFBehavior == fltNonfiniteBehavior::IEEE754)
9730 IsInf = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: SetCCVT, N1: IsExpAllOnes, N2: IsMantZero);
9731 else
9732 IsInf = DAG.getBoolConstant(V: false, DL: dl, VT: SetCCVT, OpVT: IntVT);
9733
9734 SDValue IsZero = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: SetCCVT, N1: IsExpZero, N2: IsMantZero);
9735 SDValue IsDenorm =
9736 DAG.getNode(Opcode: ISD::AND, DL: dl, VT: SetCCVT, N1: IsExpZero, N2: IsMantNonZero);
9737
9738 // Normal value conversion.
9739 const int BiasAdjust = DstBias - SrcBias;
9740 SDValue NormDstExp =
9741 DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: IntVT, N1: ExpField,
9742 N2: DAG.getConstant(Val: APInt(DstBits, BiasAdjust, true), DL: dl, VT: IntVT));
9743
9744 SDValue NormDstMant;
9745 if (DstMant > SrcMant) {
9746 SDValue NormDstMantShift =
9747 DAG.getShiftAmountConstant(Val: DstMant - SrcMant, VT: IntVT, DL: dl);
9748 NormDstMant = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: IntVT, N1: MantField, N2: NormDstMantShift);
9749 } else {
9750 NormDstMant = MantField;
9751 }
9752
9753 SDValue DstMantShift = DAG.getShiftAmountConstant(Val: DstMant, VT: IntVT, DL: dl);
9754 SDValue NormExpShifted =
9755 DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: IntVT, N1: NormDstExp, N2: DstMantShift);
9756 SDValue NormResult =
9757 DAG.getNode(Opcode: ISD::OR, DL: dl, VT: IntVT,
9758 N1: DAG.getNode(Opcode: ISD::OR, DL: dl, VT: IntVT, N1: SignShifted, N2: NormExpShifted),
9759 N2: NormDstMant);
9760
9761 // Denormal value conversion.
9762 SDValue DenormResult;
9763 {
9764 const unsigned IntVTBits = DstBits;
9765 SDValue LeadingZeros =
9766 DAG.getNode(Opcode: ISD::CTLZ_ZERO_POISON, DL: dl, VT: IntVT, Operand: MantField);
9767
9768 const int DenormExpConst =
9769 (int)IntVTBits + DstBias - SrcBias - (int)SrcMant;
9770 SDValue DenormDstExp = DAG.getNode(
9771 Opcode: ISD::SUB, DL: dl, VT: IntVT,
9772 N1: DAG.getConstant(Val: APInt(DstBits, DenormExpConst, true), DL: dl, VT: IntVT),
9773 N2: LeadingZeros);
9774
9775 SDValue MantMSB =
9776 DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: IntVT,
9777 N1: DAG.getConstant(Val: IntVTBits - 1, DL: dl, VT: IntVT), N2: LeadingZeros);
9778
9779 SDValue LeadingOne = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: IntVT, N1: One, N2: MantMSB);
9780 SDValue Frac = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: IntVT, N1: MantField, N2: LeadingOne);
9781
9782 const unsigned ShiftSub = IntVTBits - 1 - DstMant;
9783 SDValue ShiftAmount = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: IntVT, N1: LeadingZeros,
9784 N2: DAG.getConstant(Val: ShiftSub, DL: dl, VT: IntVT));
9785
9786 SDValue DenormDstMant = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: IntVT, N1: Frac, N2: ShiftAmount);
9787
9788 SDValue DenormExpShifted =
9789 DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: IntVT, N1: DenormDstExp, N2: DstMantShift);
9790 DenormResult = DAG.getNode(
9791 Opcode: ISD::OR, DL: dl, VT: IntVT,
9792 N1: DAG.getNode(Opcode: ISD::OR, DL: dl, VT: IntVT, N1: SignShifted, N2: DenormExpShifted),
9793 N2: DenormDstMant);
9794 }
9795
9796 SDValue FiniteResult =
9797 DAG.getSelect(DL: dl, VT: IntVT, Cond: IsDenorm, LHS: DenormResult, RHS: NormResult);
9798
9799 const uint64_t QNaNBit = (DstMant > 0) ? (1ULL << (DstMant - 1)) : 0;
9800 SDValue NaNResult =
9801 DAG.getConstant(Val: (DstExpAllOnes << DstMant) | QNaNBit, DL: dl, VT: IntVT);
9802
9803 SDValue InfResult =
9804 DAG.getNode(Opcode: ISD::OR, DL: dl, VT: IntVT, N1: SignShifted,
9805 N2: DAG.getConstant(Val: DstExpAllOnes << DstMant, DL: dl, VT: IntVT));
9806
9807 SDValue ZeroResult = SignShifted;
9808
9809 SDValue Result = FiniteResult;
9810 Result = DAG.getSelect(DL: dl, VT: IntVT, Cond: IsZero, LHS: ZeroResult, RHS: Result);
9811 Result = DAG.getSelect(DL: dl, VT: IntVT, Cond: IsInf, LHS: InfResult, RHS: Result);
9812 Result = DAG.getSelect(DL: dl, VT: IntVT, Cond: IsNaN, LHS: NaNResult, RHS: Result);
9813
9814 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: DstVT, Operand: Result);
9815}
9816
9817bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
9818 SelectionDAG &DAG) const {
9819 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
9820 SDValue Src = Node->getOperand(Num: OpNo);
9821 EVT SrcVT = Src.getValueType();
9822 EVT DstVT = Node->getValueType(ResNo: 0);
9823 SDLoc dl(SDValue(Node, 0));
9824
9825 // FIXME: Only f32 to i64 conversions are supported.
9826 if (SrcVT != MVT::f32 || DstVT != MVT::i64)
9827 return false;
9828
9829 if (Node->isStrictFPOpcode())
9830 // When a NaN is converted to an integer a trap is allowed. We can't
9831 // use this expansion here because it would eliminate that trap. Other
9832 // traps are also allowed and cannot be eliminated. See
9833 // IEEE 754-2008 sec 5.8.
9834 return false;
9835
9836 // Expand f32 -> i64 conversion
9837 // This algorithm comes from compiler-rt's implementation of fixsfdi:
9838 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
9839 unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
9840 EVT IntVT = SrcVT.changeTypeToInteger();
9841 EVT IntShVT = getShiftAmountTy(LHSTy: IntVT, DL: DAG.getDataLayout());
9842
9843 SDValue ExponentMask = DAG.getConstant(Val: 0x7F800000, DL: dl, VT: IntVT);
9844 SDValue ExponentLoBit = DAG.getConstant(Val: 23, DL: dl, VT: IntVT);
9845 SDValue Bias = DAG.getConstant(Val: 127, DL: dl, VT: IntVT);
9846 SDValue SignMask = DAG.getConstant(Val: APInt::getSignMask(BitWidth: SrcEltBits), DL: dl, VT: IntVT);
9847 SDValue SignLowBit = DAG.getConstant(Val: SrcEltBits - 1, DL: dl, VT: IntVT);
9848 SDValue MantissaMask = DAG.getConstant(Val: 0x007FFFFF, DL: dl, VT: IntVT);
9849
9850 SDValue Bits = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: IntVT, Operand: Src);
9851
9852 SDValue ExponentBits = DAG.getNode(
9853 Opcode: ISD::SRL, DL: dl, VT: IntVT, N1: DAG.getNode(Opcode: ISD::AND, DL: dl, VT: IntVT, N1: Bits, N2: ExponentMask),
9854 N2: DAG.getZExtOrTrunc(Op: ExponentLoBit, DL: dl, VT: IntShVT));
9855 SDValue Exponent = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: IntVT, N1: ExponentBits, N2: Bias);
9856
9857 SDValue Sign = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT: IntVT,
9858 N1: DAG.getNode(Opcode: ISD::AND, DL: dl, VT: IntVT, N1: Bits, N2: SignMask),
9859 N2: DAG.getZExtOrTrunc(Op: SignLowBit, DL: dl, VT: IntShVT));
9860 Sign = DAG.getSExtOrTrunc(Op: Sign, DL: dl, VT: DstVT);
9861
9862 SDValue R = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: IntVT,
9863 N1: DAG.getNode(Opcode: ISD::AND, DL: dl, VT: IntVT, N1: Bits, N2: MantissaMask),
9864 N2: DAG.getConstant(Val: 0x00800000, DL: dl, VT: IntVT));
9865
9866 R = DAG.getZExtOrTrunc(Op: R, DL: dl, VT: DstVT);
9867
9868 R = DAG.getSelectCC(
9869 DL: dl, LHS: Exponent, RHS: ExponentLoBit,
9870 True: DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: DstVT, N1: R,
9871 N2: DAG.getZExtOrTrunc(
9872 Op: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: IntVT, N1: Exponent, N2: ExponentLoBit),
9873 DL: dl, VT: IntShVT)),
9874 False: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: DstVT, N1: R,
9875 N2: DAG.getZExtOrTrunc(
9876 Op: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: IntVT, N1: ExponentLoBit, N2: Exponent),
9877 DL: dl, VT: IntShVT)),
9878 Cond: ISD::SETGT);
9879
9880 SDValue Ret = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: DstVT,
9881 N1: DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: DstVT, N1: R, N2: Sign), N2: Sign);
9882
9883 Result = DAG.getSelectCC(DL: dl, LHS: Exponent, RHS: DAG.getConstant(Val: 0, DL: dl, VT: IntVT),
9884 True: DAG.getConstant(Val: 0, DL: dl, VT: DstVT), False: Ret, Cond: ISD::SETLT);
9885 return true;
9886}
9887
9888bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
9889 SDValue &Chain,
9890 SelectionDAG &DAG) const {
9891 SDLoc dl(SDValue(Node, 0));
9892 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
9893 SDValue Src = Node->getOperand(Num: OpNo);
9894
9895 EVT SrcVT = Src.getValueType();
9896 EVT DstVT = Node->getValueType(ResNo: 0);
9897 EVT SetCCVT =
9898 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: SrcVT);
9899 EVT DstSetCCVT =
9900 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: DstVT);
9901
9902 // Only expand vector types if we have the appropriate vector bit operations.
9903 unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
9904 ISD::FP_TO_SINT;
9905 if (DstVT.isVector() && (!isOperationLegalOrCustom(Op: SIntOpcode, VT: DstVT) ||
9906 !isOperationLegalOrCustomOrPromote(Op: ISD::XOR, VT: SrcVT)))
9907 return false;
9908
9909 // If the maximum float value is smaller then the signed integer range,
9910 // the destination signmask can't be represented by the float, so we can
9911 // just use FP_TO_SINT directly.
9912 const fltSemantics &APFSem = SrcVT.getFltSemantics();
9913 APFloat APF(APFSem, APInt::getZero(numBits: SrcVT.getScalarSizeInBits()));
9914 APInt SignMask = APInt::getSignMask(BitWidth: DstVT.getScalarSizeInBits());
9915 if (APFloat::opOverflow &
9916 APF.convertFromAPInt(Input: SignMask, IsSigned: false, RM: APFloat::rmNearestTiesToEven)) {
9917 if (Node->isStrictFPOpcode()) {
9918 Result = DAG.getNode(Opcode: ISD::STRICT_FP_TO_SINT, DL: dl, ResultTys: { DstVT, MVT::Other },
9919 Ops: { Node->getOperand(Num: 0), Src });
9920 Chain = Result.getValue(R: 1);
9921 } else
9922 Result = DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: dl, VT: DstVT, Operand: Src);
9923 return true;
9924 }
9925
9926 // Don't expand it if there isn't cheap fsub instruction.
9927 if (!isOperationLegalOrCustom(
9928 Op: Node->isStrictFPOpcode() ? ISD::STRICT_FSUB : ISD::FSUB, VT: SrcVT))
9929 return false;
9930
9931 SDValue Cst = DAG.getConstantFP(Val: APF, DL: dl, VT: SrcVT);
9932 SDValue Sel;
9933
9934 if (Node->isStrictFPOpcode()) {
9935 Sel = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: Cst, Cond: ISD::SETLT,
9936 Chain: Node->getOperand(Num: 0), /*IsSignaling*/ true);
9937 Chain = Sel.getValue(R: 1);
9938 } else {
9939 Sel = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: Cst, Cond: ISD::SETLT);
9940 }
9941
9942 bool Strict = Node->isStrictFPOpcode() ||
9943 shouldUseStrictFP_TO_INT(FpVT: SrcVT, IntVT: DstVT, /*IsSigned*/ false);
9944
9945 if (Strict) {
9946 // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
9947 // signmask then offset (the result of which should be fully representable).
9948 // Sel = Src < 0x8000000000000000
9949 // FltOfs = select Sel, 0, 0x8000000000000000
9950 // IntOfs = select Sel, 0, 0x8000000000000000
9951 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
9952
9953 // TODO: Should any fast-math-flags be set for the FSUB?
9954 SDValue FltOfs = DAG.getSelect(DL: dl, VT: SrcVT, Cond: Sel,
9955 LHS: DAG.getConstantFP(Val: 0.0, DL: dl, VT: SrcVT), RHS: Cst);
9956 Sel = DAG.getBoolExtOrTrunc(Op: Sel, SL: dl, VT: DstSetCCVT, OpVT: DstVT);
9957 SDValue IntOfs = DAG.getSelect(DL: dl, VT: DstVT, Cond: Sel,
9958 LHS: DAG.getConstant(Val: 0, DL: dl, VT: DstVT),
9959 RHS: DAG.getConstant(Val: SignMask, DL: dl, VT: DstVT));
9960 SDValue SInt;
9961 if (Node->isStrictFPOpcode()) {
9962 SDValue Val = DAG.getNode(Opcode: ISD::STRICT_FSUB, DL: dl, ResultTys: { SrcVT, MVT::Other },
9963 Ops: { Chain, Src, FltOfs });
9964 SInt = DAG.getNode(Opcode: ISD::STRICT_FP_TO_SINT, DL: dl, ResultTys: { DstVT, MVT::Other },
9965 Ops: { Val.getValue(R: 1), Val });
9966 Chain = SInt.getValue(R: 1);
9967 } else {
9968 SDValue Val = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: SrcVT, N1: Src, N2: FltOfs);
9969 SInt = DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: dl, VT: DstVT, Operand: Val);
9970 }
9971 Result = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: DstVT, N1: SInt, N2: IntOfs);
9972 } else {
9973 // Expand based on maximum range of FP_TO_SINT:
9974 // True = fp_to_sint(Src)
9975 // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
9976 // Result = select (Src < 0x8000000000000000), True, False
9977
9978 SDValue True = DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: dl, VT: DstVT, Operand: Src);
9979 // TODO: Should any fast-math-flags be set for the FSUB?
9980 SDValue False = DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: dl, VT: DstVT,
9981 Operand: DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: SrcVT, N1: Src, N2: Cst));
9982 False = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: DstVT, N1: False,
9983 N2: DAG.getConstant(Val: SignMask, DL: dl, VT: DstVT));
9984 Sel = DAG.getBoolExtOrTrunc(Op: Sel, SL: dl, VT: DstSetCCVT, OpVT: DstVT);
9985 Result = DAG.getSelect(DL: dl, VT: DstVT, Cond: Sel, LHS: True, RHS: False);
9986 }
9987 return true;
9988}
9989
9990bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
9991 SDValue &Chain, SelectionDAG &DAG) const {
9992 // This transform is not correct for converting 0 when rounding mode is set
9993 // to round toward negative infinity which will produce -0.0. So disable
9994 // under strictfp.
9995 if (Node->isStrictFPOpcode())
9996 return false;
9997
9998 SDValue Src = Node->getOperand(Num: 0);
9999 EVT SrcVT = Src.getValueType();
10000 EVT DstVT = Node->getValueType(ResNo: 0);
10001
10002 // If the input is known to be non-negative and SINT_TO_FP is legal then use
10003 // it.
10004 if (Node->getFlags().hasNonNeg() &&
10005 isOperationLegalOrCustom(Op: ISD::SINT_TO_FP, VT: SrcVT)) {
10006 Result =
10007 DAG.getNode(Opcode: ISD::SINT_TO_FP, DL: SDLoc(Node), VT: DstVT, Operand: Node->getOperand(Num: 0));
10008 return true;
10009 }
10010
10011 if (SrcVT.getScalarType() != MVT::i64 || DstVT.getScalarType() != MVT::f64)
10012 return false;
10013
10014 // Only expand vector types if we have the appropriate vector bit
10015 // operations.
10016 if (SrcVT.isVector() && (!isOperationLegalOrCustom(Op: ISD::SRL, VT: SrcVT) ||
10017 !isOperationLegalOrCustom(Op: ISD::FADD, VT: DstVT) ||
10018 !isOperationLegalOrCustom(Op: ISD::FSUB, VT: DstVT) ||
10019 !isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT: SrcVT) ||
10020 !isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT: SrcVT)))
10021 return false;
10022
10023 SDLoc dl(SDValue(Node, 0));
10024
10025 // Implementation of unsigned i64 to f64 following the algorithm in
10026 // __floatundidf in compiler_rt. This implementation performs rounding
10027 // correctly in all rounding modes with the exception of converting 0
10028 // when rounding toward negative infinity. In that case the fsub will
10029 // produce -0.0. This will be added to +0.0 and produce -0.0 which is
10030 // incorrect.
10031 SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), DL: dl, VT: SrcVT);
10032 SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
10033 Val: llvm::bit_cast<double>(UINT64_C(0x4530000000100000)), DL: dl, VT: DstVT);
10034 SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), DL: dl, VT: SrcVT);
10035 SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), DL: dl, VT: SrcVT);
10036 SDValue HiShift = DAG.getShiftAmountConstant(Val: 32, VT: SrcVT, DL: dl);
10037
10038 SDValue Lo = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: SrcVT, N1: Src, N2: LoMask);
10039 SDValue Hi = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: SrcVT, N1: Src, N2: HiShift);
10040 SDValue LoOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: SrcVT, N1: Lo, N2: TwoP52);
10041 SDValue HiOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: SrcVT, N1: Hi, N2: TwoP84);
10042 SDValue LoFlt = DAG.getBitcast(VT: DstVT, V: LoOr);
10043 SDValue HiFlt = DAG.getBitcast(VT: DstVT, V: HiOr);
10044 SDValue HiSub = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: DstVT, N1: HiFlt, N2: TwoP84PlusTwoP52);
10045 Result = DAG.getNode(Opcode: ISD::FADD, DL: dl, VT: DstVT, N1: LoFlt, N2: HiSub);
10046 return true;
10047}
10048
10049SDValue
10050TargetLowering::createSelectForFMINNUM_FMAXNUM(SDNode *Node,
10051 SelectionDAG &DAG) const {
10052 unsigned Opcode = Node->getOpcode();
10053 assert((Opcode == ISD::FMINNUM || Opcode == ISD::FMAXNUM ||
10054 Opcode == ISD::STRICT_FMINNUM || Opcode == ISD::STRICT_FMAXNUM) &&
10055 "Wrong opcode");
10056
10057 if (Node->getFlags().hasNoNaNs()) {
10058 ISD::CondCode Pred = Opcode == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
10059 EVT VT = Node->getValueType(ResNo: 0);
10060 if ((!isCondCodeLegal(CC: Pred, VT: VT.getSimpleVT()) ||
10061 !isOperationLegalOrCustom(Op: ISD::VSELECT, VT)) &&
10062 VT.isVector())
10063 return SDValue();
10064 SDValue Op1 = Node->getOperand(Num: 0);
10065 SDValue Op2 = Node->getOperand(Num: 1);
10066 return DAG.getSelectCC(DL: SDLoc(Node), LHS: Op1, RHS: Op2, True: Op1, False: Op2, Cond: Pred,
10067 Flags: Node->getFlags());
10068 }
10069
10070 return SDValue();
10071}
10072
10073SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
10074 SelectionDAG &DAG) const {
10075 if (SDValue Expanded = expandVectorNaryOpBySplitting(Node, DAG))
10076 return Expanded;
10077
10078 EVT VT = Node->getValueType(ResNo: 0);
10079 if (VT.isScalableVector())
10080 report_fatal_error(
10081 reason: "Expanding fminnum/fmaxnum for scalable vectors is undefined.");
10082
10083 SDLoc dl(Node);
10084 unsigned NewOp =
10085 Node->getOpcode() == ISD::FMINNUM ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
10086
10087 if (isOperationLegalOrCustom(Op: NewOp, VT)) {
10088 SDValue Quiet0 = Node->getOperand(Num: 0);
10089 SDValue Quiet1 = Node->getOperand(Num: 1);
10090
10091 if (!Node->getFlags().hasNoNaNs()) {
10092 // Insert canonicalizes if it's possible we need to quiet to get correct
10093 // sNaN behavior.
10094 if (!DAG.isKnownNeverSNaN(Op: Quiet0)) {
10095 Quiet0 = DAG.getNode(Opcode: ISD::FCANONICALIZE, DL: dl, VT, Operand: Quiet0,
10096 Flags: Node->getFlags());
10097 }
10098 if (!DAG.isKnownNeverSNaN(Op: Quiet1)) {
10099 Quiet1 = DAG.getNode(Opcode: ISD::FCANONICALIZE, DL: dl, VT, Operand: Quiet1,
10100 Flags: Node->getFlags());
10101 }
10102 }
10103
10104 return DAG.getNode(Opcode: NewOp, DL: dl, VT, N1: Quiet0, N2: Quiet1, Flags: Node->getFlags());
10105 }
10106
10107 // If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
10108 // instead if there are no NaNs.
10109 if (Node->getFlags().hasNoNaNs() ||
10110 (DAG.isKnownNeverNaN(Op: Node->getOperand(Num: 0)) &&
10111 DAG.isKnownNeverNaN(Op: Node->getOperand(Num: 1)))) {
10112 unsigned IEEE2018Op =
10113 Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
10114 if (isOperationLegalOrCustom(Op: IEEE2018Op, VT))
10115 return DAG.getNode(Opcode: IEEE2018Op, DL: dl, VT, N1: Node->getOperand(Num: 0),
10116 N2: Node->getOperand(Num: 1), Flags: Node->getFlags());
10117 }
10118
10119 if (SDValue SelCC = createSelectForFMINNUM_FMAXNUM(Node, DAG))
10120 return SelCC;
10121
10122 return SDValue();
10123}
10124
10125static SDValue isSpecificZeroAfterMaybeRounding(SelectionDAG &DAG,
10126 const TargetLowering &TLI,
10127 const SDLoc &DL, SDValue Val,
10128 FPClassTest FPClass) {
10129 EVT VT = Val.getValueType();
10130 EVT CCVT = TLI.getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
10131 EVT IntVT = VT.changeTypeToInteger();
10132 EVT FloatVT = VT.changeElementType(Context&: *DAG.getContext(), EltVT: MVT::f32);
10133 SDValue TestZero = DAG.getTargetConstant(Val: FPClass, DL, VT: MVT::i32);
10134 if (!TLI.isTypeLegal(VT: IntVT) &&
10135 !TLI.isOperationLegalOrCustom(Op: ISD::IS_FPCLASS, VT))
10136 Val = DAG.getNode(Opcode: ISD::FP_ROUND, DL, VT: FloatVT, N1: Val,
10137 N2: DAG.getIntPtrConstant(Val: 0, DL, /*isTarget=*/true));
10138 return DAG.getNode(Opcode: ISD::IS_FPCLASS, DL, VT: CCVT, N1: Val, N2: TestZero);
10139}
10140
10141SDValue TargetLowering::expandFMINIMUM_FMAXIMUM(SDNode *N,
10142 SelectionDAG &DAG) const {
10143 if (SDValue Expanded = expandVectorNaryOpBySplitting(Node: N, DAG))
10144 return Expanded;
10145
10146 SDLoc DL(N);
10147 SDValue LHS = N->getOperand(Num: 0);
10148 SDValue RHS = N->getOperand(Num: 1);
10149 unsigned Opc = N->getOpcode();
10150 EVT VT = N->getValueType(ResNo: 0);
10151 EVT CCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
10152 bool IsMax = Opc == ISD::FMAXIMUM;
10153 SDNodeFlags Flags = N->getFlags();
10154
10155 // First, implement comparison not propagating NaN. If no native fmin or fmax
10156 // available, use plain select with setcc instead.
10157 SDValue MinMax;
10158 unsigned CompOpcIeee = IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
10159 unsigned CompOpc = IsMax ? ISD::FMAXNUM : ISD::FMINNUM;
10160
10161 // FIXME: We should probably define fminnum/fmaxnum variants with correct
10162 // signed zero behavior.
10163 bool MinMaxMustRespectOrderedZero = false;
10164
10165 if (isOperationLegalOrCustom(Op: CompOpcIeee, VT)) {
10166 MinMax = DAG.getNode(Opcode: CompOpcIeee, DL, VT, N1: LHS, N2: RHS, Flags);
10167 MinMaxMustRespectOrderedZero = true;
10168 } else if (isOperationLegalOrCustom(Op: CompOpc, VT)) {
10169 MinMax = DAG.getNode(Opcode: CompOpc, DL, VT, N1: LHS, N2: RHS, Flags);
10170 } else {
10171 if (VT.isVector() && !isOperationLegalOrCustom(Op: ISD::VSELECT, VT))
10172 return DAG.UnrollVectorOp(N);
10173
10174 // NaN (if exists) will be propagated later, so orderness doesn't matter.
10175 SDValue Compare =
10176 DAG.getSetCC(DL, VT: CCVT, LHS, RHS, Cond: IsMax ? ISD::SETOGT : ISD::SETOLT);
10177 MinMax = DAG.getSelect(DL, VT, Cond: Compare, LHS, RHS, Flags);
10178 }
10179
10180 // Propagate any NaN of both operands
10181 if (!N->getFlags().hasNoNaNs() &&
10182 (!DAG.isKnownNeverNaN(Op: RHS) || !DAG.isKnownNeverNaN(Op: LHS))) {
10183 ConstantFP *FPNaN = ConstantFP::get(Context&: *DAG.getContext(),
10184 V: APFloat::getNaN(Sem: VT.getFltSemantics()));
10185 MinMax = DAG.getSelect(DL, VT, Cond: DAG.getSetCC(DL, VT: CCVT, LHS, RHS, Cond: ISD::SETUO),
10186 LHS: DAG.getConstantFP(V: *FPNaN, DL, VT), RHS: MinMax, Flags);
10187 }
10188
10189 // fminimum/fmaximum requires -0.0 less than +0.0
10190 if (!MinMaxMustRespectOrderedZero && !N->getFlags().hasNoSignedZeros() &&
10191 !DAG.isKnownNeverLogicalZero(Op: RHS) && !DAG.isKnownNeverLogicalZero(Op: LHS)) {
10192 SDValue IsEqual = DAG.getSetCC(DL, VT: CCVT, LHS, RHS, Cond: ISD::SETOEQ);
10193 SDValue IsSpecificZero = isSpecificZeroAfterMaybeRounding(
10194 DAG, TLI: *this, DL, Val: LHS, FPClass: IsMax ? fcPosZero : fcNegZero);
10195 SDValue RetZero = DAG.getSelect(DL, VT, Cond: IsSpecificZero, LHS, RHS, Flags);
10196 MinMax = DAG.getSelect(DL, VT, Cond: IsEqual, LHS: RetZero, RHS: MinMax, Flags);
10197 }
10198
10199 return MinMax;
10200}
10201
10202SDValue TargetLowering::expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *Node,
10203 SelectionDAG &DAG) const {
10204 SDLoc DL(Node);
10205 SDValue LHS = Node->getOperand(Num: 0);
10206 SDValue RHS = Node->getOperand(Num: 1);
10207 unsigned Opc = Node->getOpcode();
10208 EVT VT = Node->getValueType(ResNo: 0);
10209 EVT CCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
10210 bool IsMax = Opc == ISD::FMAXIMUMNUM;
10211 SDNodeFlags Flags = Node->getFlags();
10212
10213 unsigned NewOp =
10214 Opc == ISD::FMINIMUMNUM ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
10215
10216 if (isOperationLegalOrCustom(Op: NewOp, VT)) {
10217 if (!Flags.hasNoNaNs()) {
10218 // Insert canonicalizes if it's possible we need to quiet to get correct
10219 // sNaN behavior.
10220 if (!DAG.isKnownNeverSNaN(Op: LHS)) {
10221 LHS = DAG.getNode(Opcode: ISD::FCANONICALIZE, DL, VT, Operand: LHS, Flags);
10222 }
10223 if (!DAG.isKnownNeverSNaN(Op: RHS)) {
10224 RHS = DAG.getNode(Opcode: ISD::FCANONICALIZE, DL, VT, Operand: RHS, Flags);
10225 }
10226 }
10227
10228 return DAG.getNode(Opcode: NewOp, DL, VT, N1: LHS, N2: RHS, Flags);
10229 }
10230
10231 // We can use FMINIMUM/FMAXIMUM if there is no NaN, since it has
10232 // same behaviors for all of other cases: +0.0 vs -0.0 included.
10233 if (Flags.hasNoNaNs() ||
10234 (DAG.isKnownNeverNaN(Op: LHS) && DAG.isKnownNeverNaN(Op: RHS))) {
10235 unsigned IEEE2019Op =
10236 Opc == ISD::FMINIMUMNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
10237 if (isOperationLegalOrCustom(Op: IEEE2019Op, VT))
10238 return DAG.getNode(Opcode: IEEE2019Op, DL, VT, N1: LHS, N2: RHS, Flags);
10239 }
10240
10241 // FMINNUM/FMAXMUM returns qNaN if either operand is sNaN, and it may return
10242 // either one for +0.0 vs -0.0.
10243 if ((Flags.hasNoNaNs() ||
10244 (DAG.isKnownNeverSNaN(Op: LHS) && DAG.isKnownNeverSNaN(Op: RHS))) &&
10245 (Flags.hasNoSignedZeros() || DAG.isKnownNeverLogicalZero(Op: LHS) ||
10246 DAG.isKnownNeverLogicalZero(Op: RHS))) {
10247 unsigned IEEE2008Op = Opc == ISD::FMINIMUMNUM ? ISD::FMINNUM : ISD::FMAXNUM;
10248 if (isOperationLegalOrCustom(Op: IEEE2008Op, VT))
10249 return DAG.getNode(Opcode: IEEE2008Op, DL, VT, N1: LHS, N2: RHS, Flags);
10250 }
10251
10252 if (VT.isVector() &&
10253 (isOperationLegalOrCustomOrPromote(Op: Opc, VT: VT.getVectorElementType()) ||
10254 !isOperationLegalOrCustom(Op: ISD::VSELECT, VT)))
10255 return DAG.UnrollVectorOp(N: Node);
10256
10257 // If only one operand is NaN, override it with another operand.
10258 if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(Op: LHS)) {
10259 LHS = DAG.getSelectCC(DL, LHS, RHS: LHS, True: RHS, False: LHS, Cond: ISD::SETUO);
10260 }
10261 if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(Op: RHS)) {
10262 RHS = DAG.getSelectCC(DL, LHS: RHS, RHS, True: LHS, False: RHS, Cond: ISD::SETUO);
10263 }
10264
10265 // Always prefer RHS if equal.
10266 SDValue MinMax =
10267 DAG.getSelectCC(DL, LHS, RHS, True: LHS, False: RHS, Cond: IsMax ? ISD::SETGT : ISD::SETLT);
10268
10269 // TODO: We need quiet sNaN if strictfp.
10270
10271 // Fixup signed zero behavior.
10272 if (Flags.hasNoSignedZeros() || DAG.isKnownNeverLogicalZero(Op: LHS) ||
10273 DAG.isKnownNeverLogicalZero(Op: RHS)) {
10274 return MinMax;
10275 }
10276 SDValue IsZero = DAG.getSetCC(DL, VT: CCVT, LHS: MinMax,
10277 RHS: DAG.getConstantFP(Val: 0.0, DL, VT), Cond: ISD::SETEQ);
10278 SDValue IsSpecificZero = isSpecificZeroAfterMaybeRounding(
10279 DAG, TLI: *this, DL, Val: LHS, FPClass: IsMax ? fcPosZero : fcNegZero);
10280 // It's OK to select from LHS and MinMax, with only one ISD::IS_FPCLASS, as
10281 // we preferred RHS when generate MinMax, if the operands are equal.
10282 SDValue RetZero = DAG.getSelect(DL, VT, Cond: IsSpecificZero, LHS, RHS: MinMax, Flags);
10283 return DAG.getSelect(DL, VT, Cond: IsZero, LHS: RetZero, RHS: MinMax, Flags);
10284}
10285
10286/// Returns a true value if if this FPClassTest can be performed with an ordered
10287/// fcmp to 0, and a false value if it's an unordered fcmp to 0. Returns
10288/// std::nullopt if it cannot be performed as a compare with 0.
10289static std::optional<bool> isFCmpEqualZero(FPClassTest Test,
10290 const fltSemantics &Semantics,
10291 const MachineFunction &MF) {
10292 FPClassTest OrderedMask = Test & ~fcNan;
10293 FPClassTest NanTest = Test & fcNan;
10294 bool IsOrdered = NanTest == fcNone;
10295 bool IsUnordered = NanTest == fcNan;
10296
10297 // Skip cases that are testing for only a qnan or snan.
10298 if (!IsOrdered && !IsUnordered)
10299 return std::nullopt;
10300
10301 if (OrderedMask == fcZero &&
10302 MF.getDenormalMode(FPType: Semantics).Input == DenormalMode::IEEE)
10303 return IsOrdered;
10304 if (OrderedMask == (fcZero | fcSubnormal) &&
10305 MF.getDenormalMode(FPType: Semantics).inputsAreZero())
10306 return IsOrdered;
10307 return std::nullopt;
10308}
10309
10310SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
10311 const FPClassTest OrigTestMask,
10312 SDNodeFlags Flags, const SDLoc &DL,
10313 SelectionDAG &DAG) const {
10314 EVT OperandVT = Op.getValueType();
10315 assert(OperandVT.isFloatingPoint());
10316 FPClassTest Test = OrigTestMask;
10317
10318 // Degenerated cases.
10319 if (Test == fcNone)
10320 return DAG.getBoolConstant(V: false, DL, VT: ResultVT, OpVT: OperandVT);
10321 if (Test == fcAllFlags)
10322 return DAG.getBoolConstant(V: true, DL, VT: ResultVT, OpVT: OperandVT);
10323
10324 // PPC double double is a pair of doubles, of which the higher part determines
10325 // the value class.
10326 if (OperandVT == MVT::ppcf128) {
10327 Op = DAG.getNode(Opcode: ISD::EXTRACT_ELEMENT, DL, VT: MVT::f64, N1: Op,
10328 N2: DAG.getConstant(Val: 1, DL, VT: MVT::i32));
10329 OperandVT = MVT::f64;
10330 }
10331
10332 // Floating-point type properties.
10333 EVT ScalarFloatVT = OperandVT.getScalarType();
10334 const Type *FloatTy = ScalarFloatVT.getTypeForEVT(Context&: *DAG.getContext());
10335 const llvm::fltSemantics &Semantics = FloatTy->getFltSemantics();
10336 bool IsF80 = (ScalarFloatVT == MVT::f80);
10337
10338 // Some checks can be implemented using float comparisons, if floating point
10339 // exceptions are ignored.
10340 if (Flags.hasNoFPExcept() &&
10341 isOperationLegalOrCustom(Op: ISD::SETCC, VT: OperandVT.getScalarType())) {
10342 FPClassTest FPTestMask = Test;
10343 bool IsInvertedFP = false;
10344
10345 if (FPClassTest InvertedFPCheck =
10346 invertFPClassTestIfSimpler(Test: FPTestMask, UseFCmp: true)) {
10347 FPTestMask = InvertedFPCheck;
10348 IsInvertedFP = true;
10349 }
10350
10351 ISD::CondCode OrderedCmpOpcode = IsInvertedFP ? ISD::SETUNE : ISD::SETOEQ;
10352 ISD::CondCode UnorderedCmpOpcode = IsInvertedFP ? ISD::SETONE : ISD::SETUEQ;
10353
10354 // See if we can fold an | fcNan into an unordered compare.
10355 FPClassTest OrderedFPTestMask = FPTestMask & ~fcNan;
10356
10357 // Can't fold the ordered check if we're only testing for snan or qnan
10358 // individually.
10359 if ((FPTestMask & fcNan) != fcNan)
10360 OrderedFPTestMask = FPTestMask;
10361
10362 const bool IsOrdered = FPTestMask == OrderedFPTestMask;
10363
10364 if (std::optional<bool> IsCmp0 =
10365 isFCmpEqualZero(Test: FPTestMask, Semantics, MF: DAG.getMachineFunction());
10366 IsCmp0 && (isCondCodeLegalOrCustom(
10367 CC: *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode,
10368 VT: OperandVT.getScalarType().getSimpleVT()))) {
10369
10370 // If denormals could be implicitly treated as 0, this is not equivalent
10371 // to a compare with 0 since it will also be true for denormals.
10372 return DAG.getSetCC(DL, VT: ResultVT, LHS: Op,
10373 RHS: DAG.getConstantFP(Val: 0.0, DL, VT: OperandVT),
10374 Cond: *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode);
10375 }
10376
10377 if (FPTestMask == fcNan &&
10378 isCondCodeLegalOrCustom(CC: IsInvertedFP ? ISD::SETO : ISD::SETUO,
10379 VT: OperandVT.getScalarType().getSimpleVT()))
10380 return DAG.getSetCC(DL, VT: ResultVT, LHS: Op, RHS: Op,
10381 Cond: IsInvertedFP ? ISD::SETO : ISD::SETUO);
10382
10383 bool IsOrderedInf = FPTestMask == fcInf;
10384 if ((FPTestMask == fcInf || FPTestMask == (fcInf | fcNan)) &&
10385 isCondCodeLegalOrCustom(CC: IsOrderedInf ? OrderedCmpOpcode
10386 : UnorderedCmpOpcode,
10387 VT: OperandVT.getScalarType().getSimpleVT()) &&
10388 isOperationLegalOrCustom(Op: ISD::FABS, VT: OperandVT.getScalarType()) &&
10389 (isOperationLegal(Op: ISD::ConstantFP, VT: OperandVT.getScalarType()) ||
10390 (OperandVT.isVector() &&
10391 isOperationLegalOrCustom(Op: ISD::BUILD_VECTOR, VT: OperandVT)))) {
10392 // isinf(x) --> fabs(x) == inf
10393 SDValue Abs = DAG.getNode(Opcode: ISD::FABS, DL, VT: OperandVT, Operand: Op);
10394 SDValue Inf =
10395 DAG.getConstantFP(Val: APFloat::getInf(Sem: Semantics), DL, VT: OperandVT);
10396 return DAG.getSetCC(DL, VT: ResultVT, LHS: Abs, RHS: Inf,
10397 Cond: IsOrderedInf ? OrderedCmpOpcode : UnorderedCmpOpcode);
10398 }
10399
10400 if ((OrderedFPTestMask == fcPosInf || OrderedFPTestMask == fcNegInf) &&
10401 isCondCodeLegalOrCustom(CC: IsOrdered ? OrderedCmpOpcode
10402 : UnorderedCmpOpcode,
10403 VT: OperandVT.getSimpleVT())) {
10404 // isposinf(x) --> x == inf
10405 // isneginf(x) --> x == -inf
10406 // isposinf(x) || nan --> x u== inf
10407 // isneginf(x) || nan --> x u== -inf
10408
10409 SDValue Inf = DAG.getConstantFP(
10410 Val: APFloat::getInf(Sem: Semantics, Negative: OrderedFPTestMask == fcNegInf), DL,
10411 VT: OperandVT);
10412 return DAG.getSetCC(DL, VT: ResultVT, LHS: Op, RHS: Inf,
10413 Cond: IsOrdered ? OrderedCmpOpcode : UnorderedCmpOpcode);
10414 }
10415
10416 if (OrderedFPTestMask == (fcSubnormal | fcZero) && !IsOrdered) {
10417 // TODO: Could handle ordered case, but it produces worse code for
10418 // x86. Maybe handle ordered if fabs is free?
10419
10420 ISD::CondCode OrderedOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
10421 ISD::CondCode UnorderedOp = IsInvertedFP ? ISD::SETOGE : ISD::SETULT;
10422
10423 if (isCondCodeLegalOrCustom(CC: IsOrdered ? OrderedOp : UnorderedOp,
10424 VT: OperandVT.getScalarType().getSimpleVT())) {
10425 // (issubnormal(x) || iszero(x)) --> fabs(x) < smallest_normal
10426
10427 // TODO: Maybe only makes sense if fabs is free. Integer test of
10428 // exponent bits seems better for x86.
10429 SDValue Abs = DAG.getNode(Opcode: ISD::FABS, DL, VT: OperandVT, Operand: Op);
10430 SDValue SmallestNormal = DAG.getConstantFP(
10431 Val: APFloat::getSmallestNormalized(Sem: Semantics), DL, VT: OperandVT);
10432 return DAG.getSetCC(DL, VT: ResultVT, LHS: Abs, RHS: SmallestNormal,
10433 Cond: IsOrdered ? OrderedOp : UnorderedOp);
10434 }
10435 }
10436
10437 if (FPTestMask == fcNormal) {
10438 // TODO: Handle unordered
10439 ISD::CondCode IsFiniteOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
10440 ISD::CondCode IsNormalOp = IsInvertedFP ? ISD::SETOLT : ISD::SETUGE;
10441
10442 if (isCondCodeLegalOrCustom(CC: IsFiniteOp,
10443 VT: OperandVT.getScalarType().getSimpleVT()) &&
10444 isCondCodeLegalOrCustom(CC: IsNormalOp,
10445 VT: OperandVT.getScalarType().getSimpleVT()) &&
10446 isFAbsFree(VT: OperandVT)) {
10447 // isnormal(x) --> fabs(x) < infinity && !(fabs(x) < smallest_normal)
10448 SDValue Inf =
10449 DAG.getConstantFP(Val: APFloat::getInf(Sem: Semantics), DL, VT: OperandVT);
10450 SDValue SmallestNormal = DAG.getConstantFP(
10451 Val: APFloat::getSmallestNormalized(Sem: Semantics), DL, VT: OperandVT);
10452
10453 SDValue Abs = DAG.getNode(Opcode: ISD::FABS, DL, VT: OperandVT, Operand: Op);
10454 SDValue IsFinite = DAG.getSetCC(DL, VT: ResultVT, LHS: Abs, RHS: Inf, Cond: IsFiniteOp);
10455 SDValue IsNormal =
10456 DAG.getSetCC(DL, VT: ResultVT, LHS: Abs, RHS: SmallestNormal, Cond: IsNormalOp);
10457 unsigned LogicOp = IsInvertedFP ? ISD::OR : ISD::AND;
10458 return DAG.getNode(Opcode: LogicOp, DL, VT: ResultVT, N1: IsFinite, N2: IsNormal);
10459 }
10460 }
10461 }
10462
10463 // Some checks may be represented as inversion of simpler check, for example
10464 // "inf|normal|subnormal|zero" => !"nan".
10465 bool IsInverted = false;
10466
10467 if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test, UseFCmp: false)) {
10468 Test = InvertedCheck;
10469 IsInverted = true;
10470 }
10471
10472 // In the general case use integer operations.
10473 unsigned BitSize = OperandVT.getScalarSizeInBits();
10474 EVT IntVT = OperandVT.changeElementType(
10475 Context&: *DAG.getContext(), EltVT: EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: BitSize));
10476 SDValue OpAsInt = DAG.getBitcast(VT: IntVT, V: Op);
10477
10478 // Various masks.
10479 APInt SignBit = APInt::getSignMask(BitWidth: BitSize);
10480 APInt ValueMask = APInt::getSignedMaxValue(numBits: BitSize); // All bits but sign.
10481 APInt Inf = APFloat::getInf(Sem: Semantics).bitcastToAPInt(); // Exp and int bit.
10482 const unsigned ExplicitIntBitInF80 = 63;
10483 APInt ExpMask = Inf;
10484 if (IsF80)
10485 ExpMask.clearBit(BitPosition: ExplicitIntBitInF80);
10486 APInt AllOneMantissa = APFloat::getLargest(Sem: Semantics).bitcastToAPInt() & ~Inf;
10487 APInt QNaNBitMask =
10488 APInt::getOneBitSet(numBits: BitSize, BitNo: AllOneMantissa.getActiveBits() - 1);
10489 APInt InversionMask = APInt::getAllOnes(numBits: ResultVT.getScalarSizeInBits());
10490
10491 SDValue ValueMaskV = DAG.getConstant(Val: ValueMask, DL, VT: IntVT);
10492 SDValue SignBitV = DAG.getConstant(Val: SignBit, DL, VT: IntVT);
10493 SDValue ExpMaskV = DAG.getConstant(Val: ExpMask, DL, VT: IntVT);
10494 SDValue ZeroV = DAG.getConstant(Val: 0, DL, VT: IntVT);
10495 SDValue InfV = DAG.getConstant(Val: Inf, DL, VT: IntVT);
10496 SDValue ResultInversionMask = DAG.getConstant(Val: InversionMask, DL, VT: ResultVT);
10497
10498 SDValue Res;
10499 const auto appendResult = [&](SDValue PartialRes) {
10500 if (PartialRes) {
10501 if (Res)
10502 Res = DAG.getNode(Opcode: ISD::OR, DL, VT: ResultVT, N1: Res, N2: PartialRes);
10503 else
10504 Res = PartialRes;
10505 }
10506 };
10507
10508 SDValue IntBitIsSetV; // Explicit integer bit in f80 mantissa is set.
10509 const auto getIntBitIsSet = [&]() -> SDValue {
10510 if (!IntBitIsSetV) {
10511 APInt IntBitMask(BitSize, 0);
10512 IntBitMask.setBit(ExplicitIntBitInF80);
10513 SDValue IntBitMaskV = DAG.getConstant(Val: IntBitMask, DL, VT: IntVT);
10514 SDValue IntBitV = DAG.getNode(Opcode: ISD::AND, DL, VT: IntVT, N1: OpAsInt, N2: IntBitMaskV);
10515 IntBitIsSetV = DAG.getSetCC(DL, VT: ResultVT, LHS: IntBitV, RHS: ZeroV, Cond: ISD::SETNE);
10516 }
10517 return IntBitIsSetV;
10518 };
10519
10520 // Split the value into sign bit and absolute value.
10521 SDValue AbsV = DAG.getNode(Opcode: ISD::AND, DL, VT: IntVT, N1: OpAsInt, N2: ValueMaskV);
10522 SDValue SignV = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt,
10523 RHS: DAG.getConstant(Val: 0, DL, VT: IntVT), Cond: ISD::SETLT);
10524
10525 // Tests that involve more than one class should be processed first.
10526 SDValue PartialRes;
10527
10528 if (IsF80)
10529 ; // Detect finite numbers of f80 by checking individual classes because
10530 // they have different settings of the explicit integer bit.
10531 else if ((Test & fcFinite) == fcFinite) {
10532 // finite(V) ==> (a << 1) < (inf << 1)
10533 //
10534 // See https://github.com/llvm/llvm-project/issues/169270, this is slightly
10535 // shorter than the `finite(V) ==> abs(V) < exp_mask` formula used before.
10536
10537 assert(APFloat::isIEEELikeFP(OperandVT.getFltSemantics()) &&
10538 "finite check requires IEEE-like FP");
10539
10540 SDValue One = DAG.getShiftAmountConstant(Val: 1, VT: IntVT, DL);
10541 SDValue TwiceOp = DAG.getNode(Opcode: ISD::SHL, DL, VT: IntVT, N1: OpAsInt, N2: One);
10542 SDValue TwiceInf = DAG.getNode(Opcode: ISD::SHL, DL, VT: IntVT, N1: ExpMaskV, N2: One);
10543
10544 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: TwiceOp, RHS: TwiceInf, Cond: ISD::SETULT);
10545 Test &= ~fcFinite;
10546 } else if ((Test & fcFinite) == fcPosFinite) {
10547 // finite(V) && V > 0 ==> V < exp_mask
10548 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt, RHS: ExpMaskV, Cond: ISD::SETULT);
10549 Test &= ~fcPosFinite;
10550 } else if ((Test & fcFinite) == fcNegFinite) {
10551 // finite(V) && V < 0 ==> abs(V) < exp_mask && signbit == 1
10552 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: ExpMaskV, Cond: ISD::SETLT);
10553 PartialRes = DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: PartialRes, N2: SignV);
10554 Test &= ~fcNegFinite;
10555 }
10556 appendResult(PartialRes);
10557
10558 if (FPClassTest PartialCheck = Test & (fcZero | fcSubnormal)) {
10559 // fcZero | fcSubnormal => test all exponent bits are 0
10560 // TODO: Handle sign bit specific cases
10561 if (PartialCheck == (fcZero | fcSubnormal)) {
10562 SDValue ExpBits = DAG.getNode(Opcode: ISD::AND, DL, VT: IntVT, N1: OpAsInt, N2: ExpMaskV);
10563 SDValue ExpIsZero =
10564 DAG.getSetCC(DL, VT: ResultVT, LHS: ExpBits, RHS: ZeroV, Cond: ISD::SETEQ);
10565 appendResult(ExpIsZero);
10566 Test &= ~PartialCheck & fcAllFlags;
10567 }
10568 }
10569
10570 // Check for individual classes.
10571
10572 if (unsigned PartialCheck = Test & fcZero) {
10573 if (PartialCheck == fcPosZero)
10574 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt, RHS: ZeroV, Cond: ISD::SETEQ);
10575 else if (PartialCheck == fcZero)
10576 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: ZeroV, Cond: ISD::SETEQ);
10577 else // ISD::fcNegZero
10578 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt, RHS: SignBitV, Cond: ISD::SETEQ);
10579 appendResult(PartialRes);
10580 }
10581
10582 if (unsigned PartialCheck = Test & fcSubnormal) {
10583 // issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set)
10584 // issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set)
10585 SDValue V = (PartialCheck == fcPosSubnormal) ? OpAsInt : AbsV;
10586 SDValue MantissaV = DAG.getConstant(Val: AllOneMantissa, DL, VT: IntVT);
10587 SDValue VMinusOneV =
10588 DAG.getNode(Opcode: ISD::SUB, DL, VT: IntVT, N1: V, N2: DAG.getConstant(Val: 1, DL, VT: IntVT));
10589 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: VMinusOneV, RHS: MantissaV, Cond: ISD::SETULT);
10590 if (PartialCheck == fcNegSubnormal)
10591 PartialRes = DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: PartialRes, N2: SignV);
10592 appendResult(PartialRes);
10593 }
10594
10595 if (unsigned PartialCheck = Test & fcInf) {
10596 if (PartialCheck == fcPosInf)
10597 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt, RHS: InfV, Cond: ISD::SETEQ);
10598 else if (PartialCheck == fcInf)
10599 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: InfV, Cond: ISD::SETEQ);
10600 else { // ISD::fcNegInf
10601 APInt NegInf = APFloat::getInf(Sem: Semantics, Negative: true).bitcastToAPInt();
10602 SDValue NegInfV = DAG.getConstant(Val: NegInf, DL, VT: IntVT);
10603 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt, RHS: NegInfV, Cond: ISD::SETEQ);
10604 }
10605 appendResult(PartialRes);
10606 }
10607
10608 if (unsigned PartialCheck = Test & fcNan) {
10609 APInt InfWithQnanBit = Inf | QNaNBitMask;
10610 SDValue InfWithQnanBitV = DAG.getConstant(Val: InfWithQnanBit, DL, VT: IntVT);
10611 if (PartialCheck == fcNan) {
10612 // isnan(V) ==> abs(V) > int(inf)
10613 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: InfV, Cond: ISD::SETGT);
10614 if (IsF80) {
10615 // Recognize unsupported values as NaNs for compatibility with glibc.
10616 // In them (exp(V)==0) == int_bit.
10617 SDValue ExpBits = DAG.getNode(Opcode: ISD::AND, DL, VT: IntVT, N1: AbsV, N2: ExpMaskV);
10618 SDValue ExpIsZero =
10619 DAG.getSetCC(DL, VT: ResultVT, LHS: ExpBits, RHS: ZeroV, Cond: ISD::SETEQ);
10620 SDValue IsPseudo =
10621 DAG.getSetCC(DL, VT: ResultVT, LHS: getIntBitIsSet(), RHS: ExpIsZero, Cond: ISD::SETEQ);
10622 PartialRes = DAG.getNode(Opcode: ISD::OR, DL, VT: ResultVT, N1: PartialRes, N2: IsPseudo);
10623 }
10624 } else if (PartialCheck == fcQNan) {
10625 // isquiet(V) ==> abs(V) >= (unsigned(Inf) | quiet_bit)
10626 PartialRes =
10627 DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: InfWithQnanBitV, Cond: ISD::SETGE);
10628 } else { // ISD::fcSNan
10629 // issignaling(V) ==> abs(V) > unsigned(Inf) &&
10630 // abs(V) < (unsigned(Inf) | quiet_bit)
10631 SDValue IsNan = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: InfV, Cond: ISD::SETGT);
10632 SDValue IsNotQnan =
10633 DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: InfWithQnanBitV, Cond: ISD::SETLT);
10634 PartialRes = DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: IsNan, N2: IsNotQnan);
10635 }
10636 appendResult(PartialRes);
10637 }
10638
10639 if (unsigned PartialCheck = Test & fcNormal) {
10640 // isnormal(V) ==> (0 < exp < max_exp) ==> (unsigned(exp-1) < (max_exp-1))
10641 APInt ExpLSB = ExpMask & ~(ExpMask.shl(shiftAmt: 1));
10642 SDValue ExpLSBV = DAG.getConstant(Val: ExpLSB, DL, VT: IntVT);
10643 SDValue ExpMinus1 = DAG.getNode(Opcode: ISD::SUB, DL, VT: IntVT, N1: AbsV, N2: ExpLSBV);
10644 APInt ExpLimit = ExpMask - ExpLSB;
10645 SDValue ExpLimitV = DAG.getConstant(Val: ExpLimit, DL, VT: IntVT);
10646 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: ExpMinus1, RHS: ExpLimitV, Cond: ISD::SETULT);
10647 if (PartialCheck == fcNegNormal)
10648 PartialRes = DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: PartialRes, N2: SignV);
10649 else if (PartialCheck == fcPosNormal) {
10650 SDValue PosSignV =
10651 DAG.getNode(Opcode: ISD::XOR, DL, VT: ResultVT, N1: SignV, N2: ResultInversionMask);
10652 PartialRes = DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: PartialRes, N2: PosSignV);
10653 }
10654 if (IsF80)
10655 PartialRes =
10656 DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: PartialRes, N2: getIntBitIsSet());
10657 appendResult(PartialRes);
10658 }
10659
10660 if (!Res)
10661 return DAG.getConstant(Val: IsInverted, DL, VT: ResultVT);
10662 if (IsInverted)
10663 Res = DAG.getNode(Opcode: ISD::XOR, DL, VT: ResultVT, N1: Res, N2: ResultInversionMask);
10664 return Res;
10665}
10666
10667// Only expand vector types if we have the appropriate vector bit operations.
10668static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) {
10669 assert(VT.isVector() && "Expected vector type");
10670 unsigned Len = VT.getScalarSizeInBits();
10671 return TLI.isOperationLegalOrCustom(Op: ISD::ADD, VT) &&
10672 TLI.isOperationLegalOrCustom(Op: ISD::SUB, VT) &&
10673 TLI.isOperationLegalOrCustom(Op: ISD::SRL, VT) &&
10674 (Len == 8 || TLI.isOperationLegalOrCustom(Op: ISD::MUL, VT)) &&
10675 TLI.isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT);
10676}
10677
10678SDValue TargetLowering::expandCTPOP(SDNode *Node, SelectionDAG &DAG) const {
10679 SDLoc dl(Node);
10680 EVT VT = Node->getValueType(ResNo: 0);
10681 EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
10682 SDValue Op = Node->getOperand(Num: 0);
10683 unsigned Len = VT.getScalarSizeInBits();
10684 assert(VT.isInteger() && "CTPOP not implemented for this type.");
10685
10686 // TODO: Add support for irregular type lengths.
10687 if (!(Len <= 128 && Len % 8 == 0))
10688 return SDValue();
10689
10690 // Only expand vector types if we have the appropriate vector bit operations.
10691 if (VT.isVector() && !canExpandVectorCTPOP(TLI: *this, VT))
10692 return SDValue();
10693
10694 // This is the "best" algorithm from
10695 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
10696 SDValue Mask55 =
10697 DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt(8, 0x55)), DL: dl, VT);
10698 SDValue Mask33 =
10699 DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt(8, 0x33)), DL: dl, VT);
10700 SDValue Mask0F =
10701 DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt(8, 0x0F)), DL: dl, VT);
10702
10703 // v = v - ((v >> 1) & 0x55555555...)
10704 Op = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Op,
10705 N2: DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
10706 N1: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op,
10707 N2: DAG.getConstant(Val: 1, DL: dl, VT: ShVT)),
10708 N2: Mask55));
10709 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
10710 Op = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op, N2: Mask33),
10711 N2: DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
10712 N1: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op,
10713 N2: DAG.getConstant(Val: 2, DL: dl, VT: ShVT)),
10714 N2: Mask33));
10715 // v = (v + (v >> 4)) & 0x0F0F0F0F...
10716 Op = DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
10717 N1: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Op,
10718 N2: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op,
10719 N2: DAG.getConstant(Val: 4, DL: dl, VT: ShVT))),
10720 N2: Mask0F);
10721
10722 if (Len <= 8)
10723 return Op;
10724
10725 // Avoid the multiply if we only have 2 bytes to add.
10726 // TODO: Only doing this for scalars because vectors weren't as obviously
10727 // improved.
10728 if (Len == 16 && !VT.isVector()) {
10729 // v = (v + (v >> 8)) & 0x00FF;
10730 return DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
10731 N1: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Op,
10732 N2: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op,
10733 N2: DAG.getConstant(Val: 8, DL: dl, VT: ShVT))),
10734 N2: DAG.getConstant(Val: 0xFF, DL: dl, VT));
10735 }
10736
10737 // v = (v * 0x01010101...) >> (Len - 8)
10738 SDValue V;
10739 if (isOperationLegalOrCustomOrPromote(
10740 Op: ISD::MUL, VT: getTypeToTransformTo(Context&: *DAG.getContext(), VT))) {
10741 SDValue Mask01 =
10742 DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt(8, 0x01)), DL: dl, VT);
10743 V = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: Op, N2: Mask01);
10744 } else {
10745 V = Op;
10746 for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
10747 SDValue ShiftC = DAG.getShiftAmountConstant(Val: Shift, VT, DL: dl);
10748 V = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: V,
10749 N2: DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: V, N2: ShiftC));
10750 }
10751 }
10752 return DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: V, N2: DAG.getConstant(Val: Len - 8, DL: dl, VT: ShVT));
10753}
10754
10755SDValue TargetLowering::expandVPCTPOP(SDNode *Node, SelectionDAG &DAG) const {
10756 SDLoc dl(Node);
10757 EVT VT = Node->getValueType(ResNo: 0);
10758 EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
10759 SDValue Op = Node->getOperand(Num: 0);
10760 SDValue Mask = Node->getOperand(Num: 1);
10761 SDValue VL = Node->getOperand(Num: 2);
10762 unsigned Len = VT.getScalarSizeInBits();
10763 assert(VT.isInteger() && "VP_CTPOP not implemented for this type.");
10764
10765 // TODO: Add support for irregular type lengths.
10766 if (!(Len <= 128 && Len % 8 == 0))
10767 return SDValue();
10768
10769 // This is same algorithm of expandCTPOP from
10770 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
10771 SDValue Mask55 =
10772 DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt(8, 0x55)), DL: dl, VT);
10773 SDValue Mask33 =
10774 DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt(8, 0x33)), DL: dl, VT);
10775 SDValue Mask0F =
10776 DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt(8, 0x0F)), DL: dl, VT);
10777
10778 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5;
10779
10780 // v = v - ((v >> 1) & 0x55555555...)
10781 Tmp1 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT,
10782 N1: DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op,
10783 N2: DAG.getConstant(Val: 1, DL: dl, VT: ShVT), N3: Mask, N4: VL),
10784 N2: Mask55, N3: Mask, N4: VL);
10785 Op = DAG.getNode(Opcode: ISD::VP_SUB, DL: dl, VT, N1: Op, N2: Tmp1, N3: Mask, N4: VL);
10786
10787 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
10788 Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Op, N2: Mask33, N3: Mask, N4: VL);
10789 Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT,
10790 N1: DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op,
10791 N2: DAG.getConstant(Val: 2, DL: dl, VT: ShVT), N3: Mask, N4: VL),
10792 N2: Mask33, N3: Mask, N4: VL);
10793 Op = DAG.getNode(Opcode: ISD::VP_ADD, DL: dl, VT, N1: Tmp2, N2: Tmp3, N3: Mask, N4: VL);
10794
10795 // v = (v + (v >> 4)) & 0x0F0F0F0F...
10796 Tmp4 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 4, DL: dl, VT: ShVT),
10797 N3: Mask, N4: VL),
10798 Tmp5 = DAG.getNode(Opcode: ISD::VP_ADD, DL: dl, VT, N1: Op, N2: Tmp4, N3: Mask, N4: VL);
10799 Op = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp5, N2: Mask0F, N3: Mask, N4: VL);
10800
10801 if (Len <= 8)
10802 return Op;
10803
10804 // v = (v * 0x01010101...) >> (Len - 8)
10805 SDValue V;
10806 if (isOperationLegalOrCustomOrPromote(
10807 Op: ISD::VP_MUL, VT: getTypeToTransformTo(Context&: *DAG.getContext(), VT))) {
10808 SDValue Mask01 =
10809 DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt(8, 0x01)), DL: dl, VT);
10810 V = DAG.getNode(Opcode: ISD::VP_MUL, DL: dl, VT, N1: Op, N2: Mask01, N3: Mask, N4: VL);
10811 } else {
10812 V = Op;
10813 for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
10814 SDValue ShiftC = DAG.getShiftAmountConstant(Val: Shift, VT, DL: dl);
10815 V = DAG.getNode(Opcode: ISD::VP_ADD, DL: dl, VT, N1: V,
10816 N2: DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: V, N2: ShiftC, N3: Mask, N4: VL),
10817 N3: Mask, N4: VL);
10818 }
10819 }
10820 return DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: V, N2: DAG.getConstant(Val: Len - 8, DL: dl, VT: ShVT),
10821 N3: Mask, N4: VL);
10822}
10823
10824SDValue TargetLowering::expandCTLZ(SDNode *Node, SelectionDAG &DAG) const {
10825 SDLoc dl(Node);
10826 EVT VT = Node->getValueType(ResNo: 0);
10827 EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
10828 SDValue Op = Node->getOperand(Num: 0);
10829 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
10830
10831 // If the non-ZERO_POISON version is supported we can use that instead.
10832 if (Node->getOpcode() == ISD::CTLZ_ZERO_POISON &&
10833 isOperationLegalOrCustom(Op: ISD::CTLZ, VT))
10834 return DAG.getNode(Opcode: ISD::CTLZ, DL: dl, VT, Operand: Op);
10835
10836 // If the ZERO_POISON version is supported use that and handle the zero case.
10837 if (isOperationLegalOrCustom(Op: ISD::CTLZ_ZERO_POISON, VT)) {
10838 EVT SetCCVT =
10839 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
10840 SDValue CTLZ = DAG.getNode(Opcode: ISD::CTLZ_ZERO_POISON, DL: dl, VT, Operand: Op);
10841 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT);
10842 SDValue SrcIsZero = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Op, RHS: Zero, Cond: ISD::SETEQ);
10843 return DAG.getSelect(DL: dl, VT, Cond: SrcIsZero,
10844 LHS: DAG.getConstant(Val: NumBitsPerElt, DL: dl, VT), RHS: CTLZ);
10845 }
10846
10847 // Only expand vector types if we have the appropriate vector bit operations.
10848 // This includes the operations needed to expand CTPOP if it isn't supported.
10849 if (VT.isVector() && (!isPowerOf2_32(Value: NumBitsPerElt) ||
10850 (!isOperationLegalOrCustom(Op: ISD::CTPOP, VT) &&
10851 !canExpandVectorCTPOP(TLI: *this, VT)) ||
10852 !isOperationLegalOrCustom(Op: ISD::SRL, VT) ||
10853 !isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT)))
10854 return SDValue();
10855
10856 // for now, we do this:
10857 // x = x | (x >> 1);
10858 // x = x | (x >> 2);
10859 // ...
10860 // x = x | (x >>16);
10861 // x = x | (x >>32); // for 64-bit input
10862 // return popcount(~x);
10863 //
10864 // Ref: "Hacker's Delight" by Henry Warren
10865 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
10866 SDValue Tmp = DAG.getConstant(Val: 1ULL << i, DL: dl, VT: ShVT);
10867 Op = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Op,
10868 N2: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: Tmp));
10869 }
10870 Op = DAG.getNOT(DL: dl, Val: Op, VT);
10871 return DAG.getNode(Opcode: ISD::CTPOP, DL: dl, VT, Operand: Op);
10872}
10873
10874SDValue TargetLowering::expandVPCTLZ(SDNode *Node, SelectionDAG &DAG) const {
10875 SDLoc dl(Node);
10876 EVT VT = Node->getValueType(ResNo: 0);
10877 EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
10878 SDValue Op = Node->getOperand(Num: 0);
10879 SDValue Mask = Node->getOperand(Num: 1);
10880 SDValue VL = Node->getOperand(Num: 2);
10881 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
10882
10883 // do this:
10884 // x = x | (x >> 1);
10885 // x = x | (x >> 2);
10886 // ...
10887 // x = x | (x >>16);
10888 // x = x | (x >>32); // for 64-bit input
10889 // return popcount(~x);
10890 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
10891 SDValue Tmp = DAG.getConstant(Val: 1ULL << i, DL: dl, VT: ShVT);
10892 Op = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Op,
10893 N2: DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: Tmp, N3: Mask, N4: VL), N3: Mask,
10894 N4: VL);
10895 }
10896 Op = DAG.getNode(Opcode: ISD::VP_XOR, DL: dl, VT, N1: Op, N2: DAG.getAllOnesConstant(DL: dl, VT),
10897 N3: Mask, N4: VL);
10898 return DAG.getNode(Opcode: ISD::VP_CTPOP, DL: dl, VT, N1: Op, N2: Mask, N3: VL);
10899}
10900
10901SDValue TargetLowering::expandCTLS(SDNode *Node, SelectionDAG &DAG) const {
10902 SDLoc dl(Node);
10903 EVT VT = Node->getValueType(ResNo: 0);
10904 SDValue Op = DAG.getFreeze(V: Node->getOperand(Num: 0));
10905 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
10906
10907 // CTLS(x) = CTLZ(OR(SHL(XOR(x, SRA(x, BW-1)), 1), 1))
10908 // This transforms the sign bits into leading zeros that can be counted.
10909 SDValue ShiftAmt = DAG.getShiftAmountConstant(Val: NumBitsPerElt - 1, VT, DL: dl);
10910 SDValue SignBit = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: Op, N2: ShiftAmt);
10911 SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: Op, N2: SignBit);
10912 SDValue Shl =
10913 DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Xor, N2: DAG.getShiftAmountConstant(Val: 1, VT, DL: dl));
10914 SDValue Or = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Shl, N2: DAG.getConstant(Val: 1, DL: dl, VT));
10915 return DAG.getNode(Opcode: ISD::CTLZ_ZERO_POISON, DL: dl, VT, Operand: Or);
10916}
10917
10918SDValue TargetLowering::CTTZTableLookup(SDNode *Node, SelectionDAG &DAG,
10919 const SDLoc &DL, EVT VT, SDValue Op,
10920 unsigned BitWidth) const {
10921 if (BitWidth != 32 && BitWidth != 64)
10922 return SDValue();
10923
10924 const DataLayout &TD = DAG.getDataLayout();
10925 if (!isOperationCustom(Op: ISD::ConstantPool, VT: getPointerTy(DL: TD)))
10926 return SDValue();
10927
10928 APInt DeBruijn = BitWidth == 32 ? APInt(32, 0x077CB531U)
10929 : APInt(64, 0x0218A392CD3D5DBFULL);
10930 MachinePointerInfo PtrInfo =
10931 MachinePointerInfo::getConstantPool(MF&: DAG.getMachineFunction());
10932 unsigned ShiftAmt = BitWidth - Log2_32(Value: BitWidth);
10933 SDValue Neg = DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: DAG.getConstant(Val: 0, DL, VT), N2: Op);
10934 SDValue Lookup = DAG.getNode(
10935 Opcode: ISD::SRL, DL, VT,
10936 N1: DAG.getNode(Opcode: ISD::MUL, DL, VT, N1: DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Op, N2: Neg),
10937 N2: DAG.getConstant(Val: DeBruijn, DL, VT)),
10938 N2: DAG.getShiftAmountConstant(Val: ShiftAmt, VT, DL));
10939 Lookup = DAG.getSExtOrTrunc(Op: Lookup, DL, VT: getPointerTy(DL: TD));
10940
10941 SmallVector<uint8_t> Table(BitWidth, 0);
10942 for (unsigned i = 0; i < BitWidth; i++) {
10943 APInt Shl = DeBruijn.shl(shiftAmt: i);
10944 APInt Lshr = Shl.lshr(shiftAmt: ShiftAmt);
10945 Table[Lshr.getZExtValue()] = i;
10946 }
10947
10948 // Create a ConstantArray in Constant Pool
10949 auto *CA = ConstantDataArray::get(Context&: *DAG.getContext(), Elts&: Table);
10950 SDValue CPIdx = DAG.getConstantPool(C: CA, VT: getPointerTy(DL: TD),
10951 Align: TD.getPrefTypeAlign(Ty: CA->getType()));
10952 SDValue ExtLoad = DAG.getExtLoad(ExtType: ISD::ZEXTLOAD, dl: DL, VT, Chain: DAG.getEntryNode(),
10953 Ptr: DAG.getMemBasePlusOffset(Base: CPIdx, Offset: Lookup, DL),
10954 PtrInfo, MemVT: MVT::i8);
10955 if (Node->getOpcode() == ISD::CTTZ_ZERO_POISON)
10956 return ExtLoad;
10957
10958 EVT SetCCVT =
10959 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
10960 SDValue Zero = DAG.getConstant(Val: 0, DL, VT);
10961 SDValue SrcIsZero = DAG.getSetCC(DL, VT: SetCCVT, LHS: Op, RHS: Zero, Cond: ISD::SETEQ);
10962 return DAG.getSelect(DL, VT, Cond: SrcIsZero,
10963 LHS: DAG.getConstant(Val: BitWidth, DL, VT), RHS: ExtLoad);
10964}
10965
10966SDValue TargetLowering::expandCTTZ(SDNode *Node, SelectionDAG &DAG) const {
10967 SDLoc dl(Node);
10968 EVT VT = Node->getValueType(ResNo: 0);
10969 SDValue Op = Node->getOperand(Num: 0);
10970 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
10971
10972 // If the non-ZERO_POISON version is supported we can use that instead.
10973 if (Node->getOpcode() == ISD::CTTZ_ZERO_POISON &&
10974 isOperationLegalOrCustom(Op: ISD::CTTZ, VT))
10975 return DAG.getNode(Opcode: ISD::CTTZ, DL: dl, VT, Operand: Op);
10976
10977 // If the ZERO_POISON version is supported use that and handle the zero case.
10978 if (isOperationLegalOrCustom(Op: ISD::CTTZ_ZERO_POISON, VT)) {
10979 EVT SetCCVT =
10980 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
10981 SDValue CTTZ = DAG.getNode(Opcode: ISD::CTTZ_ZERO_POISON, DL: dl, VT, Operand: Op);
10982 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT);
10983 SDValue SrcIsZero = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Op, RHS: Zero, Cond: ISD::SETEQ);
10984 return DAG.getSelect(DL: dl, VT, Cond: SrcIsZero,
10985 LHS: DAG.getConstant(Val: NumBitsPerElt, DL: dl, VT), RHS: CTTZ);
10986 }
10987
10988 // Only expand vector types if we have the appropriate vector bit operations.
10989 // This includes the operations needed to expand CTPOP if it isn't supported.
10990 if (VT.isVector() && (!isPowerOf2_32(Value: NumBitsPerElt) ||
10991 (!isOperationLegalOrCustom(Op: ISD::CTPOP, VT) &&
10992 !isOperationLegalOrCustom(Op: ISD::CTLZ, VT) &&
10993 !canExpandVectorCTPOP(TLI: *this, VT)) ||
10994 !isOperationLegalOrCustom(Op: ISD::SUB, VT) ||
10995 !isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT) ||
10996 !isOperationLegalOrCustomOrPromote(Op: ISD::XOR, VT)))
10997 return SDValue();
10998
10999 // Emit Table Lookup if ISD::CTPOP used in the fallback path below is going
11000 // to be expanded or converted to a libcall.
11001 if (!VT.isVector() && !isOperationLegalOrCustomOrPromote(Op: ISD::CTPOP, VT) &&
11002 !isOperationLegal(Op: ISD::CTLZ, VT))
11003 if (SDValue V = CTTZTableLookup(Node, DAG, DL: dl, VT, Op, BitWidth: NumBitsPerElt))
11004 return V;
11005
11006 // for now, we use: { return popcount(~x & (x - 1)); }
11007 // unless the target has ctlz but not ctpop, in which case we use:
11008 // { return 32 - nlz(~x & (x-1)); }
11009 // Ref: "Hacker's Delight" by Henry Warren
11010 SDValue Tmp = DAG.getNode(
11011 Opcode: ISD::AND, DL: dl, VT, N1: DAG.getNOT(DL: dl, Val: Op, VT),
11012 N2: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 1, DL: dl, VT)));
11013
11014 // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
11015 if (isOperationLegal(Op: ISD::CTLZ, VT) && !isOperationLegal(Op: ISD::CTPOP, VT)) {
11016 return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: DAG.getConstant(Val: NumBitsPerElt, DL: dl, VT),
11017 N2: DAG.getNode(Opcode: ISD::CTLZ, DL: dl, VT, Operand: Tmp));
11018 }
11019
11020 return DAG.getNode(Opcode: ISD::CTPOP, DL: dl, VT, Operand: Tmp);
11021}
11022
11023SDValue TargetLowering::expandVPCTTZ(SDNode *Node, SelectionDAG &DAG) const {
11024 SDValue Op = Node->getOperand(Num: 0);
11025 SDValue Mask = Node->getOperand(Num: 1);
11026 SDValue VL = Node->getOperand(Num: 2);
11027 SDLoc dl(Node);
11028 EVT VT = Node->getValueType(ResNo: 0);
11029
11030 // Same as the vector part of expandCTTZ, use: popcount(~x & (x - 1))
11031 SDValue Not = DAG.getNode(Opcode: ISD::VP_XOR, DL: dl, VT, N1: Op,
11032 N2: DAG.getAllOnesConstant(DL: dl, VT), N3: Mask, N4: VL);
11033 SDValue MinusOne = DAG.getNode(Opcode: ISD::VP_SUB, DL: dl, VT, N1: Op,
11034 N2: DAG.getConstant(Val: 1, DL: dl, VT), N3: Mask, N4: VL);
11035 SDValue Tmp = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Not, N2: MinusOne, N3: Mask, N4: VL);
11036 return DAG.getNode(Opcode: ISD::VP_CTPOP, DL: dl, VT, N1: Tmp, N2: Mask, N3: VL);
11037}
11038
11039SDValue TargetLowering::expandVPCTTZElements(SDNode *N,
11040 SelectionDAG &DAG) const {
11041 // %cond = to_bool_vec %source
11042 // %splat = splat /*val=*/VL
11043 // %tz = step_vector
11044 // %v = vp.select %cond, /*true=*/tz, /*false=*/%splat
11045 // %r = vp.reduce.umin %v
11046 SDLoc DL(N);
11047 SDValue Source = N->getOperand(Num: 0);
11048 SDValue Mask = N->getOperand(Num: 1);
11049 SDValue EVL = N->getOperand(Num: 2);
11050 EVT SrcVT = Source.getValueType();
11051 EVT ResVT = N->getValueType(ResNo: 0);
11052 EVT ResVecVT =
11053 EVT::getVectorVT(Context&: *DAG.getContext(), VT: ResVT, EC: SrcVT.getVectorElementCount());
11054
11055 // Convert to boolean vector.
11056 if (SrcVT.getScalarType() != MVT::i1) {
11057 SDValue AllZero = DAG.getConstant(Val: 0, DL, VT: SrcVT);
11058 SrcVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: MVT::i1,
11059 EC: SrcVT.getVectorElementCount());
11060 Source = DAG.getNode(Opcode: ISD::VP_SETCC, DL, VT: SrcVT, N1: Source, N2: AllZero,
11061 N3: DAG.getCondCode(Cond: ISD::SETNE), N4: Mask, N5: EVL);
11062 }
11063
11064 SDValue ExtEVL = DAG.getZExtOrTrunc(Op: EVL, DL, VT: ResVT);
11065 SDValue Splat = DAG.getSplat(VT: ResVecVT, DL, Op: ExtEVL);
11066 SDValue StepVec = DAG.getStepVector(DL, ResVT: ResVecVT);
11067 SDValue Select =
11068 DAG.getNode(Opcode: ISD::VP_SELECT, DL, VT: ResVecVT, N1: Source, N2: StepVec, N3: Splat, N4: EVL);
11069 return DAG.getNode(Opcode: ISD::VP_REDUCE_UMIN, DL, VT: ResVT, N1: ExtEVL, N2: Select, N3: Mask, N4: EVL);
11070}
11071
11072/// Returns a type-legalized version of \p Mask as the first item in the
11073/// pair. The second item contains a type-legalized step vector that's
11074/// guaranteed to fit the number of elements in \p Mask.
11075/// If the stepvector would require splitting, returns an empty SDValue
11076/// as the second item to signal that the operation should be split instead.
11077static std::pair<SDValue, SDValue>
11078getLegalMaskAndStepVector(SDValue Mask, bool ZeroIsPoison, SDLoc DL,
11079 SelectionDAG &DAG) {
11080 EVT MaskVT = Mask.getValueType();
11081 EVT BoolVT = MaskVT.getScalarType();
11082
11083 // Find a suitable type for a stepvector.
11084 // If zero is poison, we can assume the upper limit of the result is VF-1.
11085 ConstantRange VScaleRange(1, /*isFullSet=*/true); // Fixed length default.
11086 if (MaskVT.isScalableVector())
11087 VScaleRange = getVScaleRange(F: &DAG.getMachineFunction().getFunction(), BitWidth: 64);
11088 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11089 uint64_t EltWidth = TLI.getBitWidthForCttzElements(
11090 RetVT: EVT(TLI.getVectorIdxTy(DL: DAG.getDataLayout())),
11091 EC: MaskVT.getVectorElementCount(), ZeroIsPoison, VScaleRange: &VScaleRange);
11092 // If the step vector element type is smaller than the mask element type,
11093 // use the mask type directly to avoid widening issues.
11094 EltWidth = std::max(a: EltWidth, b: BoolVT.getFixedSizeInBits());
11095 EVT StepVT = MVT::getIntegerVT(BitWidth: EltWidth);
11096 EVT StepVecVT = MaskVT.changeVectorElementType(Context&: *DAG.getContext(), EltVT: StepVT);
11097
11098 // If promotion or widening is required to make the type legal, do it here.
11099 // Promotion of integers within LegalizeVectorOps is looking for types of
11100 // the same size but with a smaller number of larger elements, not the usual
11101 // larger size with the same number of larger elements.
11102 TargetLowering::LegalizeTypeAction TypeAction =
11103 TLI.getTypeAction(Context&: *DAG.getContext(), VT: StepVecVT);
11104 SDValue StepVec;
11105 if (TypeAction == TargetLowering::TypePromoteInteger) {
11106 StepVecVT = TLI.getTypeToTransformTo(Context&: *DAG.getContext(), VT: StepVecVT);
11107 StepVec = DAG.getStepVector(DL, ResVT: StepVecVT);
11108 } else if (TypeAction == TargetLowering::TypeWidenVector) {
11109 // For widening, the element count changes. Create a step vector with only
11110 // the original elements valid and zeros for padding. Also widen the mask.
11111 EVT WideVecVT = TLI.getTypeToTransformTo(Context&: *DAG.getContext(), VT: StepVecVT);
11112 unsigned WideNumElts = WideVecVT.getVectorNumElements();
11113
11114 // Build widened step vector: <0, 1, ..., OrigNumElts-1, poison, poison, ..>
11115 SDValue OrigStepVec = DAG.getStepVector(DL, ResVT: StepVecVT);
11116 SDValue UndefStep = DAG.getPOISON(VT: WideVecVT);
11117 StepVec = DAG.getInsertSubvector(DL, Vec: UndefStep, SubVec: OrigStepVec, Idx: 0);
11118
11119 // Widen mask: pad with zeros.
11120 EVT WideMaskVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: BoolVT, NumElements: WideNumElts);
11121 SDValue ZeroMask = DAG.getConstant(Val: 0, DL, VT: WideMaskVT);
11122 Mask = DAG.getInsertSubvector(DL, Vec: ZeroMask, SubVec: Mask, Idx: 0);
11123 } else if (TypeAction == TargetLowering::TypeSplitVector) {
11124 // The stepvector type would require splitting. Signal to the caller
11125 // that the operation should be split instead of expanded.
11126 return {Mask, SDValue()};
11127 } else {
11128 StepVec = DAG.getStepVector(DL, ResVT: StepVecVT);
11129 }
11130
11131 return {Mask, StepVec};
11132}
11133
11134SDValue TargetLowering::expandVectorFindLastActive(SDNode *N,
11135 SelectionDAG &DAG) const {
11136 SDLoc DL(N);
11137 auto [Mask, StepVec] = getLegalMaskAndStepVector(
11138 Mask: N->getOperand(Num: 0), /*ZeroIsPoison=*/true, DL, DAG);
11139
11140 // If StepVec is empty, the stepvector would require splitting.
11141 // Split the operation instead and let it be recursively legalized.
11142 if (!StepVec) {
11143 EVT MaskVT = N->getOperand(Num: 0).getValueType();
11144 EVT ResVT = N->getValueType(ResNo: 0);
11145
11146 // Split the mask
11147 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VT: MaskVT);
11148 auto [MaskLo, MaskHi] = DAG.SplitVector(N: N->getOperand(Num: 0), DL);
11149
11150 // Create split VECTOR_FIND_LAST_ACTIVE operations
11151 SDValue LoResult =
11152 DAG.getNode(Opcode: ISD::VECTOR_FIND_LAST_ACTIVE, DL, VT: ResVT, Operand: MaskLo);
11153 SDValue HiResult =
11154 DAG.getNode(Opcode: ISD::VECTOR_FIND_LAST_ACTIVE, DL, VT: ResVT, Operand: MaskHi);
11155
11156 // Check if any lane is active in the high mask.
11157 SDValue AnyHiActive = DAG.getNode(Opcode: ISD::VECREDUCE_OR, DL, VT: MVT::i1, Operand: MaskHi);
11158 SDValue Cond = DAG.getBoolExtOrTrunc(
11159 Op: AnyHiActive, SL: DL,
11160 VT: getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: MVT::i1),
11161 OpVT: MVT::i1);
11162
11163 // Adjust HiResult by adding the number of elements in Lo
11164 SDValue LoNumElts =
11165 DAG.getElementCount(DL, VT: ResVT, EC: LoVT.getVectorElementCount());
11166 SDValue AdjustedHiResult =
11167 DAG.getNode(Opcode: ISD::ADD, DL, VT: ResVT, N1: HiResult, N2: LoNumElts);
11168
11169 // Return: AnyHiActive ? AdjustedHiResult : LoResult;
11170 return DAG.getNode(Opcode: ISD::SELECT, DL, VT: ResVT, N1: Cond, N2: AdjustedHiResult,
11171 N3: LoResult);
11172 }
11173
11174 EVT StepVecVT = StepVec.getValueType();
11175 EVT StepVT = StepVec.getValueType().getVectorElementType();
11176
11177 // Zero out lanes with inactive elements, then find the highest remaining
11178 // value from the stepvector.
11179 SDValue Zeroes = DAG.getConstant(Val: 0, DL, VT: StepVecVT);
11180 SDValue ActiveElts = DAG.getSelect(DL, VT: StepVecVT, Cond: Mask, LHS: StepVec, RHS: Zeroes);
11181 SDValue HighestIdx = DAG.getNode(Opcode: ISD::VECREDUCE_UMAX, DL, VT: StepVT, Operand: ActiveElts);
11182 return DAG.getZExtOrTrunc(Op: HighestIdx, DL, VT: N->getValueType(ResNo: 0));
11183}
11184
11185SDValue TargetLowering::expandLoopDependenceMask(SDNode *N,
11186 SelectionDAG &DAG) const {
11187 SDLoc DL(N);
11188 EVT VT = N->getValueType(ResNo: 0);
11189 SDValue SourceValue = N->getOperand(Num: 0);
11190 SDValue SinkValue = N->getOperand(Num: 1);
11191 SDValue EltSizeInBytes = N->getOperand(Num: 2);
11192
11193 // Note: The lane offset is scalable if the mask is scalable.
11194 ElementCount LaneOffsetEC =
11195 ElementCount::get(MinVal: N->getConstantOperandVal(Num: 3), Scalable: VT.isScalableVT());
11196
11197 EVT AddrVT = SourceValue->getValueType(ResNo: 0);
11198 bool IsReadAfterWrite = N->getOpcode() == ISD::LOOP_DEPENDENCE_RAW_MASK;
11199
11200 // Take the difference between the pointers and divided by the element size,
11201 // to see how many lanes separate them.
11202 SDValue Diff = DAG.getNode(Opcode: ISD::SUB, DL, VT: AddrVT, N1: SinkValue, N2: SourceValue);
11203 if (IsReadAfterWrite)
11204 Diff = DAG.getNode(Opcode: ISD::ABS, DL, VT: AddrVT, Operand: Diff);
11205 Diff = DAG.getNode(Opcode: ISD::SDIV, DL, VT: AddrVT, N1: Diff, N2: EltSizeInBytes);
11206
11207 // The pointers do not alias if:
11208 // * Diff <= 0 (WAR_MASK)
11209 // * Diff == 0 (RAW_MASK)
11210 EVT CmpVT =
11211 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: AddrVT);
11212 SDValue Zero = DAG.getConstant(Val: 0, DL, VT: AddrVT);
11213 SDValue Cmp = DAG.getSetCC(DL, VT: CmpVT, LHS: Diff, RHS: Zero,
11214 Cond: IsReadAfterWrite ? ISD::SETEQ : ISD::SETLE);
11215
11216 // The pointers do not alias if:
11217 // Lane + LaneOffset < Diff (WAR/RAW_MASK)
11218 SDValue LaneOffset = DAG.getElementCount(DL, VT: AddrVT, EC: LaneOffsetEC);
11219 SDValue MaskN = DAG.getSelect(
11220 DL, VT: AddrVT, Cond: Cmp,
11221 LHS: DAG.getConstant(Val: APInt::getMaxValue(numBits: AddrVT.getScalarSizeInBits()), DL,
11222 VT: AddrVT),
11223 RHS: Diff);
11224
11225 return DAG.getNode(Opcode: ISD::GET_ACTIVE_LANE_MASK, DL, VT, N1: LaneOffset, N2: MaskN);
11226}
11227
11228SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG,
11229 bool IsNegative) const {
11230 SDLoc dl(N);
11231 EVT VT = N->getValueType(ResNo: 0);
11232 SDValue Op = N->getOperand(Num: 0);
11233
11234 // If expanding ABS_MIN_POISON, fall back to ABS if the target supports it.
11235 if (N->getOpcode() == ISD::ABS_MIN_POISON &&
11236 isOperationLegalOrCustom(Op: ISD::ABS, VT)) {
11237 SDValue AbsVal = DAG.getNode(Opcode: ISD::ABS, DL: dl, VT, Operand: Op);
11238 if (IsNegative)
11239 return DAG.getNegative(Val: AbsVal, DL: dl, VT);
11240 return AbsVal;
11241 }
11242
11243 // abs(x) -> smax(x,sub(0,x))
11244 if (!IsNegative && isOperationLegal(Op: ISD::SUB, VT) &&
11245 isOperationLegal(Op: ISD::SMAX, VT)) {
11246 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT);
11247 Op = DAG.getFreeze(V: Op);
11248 return DAG.getNode(Opcode: ISD::SMAX, DL: dl, VT, N1: Op,
11249 N2: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Zero, N2: Op));
11250 }
11251
11252 // abs(x) -> umin(x,sub(0,x))
11253 if (!IsNegative && isOperationLegal(Op: ISD::SUB, VT) &&
11254 isOperationLegal(Op: ISD::UMIN, VT)) {
11255 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT);
11256 Op = DAG.getFreeze(V: Op);
11257 return DAG.getNode(Opcode: ISD::UMIN, DL: dl, VT, N1: Op,
11258 N2: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Zero, N2: Op));
11259 }
11260
11261 // 0 - abs(x) -> smin(x, sub(0,x))
11262 if (IsNegative && isOperationLegal(Op: ISD::SUB, VT) &&
11263 isOperationLegal(Op: ISD::SMIN, VT)) {
11264 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT);
11265 Op = DAG.getFreeze(V: Op);
11266 return DAG.getNode(Opcode: ISD::SMIN, DL: dl, VT, N1: Op,
11267 N2: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Zero, N2: Op));
11268 }
11269
11270 // Only expand vector types if we have the appropriate vector operations.
11271 if (VT.isVector() &&
11272 (!isOperationLegalOrCustom(Op: ISD::SRA, VT) ||
11273 (!IsNegative && !isOperationLegalOrCustom(Op: ISD::ADD, VT)) ||
11274 (IsNegative && !isOperationLegalOrCustom(Op: ISD::SUB, VT)) ||
11275 !isOperationLegalOrCustomOrPromote(Op: ISD::XOR, VT)))
11276 return SDValue();
11277
11278 Op = DAG.getFreeze(V: Op);
11279 SDValue Shift = DAG.getNode(
11280 Opcode: ISD::SRA, DL: dl, VT, N1: Op,
11281 N2: DAG.getShiftAmountConstant(Val: VT.getScalarSizeInBits() - 1, VT, DL: dl));
11282 SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: Op, N2: Shift);
11283
11284 // abs(x) -> Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
11285 if (!IsNegative)
11286 return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Xor, N2: Shift);
11287
11288 // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
11289 return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Shift, N2: Xor);
11290}
11291
11292SDValue TargetLowering::expandABD(SDNode *N, SelectionDAG &DAG) const {
11293 SDLoc dl(N);
11294 EVT VT = N->getValueType(ResNo: 0);
11295 SDValue LHS = N->getOperand(Num: 0);
11296 SDValue RHS = N->getOperand(Num: 1);
11297 bool IsSigned = N->getOpcode() == ISD::ABDS;
11298
11299 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
11300 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
11301 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
11302 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
11303 if (isOperationLegal(Op: MaxOpc, VT) && isOperationLegal(Op: MinOpc, VT)) {
11304 LHS = DAG.getFreeze(V: LHS);
11305 RHS = DAG.getFreeze(V: RHS);
11306 SDValue Max = DAG.getNode(Opcode: MaxOpc, DL: dl, VT, N1: LHS, N2: RHS);
11307 SDValue Min = DAG.getNode(Opcode: MinOpc, DL: dl, VT, N1: LHS, N2: RHS);
11308 return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Max, N2: Min);
11309 }
11310
11311 // abdu(lhs, rhs) -> or(usubsat(lhs,rhs), usubsat(rhs,lhs))
11312 if (!IsSigned && isOperationLegal(Op: ISD::USUBSAT, VT)) {
11313 LHS = DAG.getFreeze(V: LHS);
11314 RHS = DAG.getFreeze(V: RHS);
11315 return DAG.getNode(Opcode: ISD::OR, DL: dl, VT,
11316 N1: DAG.getNode(Opcode: ISD::USUBSAT, DL: dl, VT, N1: LHS, N2: RHS),
11317 N2: DAG.getNode(Opcode: ISD::USUBSAT, DL: dl, VT, N1: RHS, N2: LHS));
11318 }
11319
11320 // If the subtract doesn't overflow then just use abs(sub())
11321 bool IsNonNegative = DAG.SignBitIsZero(Op: LHS) && DAG.SignBitIsZero(Op: RHS);
11322
11323 if (DAG.willNotOverflowSub(IsSigned: IsSigned || IsNonNegative, N0: LHS, N1: RHS))
11324 return DAG.getNode(Opcode: ISD::ABS, DL: dl, VT,
11325 Operand: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: LHS, N2: RHS));
11326
11327 if (DAG.willNotOverflowSub(IsSigned: IsSigned || IsNonNegative, N0: RHS, N1: LHS))
11328 return DAG.getNode(Opcode: ISD::ABS, DL: dl, VT,
11329 Operand: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: RHS, N2: LHS));
11330
11331 EVT CCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
11332 ISD::CondCode CC = IsSigned ? ISD::CondCode::SETGT : ISD::CondCode::SETUGT;
11333 LHS = DAG.getFreeze(V: LHS);
11334 RHS = DAG.getFreeze(V: RHS);
11335 SDValue Cmp = DAG.getSetCC(DL: dl, VT: CCVT, LHS, RHS, Cond: CC);
11336
11337 // Branchless expansion iff cmp result is allbits:
11338 // abds(lhs, rhs) -> sub(sgt(lhs, rhs), xor(sgt(lhs, rhs), sub(lhs, rhs)))
11339 // abdu(lhs, rhs) -> sub(ugt(lhs, rhs), xor(ugt(lhs, rhs), sub(lhs, rhs)))
11340 if (CCVT == VT && getBooleanContents(Type: VT) == ZeroOrNegativeOneBooleanContent) {
11341 SDValue Diff = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: LHS, N2: RHS);
11342 SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: Diff, N2: Cmp);
11343 return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Cmp, N2: Xor);
11344 }
11345
11346 // Similar to the branchless expansion, if we don't prefer selects, use the
11347 // (sign-extended) usubo overflow flag if the (scalar) type is illegal as this
11348 // is more likely to legalize cleanly: abdu(lhs, rhs) -> sub(xor(sub(lhs,
11349 // rhs), uof(lhs, rhs)), uof(lhs, rhs))
11350 if (!IsSigned && VT.isScalarInteger() && !isTypeLegal(VT) &&
11351 !preferSelectsOverBooleanArithmetic(VT)) {
11352 SDValue USubO =
11353 DAG.getNode(Opcode: ISD::USUBO, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: MVT::i1), Ops: {LHS, RHS});
11354 SDValue Cmp = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT, Operand: USubO.getValue(R: 1));
11355 SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: USubO.getValue(R: 0), N2: Cmp);
11356 return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Xor, N2: Cmp);
11357 }
11358
11359 // FIXME: Should really try to split the vector in case it's legal on a
11360 // subvector.
11361 if (VT.isVector() && !isOperationLegalOrCustom(Op: ISD::VSELECT, VT))
11362 return DAG.UnrollVectorOp(N);
11363
11364 // abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
11365 // abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
11366 return DAG.getSelect(DL: dl, VT, Cond: Cmp, LHS: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: LHS, N2: RHS),
11367 RHS: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: RHS, N2: LHS));
11368}
11369
11370SDValue TargetLowering::expandAVG(SDNode *N, SelectionDAG &DAG) const {
11371 SDLoc dl(N);
11372 EVT VT = N->getValueType(ResNo: 0);
11373 SDValue LHS = N->getOperand(Num: 0);
11374 SDValue RHS = N->getOperand(Num: 1);
11375
11376 unsigned Opc = N->getOpcode();
11377 bool IsFloor = Opc == ISD::AVGFLOORS || Opc == ISD::AVGFLOORU;
11378 bool IsSigned = Opc == ISD::AVGCEILS || Opc == ISD::AVGFLOORS;
11379 unsigned SumOpc = IsFloor ? ISD::ADD : ISD::SUB;
11380 unsigned SignOpc = IsFloor ? ISD::AND : ISD::OR;
11381 unsigned ShiftOpc = IsSigned ? ISD::SRA : ISD::SRL;
11382 unsigned ExtOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
11383 assert((Opc == ISD::AVGFLOORS || Opc == ISD::AVGCEILS ||
11384 Opc == ISD::AVGFLOORU || Opc == ISD::AVGCEILU) &&
11385 "Unknown AVG node");
11386
11387 // If the operands are already extended, we can add+shift.
11388 bool IsExt =
11389 (IsSigned && DAG.ComputeNumSignBits(Op: LHS) >= 2 &&
11390 DAG.ComputeNumSignBits(Op: RHS) >= 2) ||
11391 (!IsSigned && DAG.computeKnownBits(Op: LHS).countMinLeadingZeros() >= 1 &&
11392 DAG.computeKnownBits(Op: RHS).countMinLeadingZeros() >= 1);
11393 if (IsExt) {
11394 SDValue Sum = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: LHS, N2: RHS);
11395 if (!IsFloor)
11396 Sum = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Sum, N2: DAG.getConstant(Val: 1, DL: dl, VT));
11397 return DAG.getNode(Opcode: ShiftOpc, DL: dl, VT, N1: Sum,
11398 N2: DAG.getShiftAmountConstant(Val: 1, VT, DL: dl));
11399 }
11400
11401 // For scalars, see if we can efficiently extend/truncate to use add+shift.
11402 if (VT.isScalarInteger()) {
11403 EVT ExtVT = VT.widenIntegerElementType(Context&: *DAG.getContext());
11404 if (isTypeLegal(VT: ExtVT) && isTruncateFree(FromVT: ExtVT, ToVT: VT)) {
11405 LHS = DAG.getNode(Opcode: ExtOpc, DL: dl, VT: ExtVT, Operand: LHS);
11406 RHS = DAG.getNode(Opcode: ExtOpc, DL: dl, VT: ExtVT, Operand: RHS);
11407 SDValue Avg = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: ExtVT, N1: LHS, N2: RHS);
11408 if (!IsFloor)
11409 Avg = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: ExtVT, N1: Avg,
11410 N2: DAG.getConstant(Val: 1, DL: dl, VT: ExtVT));
11411 // Just use SRL as we will be truncating away the extended sign bits.
11412 Avg = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: ExtVT, N1: Avg,
11413 N2: DAG.getShiftAmountConstant(Val: 1, VT: ExtVT, DL: dl));
11414 return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Avg);
11415 }
11416 }
11417
11418 // avgflooru(lhs, rhs) -> or(lshr(add(lhs, rhs),1),shl(overflow, typesize-1))
11419 if (Opc == ISD::AVGFLOORU && VT.isScalarInteger() && !isTypeLegal(VT) &&
11420 isOperationLegalOrCustom(
11421 Op: ISD::UADDO, VT: getLegalTypeToTransformTo(Context&: *DAG.getContext(), VT))) {
11422 SDValue UAddWithOverflow =
11423 DAG.getNode(Opcode: ISD::UADDO, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: MVT::i1), Ops: {RHS, LHS});
11424
11425 SDValue Sum = UAddWithOverflow.getValue(R: 0);
11426 SDValue Overflow = UAddWithOverflow.getValue(R: 1);
11427
11428 // Right shift the sum by 1
11429 SDValue LShrVal = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Sum,
11430 N2: DAG.getShiftAmountConstant(Val: 1, VT, DL: dl));
11431
11432 SDValue ZeroExtOverflow = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT, Operand: Overflow);
11433 SDValue OverflowShl = DAG.getNode(
11434 Opcode: ISD::SHL, DL: dl, VT, N1: ZeroExtOverflow,
11435 N2: DAG.getShiftAmountConstant(Val: VT.getScalarSizeInBits() - 1, VT, DL: dl));
11436
11437 return DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: LShrVal, N2: OverflowShl);
11438 }
11439
11440 // avgceils(lhs, rhs) -> sub(or(lhs,rhs),ashr(xor(lhs,rhs),1))
11441 // avgceilu(lhs, rhs) -> sub(or(lhs,rhs),lshr(xor(lhs,rhs),1))
11442 // avgfloors(lhs, rhs) -> add(and(lhs,rhs),ashr(xor(lhs,rhs),1))
11443 // avgflooru(lhs, rhs) -> add(and(lhs,rhs),lshr(xor(lhs,rhs),1))
11444 LHS = DAG.getFreeze(V: LHS);
11445 RHS = DAG.getFreeze(V: RHS);
11446 SDValue Sign = DAG.getNode(Opcode: SignOpc, DL: dl, VT, N1: LHS, N2: RHS);
11447 SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: LHS, N2: RHS);
11448 SDValue Shift =
11449 DAG.getNode(Opcode: ShiftOpc, DL: dl, VT, N1: Xor, N2: DAG.getShiftAmountConstant(Val: 1, VT, DL: dl));
11450 return DAG.getNode(Opcode: SumOpc, DL: dl, VT, N1: Sign, N2: Shift);
11451}
11452
11453SDValue TargetLowering::expandBSWAP(SDNode *N, SelectionDAG &DAG) const {
11454 SDLoc dl(N);
11455 EVT VT = N->getValueType(ResNo: 0);
11456 SDValue Op = N->getOperand(Num: 0);
11457
11458 if (!VT.isSimple())
11459 return SDValue();
11460
11461 EVT SHVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
11462 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
11463 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
11464 default:
11465 return SDValue();
11466 case MVT::i16:
11467 // Use a rotate by 8. This can be further expanded if necessary.
11468 return DAG.getNode(Opcode: ISD::ROTL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT));
11469 case MVT::i32:
11470 // This is meant for ARM specifically, which has ROTR but no ROTL.
11471 // t = x ^ rotr(x, 16)
11472 // t = bic(t, 0x00ff0000)
11473 // t = lshr(t, 8)
11474 // x = t ^ rotr(x, 8)
11475 if (isOperationLegalOrCustom(Op: ISD::ROTR, VT)) {
11476 SDValue Rotr16 =
11477 DAG.getNode(Opcode: ISD::ROTR, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 16, DL: dl, VT: SHVT));
11478 SDValue Tmp = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: Op, N2: Rotr16);
11479 Tmp = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp,
11480 N2: DAG.getConstant(Val: 0xFF00FFFF, DL: dl, VT));
11481 Tmp = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT));
11482 SDValue Rotr8 =
11483 DAG.getNode(Opcode: ISD::ROTR, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT));
11484 return DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: Tmp, N2: Rotr8);
11485 }
11486 Tmp4 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 24, DL: dl, VT: SHVT));
11487 Tmp3 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op,
11488 N2: DAG.getConstant(Val: 0xFF00, DL: dl, VT));
11489 Tmp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT));
11490 Tmp2 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT));
11491 Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2, N2: DAG.getConstant(Val: 0xFF00, DL: dl, VT));
11492 Tmp1 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 24, DL: dl, VT: SHVT));
11493 Tmp4 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp4, N2: Tmp3);
11494 Tmp2 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp1);
11495 return DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp4, N2: Tmp2);
11496 case MVT::i64:
11497 Tmp8 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 56, DL: dl, VT: SHVT));
11498 Tmp7 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op,
11499 N2: DAG.getConstant(Val: 255ULL<<8, DL: dl, VT));
11500 Tmp7 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp7, N2: DAG.getConstant(Val: 40, DL: dl, VT: SHVT));
11501 Tmp6 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op,
11502 N2: DAG.getConstant(Val: 255ULL<<16, DL: dl, VT));
11503 Tmp6 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp6, N2: DAG.getConstant(Val: 24, DL: dl, VT: SHVT));
11504 Tmp5 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op,
11505 N2: DAG.getConstant(Val: 255ULL<<24, DL: dl, VT));
11506 Tmp5 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp5, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT));
11507 Tmp4 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT));
11508 Tmp4 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp4,
11509 N2: DAG.getConstant(Val: 255ULL<<24, DL: dl, VT));
11510 Tmp3 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 24, DL: dl, VT: SHVT));
11511 Tmp3 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp3,
11512 N2: DAG.getConstant(Val: 255ULL<<16, DL: dl, VT));
11513 Tmp2 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 40, DL: dl, VT: SHVT));
11514 Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2,
11515 N2: DAG.getConstant(Val: 255ULL<<8, DL: dl, VT));
11516 Tmp1 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 56, DL: dl, VT: SHVT));
11517 Tmp8 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp8, N2: Tmp7);
11518 Tmp6 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp6, N2: Tmp5);
11519 Tmp4 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp4, N2: Tmp3);
11520 Tmp2 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp1);
11521 Tmp8 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp8, N2: Tmp6);
11522 Tmp4 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp4, N2: Tmp2);
11523 return DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp8, N2: Tmp4);
11524 }
11525}
11526
11527SDValue TargetLowering::expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const {
11528 SDLoc dl(N);
11529 EVT VT = N->getValueType(ResNo: 0);
11530 SDValue Op = N->getOperand(Num: 0);
11531 SDValue Mask = N->getOperand(Num: 1);
11532 SDValue EVL = N->getOperand(Num: 2);
11533
11534 if (!VT.isSimple())
11535 return SDValue();
11536
11537 EVT SHVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
11538 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
11539 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
11540 default:
11541 return SDValue();
11542 case MVT::i16:
11543 Tmp1 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT),
11544 N3: Mask, N4: EVL);
11545 Tmp2 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT),
11546 N3: Mask, N4: EVL);
11547 return DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp1, N2: Tmp2, N3: Mask, N4: EVL);
11548 case MVT::i32:
11549 Tmp4 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 24, DL: dl, VT: SHVT),
11550 N3: Mask, N4: EVL);
11551 Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 0xFF00, DL: dl, VT),
11552 N3: Mask, N4: EVL);
11553 Tmp3 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT),
11554 N3: Mask, N4: EVL);
11555 Tmp2 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT),
11556 N3: Mask, N4: EVL);
11557 Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp2,
11558 N2: DAG.getConstant(Val: 0xFF00, DL: dl, VT), N3: Mask, N4: EVL);
11559 Tmp1 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 24, DL: dl, VT: SHVT),
11560 N3: Mask, N4: EVL);
11561 Tmp4 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp4, N2: Tmp3, N3: Mask, N4: EVL);
11562 Tmp2 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp2, N2: Tmp1, N3: Mask, N4: EVL);
11563 return DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp4, N2: Tmp2, N3: Mask, N4: EVL);
11564 case MVT::i64:
11565 Tmp8 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 56, DL: dl, VT: SHVT),
11566 N3: Mask, N4: EVL);
11567 Tmp7 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Op,
11568 N2: DAG.getConstant(Val: 255ULL << 8, DL: dl, VT), N3: Mask, N4: EVL);
11569 Tmp7 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp7, N2: DAG.getConstant(Val: 40, DL: dl, VT: SHVT),
11570 N3: Mask, N4: EVL);
11571 Tmp6 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Op,
11572 N2: DAG.getConstant(Val: 255ULL << 16, DL: dl, VT), N3: Mask, N4: EVL);
11573 Tmp6 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp6, N2: DAG.getConstant(Val: 24, DL: dl, VT: SHVT),
11574 N3: Mask, N4: EVL);
11575 Tmp5 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Op,
11576 N2: DAG.getConstant(Val: 255ULL << 24, DL: dl, VT), N3: Mask, N4: EVL);
11577 Tmp5 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp5, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT),
11578 N3: Mask, N4: EVL);
11579 Tmp4 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT),
11580 N3: Mask, N4: EVL);
11581 Tmp4 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp4,
11582 N2: DAG.getConstant(Val: 255ULL << 24, DL: dl, VT), N3: Mask, N4: EVL);
11583 Tmp3 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 24, DL: dl, VT: SHVT),
11584 N3: Mask, N4: EVL);
11585 Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp3,
11586 N2: DAG.getConstant(Val: 255ULL << 16, DL: dl, VT), N3: Mask, N4: EVL);
11587 Tmp2 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 40, DL: dl, VT: SHVT),
11588 N3: Mask, N4: EVL);
11589 Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp2,
11590 N2: DAG.getConstant(Val: 255ULL << 8, DL: dl, VT), N3: Mask, N4: EVL);
11591 Tmp1 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 56, DL: dl, VT: SHVT),
11592 N3: Mask, N4: EVL);
11593 Tmp8 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp8, N2: Tmp7, N3: Mask, N4: EVL);
11594 Tmp6 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp6, N2: Tmp5, N3: Mask, N4: EVL);
11595 Tmp4 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp4, N2: Tmp3, N3: Mask, N4: EVL);
11596 Tmp2 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp2, N2: Tmp1, N3: Mask, N4: EVL);
11597 Tmp8 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp8, N2: Tmp6, N3: Mask, N4: EVL);
11598 Tmp4 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp4, N2: Tmp2, N3: Mask, N4: EVL);
11599 return DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp8, N2: Tmp4, N3: Mask, N4: EVL);
11600 }
11601}
11602
11603SDValue TargetLowering::expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const {
11604 SDLoc dl(N);
11605 EVT VT = N->getValueType(ResNo: 0);
11606 SDValue Op = N->getOperand(Num: 0);
11607 EVT SHVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
11608 unsigned Sz = VT.getScalarSizeInBits();
11609
11610 SDValue Tmp, Tmp2, Tmp3;
11611
11612 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
11613 // and finally the i1 pairs.
11614 // TODO: We can easily support i4/i2 legal types if any target ever does.
11615 if (Sz >= 8 && isPowerOf2_32(Value: Sz)) {
11616 // Create the masks - repeating the pattern every byte.
11617 APInt Mask4 = APInt::getSplat(NewLen: Sz, V: APInt(8, 0x0F));
11618 APInt Mask2 = APInt::getSplat(NewLen: Sz, V: APInt(8, 0x33));
11619 APInt Mask1 = APInt::getSplat(NewLen: Sz, V: APInt(8, 0x55));
11620
11621 // BSWAP if the type is wider than a single byte.
11622 Tmp = (Sz > 8 ? DAG.getNode(Opcode: ISD::BSWAP, DL: dl, VT, Operand: Op) : Op);
11623
11624 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
11625 Tmp2 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: 4, DL: dl, VT: SHVT));
11626 Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2, N2: DAG.getConstant(Val: Mask4, DL: dl, VT));
11627 Tmp3 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask4, DL: dl, VT));
11628 Tmp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: 4, DL: dl, VT: SHVT));
11629 Tmp = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp3);
11630
11631 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
11632 Tmp2 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: 2, DL: dl, VT: SHVT));
11633 Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2, N2: DAG.getConstant(Val: Mask2, DL: dl, VT));
11634 Tmp3 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask2, DL: dl, VT));
11635 Tmp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: 2, DL: dl, VT: SHVT));
11636 Tmp = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp3);
11637
11638 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
11639 Tmp2 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: 1, DL: dl, VT: SHVT));
11640 Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2, N2: DAG.getConstant(Val: Mask1, DL: dl, VT));
11641 Tmp3 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask1, DL: dl, VT));
11642 Tmp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: 1, DL: dl, VT: SHVT));
11643 Tmp = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp3);
11644 return Tmp;
11645 }
11646
11647 Tmp = DAG.getConstant(Val: 0, DL: dl, VT);
11648 for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) {
11649 if (I < J)
11650 Tmp2 =
11651 DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: J - I, DL: dl, VT: SHVT));
11652 else
11653 Tmp2 =
11654 DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: I - J, DL: dl, VT: SHVT));
11655
11656 APInt Shift = APInt::getOneBitSet(numBits: Sz, BitNo: J);
11657 Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2, N2: DAG.getConstant(Val: Shift, DL: dl, VT));
11658 Tmp = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp, N2: Tmp2);
11659 }
11660
11661 return Tmp;
11662}
11663
11664SDValue TargetLowering::expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const {
11665 assert(N->getOpcode() == ISD::VP_BITREVERSE);
11666
11667 SDLoc dl(N);
11668 EVT VT = N->getValueType(ResNo: 0);
11669 SDValue Op = N->getOperand(Num: 0);
11670 SDValue Mask = N->getOperand(Num: 1);
11671 SDValue EVL = N->getOperand(Num: 2);
11672 EVT SHVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
11673 unsigned Sz = VT.getScalarSizeInBits();
11674
11675 SDValue Tmp, Tmp2, Tmp3;
11676
11677 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
11678 // and finally the i1 pairs.
11679 // TODO: We can easily support i4/i2 legal types if any target ever does.
11680 if (Sz >= 8 && isPowerOf2_32(Value: Sz)) {
11681 // Create the masks - repeating the pattern every byte.
11682 APInt Mask4 = APInt::getSplat(NewLen: Sz, V: APInt(8, 0x0F));
11683 APInt Mask2 = APInt::getSplat(NewLen: Sz, V: APInt(8, 0x33));
11684 APInt Mask1 = APInt::getSplat(NewLen: Sz, V: APInt(8, 0x55));
11685
11686 // BSWAP if the type is wider than a single byte.
11687 Tmp = (Sz > 8 ? DAG.getNode(Opcode: ISD::VP_BSWAP, DL: dl, VT, N1: Op, N2: Mask, N3: EVL) : Op);
11688
11689 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
11690 Tmp2 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: 4, DL: dl, VT: SHVT),
11691 N3: Mask, N4: EVL);
11692 Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp2,
11693 N2: DAG.getConstant(Val: Mask4, DL: dl, VT), N3: Mask, N4: EVL);
11694 Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask4, DL: dl, VT),
11695 N3: Mask, N4: EVL);
11696 Tmp3 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: 4, DL: dl, VT: SHVT),
11697 N3: Mask, N4: EVL);
11698 Tmp = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp2, N2: Tmp3, N3: Mask, N4: EVL);
11699
11700 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
11701 Tmp2 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: 2, DL: dl, VT: SHVT),
11702 N3: Mask, N4: EVL);
11703 Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp2,
11704 N2: DAG.getConstant(Val: Mask2, DL: dl, VT), N3: Mask, N4: EVL);
11705 Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask2, DL: dl, VT),
11706 N3: Mask, N4: EVL);
11707 Tmp3 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: 2, DL: dl, VT: SHVT),
11708 N3: Mask, N4: EVL);
11709 Tmp = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp2, N2: Tmp3, N3: Mask, N4: EVL);
11710
11711 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
11712 Tmp2 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: 1, DL: dl, VT: SHVT),
11713 N3: Mask, N4: EVL);
11714 Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp2,
11715 N2: DAG.getConstant(Val: Mask1, DL: dl, VT), N3: Mask, N4: EVL);
11716 Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask1, DL: dl, VT),
11717 N3: Mask, N4: EVL);
11718 Tmp3 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: 1, DL: dl, VT: SHVT),
11719 N3: Mask, N4: EVL);
11720 Tmp = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp2, N2: Tmp3, N3: Mask, N4: EVL);
11721 return Tmp;
11722 }
11723 return SDValue();
11724}
11725
11726std::pair<SDValue, SDValue>
11727TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
11728 SelectionDAG &DAG) const {
11729 SDLoc SL(LD);
11730 SDValue Chain = LD->getChain();
11731 SDValue BasePTR = LD->getBasePtr();
11732 EVT SrcVT = LD->getMemoryVT();
11733 EVT DstVT = LD->getValueType(ResNo: 0);
11734 ISD::LoadExtType ExtType = LD->getExtensionType();
11735
11736 if (SrcVT.isScalableVector())
11737 report_fatal_error(reason: "Cannot scalarize scalable vector loads");
11738
11739 unsigned NumElem = SrcVT.getVectorNumElements();
11740
11741 EVT SrcEltVT = SrcVT.getScalarType();
11742 EVT DstEltVT = DstVT.getScalarType();
11743
11744 // A vector must always be stored in memory as-is, i.e. without any padding
11745 // between the elements, since various code depend on it, e.g. in the
11746 // handling of a bitcast of a vector type to int, which may be done with a
11747 // vector store followed by an integer load. A vector that does not have
11748 // elements that are byte-sized must therefore be stored as an integer
11749 // built out of the extracted vector elements.
11750 if (!SrcEltVT.isByteSized()) {
11751 unsigned NumLoadBits = SrcVT.getStoreSizeInBits();
11752 EVT LoadVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NumLoadBits);
11753
11754 unsigned NumSrcBits = SrcVT.getSizeInBits();
11755 EVT SrcIntVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NumSrcBits);
11756
11757 unsigned SrcEltBits = SrcEltVT.getSizeInBits();
11758 SDValue SrcEltBitMask = DAG.getConstant(
11759 Val: APInt::getLowBitsSet(numBits: NumLoadBits, loBitsSet: SrcEltBits), DL: SL, VT: LoadVT);
11760
11761 // Load the whole vector and avoid masking off the top bits as it makes
11762 // the codegen worse.
11763 SDValue Load =
11764 DAG.getExtLoad(ExtType: ISD::EXTLOAD, dl: SL, VT: LoadVT, Chain, Ptr: BasePTR,
11765 PtrInfo: LD->getPointerInfo(), MemVT: SrcIntVT, Alignment: LD->getBaseAlign(),
11766 MMOFlags: LD->getMemOperand()->getFlags(), AAInfo: LD->getAAInfo());
11767
11768 SmallVector<SDValue, 8> Vals;
11769 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
11770 unsigned ShiftIntoIdx =
11771 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
11772 SDValue ShiftAmount = DAG.getShiftAmountConstant(
11773 Val: ShiftIntoIdx * SrcEltVT.getSizeInBits(), VT: LoadVT, DL: SL);
11774 SDValue ShiftedElt = DAG.getNode(Opcode: ISD::SRL, DL: SL, VT: LoadVT, N1: Load, N2: ShiftAmount);
11775 SDValue Elt =
11776 DAG.getNode(Opcode: ISD::AND, DL: SL, VT: LoadVT, N1: ShiftedElt, N2: SrcEltBitMask);
11777 SDValue Scalar = DAG.getNode(Opcode: ISD::TRUNCATE, DL: SL, VT: SrcEltVT, Operand: Elt);
11778
11779 if (ExtType != ISD::NON_EXTLOAD) {
11780 unsigned ExtendOp = ISD::getExtForLoadExtType(IsFP: false, ExtType);
11781 Scalar = DAG.getNode(Opcode: ExtendOp, DL: SL, VT: DstEltVT, Operand: Scalar);
11782 }
11783
11784 Vals.push_back(Elt: Scalar);
11785 }
11786
11787 SDValue Value = DAG.getBuildVector(VT: DstVT, DL: SL, Ops: Vals);
11788 return std::make_pair(x&: Value, y: Load.getValue(R: 1));
11789 }
11790
11791 unsigned Stride = SrcEltVT.getSizeInBits() / 8;
11792 assert(SrcEltVT.isByteSized());
11793
11794 SmallVector<SDValue, 8> Vals;
11795 SmallVector<SDValue, 8> LoadChains;
11796
11797 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
11798 SDValue ScalarLoad = DAG.getExtLoad(
11799 ExtType, dl: SL, VT: DstEltVT, Chain, Ptr: BasePTR,
11800 PtrInfo: LD->getPointerInfo().getWithOffset(O: Idx * Stride), MemVT: SrcEltVT,
11801 Alignment: LD->getBaseAlign(), MMOFlags: LD->getMemOperand()->getFlags(), AAInfo: LD->getAAInfo());
11802
11803 BasePTR = DAG.getObjectPtrOffset(SL, Ptr: BasePTR, Offset: TypeSize::getFixed(ExactSize: Stride));
11804
11805 Vals.push_back(Elt: ScalarLoad.getValue(R: 0));
11806 LoadChains.push_back(Elt: ScalarLoad.getValue(R: 1));
11807 }
11808
11809 SDValue NewChain = DAG.getNode(Opcode: ISD::TokenFactor, DL: SL, VT: MVT::Other, Ops: LoadChains);
11810 SDValue Value = DAG.getBuildVector(VT: DstVT, DL: SL, Ops: Vals);
11811
11812 return std::make_pair(x&: Value, y&: NewChain);
11813}
11814
11815SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
11816 SelectionDAG &DAG) const {
11817 SDLoc SL(ST);
11818
11819 SDValue Chain = ST->getChain();
11820 SDValue BasePtr = ST->getBasePtr();
11821 SDValue Value = ST->getValue();
11822 EVT StVT = ST->getMemoryVT();
11823
11824 if (StVT.isScalableVector())
11825 report_fatal_error(reason: "Cannot scalarize scalable vector stores");
11826
11827 // The type of the data we want to save
11828 EVT RegVT = Value.getValueType();
11829 EVT RegSclVT = RegVT.getScalarType();
11830
11831 // The type of data as saved in memory.
11832 EVT MemSclVT = StVT.getScalarType();
11833
11834 unsigned NumElem = StVT.getVectorNumElements();
11835
11836 // A vector must always be stored in memory as-is, i.e. without any padding
11837 // between the elements, since various code depend on it, e.g. in the
11838 // handling of a bitcast of a vector type to int, which may be done with a
11839 // vector store followed by an integer load. A vector that does not have
11840 // elements that are byte-sized must therefore be stored as an integer
11841 // built out of the extracted vector elements.
11842 if (!MemSclVT.isByteSized()) {
11843 unsigned NumBits = StVT.getSizeInBits();
11844 EVT IntVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NumBits);
11845
11846 SDValue CurrVal = DAG.getConstant(Val: 0, DL: SL, VT: IntVT);
11847
11848 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
11849 SDValue Elt = DAG.getExtractVectorElt(DL: SL, VT: RegSclVT, Vec: Value, Idx);
11850 SDValue Trunc = DAG.getNode(Opcode: ISD::TRUNCATE, DL: SL, VT: MemSclVT, Operand: Elt);
11851 SDValue ExtElt = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: SL, VT: IntVT, Operand: Trunc);
11852 unsigned ShiftIntoIdx =
11853 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
11854 SDValue ShiftAmount =
11855 DAG.getConstant(Val: ShiftIntoIdx * MemSclVT.getSizeInBits(), DL: SL, VT: IntVT);
11856 SDValue ShiftedElt =
11857 DAG.getNode(Opcode: ISD::SHL, DL: SL, VT: IntVT, N1: ExtElt, N2: ShiftAmount);
11858 CurrVal = DAG.getNode(Opcode: ISD::OR, DL: SL, VT: IntVT, N1: CurrVal, N2: ShiftedElt);
11859 }
11860
11861 return DAG.getStore(Chain, dl: SL, Val: CurrVal, Ptr: BasePtr, PtrInfo: ST->getPointerInfo(),
11862 Alignment: ST->getBaseAlign(), MMOFlags: ST->getMemOperand()->getFlags(),
11863 AAInfo: ST->getAAInfo());
11864 }
11865
11866 // Store Stride in bytes
11867 unsigned Stride = MemSclVT.getSizeInBits() / 8;
11868 assert(Stride && "Zero stride!");
11869 // Extract each of the elements from the original vector and save them into
11870 // memory individually.
11871 SmallVector<SDValue, 8> Stores;
11872 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
11873 SDValue Elt = DAG.getExtractVectorElt(DL: SL, VT: RegSclVT, Vec: Value, Idx);
11874
11875 SDValue Ptr =
11876 DAG.getObjectPtrOffset(SL, Ptr: BasePtr, Offset: TypeSize::getFixed(ExactSize: Idx * Stride));
11877
11878 // This scalar TruncStore may be illegal, but we legalize it later.
11879 SDValue Store = DAG.getTruncStore(
11880 Chain, dl: SL, Val: Elt, Ptr, PtrInfo: ST->getPointerInfo().getWithOffset(O: Idx * Stride),
11881 SVT: MemSclVT, Alignment: ST->getBaseAlign(), MMOFlags: ST->getMemOperand()->getFlags(),
11882 AAInfo: ST->getAAInfo());
11883
11884 Stores.push_back(Elt: Store);
11885 }
11886
11887 return DAG.getNode(Opcode: ISD::TokenFactor, DL: SL, VT: MVT::Other, Ops: Stores);
11888}
11889
11890std::pair<SDValue, SDValue>
11891TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
11892 assert(LD->getAddressingMode() == ISD::UNINDEXED &&
11893 "unaligned indexed loads not implemented!");
11894 SDValue Chain = LD->getChain();
11895 SDValue Ptr = LD->getBasePtr();
11896 EVT VT = LD->getValueType(ResNo: 0);
11897 EVT LoadedVT = LD->getMemoryVT();
11898 SDLoc dl(LD);
11899 auto &MF = DAG.getMachineFunction();
11900
11901 if (VT.isFloatingPoint() || VT.isVector()) {
11902 EVT intVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: LoadedVT.getSizeInBits());
11903 if (isTypeLegal(VT: intVT) && isTypeLegal(VT: LoadedVT)) {
11904 if (!isOperationLegalOrCustom(Op: ISD::LOAD, VT: intVT) &&
11905 LoadedVT.isVector()) {
11906 // Scalarize the load and let the individual components be handled.
11907 return scalarizeVectorLoad(LD, DAG);
11908 }
11909
11910 // Expand to a (misaligned) integer load of the same size,
11911 // then bitconvert to floating point or vector.
11912 SDValue newLoad = DAG.getLoad(VT: intVT, dl, Chain, Ptr,
11913 MMO: LD->getMemOperand());
11914 SDValue Result = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: LoadedVT, Operand: newLoad);
11915 if (LoadedVT != VT)
11916 Result = DAG.getNode(Opcode: VT.isFloatingPoint() ? ISD::FP_EXTEND :
11917 ISD::ANY_EXTEND, DL: dl, VT, Operand: Result);
11918
11919 return std::make_pair(x&: Result, y: newLoad.getValue(R: 1));
11920 }
11921
11922 // Copy the value to a (aligned) stack slot using (unaligned) integer
11923 // loads and stores, then do a (aligned) load from the stack slot.
11924 MVT RegVT = getRegisterType(Context&: *DAG.getContext(), VT: intVT);
11925 unsigned LoadedBytes = LoadedVT.getStoreSize();
11926 unsigned RegBytes = RegVT.getSizeInBits() / 8;
11927 unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
11928
11929 // Make sure the stack slot is also aligned for the register type.
11930 SDValue StackBase = DAG.CreateStackTemporary(VT1: LoadedVT, VT2: RegVT);
11931 auto FrameIndex = cast<FrameIndexSDNode>(Val: StackBase.getNode())->getIndex();
11932 SmallVector<SDValue, 8> Stores;
11933 SDValue StackPtr = StackBase;
11934 unsigned Offset = 0;
11935
11936 EVT PtrVT = Ptr.getValueType();
11937 EVT StackPtrVT = StackPtr.getValueType();
11938
11939 SDValue PtrIncrement = DAG.getConstant(Val: RegBytes, DL: dl, VT: PtrVT);
11940 SDValue StackPtrIncrement = DAG.getConstant(Val: RegBytes, DL: dl, VT: StackPtrVT);
11941
11942 // Do all but one copies using the full register width.
11943 for (unsigned i = 1; i < NumRegs; i++) {
11944 // Load one integer register's worth from the original location.
11945 SDValue Load = DAG.getLoad(
11946 VT: RegVT, dl, Chain, Ptr, PtrInfo: LD->getPointerInfo().getWithOffset(O: Offset),
11947 Alignment: LD->getBaseAlign(), MMOFlags: LD->getMemOperand()->getFlags(), AAInfo: LD->getAAInfo());
11948 // Follow the load with a store to the stack slot. Remember the store.
11949 Stores.push_back(Elt: DAG.getStore(
11950 Chain: Load.getValue(R: 1), dl, Val: Load, Ptr: StackPtr,
11951 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset)));
11952 // Increment the pointers.
11953 Offset += RegBytes;
11954
11955 Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: PtrIncrement);
11956 StackPtr = DAG.getObjectPtrOffset(SL: dl, Ptr: StackPtr, Offset: StackPtrIncrement);
11957 }
11958
11959 // The last copy may be partial. Do an extending load.
11960 EVT MemVT = EVT::getIntegerVT(Context&: *DAG.getContext(),
11961 BitWidth: 8 * (LoadedBytes - Offset));
11962 SDValue Load = DAG.getExtLoad(
11963 ExtType: ISD::EXTLOAD, dl, VT: RegVT, Chain, Ptr,
11964 PtrInfo: LD->getPointerInfo().getWithOffset(O: Offset), MemVT, Alignment: LD->getBaseAlign(),
11965 MMOFlags: LD->getMemOperand()->getFlags(), AAInfo: LD->getAAInfo());
11966 // Follow the load with a store to the stack slot. Remember the store.
11967 // On big-endian machines this requires a truncating store to ensure
11968 // that the bits end up in the right place.
11969 Stores.push_back(Elt: DAG.getTruncStore(
11970 Chain: Load.getValue(R: 1), dl, Val: Load, Ptr: StackPtr,
11971 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset), SVT: MemVT));
11972
11973 // The order of the stores doesn't matter - say it with a TokenFactor.
11974 SDValue TF = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: Stores);
11975
11976 // Finally, perform the original load only redirected to the stack slot.
11977 Load = DAG.getExtLoad(ExtType: LD->getExtensionType(), dl, VT, Chain: TF, Ptr: StackBase,
11978 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset: 0),
11979 MemVT: LoadedVT);
11980
11981 // Callers expect a MERGE_VALUES node.
11982 return std::make_pair(x&: Load, y&: TF);
11983 }
11984
11985 assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
11986 "Unaligned load of unsupported type.");
11987
11988 // Compute the new VT that is half the size of the old one. This is an
11989 // integer MVT.
11990 unsigned NumBits = LoadedVT.getSizeInBits();
11991 EVT NewLoadedVT;
11992 NewLoadedVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NumBits/2);
11993 NumBits >>= 1;
11994
11995 Align Alignment = LD->getBaseAlign();
11996 unsigned IncrementSize = NumBits / 8;
11997 ISD::LoadExtType HiExtType = LD->getExtensionType();
11998
11999 // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
12000 if (HiExtType == ISD::NON_EXTLOAD)
12001 HiExtType = ISD::ZEXTLOAD;
12002
12003 // Load the value in two parts
12004 SDValue Lo, Hi;
12005 if (DAG.getDataLayout().isLittleEndian()) {
12006 Lo = DAG.getExtLoad(ExtType: ISD::ZEXTLOAD, dl, VT, Chain, Ptr, PtrInfo: LD->getPointerInfo(),
12007 MemVT: NewLoadedVT, Alignment, MMOFlags: LD->getMemOperand()->getFlags(),
12008 AAInfo: LD->getAAInfo());
12009
12010 Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: TypeSize::getFixed(ExactSize: IncrementSize));
12011 Hi = DAG.getExtLoad(ExtType: HiExtType, dl, VT, Chain, Ptr,
12012 PtrInfo: LD->getPointerInfo().getWithOffset(O: IncrementSize),
12013 MemVT: NewLoadedVT, Alignment, MMOFlags: LD->getMemOperand()->getFlags(),
12014 AAInfo: LD->getAAInfo());
12015 } else {
12016 Hi = DAG.getExtLoad(ExtType: HiExtType, dl, VT, Chain, Ptr, PtrInfo: LD->getPointerInfo(),
12017 MemVT: NewLoadedVT, Alignment, MMOFlags: LD->getMemOperand()->getFlags(),
12018 AAInfo: LD->getAAInfo());
12019
12020 Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: TypeSize::getFixed(ExactSize: IncrementSize));
12021 Lo = DAG.getExtLoad(ExtType: ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
12022 PtrInfo: LD->getPointerInfo().getWithOffset(O: IncrementSize),
12023 MemVT: NewLoadedVT, Alignment, MMOFlags: LD->getMemOperand()->getFlags(),
12024 AAInfo: LD->getAAInfo());
12025 }
12026
12027 // aggregate the two parts
12028 SDValue ShiftAmount = DAG.getShiftAmountConstant(Val: NumBits, VT, DL: dl);
12029 SDValue Result = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Hi, N2: ShiftAmount);
12030 Result = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Result, N2: Lo);
12031
12032 SDValue TF = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, N1: Lo.getValue(R: 1),
12033 N2: Hi.getValue(R: 1));
12034
12035 return std::make_pair(x&: Result, y&: TF);
12036}
12037
12038SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
12039 SelectionDAG &DAG) const {
12040 assert(ST->getAddressingMode() == ISD::UNINDEXED &&
12041 "unaligned indexed stores not implemented!");
12042 SDValue Chain = ST->getChain();
12043 SDValue Ptr = ST->getBasePtr();
12044 SDValue Val = ST->getValue();
12045 EVT VT = Val.getValueType();
12046 Align Alignment = ST->getBaseAlign();
12047 auto &MF = DAG.getMachineFunction();
12048 EVT StoreMemVT = ST->getMemoryVT();
12049
12050 SDLoc dl(ST);
12051 if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) {
12052 EVT intVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: VT.getSizeInBits());
12053 if (isTypeLegal(VT: intVT)) {
12054 if (!isOperationLegalOrCustom(Op: ISD::STORE, VT: intVT) &&
12055 StoreMemVT.isVector()) {
12056 // Scalarize the store and let the individual components be handled.
12057 SDValue Result = scalarizeVectorStore(ST, DAG);
12058 return Result;
12059 }
12060 // Expand to a bitconvert of the value to the integer type of the
12061 // same size, then a (misaligned) int store.
12062 // FIXME: Does not handle truncating floating point stores!
12063 SDValue Result = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: intVT, Operand: Val);
12064 Result = DAG.getStore(Chain, dl, Val: Result, Ptr, PtrInfo: ST->getPointerInfo(),
12065 Alignment, MMOFlags: ST->getMemOperand()->getFlags());
12066 return Result;
12067 }
12068 // Do a (aligned) store to a stack slot, then copy from the stack slot
12069 // to the final destination using (unaligned) integer loads and stores.
12070 MVT RegVT = getRegisterType(
12071 Context&: *DAG.getContext(),
12072 VT: EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: StoreMemVT.getSizeInBits()));
12073 EVT PtrVT = Ptr.getValueType();
12074 unsigned StoredBytes = StoreMemVT.getStoreSize();
12075 unsigned RegBytes = RegVT.getSizeInBits() / 8;
12076 unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
12077
12078 // Make sure the stack slot is also aligned for the register type.
12079 SDValue StackPtr = DAG.CreateStackTemporary(VT1: StoreMemVT, VT2: RegVT);
12080 auto FrameIndex = cast<FrameIndexSDNode>(Val: StackPtr.getNode())->getIndex();
12081
12082 // Perform the original store, only redirected to the stack slot.
12083 SDValue Store = DAG.getTruncStore(
12084 Chain, dl, Val, Ptr: StackPtr,
12085 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset: 0), SVT: StoreMemVT);
12086
12087 EVT StackPtrVT = StackPtr.getValueType();
12088
12089 SDValue PtrIncrement = DAG.getConstant(Val: RegBytes, DL: dl, VT: PtrVT);
12090 SDValue StackPtrIncrement = DAG.getConstant(Val: RegBytes, DL: dl, VT: StackPtrVT);
12091 SmallVector<SDValue, 8> Stores;
12092 unsigned Offset = 0;
12093
12094 // Do all but one copies using the full register width.
12095 for (unsigned i = 1; i < NumRegs; i++) {
12096 // Load one integer register's worth from the stack slot.
12097 SDValue Load = DAG.getLoad(
12098 VT: RegVT, dl, Chain: Store, Ptr: StackPtr,
12099 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset));
12100 // Store it to the final location. Remember the store.
12101 Stores.push_back(Elt: DAG.getStore(Chain: Load.getValue(R: 1), dl, Val: Load, Ptr,
12102 PtrInfo: ST->getPointerInfo().getWithOffset(O: Offset),
12103 Alignment: ST->getBaseAlign(),
12104 MMOFlags: ST->getMemOperand()->getFlags()));
12105 // Increment the pointers.
12106 Offset += RegBytes;
12107 StackPtr = DAG.getObjectPtrOffset(SL: dl, Ptr: StackPtr, Offset: StackPtrIncrement);
12108 Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: PtrIncrement);
12109 }
12110
12111 // The last store may be partial. Do a truncating store. On big-endian
12112 // machines this requires an extending load from the stack slot to ensure
12113 // that the bits are in the right place.
12114 EVT LoadMemVT =
12115 EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: 8 * (StoredBytes - Offset));
12116
12117 // Load from the stack slot.
12118 SDValue Load = DAG.getExtLoad(
12119 ExtType: ISD::EXTLOAD, dl, VT: RegVT, Chain: Store, Ptr: StackPtr,
12120 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset), MemVT: LoadMemVT);
12121
12122 Stores.push_back(Elt: DAG.getTruncStore(
12123 Chain: Load.getValue(R: 1), dl, Val: Load, Ptr,
12124 PtrInfo: ST->getPointerInfo().getWithOffset(O: Offset), SVT: LoadMemVT,
12125 Alignment: ST->getBaseAlign(), MMOFlags: ST->getMemOperand()->getFlags(), AAInfo: ST->getAAInfo()));
12126 // The order of the stores doesn't matter - say it with a TokenFactor.
12127 SDValue Result = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: Stores);
12128 return Result;
12129 }
12130
12131 assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
12132 "Unaligned store of unknown type.");
12133 // Get the half-size VT
12134 EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(Context&: *DAG.getContext());
12135 unsigned NumBits = NewStoredVT.getFixedSizeInBits();
12136 unsigned IncrementSize = NumBits / 8;
12137
12138 // Divide the stored value in two parts.
12139 SDValue ShiftAmount =
12140 DAG.getShiftAmountConstant(Val: NumBits, VT: Val.getValueType(), DL: dl);
12141 SDValue Lo = Val;
12142 // If Val is a constant, replace the upper bits with 0. The SRL will constant
12143 // fold and not use the upper bits. A smaller constant may be easier to
12144 // materialize.
12145 if (auto *C = dyn_cast<ConstantSDNode>(Val&: Lo); C && !C->isOpaque())
12146 Lo = DAG.getNode(
12147 Opcode: ISD::AND, DL: dl, VT, N1: Lo,
12148 N2: DAG.getConstant(Val: APInt::getLowBitsSet(numBits: VT.getSizeInBits(), loBitsSet: NumBits), DL: dl,
12149 VT));
12150 SDValue Hi = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Val, N2: ShiftAmount);
12151
12152 // Store the two parts
12153 SDValue Store1, Store2;
12154 Store1 = DAG.getTruncStore(Chain, dl,
12155 Val: DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
12156 Ptr, PtrInfo: ST->getPointerInfo(), SVT: NewStoredVT, Alignment,
12157 MMOFlags: ST->getMemOperand()->getFlags());
12158
12159 Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: TypeSize::getFixed(ExactSize: IncrementSize));
12160 Store2 = DAG.getTruncStore(
12161 Chain, dl, Val: DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
12162 PtrInfo: ST->getPointerInfo().getWithOffset(O: IncrementSize), SVT: NewStoredVT, Alignment,
12163 MMOFlags: ST->getMemOperand()->getFlags(), AAInfo: ST->getAAInfo());
12164
12165 SDValue Result =
12166 DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, N1: Store1, N2: Store2);
12167 return Result;
12168}
12169
12170SDValue
12171TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
12172 const SDLoc &DL, EVT DataVT,
12173 SelectionDAG &DAG,
12174 bool IsCompressedMemory) const {
12175 SDValue Increment;
12176 EVT AddrVT = Addr.getValueType();
12177 EVT MaskVT = Mask.getValueType();
12178 assert(DataVT.getVectorElementCount() == MaskVT.getVectorElementCount() &&
12179 "Incompatible types of Data and Mask");
12180 if (IsCompressedMemory) {
12181 // Incrementing the pointer according to number of '1's in the mask.
12182 if (DataVT.isScalableVector()) {
12183 EVT MaskExtVT = MaskVT.changeElementType(Context&: *DAG.getContext(), EltVT: MVT::i32);
12184 SDValue MaskExt = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: MaskExtVT, Operand: Mask);
12185 Increment = DAG.getNode(Opcode: ISD::VECREDUCE_ADD, DL, VT: MVT::i32, Operand: MaskExt);
12186 } else {
12187 EVT MaskIntVT =
12188 EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: MaskVT.getSizeInBits());
12189 SDValue MaskInIntReg = DAG.getBitcast(VT: MaskIntVT, V: Mask);
12190 if (MaskIntVT.getSizeInBits() < 32) {
12191 MaskInIntReg =
12192 DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: MVT::i32, Operand: MaskInIntReg);
12193 MaskIntVT = MVT::i32;
12194 }
12195 Increment = DAG.getNode(Opcode: ISD::CTPOP, DL, VT: MaskIntVT, Operand: MaskInIntReg);
12196 }
12197 // Scale is an element size in bytes.
12198 SDValue Scale = DAG.getConstant(Val: DataVT.getScalarSizeInBits() / 8, DL,
12199 VT: AddrVT);
12200 Increment = DAG.getZExtOrTrunc(Op: Increment, DL, VT: AddrVT);
12201 Increment = DAG.getNode(Opcode: ISD::MUL, DL, VT: AddrVT, N1: Increment, N2: Scale);
12202 } else
12203 Increment = DAG.getTypeSize(DL, VT: AddrVT, TS: DataVT.getStoreSize());
12204
12205 return DAG.getNode(Opcode: ISD::ADD, DL, VT: AddrVT, N1: Addr, N2: Increment);
12206}
12207
12208static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx,
12209 EVT VecVT, const SDLoc &dl,
12210 ElementCount SubEC) {
12211 assert(!(SubEC.isScalable() && VecVT.isFixedLengthVector()) &&
12212 "Cannot index a scalable vector within a fixed-width vector");
12213
12214 unsigned NElts = VecVT.getVectorMinNumElements();
12215 unsigned NumSubElts = SubEC.getKnownMinValue();
12216 EVT IdxVT = Idx.getValueType();
12217
12218 if (VecVT.isScalableVector() && !SubEC.isScalable()) {
12219 // If this is a constant index and we know the value plus the number of the
12220 // elements in the subvector minus one is less than the minimum number of
12221 // elements then it's safe to return Idx.
12222 if (auto *IdxCst = dyn_cast<ConstantSDNode>(Val&: Idx))
12223 if (IdxCst->getZExtValue() + (NumSubElts - 1) < NElts)
12224 return Idx;
12225 SDValue VS =
12226 DAG.getVScale(DL: dl, VT: IdxVT, MulImm: APInt(IdxVT.getFixedSizeInBits(), NElts));
12227 unsigned SubOpcode = NumSubElts <= NElts ? ISD::SUB : ISD::USUBSAT;
12228 SDValue Sub = DAG.getNode(Opcode: SubOpcode, DL: dl, VT: IdxVT, N1: VS,
12229 N2: DAG.getConstant(Val: NumSubElts, DL: dl, VT: IdxVT));
12230 return DAG.getNode(Opcode: ISD::UMIN, DL: dl, VT: IdxVT, N1: Idx, N2: Sub);
12231 }
12232 if (isPowerOf2_32(Value: NElts) && NumSubElts == 1) {
12233 APInt Imm = APInt::getLowBitsSet(numBits: IdxVT.getSizeInBits(), loBitsSet: Log2_32(Value: NElts));
12234 return DAG.getNode(Opcode: ISD::AND, DL: dl, VT: IdxVT, N1: Idx,
12235 N2: DAG.getConstant(Val: Imm, DL: dl, VT: IdxVT));
12236 }
12237 unsigned MaxIndex = NumSubElts < NElts ? NElts - NumSubElts : 0;
12238 return DAG.getNode(Opcode: ISD::UMIN, DL: dl, VT: IdxVT, N1: Idx,
12239 N2: DAG.getConstant(Val: MaxIndex, DL: dl, VT: IdxVT));
12240}
12241
12242SDValue
12243TargetLowering::getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr,
12244 EVT VecVT, SDValue Index,
12245 const SDNodeFlags PtrArithFlags) const {
12246 return getVectorSubVecPointer(
12247 DAG, VecPtr, VecVT,
12248 SubVecVT: EVT::getVectorVT(Context&: *DAG.getContext(), VT: VecVT.getVectorElementType(), NumElements: 1),
12249 Index, PtrArithFlags);
12250}
12251
12252SDValue
12253TargetLowering::getVectorSubVecPointer(SelectionDAG &DAG, SDValue VecPtr,
12254 EVT VecVT, EVT SubVecVT, SDValue Index,
12255 const SDNodeFlags PtrArithFlags) const {
12256 SDLoc dl(Index);
12257 // Make sure the index type is big enough to compute in.
12258 Index = DAG.getZExtOrTrunc(Op: Index, DL: dl, VT: VecPtr.getValueType());
12259
12260 EVT EltVT = VecVT.getVectorElementType();
12261
12262 // Calculate the element offset and add it to the pointer.
12263 unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size.
12264 assert(EltSize * 8 == EltVT.getFixedSizeInBits() &&
12265 "Converting bits to bytes lost precision");
12266 assert(SubVecVT.getVectorElementType() == EltVT &&
12267 "Sub-vector must be a vector with matching element type");
12268 Index = clampDynamicVectorIndex(DAG, Idx: Index, VecVT, dl,
12269 SubEC: SubVecVT.getVectorElementCount());
12270
12271 EVT IdxVT = Index.getValueType();
12272 if (SubVecVT.isScalableVector())
12273 Index =
12274 DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: IdxVT, N1: Index,
12275 N2: DAG.getVScale(DL: dl, VT: IdxVT, MulImm: APInt(IdxVT.getSizeInBits(), 1)));
12276
12277 Index = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: IdxVT, N1: Index,
12278 N2: DAG.getConstant(Val: EltSize, DL: dl, VT: IdxVT));
12279 return DAG.getMemBasePlusOffset(Base: VecPtr, Offset: Index, DL: dl, Flags: PtrArithFlags);
12280}
12281
12282//===----------------------------------------------------------------------===//
12283// Implementation of Emulated TLS Model
12284//===----------------------------------------------------------------------===//
12285
12286SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
12287 SelectionDAG &DAG) const {
12288 // Access to address of TLS varialbe xyz is lowered to a function call:
12289 // __emutls_get_address( address of global variable named "__emutls_v.xyz" )
12290 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
12291 PointerType *VoidPtrType = PointerType::get(C&: *DAG.getContext(), AddressSpace: 0);
12292 SDLoc dl(GA);
12293
12294 ArgListTy Args;
12295 const GlobalValue *GV =
12296 cast<GlobalValue>(Val: GA->getGlobal()->stripPointerCastsAndAliases());
12297 SmallString<32> NameString("__emutls_v.");
12298 NameString += GV->getName();
12299 StringRef EmuTlsVarName(NameString);
12300 const GlobalVariable *EmuTlsVar =
12301 GV->getParent()->getNamedGlobal(Name: EmuTlsVarName);
12302 assert(EmuTlsVar && "Cannot find EmuTlsVar ");
12303 Args.emplace_back(args: DAG.getGlobalAddress(GV: EmuTlsVar, DL: dl, VT: PtrVT), args&: VoidPtrType);
12304
12305 SDValue EmuTlsGetAddr = DAG.getExternalSymbol(Sym: "__emutls_get_address", VT: PtrVT);
12306
12307 TargetLowering::CallLoweringInfo CLI(DAG);
12308 CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
12309 CLI.setLibCallee(CC: CallingConv::C, ResultType: VoidPtrType, Target: EmuTlsGetAddr, ArgsList: std::move(Args));
12310 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
12311
12312 // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
12313 // At last for X86 targets, maybe good for other targets too?
12314 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
12315 MFI.setAdjustsStack(true); // Is this only for X86 target?
12316 MFI.setHasCalls(true);
12317
12318 assert((GA->getOffset() == 0) &&
12319 "Emulated TLS must have zero offset in GlobalAddressSDNode");
12320 return CallResult.first;
12321}
12322
12323SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,
12324 SelectionDAG &DAG) const {
12325 assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
12326 if (!isCtlzFast())
12327 return SDValue();
12328 ISD::CondCode CC = cast<CondCodeSDNode>(Val: Op.getOperand(i: 2))->get();
12329 SDLoc dl(Op);
12330 if (isNullConstant(V: Op.getOperand(i: 1)) && CC == ISD::SETEQ) {
12331 EVT VT = Op.getOperand(i: 0).getValueType();
12332 SDValue Zext = Op.getOperand(i: 0);
12333 if (VT.bitsLT(VT: MVT::i32)) {
12334 VT = MVT::i32;
12335 Zext = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: Op.getOperand(i: 0));
12336 }
12337 unsigned Log2b = Log2_32(Value: VT.getSizeInBits());
12338 SDValue Clz = DAG.getNode(Opcode: ISD::CTLZ, DL: dl, VT, Operand: Zext);
12339 SDValue Scc = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Clz,
12340 N2: DAG.getConstant(Val: Log2b, DL: dl, VT: MVT::i32));
12341 return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i32, Operand: Scc);
12342 }
12343 return SDValue();
12344}
12345
12346SDValue TargetLowering::expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const {
12347 SDValue Op0 = Node->getOperand(Num: 0);
12348 SDValue Op1 = Node->getOperand(Num: 1);
12349 EVT VT = Op0.getValueType();
12350 EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
12351 unsigned Opcode = Node->getOpcode();
12352 SDLoc DL(Node);
12353
12354 // If both sign bits are zero, flip UMIN/UMAX <-> SMIN/SMAX if legal.
12355 unsigned AltOpcode = ISD::getOppositeSignednessMinMaxOpcode(MinMaxOpc: Opcode);
12356 if (isOperationLegal(Op: AltOpcode, VT) && DAG.SignBitIsZero(Op: Op0) &&
12357 DAG.SignBitIsZero(Op: Op1))
12358 return DAG.getNode(Opcode: AltOpcode, DL, VT, N1: Op0, N2: Op1);
12359
12360 // umax(x,1) --> sub(x,cmpeq(x,0)) iff cmp result is allbits
12361 if (Opcode == ISD::UMAX && llvm::isOneOrOneSplat(V: Op1, AllowUndefs: true) && BoolVT == VT &&
12362 getBooleanContents(Type: VT) == ZeroOrNegativeOneBooleanContent) {
12363 Op0 = DAG.getFreeze(V: Op0);
12364 SDValue Zero = DAG.getConstant(Val: 0, DL, VT);
12365 return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Op0,
12366 N2: DAG.getSetCC(DL, VT, LHS: Op0, RHS: Zero, Cond: ISD::SETEQ));
12367 }
12368
12369 // umin(x,y) -> sub(x,usubsat(x,y))
12370 // TODO: Missing freeze(Op0)?
12371 if (Opcode == ISD::UMIN && isOperationLegal(Op: ISD::SUB, VT) &&
12372 isOperationLegal(Op: ISD::USUBSAT, VT)) {
12373 return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Op0,
12374 N2: DAG.getNode(Opcode: ISD::USUBSAT, DL, VT, N1: Op0, N2: Op1));
12375 }
12376
12377 // umax(x,y) -> add(x,usubsat(y,x))
12378 // TODO: Missing freeze(Op0)?
12379 if (Opcode == ISD::UMAX && isOperationLegal(Op: ISD::ADD, VT) &&
12380 isOperationLegal(Op: ISD::USUBSAT, VT)) {
12381 return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Op0,
12382 N2: DAG.getNode(Opcode: ISD::USUBSAT, DL, VT, N1: Op1, N2: Op0));
12383 }
12384
12385 // FIXME: Should really try to split the vector in case it's legal on a
12386 // subvector.
12387 if (VT.isVector() && !isOperationLegalOrCustom(Op: ISD::VSELECT, VT))
12388 return DAG.UnrollVectorOp(N: Node);
12389
12390 // Attempt to find an existing SETCC node that we can reuse.
12391 // TODO: Do we need a generic doesSETCCNodeExist?
12392 // TODO: Missing freeze(Op0)/freeze(Op1)?
12393 auto buildMinMax = [&](ISD::CondCode PrefCC, ISD::CondCode AltCC,
12394 ISD::CondCode PrefCommuteCC,
12395 ISD::CondCode AltCommuteCC) {
12396 SDVTList BoolVTList = DAG.getVTList(VT: BoolVT);
12397 for (ISD::CondCode CC : {PrefCC, AltCC}) {
12398 if (DAG.doesNodeExist(Opcode: ISD::SETCC, VTList: BoolVTList,
12399 Ops: {Op0, Op1, DAG.getCondCode(Cond: CC)})) {
12400 SDValue Cond = DAG.getSetCC(DL, VT: BoolVT, LHS: Op0, RHS: Op1, Cond: CC);
12401 return DAG.getSelect(DL, VT, Cond, LHS: Op0, RHS: Op1);
12402 }
12403 }
12404 for (ISD::CondCode CC : {PrefCommuteCC, AltCommuteCC}) {
12405 if (DAG.doesNodeExist(Opcode: ISD::SETCC, VTList: BoolVTList,
12406 Ops: {Op0, Op1, DAG.getCondCode(Cond: CC)})) {
12407 SDValue Cond = DAG.getSetCC(DL, VT: BoolVT, LHS: Op0, RHS: Op1, Cond: CC);
12408 return DAG.getSelect(DL, VT, Cond, LHS: Op1, RHS: Op0);
12409 }
12410 }
12411 SDValue Cond = DAG.getSetCC(DL, VT: BoolVT, LHS: Op0, RHS: Op1, Cond: PrefCC);
12412 return DAG.getSelect(DL, VT, Cond, LHS: Op0, RHS: Op1);
12413 };
12414
12415 // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
12416 // -> Y = (A < B) ? B : A
12417 // -> Y = (A >= B) ? A : B
12418 // -> Y = (A <= B) ? B : A
12419 switch (Opcode) {
12420 case ISD::SMAX:
12421 return buildMinMax(ISD::SETGT, ISD::SETGE, ISD::SETLT, ISD::SETLE);
12422 case ISD::SMIN:
12423 return buildMinMax(ISD::SETLT, ISD::SETLE, ISD::SETGT, ISD::SETGE);
12424 case ISD::UMAX:
12425 return buildMinMax(ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE);
12426 case ISD::UMIN:
12427 return buildMinMax(ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE);
12428 }
12429
12430 llvm_unreachable("How did we get here?");
12431}
12432
12433SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
12434 unsigned Opcode = Node->getOpcode();
12435 SDValue LHS = Node->getOperand(Num: 0);
12436 SDValue RHS = Node->getOperand(Num: 1);
12437 EVT VT = LHS.getValueType();
12438 SDLoc dl(Node);
12439
12440 assert(VT == RHS.getValueType() && "Expected operands to be the same type");
12441 assert(VT.isInteger() && "Expected operands to be integers");
12442
12443 // usub.sat(a, b) -> umax(a, b) - b
12444 if (Opcode == ISD::USUBSAT && isOperationLegal(Op: ISD::UMAX, VT)) {
12445 SDValue Max = DAG.getNode(Opcode: ISD::UMAX, DL: dl, VT, N1: LHS, N2: RHS);
12446 return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Max, N2: RHS);
12447 }
12448
12449 // usub.sat(a, 1) -> sub(a, zext(a != 0))
12450 // Prefer this on targets without legal/cost-effective overflow-carry nodes.
12451 if (Opcode == ISD::USUBSAT && isOneOrOneSplat(V: RHS) &&
12452 !isOperationLegalOrCustom(Op: ISD::USUBO_CARRY, VT)) {
12453 LHS = DAG.getFreeze(V: LHS);
12454 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT);
12455 EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
12456 SDValue IsNonZero = DAG.getSetCC(DL: dl, VT: BoolVT, LHS, RHS: Zero, Cond: ISD::SETNE);
12457 SDValue Subtrahend = DAG.getBoolExtOrTrunc(Op: IsNonZero, SL: dl, VT, OpVT: BoolVT);
12458 Subtrahend =
12459 DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Subtrahend, N2: DAG.getConstant(Val: 1, DL: dl, VT));
12460 return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: LHS, N2: Subtrahend);
12461 }
12462
12463 // uadd.sat(a, b) -> umin(a, ~b) + b
12464 if (Opcode == ISD::UADDSAT && isOperationLegal(Op: ISD::UMIN, VT)) {
12465 SDValue InvRHS = DAG.getNOT(DL: dl, Val: RHS, VT);
12466 SDValue Min = DAG.getNode(Opcode: ISD::UMIN, DL: dl, VT, N1: LHS, N2: InvRHS);
12467 return DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Min, N2: RHS);
12468 }
12469
12470 unsigned OverflowOp;
12471 switch (Opcode) {
12472 case ISD::SADDSAT:
12473 OverflowOp = ISD::SADDO;
12474 break;
12475 case ISD::UADDSAT:
12476 OverflowOp = ISD::UADDO;
12477 break;
12478 case ISD::SSUBSAT:
12479 OverflowOp = ISD::SSUBO;
12480 break;
12481 case ISD::USUBSAT:
12482 OverflowOp = ISD::USUBO;
12483 break;
12484 default:
12485 llvm_unreachable("Expected method to receive signed or unsigned saturation "
12486 "addition or subtraction node.");
12487 }
12488
12489 // FIXME: Should really try to split the vector in case it's legal on a
12490 // subvector.
12491 if (VT.isVector() && !isOperationLegalOrCustom(Op: ISD::VSELECT, VT))
12492 return DAG.UnrollVectorOp(N: Node);
12493
12494 unsigned BitWidth = LHS.getScalarValueSizeInBits();
12495 EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
12496 SDValue Result = DAG.getNode(Opcode: OverflowOp, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: BoolVT), N1: LHS, N2: RHS);
12497 SDValue SumDiff = Result.getValue(R: 0);
12498 SDValue Overflow = Result.getValue(R: 1);
12499 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT);
12500 SDValue AllOnes = DAG.getAllOnesConstant(DL: dl, VT);
12501
12502 if (Opcode == ISD::UADDSAT) {
12503 if (getBooleanContents(Type: VT) == ZeroOrNegativeOneBooleanContent) {
12504 // (LHS + RHS) | OverflowMask
12505 SDValue OverflowMask = DAG.getSExtOrTrunc(Op: Overflow, DL: dl, VT);
12506 return DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: SumDiff, N2: OverflowMask);
12507 }
12508 // Overflow ? 0xffff.... : (LHS + RHS)
12509 return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: AllOnes, RHS: SumDiff);
12510 }
12511
12512 if (Opcode == ISD::USUBSAT) {
12513 if (getBooleanContents(Type: VT) == ZeroOrNegativeOneBooleanContent) {
12514 // (LHS - RHS) & ~OverflowMask
12515 SDValue OverflowMask = DAG.getSExtOrTrunc(Op: Overflow, DL: dl, VT);
12516 SDValue Not = DAG.getNOT(DL: dl, Val: OverflowMask, VT);
12517 return DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: SumDiff, N2: Not);
12518 }
12519 // Overflow ? 0 : (LHS - RHS)
12520 return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: Zero, RHS: SumDiff);
12521 }
12522
12523 assert((Opcode == ISD::SADDSAT || Opcode == ISD::SSUBSAT) &&
12524 "Expected signed saturating add/sub opcode");
12525
12526 const APInt MinVal = APInt::getSignedMinValue(numBits: BitWidth);
12527 const APInt MaxVal = APInt::getSignedMaxValue(numBits: BitWidth);
12528
12529 KnownBits KnownLHS = DAG.computeKnownBits(Op: LHS);
12530 KnownBits KnownRHS = DAG.computeKnownBits(Op: RHS);
12531
12532 // If either of the operand signs are known, then they are guaranteed to
12533 // only saturate in one direction. If non-negative they will saturate
12534 // towards SIGNED_MAX, if negative they will saturate towards SIGNED_MIN.
12535 //
12536 // In the case of ISD::SSUBSAT, 'x - y' is equivalent to 'x + (-y)', so the
12537 // sign of 'y' has to be flipped.
12538
12539 bool LHSIsNonNegative = KnownLHS.isNonNegative();
12540 bool RHSIsNonNegative =
12541 Opcode == ISD::SADDSAT ? KnownRHS.isNonNegative() : KnownRHS.isNegative();
12542 if (LHSIsNonNegative || RHSIsNonNegative) {
12543 SDValue SatMax = DAG.getConstant(Val: MaxVal, DL: dl, VT);
12544 return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: SatMax, RHS: SumDiff);
12545 }
12546
12547 bool LHSIsNegative = KnownLHS.isNegative();
12548 bool RHSIsNegative =
12549 Opcode == ISD::SADDSAT ? KnownRHS.isNegative() : KnownRHS.isNonNegative();
12550 if (LHSIsNegative || RHSIsNegative) {
12551 SDValue SatMin = DAG.getConstant(Val: MinVal, DL: dl, VT);
12552 return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: SatMin, RHS: SumDiff);
12553 }
12554
12555 // Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff
12556 SDValue SatMin = DAG.getConstant(Val: MinVal, DL: dl, VT);
12557 SDValue Shift = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: SumDiff,
12558 N2: DAG.getConstant(Val: BitWidth - 1, DL: dl, VT));
12559 Result = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: Shift, N2: SatMin);
12560 return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: Result, RHS: SumDiff);
12561}
12562
12563SDValue TargetLowering::expandCMP(SDNode *Node, SelectionDAG &DAG) const {
12564 unsigned Opcode = Node->getOpcode();
12565 SDValue LHS = Node->getOperand(Num: 0);
12566 SDValue RHS = Node->getOperand(Num: 1);
12567 EVT VT = LHS.getValueType();
12568 EVT ResVT = Node->getValueType(ResNo: 0);
12569 EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
12570 SDLoc dl(Node);
12571
12572 auto LTPredicate = (Opcode == ISD::UCMP ? ISD::SETULT : ISD::SETLT);
12573 auto GTPredicate = (Opcode == ISD::UCMP ? ISD::SETUGT : ISD::SETGT);
12574 SDValue IsLT = DAG.getSetCC(DL: dl, VT: BoolVT, LHS, RHS, Cond: LTPredicate);
12575 SDValue IsGT = DAG.getSetCC(DL: dl, VT: BoolVT, LHS, RHS, Cond: GTPredicate);
12576
12577 // We can't perform arithmetic on i1 values. Extending them would
12578 // probably result in worse codegen, so let's just use two selects instead.
12579 // Some targets are also just better off using selects rather than subtraction
12580 // because one of the conditions can be merged with one of the selects.
12581 // And finally, if we don't know the contents of high bits of a boolean value
12582 // we can't perform any arithmetic either.
12583 if (preferSelectsOverBooleanArithmetic(VT) ||
12584 BoolVT.getScalarSizeInBits() == 1 ||
12585 getBooleanContents(Type: BoolVT) == UndefinedBooleanContent) {
12586 SDValue SelectZeroOrOne =
12587 DAG.getSelect(DL: dl, VT: ResVT, Cond: IsGT, LHS: DAG.getConstant(Val: 1, DL: dl, VT: ResVT),
12588 RHS: DAG.getConstant(Val: 0, DL: dl, VT: ResVT));
12589 return DAG.getSelect(DL: dl, VT: ResVT, Cond: IsLT, LHS: DAG.getAllOnesConstant(DL: dl, VT: ResVT),
12590 RHS: SelectZeroOrOne);
12591 }
12592
12593 if (getBooleanContents(Type: BoolVT) == ZeroOrNegativeOneBooleanContent)
12594 std::swap(a&: IsGT, b&: IsLT);
12595 return DAG.getSExtOrTrunc(Op: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: BoolVT, N1: IsGT, N2: IsLT), DL: dl,
12596 VT: ResVT);
12597}
12598
12599SDValue TargetLowering::expandShlSat(SDNode *Node, SelectionDAG &DAG) const {
12600 unsigned Opcode = Node->getOpcode();
12601 bool IsSigned = Opcode == ISD::SSHLSAT;
12602 SDValue LHS = Node->getOperand(Num: 0);
12603 SDValue RHS = Node->getOperand(Num: 1);
12604 EVT VT = LHS.getValueType();
12605 SDLoc dl(Node);
12606
12607 assert((Node->getOpcode() == ISD::SSHLSAT ||
12608 Node->getOpcode() == ISD::USHLSAT) &&
12609 "Expected a SHLSAT opcode");
12610 assert(VT.isInteger() && "Expected operands to be integers");
12611
12612 if (VT.isVector() && !isOperationLegalOrCustom(Op: ISD::VSELECT, VT))
12613 return DAG.UnrollVectorOp(N: Node);
12614
12615 // If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate.
12616
12617 unsigned BW = VT.getScalarSizeInBits();
12618 EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
12619 SDValue Result = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: LHS, N2: RHS);
12620 SDValue Orig =
12621 DAG.getNode(Opcode: IsSigned ? ISD::SRA : ISD::SRL, DL: dl, VT, N1: Result, N2: RHS);
12622
12623 SDValue SatVal;
12624 if (IsSigned) {
12625 SDValue SatMin = DAG.getConstant(Val: APInt::getSignedMinValue(numBits: BW), DL: dl, VT);
12626 SDValue SatMax = DAG.getConstant(Val: APInt::getSignedMaxValue(numBits: BW), DL: dl, VT);
12627 SDValue Cond =
12628 DAG.getSetCC(DL: dl, VT: BoolVT, LHS, RHS: DAG.getConstant(Val: 0, DL: dl, VT), Cond: ISD::SETLT);
12629 SatVal = DAG.getSelect(DL: dl, VT, Cond, LHS: SatMin, RHS: SatMax);
12630 } else {
12631 SatVal = DAG.getConstant(Val: APInt::getMaxValue(numBits: BW), DL: dl, VT);
12632 }
12633 SDValue Cond = DAG.getSetCC(DL: dl, VT: BoolVT, LHS, RHS: Orig, Cond: ISD::SETNE);
12634 return DAG.getSelect(DL: dl, VT, Cond, LHS: SatVal, RHS: Result);
12635}
12636
12637void TargetLowering::forceExpandMultiply(SelectionDAG &DAG, const SDLoc &dl,
12638 bool Signed, SDValue &Lo, SDValue &Hi,
12639 SDValue LHS, SDValue RHS,
12640 SDValue HiLHS, SDValue HiRHS) const {
12641 EVT VT = LHS.getValueType();
12642 assert(RHS.getValueType() == VT && "Mismatching operand types");
12643
12644 assert((HiLHS && HiRHS) || (!HiLHS && !HiRHS));
12645 assert((!Signed || !HiLHS) &&
12646 "Signed flag should only be set when HiLHS and RiRHS are null");
12647
12648 // We'll expand the multiplication by brute force because we have no other
12649 // options. This is a trivially-generalized version of the code from
12650 // Hacker's Delight (itself derived from Knuth's Algorithm M from section
12651 // 4.3.1). If Signed is set, we can use arithmetic right shifts to propagate
12652 // sign bits while calculating the Hi half.
12653 unsigned Bits = VT.getSizeInBits();
12654 unsigned HalfBits = Bits / 2;
12655 SDValue Mask = DAG.getConstant(Val: APInt::getLowBitsSet(numBits: Bits, loBitsSet: HalfBits), DL: dl, VT);
12656 SDValue LL = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: LHS, N2: Mask);
12657 SDValue RL = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: RHS, N2: Mask);
12658
12659 SDValue T = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LL, N2: RL);
12660 SDValue TL = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: T, N2: Mask);
12661
12662 SDValue Shift = DAG.getShiftAmountConstant(Val: HalfBits, VT, DL: dl);
12663 // This is always an unsigned shift.
12664 SDValue TH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: T, N2: Shift);
12665
12666 unsigned ShiftOpc = Signed ? ISD::SRA : ISD::SRL;
12667 SDValue LH = DAG.getNode(Opcode: ShiftOpc, DL: dl, VT, N1: LHS, N2: Shift);
12668 SDValue RH = DAG.getNode(Opcode: ShiftOpc, DL: dl, VT, N1: RHS, N2: Shift);
12669
12670 SDValue U =
12671 DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LH, N2: RL), N2: TH);
12672 SDValue UL = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: U, N2: Mask);
12673 SDValue UH = DAG.getNode(Opcode: ShiftOpc, DL: dl, VT, N1: U, N2: Shift);
12674
12675 SDValue V =
12676 DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LL, N2: RH), N2: UL);
12677 SDValue VH = DAG.getNode(Opcode: ShiftOpc, DL: dl, VT, N1: V, N2: Shift);
12678
12679 Lo = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: TL,
12680 N2: DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: V, N2: Shift));
12681
12682 Hi = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LH, N2: RH),
12683 N2: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: UH, N2: VH));
12684
12685 // If HiLHS and HiRHS are set, multiply them by the opposite low part and add
12686 // the products to Hi.
12687 if (HiLHS) {
12688 SDValue RHLL = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: HiRHS, N2: LHS);
12689 SDValue RLLH = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: RHS, N2: HiLHS);
12690 Hi = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Hi,
12691 N2: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: RHLL, N2: RLLH));
12692 }
12693}
12694
12695void TargetLowering::forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl,
12696 bool Signed, const SDValue LHS,
12697 const SDValue RHS, SDValue &Lo,
12698 SDValue &Hi) const {
12699 EVT VT = LHS.getValueType();
12700 assert(RHS.getValueType() == VT && "Mismatching operand types");
12701 EVT WideVT = VT.widenIntegerElementType(Context&: *DAG.getContext());
12702 // We can fall back to a libcall with an illegal type for the MUL if we
12703 // have a libcall big enough.
12704 RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
12705 if (WideVT == MVT::i16)
12706 LC = RTLIB::MUL_I16;
12707 else if (WideVT == MVT::i32)
12708 LC = RTLIB::MUL_I32;
12709 else if (WideVT == MVT::i64)
12710 LC = RTLIB::MUL_I64;
12711 else if (WideVT == MVT::i128)
12712 LC = RTLIB::MUL_I128;
12713
12714 RTLIB::LibcallImpl LibcallImpl = getLibcallImpl(Call: LC);
12715 if (LibcallImpl == RTLIB::Unsupported) {
12716 forceExpandMultiply(DAG, dl, Signed, Lo, Hi, LHS, RHS);
12717 return;
12718 }
12719
12720 SDValue HiLHS, HiRHS;
12721 if (Signed) {
12722 // The high part is obtained by SRA'ing all but one of the bits of low
12723 // part.
12724 unsigned LoSize = VT.getFixedSizeInBits();
12725 SDValue Shift = DAG.getShiftAmountConstant(Val: LoSize - 1, VT, DL: dl);
12726 HiLHS = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: LHS, N2: Shift);
12727 HiRHS = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: RHS, N2: Shift);
12728 } else {
12729 HiLHS = DAG.getConstant(Val: 0, DL: dl, VT);
12730 HiRHS = DAG.getConstant(Val: 0, DL: dl, VT);
12731 }
12732
12733 // Attempt a libcall.
12734 SDValue Ret;
12735 TargetLowering::MakeLibCallOptions CallOptions;
12736 CallOptions.setIsSigned(Signed);
12737 CallOptions.setIsPostTypeLegalization(true);
12738 if (shouldSplitFunctionArgumentsAsLittleEndian(DL: DAG.getDataLayout())) {
12739 // Halves of WideVT are packed into registers in different order
12740 // depending on platform endianness. This is usually handled by
12741 // the C calling convention, but we can't defer to it in
12742 // the legalizer.
12743 SDValue Args[] = {LHS, HiLHS, RHS, HiRHS};
12744 Ret = makeLibCall(DAG, LC, RetVT: WideVT, Ops: Args, CallOptions, dl).first;
12745 } else {
12746 SDValue Args[] = {HiLHS, LHS, HiRHS, RHS};
12747 Ret = makeLibCall(DAG, LC, RetVT: WideVT, Ops: Args, CallOptions, dl).first;
12748 }
12749 assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
12750 "Ret value is a collection of constituent nodes holding result.");
12751 if (DAG.getDataLayout().isLittleEndian()) {
12752 // Same as above.
12753 Lo = Ret.getOperand(i: 0);
12754 Hi = Ret.getOperand(i: 1);
12755 } else {
12756 Lo = Ret.getOperand(i: 1);
12757 Hi = Ret.getOperand(i: 0);
12758 }
12759}
12760
12761SDValue
12762TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
12763 assert((Node->getOpcode() == ISD::SMULFIX ||
12764 Node->getOpcode() == ISD::UMULFIX ||
12765 Node->getOpcode() == ISD::SMULFIXSAT ||
12766 Node->getOpcode() == ISD::UMULFIXSAT) &&
12767 "Expected a fixed point multiplication opcode");
12768
12769 SDLoc dl(Node);
12770 SDValue LHS = Node->getOperand(Num: 0);
12771 SDValue RHS = Node->getOperand(Num: 1);
12772 EVT VT = LHS.getValueType();
12773 unsigned Scale = Node->getConstantOperandVal(Num: 2);
12774 bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT ||
12775 Node->getOpcode() == ISD::UMULFIXSAT);
12776 bool Signed = (Node->getOpcode() == ISD::SMULFIX ||
12777 Node->getOpcode() == ISD::SMULFIXSAT);
12778 EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
12779 unsigned VTSize = VT.getScalarSizeInBits();
12780
12781 if (!Scale) {
12782 // [us]mul.fix(a, b, 0) -> mul(a, b)
12783 if (!Saturating) {
12784 if (isOperationLegalOrCustom(Op: ISD::MUL, VT))
12785 return DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LHS, N2: RHS);
12786 } else if (Signed && isOperationLegalOrCustom(Op: ISD::SMULO, VT)) {
12787 SDValue Result =
12788 DAG.getNode(Opcode: ISD::SMULO, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: BoolVT), N1: LHS, N2: RHS);
12789 SDValue Product = Result.getValue(R: 0);
12790 SDValue Overflow = Result.getValue(R: 1);
12791 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT);
12792
12793 APInt MinVal = APInt::getSignedMinValue(numBits: VTSize);
12794 APInt MaxVal = APInt::getSignedMaxValue(numBits: VTSize);
12795 SDValue SatMin = DAG.getConstant(Val: MinVal, DL: dl, VT);
12796 SDValue SatMax = DAG.getConstant(Val: MaxVal, DL: dl, VT);
12797 // Xor the inputs, if resulting sign bit is 0 the product will be
12798 // positive, else negative.
12799 SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: LHS, N2: RHS);
12800 SDValue ProdNeg = DAG.getSetCC(DL: dl, VT: BoolVT, LHS: Xor, RHS: Zero, Cond: ISD::SETLT);
12801 Result = DAG.getSelect(DL: dl, VT, Cond: ProdNeg, LHS: SatMin, RHS: SatMax);
12802 return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: Result, RHS: Product);
12803 } else if (!Signed && isOperationLegalOrCustom(Op: ISD::UMULO, VT)) {
12804 SDValue Result =
12805 DAG.getNode(Opcode: ISD::UMULO, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: BoolVT), N1: LHS, N2: RHS);
12806 SDValue Product = Result.getValue(R: 0);
12807 SDValue Overflow = Result.getValue(R: 1);
12808
12809 APInt MaxVal = APInt::getMaxValue(numBits: VTSize);
12810 SDValue SatMax = DAG.getConstant(Val: MaxVal, DL: dl, VT);
12811 return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: SatMax, RHS: Product);
12812 }
12813 }
12814
12815 assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
12816 "Expected scale to be less than the number of bits if signed or at "
12817 "most the number of bits if unsigned.");
12818 assert(LHS.getValueType() == RHS.getValueType() &&
12819 "Expected both operands to be the same type");
12820
12821 // Get the upper and lower bits of the result.
12822 SDValue Lo, Hi;
12823 unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
12824 unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
12825 EVT WideVT = VT.widenIntegerElementType(Context&: *DAG.getContext());
12826 if (isOperationLegalOrCustom(Op: LoHiOp, VT)) {
12827 SDValue Result = DAG.getNode(Opcode: LoHiOp, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: VT), N1: LHS, N2: RHS);
12828 Lo = Result.getValue(R: 0);
12829 Hi = Result.getValue(R: 1);
12830 } else if (isOperationLegalOrCustom(Op: HiOp, VT)) {
12831 Lo = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LHS, N2: RHS);
12832 Hi = DAG.getNode(Opcode: HiOp, DL: dl, VT, N1: LHS, N2: RHS);
12833 } else if (isOperationLegalOrCustom(Op: ISD::MUL, VT: WideVT)) {
12834 // Try for a multiplication using a wider type.
12835 unsigned Ext = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
12836 SDValue LHSExt = DAG.getNode(Opcode: Ext, DL: dl, VT: WideVT, Operand: LHS);
12837 SDValue RHSExt = DAG.getNode(Opcode: Ext, DL: dl, VT: WideVT, Operand: RHS);
12838 SDValue Res = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: WideVT, N1: LHSExt, N2: RHSExt);
12839 Lo = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Res);
12840 SDValue Shifted =
12841 DAG.getNode(Opcode: ISD::SRA, DL: dl, VT: WideVT, N1: Res,
12842 N2: DAG.getShiftAmountConstant(Val: VTSize, VT: WideVT, DL: dl));
12843 Hi = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Shifted);
12844 } else if (VT.isVector()) {
12845 return SDValue();
12846 } else {
12847 forceExpandWideMUL(DAG, dl, Signed, LHS, RHS, Lo, Hi);
12848 }
12849
12850 if (Scale == VTSize)
12851 // Result is just the top half since we'd be shifting by the width of the
12852 // operand. Overflow impossible so this works for both UMULFIX and
12853 // UMULFIXSAT.
12854 return Hi;
12855
12856 // The result will need to be shifted right by the scale since both operands
12857 // are scaled. The result is given to us in 2 halves, so we only want part of
12858 // both in the result.
12859 SDValue Result = DAG.getNode(Opcode: ISD::FSHR, DL: dl, VT, N1: Hi, N2: Lo,
12860 N3: DAG.getShiftAmountConstant(Val: Scale, VT, DL: dl));
12861 if (!Saturating)
12862 return Result;
12863
12864 if (!Signed) {
12865 // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
12866 // widened multiplication) aren't all zeroes.
12867
12868 // Saturate to max if ((Hi >> Scale) != 0),
12869 // which is the same as if (Hi > ((1 << Scale) - 1))
12870 APInt MaxVal = APInt::getMaxValue(numBits: VTSize);
12871 SDValue LowMask = DAG.getConstant(Val: APInt::getLowBitsSet(numBits: VTSize, loBitsSet: Scale),
12872 DL: dl, VT);
12873 Result = DAG.getSelectCC(DL: dl, LHS: Hi, RHS: LowMask,
12874 True: DAG.getConstant(Val: MaxVal, DL: dl, VT), False: Result,
12875 Cond: ISD::SETUGT);
12876
12877 return Result;
12878 }
12879
12880 // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
12881 // widened multiplication) aren't all ones or all zeroes.
12882
12883 SDValue SatMin = DAG.getConstant(Val: APInt::getSignedMinValue(numBits: VTSize), DL: dl, VT);
12884 SDValue SatMax = DAG.getConstant(Val: APInt::getSignedMaxValue(numBits: VTSize), DL: dl, VT);
12885
12886 if (Scale == 0) {
12887 SDValue Sign = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: Lo,
12888 N2: DAG.getShiftAmountConstant(Val: VTSize - 1, VT, DL: dl));
12889 SDValue Overflow = DAG.getSetCC(DL: dl, VT: BoolVT, LHS: Hi, RHS: Sign, Cond: ISD::SETNE);
12890 // Saturated to SatMin if wide product is negative, and SatMax if wide
12891 // product is positive ...
12892 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT);
12893 SDValue ResultIfOverflow = DAG.getSelectCC(DL: dl, LHS: Hi, RHS: Zero, True: SatMin, False: SatMax,
12894 Cond: ISD::SETLT);
12895 // ... but only if we overflowed.
12896 return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: ResultIfOverflow, RHS: Result);
12897 }
12898
12899 // We handled Scale==0 above so all the bits to examine is in Hi.
12900
12901 // Saturate to max if ((Hi >> (Scale - 1)) > 0),
12902 // which is the same as if (Hi > (1 << (Scale - 1)) - 1)
12903 SDValue LowMask = DAG.getConstant(Val: APInt::getLowBitsSet(numBits: VTSize, loBitsSet: Scale - 1),
12904 DL: dl, VT);
12905 Result = DAG.getSelectCC(DL: dl, LHS: Hi, RHS: LowMask, True: SatMax, False: Result, Cond: ISD::SETGT);
12906 // Saturate to min if (Hi >> (Scale - 1)) < -1),
12907 // which is the same as if (HI < (-1 << (Scale - 1))
12908 SDValue HighMask =
12909 DAG.getConstant(Val: APInt::getHighBitsSet(numBits: VTSize, hiBitsSet: VTSize - Scale + 1),
12910 DL: dl, VT);
12911 Result = DAG.getSelectCC(DL: dl, LHS: Hi, RHS: HighMask, True: SatMin, False: Result, Cond: ISD::SETLT);
12912 return Result;
12913}
12914
12915SDValue
12916TargetLowering::expandFixedPointDiv(unsigned Opcode, const SDLoc &dl,
12917 SDValue LHS, SDValue RHS,
12918 unsigned Scale, SelectionDAG &DAG) const {
12919 assert((Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT ||
12920 Opcode == ISD::UDIVFIX || Opcode == ISD::UDIVFIXSAT) &&
12921 "Expected a fixed point division opcode");
12922
12923 EVT VT = LHS.getValueType();
12924 bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT;
12925 bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT;
12926 EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
12927
12928 // If there is enough room in the type to upscale the LHS or downscale the
12929 // RHS before the division, we can perform it in this type without having to
12930 // resize. For signed operations, the LHS headroom is the number of
12931 // redundant sign bits, and for unsigned ones it is the number of zeroes.
12932 // The headroom for the RHS is the number of trailing zeroes.
12933 unsigned LHSLead = Signed ? DAG.ComputeNumSignBits(Op: LHS) - 1
12934 : DAG.computeKnownBits(Op: LHS).countMinLeadingZeros();
12935 unsigned RHSTrail = DAG.computeKnownBits(Op: RHS).countMinTrailingZeros();
12936
12937 // For signed saturating operations, we need to be able to detect true integer
12938 // division overflow; that is, when you have MIN / -EPS. However, this
12939 // is undefined behavior and if we emit divisions that could take such
12940 // values it may cause undesired behavior (arithmetic exceptions on x86, for
12941 // example).
12942 // Avoid this by requiring an extra bit so that we never get this case.
12943 // FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale
12944 // signed saturating division, we need to emit a whopping 32-bit division.
12945 if (LHSLead + RHSTrail < Scale + (unsigned)(Saturating && Signed))
12946 return SDValue();
12947
12948 unsigned LHSShift = std::min(a: LHSLead, b: Scale);
12949 unsigned RHSShift = Scale - LHSShift;
12950
12951 // At this point, we know that if we shift the LHS up by LHSShift and the
12952 // RHS down by RHSShift, we can emit a regular division with a final scaling
12953 // factor of Scale.
12954
12955 if (LHSShift)
12956 LHS = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: LHS,
12957 N2: DAG.getShiftAmountConstant(Val: LHSShift, VT, DL: dl));
12958 if (RHSShift)
12959 RHS = DAG.getNode(Opcode: Signed ? ISD::SRA : ISD::SRL, DL: dl, VT, N1: RHS,
12960 N2: DAG.getShiftAmountConstant(Val: RHSShift, VT, DL: dl));
12961
12962 SDValue Quot;
12963 if (Signed) {
12964 // For signed operations, if the resulting quotient is negative and the
12965 // remainder is nonzero, subtract 1 from the quotient to round towards
12966 // negative infinity.
12967 SDValue Rem;
12968 // FIXME: Ideally we would always produce an SDIVREM here, but if the
12969 // type isn't legal, SDIVREM cannot be expanded. There is no reason why
12970 // we couldn't just form a libcall, but the type legalizer doesn't do it.
12971 if (isTypeLegal(VT) &&
12972 isOperationLegalOrCustom(Op: ISD::SDIVREM, VT)) {
12973 Quot = DAG.getNode(Opcode: ISD::SDIVREM, DL: dl,
12974 VTList: DAG.getVTList(VT1: VT, VT2: VT),
12975 N1: LHS, N2: RHS);
12976 Rem = Quot.getValue(R: 1);
12977 Quot = Quot.getValue(R: 0);
12978 } else {
12979 Quot = DAG.getNode(Opcode: ISD::SDIV, DL: dl, VT,
12980 N1: LHS, N2: RHS);
12981 Rem = DAG.getNode(Opcode: ISD::SREM, DL: dl, VT,
12982 N1: LHS, N2: RHS);
12983 }
12984 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT);
12985 SDValue RemNonZero = DAG.getSetCC(DL: dl, VT: BoolVT, LHS: Rem, RHS: Zero, Cond: ISD::SETNE);
12986 SDValue LHSNeg = DAG.getSetCC(DL: dl, VT: BoolVT, LHS, RHS: Zero, Cond: ISD::SETLT);
12987 SDValue RHSNeg = DAG.getSetCC(DL: dl, VT: BoolVT, LHS: RHS, RHS: Zero, Cond: ISD::SETLT);
12988 SDValue QuotNeg = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: BoolVT, N1: LHSNeg, N2: RHSNeg);
12989 SDValue Sub1 = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Quot,
12990 N2: DAG.getConstant(Val: 1, DL: dl, VT));
12991 Quot = DAG.getSelect(DL: dl, VT,
12992 Cond: DAG.getNode(Opcode: ISD::AND, DL: dl, VT: BoolVT, N1: RemNonZero, N2: QuotNeg),
12993 LHS: Sub1, RHS: Quot);
12994 } else
12995 Quot = DAG.getNode(Opcode: ISD::UDIV, DL: dl, VT,
12996 N1: LHS, N2: RHS);
12997
12998 return Quot;
12999}
13000
13001void TargetLowering::expandUADDSUBO(
13002 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
13003 SDLoc dl(Node);
13004 SDValue LHS = Node->getOperand(Num: 0);
13005 SDValue RHS = Node->getOperand(Num: 1);
13006 bool IsAdd = Node->getOpcode() == ISD::UADDO;
13007
13008 // If UADDO_CARRY/SUBO_CARRY is legal, use that instead.
13009 unsigned OpcCarry = IsAdd ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
13010 if (isOperationLegalOrCustom(Op: OpcCarry, VT: Node->getValueType(ResNo: 0))) {
13011 SDValue CarryIn = DAG.getConstant(Val: 0, DL: dl, VT: Node->getValueType(ResNo: 1));
13012 SDValue NodeCarry = DAG.getNode(Opcode: OpcCarry, DL: dl, VTList: Node->getVTList(),
13013 Ops: { LHS, RHS, CarryIn });
13014 Result = SDValue(NodeCarry.getNode(), 0);
13015 Overflow = SDValue(NodeCarry.getNode(), 1);
13016 return;
13017 }
13018
13019 Result = DAG.getNode(Opcode: IsAdd ? ISD::ADD : ISD::SUB, DL: dl,
13020 VT: LHS.getValueType(), N1: LHS, N2: RHS);
13021
13022 EVT ResultType = Node->getValueType(ResNo: 1);
13023 EVT SetCCType = getSetCCResultType(
13024 DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: Node->getValueType(ResNo: 0));
13025 SDValue SetCC;
13026 if (IsAdd && isOneConstant(V: RHS)) {
13027 // Special case: uaddo X, 1 overflowed if X+1 is 0. This potential reduces
13028 // the live range of X. We assume comparing with 0 is cheap.
13029 // The general case (X + C) < C is not necessarily beneficial. Although we
13030 // reduce the live range of X, we may introduce the materialization of
13031 // constant C.
13032 SetCC =
13033 DAG.getSetCC(DL: dl, VT: SetCCType, LHS: Result,
13034 RHS: DAG.getConstant(Val: 0, DL: dl, VT: Node->getValueType(ResNo: 0)), Cond: ISD::SETEQ);
13035 } else if (IsAdd && isAllOnesConstant(V: RHS)) {
13036 // Special case: uaddo X, -1 overflows if X != 0.
13037 SetCC =
13038 DAG.getSetCC(DL: dl, VT: SetCCType, LHS,
13039 RHS: DAG.getConstant(Val: 0, DL: dl, VT: Node->getValueType(ResNo: 0)), Cond: ISD::SETNE);
13040 } else {
13041 ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
13042 SetCC = DAG.getSetCC(DL: dl, VT: SetCCType, LHS: Result, RHS: LHS, Cond: CC);
13043 }
13044 Overflow = DAG.getBoolExtOrTrunc(Op: SetCC, SL: dl, VT: ResultType, OpVT: ResultType);
13045}
13046
13047void TargetLowering::expandSADDSUBO(
13048 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
13049 SDLoc dl(Node);
13050 SDValue LHS = Node->getOperand(Num: 0);
13051 SDValue RHS = Node->getOperand(Num: 1);
13052 bool IsAdd = Node->getOpcode() == ISD::SADDO;
13053
13054 Result = DAG.getNode(Opcode: IsAdd ? ISD::ADD : ISD::SUB, DL: dl,
13055 VT: LHS.getValueType(), N1: LHS, N2: RHS);
13056
13057 EVT ResultType = Node->getValueType(ResNo: 1);
13058 EVT OType = getSetCCResultType(
13059 DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: Node->getValueType(ResNo: 0));
13060
13061 // If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
13062 unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
13063 if (isOperationLegal(Op: OpcSat, VT: LHS.getValueType())) {
13064 SDValue Sat = DAG.getNode(Opcode: OpcSat, DL: dl, VT: LHS.getValueType(), N1: LHS, N2: RHS);
13065 SDValue SetCC = DAG.getSetCC(DL: dl, VT: OType, LHS: Result, RHS: Sat, Cond: ISD::SETNE);
13066 Overflow = DAG.getBoolExtOrTrunc(Op: SetCC, SL: dl, VT: ResultType, OpVT: ResultType);
13067 return;
13068 }
13069
13070 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT: LHS.getValueType());
13071
13072 if (IsAdd) {
13073 // For an addition, the result should be less than one of the operands (LHS)
13074 // if and only if the other operand (RHS) is negative, otherwise there will
13075 // be overflow.
13076 SDValue ResultLowerThanLHS =
13077 DAG.getSetCC(DL: dl, VT: OType, LHS: Result, RHS: LHS, Cond: ISD::SETLT);
13078 SDValue RHSNegative = DAG.getSetCC(DL: dl, VT: OType, LHS: RHS, RHS: Zero, Cond: ISD::SETLT);
13079 Overflow = DAG.getBoolExtOrTrunc(
13080 Op: DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: OType, N1: RHSNegative, N2: ResultLowerThanLHS), SL: dl,
13081 VT: ResultType, OpVT: ResultType);
13082 } else {
13083 // For subtraction, overflow occurs when the signed comparison of operands
13084 // doesn't match the sign of the result.
13085 SDValue LHSLessThanRHS = DAG.getSetCC(DL: dl, VT: OType, LHS, RHS, Cond: ISD::SETLT);
13086 SDValue ResultNegative = DAG.getSetCC(DL: dl, VT: OType, LHS: Result, RHS: Zero, Cond: ISD::SETLT);
13087 Overflow = DAG.getBoolExtOrTrunc(
13088 Op: DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: OType, N1: LHSLessThanRHS, N2: ResultNegative), SL: dl,
13089 VT: ResultType, OpVT: ResultType);
13090 }
13091}
13092
13093bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,
13094 SDValue &Overflow, SelectionDAG &DAG) const {
13095 SDLoc dl(Node);
13096 EVT VT = Node->getValueType(ResNo: 0);
13097 EVT SetCCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
13098 SDValue LHS = Node->getOperand(Num: 0);
13099 SDValue RHS = Node->getOperand(Num: 1);
13100 bool isSigned = Node->getOpcode() == ISD::SMULO;
13101
13102 // For power-of-two multiplications we can use a simpler shift expansion.
13103 if (ConstantSDNode *RHSC = isConstOrConstSplat(N: RHS)) {
13104 const APInt &C = RHSC->getAPIntValue();
13105 // mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
13106 if (C.isPowerOf2()) {
13107 // smulo(x, signed_min) is same as umulo(x, signed_min).
13108 bool UseArithShift = isSigned && !C.isMinSignedValue();
13109 SDValue ShiftAmt = DAG.getShiftAmountConstant(Val: C.logBase2(), VT, DL: dl);
13110 Result = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: LHS, N2: ShiftAmt);
13111 Overflow = DAG.getSetCC(DL: dl, VT: SetCCVT,
13112 LHS: DAG.getNode(Opcode: UseArithShift ? ISD::SRA : ISD::SRL,
13113 DL: dl, VT, N1: Result, N2: ShiftAmt),
13114 RHS: LHS, Cond: ISD::SETNE);
13115 return true;
13116 }
13117 }
13118
13119 SDValue BottomHalf;
13120 SDValue TopHalf;
13121 EVT WideVT = VT.widenIntegerElementType(Context&: *DAG.getContext());
13122
13123 static const unsigned Ops[2][3] =
13124 { { ISD::UMUL_LOHI, ISD::MULHU, ISD::ZERO_EXTEND },
13125 { ISD::SMUL_LOHI, ISD::MULHS, ISD::SIGN_EXTEND }};
13126 if (isOperationLegalOrCustom(Op: Ops[isSigned][0], VT)) {
13127 BottomHalf = DAG.getNode(Opcode: Ops[isSigned][0], DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: VT), N1: LHS,
13128 N2: RHS);
13129 TopHalf = BottomHalf.getValue(R: 1);
13130 } else if (isOperationLegalOrCustom(Op: Ops[isSigned][1], VT)) {
13131 BottomHalf = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LHS, N2: RHS);
13132 TopHalf = DAG.getNode(Opcode: Ops[isSigned][1], DL: dl, VT, N1: LHS, N2: RHS);
13133 } else if (isTypeLegal(VT: WideVT)) {
13134 LHS = DAG.getNode(Opcode: Ops[isSigned][2], DL: dl, VT: WideVT, Operand: LHS);
13135 RHS = DAG.getNode(Opcode: Ops[isSigned][2], DL: dl, VT: WideVT, Operand: RHS);
13136 SDValue Mul = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: WideVT, N1: LHS, N2: RHS);
13137 BottomHalf = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Mul);
13138 SDValue ShiftAmt =
13139 DAG.getShiftAmountConstant(Val: VT.getScalarSizeInBits(), VT: WideVT, DL: dl);
13140 TopHalf = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT,
13141 Operand: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: WideVT, N1: Mul, N2: ShiftAmt));
13142 } else {
13143 if (VT.isVector())
13144 return false;
13145
13146 forceExpandWideMUL(DAG, dl, Signed: isSigned, LHS, RHS, Lo&: BottomHalf, Hi&: TopHalf);
13147 }
13148
13149 Result = BottomHalf;
13150 if (isSigned) {
13151 SDValue ShiftAmt = DAG.getShiftAmountConstant(
13152 Val: VT.getScalarSizeInBits() - 1, VT: BottomHalf.getValueType(), DL: dl);
13153 SDValue Sign = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: BottomHalf, N2: ShiftAmt);
13154 Overflow = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: TopHalf, RHS: Sign, Cond: ISD::SETNE);
13155 } else {
13156 Overflow = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: TopHalf,
13157 RHS: DAG.getConstant(Val: 0, DL: dl, VT), Cond: ISD::SETNE);
13158 }
13159
13160 // Truncate the result if SetCC returns a larger type than needed.
13161 EVT RType = Node->getValueType(ResNo: 1);
13162 if (RType.bitsLT(VT: Overflow.getValueType()))
13163 Overflow = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: RType, Operand: Overflow);
13164
13165 assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
13166 "Unexpected result type for S/UMULO legalization");
13167 return true;
13168}
13169
13170SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const {
13171 SDLoc dl(Node);
13172 ISD::NodeType BaseOpcode = ISD::getVecReduceBaseOpcode(VecReduceOpcode: Node->getOpcode());
13173 SDValue Op = Node->getOperand(Num: 0);
13174 SDNodeFlags Flags = Node->getFlags();
13175 EVT VT = Op.getValueType();
13176
13177 // Try to use a shuffle reduction for power of two vectors.
13178 if (VT.isPow2VectorType()) {
13179 // See if the reduction opcode is safe to use with widened types.
13180 bool WidenSrc = false;
13181 switch (Node->getOpcode()) {
13182 case ISD::VECREDUCE_FADD:
13183 case ISD::VECREDUCE_FMUL:
13184 case ISD::VECREDUCE_ADD:
13185 case ISD::VECREDUCE_MUL:
13186 case ISD::VECREDUCE_AND:
13187 case ISD::VECREDUCE_OR:
13188 case ISD::VECREDUCE_XOR:
13189 case ISD::VECREDUCE_SMAX:
13190 case ISD::VECREDUCE_SMIN:
13191 case ISD::VECREDUCE_UMAX:
13192 case ISD::VECREDUCE_UMIN:
13193 WidenSrc = VT.isFixedLengthVector();
13194 break;
13195 }
13196
13197 while (VT.getVectorElementCount().isKnownMultipleOf(RHS: 2)) {
13198 EVT HalfVT = VT.getHalfNumVectorElementsVT(Context&: *DAG.getContext());
13199 if (!isOperationLegalOrCustom(Op: BaseOpcode, VT: HalfVT)) {
13200 if (WidenSrc && Op.getOpcode() != ISD::BUILD_VECTOR) {
13201 // Attempt to widen the source vectors to a legal op.
13202 EVT WideVT = getTypeToTransformTo(Context&: *DAG.getContext(), VT: HalfVT);
13203 if (WideVT.isVector() &&
13204 WideVT.getScalarType() == HalfVT.getScalarType() &&
13205 WideVT.getVectorNumElements() >= HalfVT.getVectorNumElements() &&
13206 isOperationLegalOrCustom(Op: BaseOpcode, VT: WideVT)) {
13207 SDValue Lo, Hi;
13208 std::tie(args&: Lo, args&: Hi) = DAG.SplitVector(N: Op, DL: dl);
13209 Lo = DAG.getInsertSubvector(DL: dl, Vec: DAG.getPOISON(VT: WideVT), SubVec: Lo, Idx: 0);
13210 Hi = DAG.getInsertSubvector(DL: dl, Vec: DAG.getPOISON(VT: WideVT), SubVec: Hi, Idx: 0);
13211 Op = DAG.getNode(Opcode: BaseOpcode, DL: dl, VT: WideVT, N1: Lo, N2: Hi, Flags);
13212 Op = DAG.getExtractSubvector(DL: dl, VT: HalfVT, Vec: Op, Idx: 0);
13213 VT = HalfVT;
13214 continue;
13215 }
13216 }
13217 break;
13218 }
13219
13220 SDValue Lo, Hi;
13221 std::tie(args&: Lo, args&: Hi) = DAG.SplitVector(N: Op, DL: dl);
13222 Op = DAG.getNode(Opcode: BaseOpcode, DL: dl, VT: HalfVT, N1: Lo, N2: Hi, Flags);
13223 VT = HalfVT;
13224
13225 // Stop if splitting is enough to make the reduction legal.
13226 if (isOperationLegalOrCustom(Op: Node->getOpcode(), VT: HalfVT))
13227 return DAG.getNode(Opcode: Node->getOpcode(), DL: dl, VT: Node->getValueType(ResNo: 0), Operand: Op,
13228 Flags);
13229 }
13230 }
13231
13232 if (VT.isScalableVector())
13233 reportFatalInternalError(
13234 reason: "Expanding reductions for scalable vectors is undefined.");
13235
13236 EVT EltVT = VT.getVectorElementType();
13237 unsigned NumElts = VT.getVectorNumElements();
13238
13239 SmallVector<SDValue, 8> Ops;
13240 DAG.ExtractVectorElements(Op, Args&: Ops, Start: 0, Count: NumElts);
13241
13242 SDValue Res = Ops[0];
13243 for (unsigned i = 1; i < NumElts; i++)
13244 Res = DAG.getNode(Opcode: BaseOpcode, DL: dl, VT: EltVT, N1: Res, N2: Ops[i], Flags);
13245
13246 // Result type may be wider than element type.
13247 if (EltVT != Node->getValueType(ResNo: 0))
13248 Res = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT: Node->getValueType(ResNo: 0), Operand: Res);
13249 return Res;
13250}
13251
13252SDValue TargetLowering::expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const {
13253 SDLoc dl(Node);
13254 SDValue AccOp = Node->getOperand(Num: 0);
13255 SDValue VecOp = Node->getOperand(Num: 1);
13256 SDNodeFlags Flags = Node->getFlags();
13257
13258 EVT VT = VecOp.getValueType();
13259 EVT EltVT = VT.getVectorElementType();
13260
13261 if (VT.isScalableVector())
13262 report_fatal_error(
13263 reason: "Expanding reductions for scalable vectors is undefined.");
13264
13265 unsigned NumElts = VT.getVectorNumElements();
13266
13267 SmallVector<SDValue, 8> Ops;
13268 DAG.ExtractVectorElements(Op: VecOp, Args&: Ops, Start: 0, Count: NumElts);
13269
13270 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(VecReduceOpcode: Node->getOpcode());
13271
13272 SDValue Res = AccOp;
13273 for (unsigned i = 0; i < NumElts; i++)
13274 Res = DAG.getNode(Opcode: BaseOpcode, DL: dl, VT: EltVT, N1: Res, N2: Ops[i], Flags);
13275
13276 return Res;
13277}
13278
13279bool TargetLowering::expandREM(SDNode *Node, SDValue &Result,
13280 SelectionDAG &DAG) const {
13281 EVT VT = Node->getValueType(ResNo: 0);
13282 SDLoc dl(Node);
13283 bool isSigned = Node->getOpcode() == ISD::SREM;
13284 unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
13285 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
13286 SDValue Dividend = Node->getOperand(Num: 0);
13287 SDValue Divisor = Node->getOperand(Num: 1);
13288 if (isOperationLegalOrCustom(Op: DivRemOpc, VT)) {
13289 SDVTList VTs = DAG.getVTList(VT1: VT, VT2: VT);
13290 Result = DAG.getNode(Opcode: DivRemOpc, DL: dl, VTList: VTs, N1: Dividend, N2: Divisor).getValue(R: 1);
13291 return true;
13292 }
13293 if (isOperationLegalOrCustom(Op: DivOpc, VT)) {
13294 // X % Y -> X-X/Y*Y
13295 SDValue Divide = DAG.getNode(Opcode: DivOpc, DL: dl, VT, N1: Dividend, N2: Divisor);
13296 SDValue Mul = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: Divide, N2: Divisor);
13297 Result = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Dividend, N2: Mul);
13298 return true;
13299 }
13300 return false;
13301}
13302
13303SDValue TargetLowering::expandFP_TO_INT_SAT(SDNode *Node,
13304 SelectionDAG &DAG) const {
13305 bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT;
13306 SDLoc dl(SDValue(Node, 0));
13307 SDValue Src = Node->getOperand(Num: 0);
13308
13309 // DstVT is the result type, while SatVT is the size to which we saturate
13310 EVT SrcVT = Src.getValueType();
13311 EVT DstVT = Node->getValueType(ResNo: 0);
13312
13313 EVT SatVT = cast<VTSDNode>(Val: Node->getOperand(Num: 1))->getVT();
13314 unsigned SatWidth = SatVT.getScalarSizeInBits();
13315 unsigned DstWidth = DstVT.getScalarSizeInBits();
13316 assert(SatWidth <= DstWidth &&
13317 "Expected saturation width smaller than result width");
13318
13319 // Determine minimum and maximum integer values and their corresponding
13320 // floating-point values.
13321 APInt MinInt, MaxInt;
13322 if (IsSigned) {
13323 MinInt = APInt::getSignedMinValue(numBits: SatWidth).sext(width: DstWidth);
13324 MaxInt = APInt::getSignedMaxValue(numBits: SatWidth).sext(width: DstWidth);
13325 } else {
13326 MinInt = APInt::getMinValue(numBits: SatWidth).zext(width: DstWidth);
13327 MaxInt = APInt::getMaxValue(numBits: SatWidth).zext(width: DstWidth);
13328 }
13329
13330 // We cannot risk emitting FP_TO_XINT nodes with a source VT of [b]f16, as
13331 // libcall emission cannot handle this. Large result types will fail.
13332 if (SrcVT == MVT::f16 || SrcVT == MVT::bf16) {
13333 Src = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f32, Operand: Src);
13334 SrcVT = Src.getValueType();
13335 }
13336
13337 const fltSemantics &Sem = SrcVT.getFltSemantics();
13338 APFloat MinFloat(Sem);
13339 APFloat MaxFloat(Sem);
13340
13341 APFloat::opStatus MinStatus =
13342 MinFloat.convertFromAPInt(Input: MinInt, IsSigned, RM: APFloat::rmTowardZero);
13343 APFloat::opStatus MaxStatus =
13344 MaxFloat.convertFromAPInt(Input: MaxInt, IsSigned, RM: APFloat::rmTowardZero);
13345 bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
13346 !(MaxStatus & APFloat::opStatus::opInexact);
13347
13348 SDValue MinFloatNode = DAG.getConstantFP(Val: MinFloat, DL: dl, VT: SrcVT);
13349 SDValue MaxFloatNode = DAG.getConstantFP(Val: MaxFloat, DL: dl, VT: SrcVT);
13350
13351 // If the integer bounds are exactly representable as floats and min/max are
13352 // legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
13353 // of comparisons and selects.
13354 auto EmitMinMax = [&](unsigned MinOpcode, unsigned MaxOpcode,
13355 bool MayPropagateNaN) {
13356 bool MinMaxLegal = isOperationLegalOrCustom(Op: MinOpcode, VT: SrcVT) &&
13357 isOperationLegalOrCustom(Op: MaxOpcode, VT: SrcVT);
13358 if (!MinMaxLegal)
13359 return SDValue();
13360
13361 SDValue Clamped = Src;
13362
13363 // Clamp Src by MinFloat from below. If !MayPropagateNaN and Src is NaN
13364 // then the result is MinFloat.
13365 Clamped = DAG.getNode(Opcode: MaxOpcode, DL: dl, VT: SrcVT, N1: Clamped, N2: MinFloatNode);
13366 // Clamp by MaxFloat from above. If !MayPropagateNaN then NaN cannot occur.
13367 Clamped = DAG.getNode(Opcode: MinOpcode, DL: dl, VT: SrcVT, N1: Clamped, N2: MaxFloatNode);
13368 // Convert clamped value to integer.
13369 SDValue FpToInt = DAG.getNode(Opcode: IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT,
13370 DL: dl, VT: DstVT, Operand: Clamped);
13371
13372 // If !MayPropagateNan and the conversion is unsigned case we're done,
13373 // because we mapped NaN to MinFloat, which will cast to zero.
13374 if (!MayPropagateNaN && !IsSigned)
13375 return FpToInt;
13376
13377 // Otherwise, select 0 if Src is NaN.
13378 SDValue ZeroInt = DAG.getConstant(Val: 0, DL: dl, VT: DstVT);
13379 EVT SetCCVT =
13380 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: SrcVT);
13381 SDValue IsNan = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: Src, Cond: ISD::CondCode::SETUO);
13382 return DAG.getSelect(DL: dl, VT: DstVT, Cond: IsNan, LHS: ZeroInt, RHS: FpToInt);
13383 };
13384 if (AreExactFloatBounds) {
13385 if (SDValue Res = EmitMinMax(ISD::FMINIMUMNUM, ISD::FMAXIMUMNUM,
13386 /*MayPropagateNaN=*/false))
13387 return Res;
13388 // These may propagate NaN for sNaN operands.
13389 if (SDValue Res =
13390 EmitMinMax(ISD::FMINNUM, ISD::FMAXNUM, /*MayPropagateNaN=*/true))
13391 return Res;
13392 // These always propagate NaN.
13393 if (SDValue Res =
13394 EmitMinMax(ISD::FMINIMUM, ISD::FMAXIMUM, /*MayPropagateNaN=*/true))
13395 return Res;
13396 }
13397
13398 SDValue MinIntNode = DAG.getConstant(Val: MinInt, DL: dl, VT: DstVT);
13399 SDValue MaxIntNode = DAG.getConstant(Val: MaxInt, DL: dl, VT: DstVT);
13400
13401 // Result of direct conversion. The assumption here is that the operation is
13402 // non-trapping and it's fine to apply it to an out-of-range value if we
13403 // select it away later.
13404 SDValue FpToInt =
13405 DAG.getNode(Opcode: IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, DL: dl, VT: DstVT, Operand: Src);
13406
13407 SDValue Select = FpToInt;
13408
13409 EVT SetCCVT =
13410 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: SrcVT);
13411
13412 // If Src ULT MinFloat, select MinInt. In particular, this also selects
13413 // MinInt if Src is NaN.
13414 SDValue ULT = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: MinFloatNode, Cond: ISD::SETULT);
13415 Select = DAG.getSelect(DL: dl, VT: DstVT, Cond: ULT, LHS: MinIntNode, RHS: Select);
13416 // If Src OGT MaxFloat, select MaxInt.
13417 SDValue OGT = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: MaxFloatNode, Cond: ISD::SETOGT);
13418 Select = DAG.getSelect(DL: dl, VT: DstVT, Cond: OGT, LHS: MaxIntNode, RHS: Select);
13419
13420 // In the unsigned case we are done, because we mapped NaN to MinInt, which
13421 // is already zero.
13422 if (!IsSigned)
13423 return Select;
13424
13425 // Otherwise, select 0 if Src is NaN.
13426 SDValue ZeroInt = DAG.getConstant(Val: 0, DL: dl, VT: DstVT);
13427 SDValue IsNan = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: Src, Cond: ISD::CondCode::SETUO);
13428 return DAG.getSelect(DL: dl, VT: DstVT, Cond: IsNan, LHS: ZeroInt, RHS: Select);
13429}
13430
13431SDValue TargetLowering::expandRoundInexactToOdd(EVT ResultVT, SDValue Op,
13432 const SDLoc &dl,
13433 SelectionDAG &DAG) const {
13434 EVT OperandVT = Op.getValueType();
13435 if (OperandVT.getScalarType() == ResultVT.getScalarType())
13436 return Op;
13437 EVT ResultIntVT = ResultVT.changeTypeToInteger();
13438 // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
13439 // can induce double-rounding which may alter the results. We can
13440 // correct for this using a trick explained in: Boldo, Sylvie, and
13441 // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
13442 // World Congress. 2005.
13443 SDValue Narrow = DAG.getFPExtendOrRound(Op, DL: dl, VT: ResultVT);
13444 SDValue NarrowAsWide = DAG.getFPExtendOrRound(Op: Narrow, DL: dl, VT: OperandVT);
13445
13446 // We can keep the narrow value as-is if narrowing was exact (no
13447 // rounding error), the wide value was NaN (the narrow value is also
13448 // NaN and should be preserved) or if we rounded to the odd value.
13449 SDValue NarrowBits = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: ResultIntVT, Operand: Narrow);
13450 SDValue One = DAG.getConstant(Val: 1, DL: dl, VT: ResultIntVT);
13451 SDValue NegativeOne = DAG.getAllOnesConstant(DL: dl, VT: ResultIntVT);
13452 SDValue And = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ResultIntVT, N1: NarrowBits, N2: One);
13453 EVT ResultIntVTCCVT = getSetCCResultType(
13454 DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: And.getValueType());
13455 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT: ResultIntVT);
13456 // The result is already odd so we don't need to do anything.
13457 SDValue AlreadyOdd = DAG.getSetCC(DL: dl, VT: ResultIntVTCCVT, LHS: And, RHS: Zero, Cond: ISD::SETNE);
13458
13459 EVT WideSetCCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(),
13460 VT: Op.getValueType());
13461 // We keep results which are exact, odd or NaN.
13462 SDValue KeepNarrow =
13463 DAG.getSetCC(DL: dl, VT: WideSetCCVT, LHS: Op, RHS: NarrowAsWide, Cond: ISD::SETUEQ);
13464 KeepNarrow = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: WideSetCCVT, N1: KeepNarrow, N2: AlreadyOdd);
13465 // We morally performed a round-down if AbsNarrow is smaller than
13466 // AbsWide.
13467 SDValue AbsWide = DAG.getNode(Opcode: ISD::FABS, DL: dl, VT: OperandVT, Operand: Op);
13468 SDValue AbsNarrowAsWide = DAG.getNode(Opcode: ISD::FABS, DL: dl, VT: OperandVT, Operand: NarrowAsWide);
13469 SDValue NarrowIsRd =
13470 DAG.getSetCC(DL: dl, VT: WideSetCCVT, LHS: AbsWide, RHS: AbsNarrowAsWide, Cond: ISD::SETOGT);
13471 // If the narrow value is odd or exact, pick it.
13472 // Otherwise, narrow is even and corresponds to either the rounded-up
13473 // or rounded-down value. If narrow is the rounded-down value, we want
13474 // the rounded-up value as it will be odd.
13475 SDValue Adjust = DAG.getSelect(DL: dl, VT: ResultIntVT, Cond: NarrowIsRd, LHS: One, RHS: NegativeOne);
13476 SDValue Adjusted = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: ResultIntVT, N1: NarrowBits, N2: Adjust);
13477 Op = DAG.getSelect(DL: dl, VT: ResultIntVT, Cond: KeepNarrow, LHS: NarrowBits, RHS: Adjusted);
13478 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: ResultVT, Operand: Op);
13479}
13480
13481SDValue TargetLowering::expandFP_ROUND(SDNode *Node, SelectionDAG &DAG) const {
13482 assert(Node->getOpcode() == ISD::FP_ROUND && "Unexpected opcode!");
13483 SDValue Op = Node->getOperand(Num: 0);
13484 EVT VT = Node->getValueType(ResNo: 0);
13485 SDLoc dl(Node);
13486 if (VT.getScalarType() == MVT::bf16) {
13487 if (Node->getConstantOperandVal(Num: 1) == 1) {
13488 return DAG.getNode(Opcode: ISD::FP_TO_BF16, DL: dl, VT, Operand: Node->getOperand(Num: 0));
13489 }
13490 EVT OperandVT = Op.getValueType();
13491 SDValue IsNaN = DAG.getSetCC(
13492 DL: dl,
13493 VT: getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: OperandVT),
13494 LHS: Op, RHS: Op, Cond: ISD::SETUO);
13495
13496 // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
13497 // can induce double-rounding which may alter the results. We can
13498 // correct for this using a trick explained in: Boldo, Sylvie, and
13499 // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
13500 // World Congress. 2005.
13501 EVT F32 = VT.changeElementType(Context&: *DAG.getContext(), EltVT: MVT::f32);
13502 EVT I32 = F32.changeTypeToInteger();
13503 Op = expandRoundInexactToOdd(ResultVT: F32, Op, dl, DAG);
13504 Op = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: I32, Operand: Op);
13505
13506 // Conversions should set NaN's quiet bit. This also prevents NaNs from
13507 // turning into infinities.
13508 SDValue NaN =
13509 DAG.getNode(Opcode: ISD::OR, DL: dl, VT: I32, N1: Op, N2: DAG.getConstant(Val: 0x400000, DL: dl, VT: I32));
13510
13511 // Factor in the contribution of the low 16 bits.
13512 SDValue One = DAG.getConstant(Val: 1, DL: dl, VT: I32);
13513 SDValue Lsb = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: I32, N1: Op,
13514 N2: DAG.getShiftAmountConstant(Val: 16, VT: I32, DL: dl));
13515 Lsb = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: I32, N1: Lsb, N2: One);
13516 SDValue RoundingBias =
13517 DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: I32, N1: Lsb, N2: DAG.getConstant(Val: 0x7fff, DL: dl, VT: I32));
13518 SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: I32, N1: Op, N2: RoundingBias);
13519
13520 // Don't round if we had a NaN, we don't want to turn 0x7fffffff into
13521 // 0x80000000.
13522 Op = DAG.getSelect(DL: dl, VT: I32, Cond: IsNaN, LHS: NaN, RHS: Add);
13523
13524 // Now that we have rounded, shift the bits into position.
13525 Op = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: I32, N1: Op,
13526 N2: DAG.getShiftAmountConstant(Val: 16, VT: I32, DL: dl));
13527 EVT I16 = I32.changeElementType(Context&: *DAG.getContext(), EltVT: MVT::i16);
13528 Op = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: I16, Operand: Op);
13529 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT, Operand: Op);
13530 }
13531 return SDValue();
13532}
13533
13534SDValue TargetLowering::expandVectorSplice(SDNode *Node,
13535 SelectionDAG &DAG) const {
13536 assert((Node->getOpcode() == ISD::VECTOR_SPLICE_LEFT ||
13537 Node->getOpcode() == ISD::VECTOR_SPLICE_RIGHT) &&
13538 "Unexpected opcode!");
13539 assert((Node->getValueType(0).isScalableVector() ||
13540 !isa<ConstantSDNode>(Node->getOperand(2))) &&
13541 "Fixed length vector types with constant offsets expected to use "
13542 "SHUFFLE_VECTOR!");
13543
13544 EVT VT = Node->getValueType(ResNo: 0);
13545 SDValue V1 = Node->getOperand(Num: 0);
13546 SDValue V2 = Node->getOperand(Num: 1);
13547 SDValue Offset = Node->getOperand(Num: 2);
13548 SDLoc DL(Node);
13549
13550 // Expand through memory thusly:
13551 // Alloca CONCAT_VECTORS_TYPES(V1, V2) Ptr
13552 // Store V1, Ptr
13553 // Store V2, Ptr + sizeof(V1)
13554 // if (VECTOR_SPLICE_LEFT)
13555 // Ptr = Ptr + (Offset * sizeof(VT.Elt))
13556 // else
13557 // Ptr = Ptr + sizeof(V1) - (Offset * size(VT.Elt))
13558 // Res = Load Ptr
13559
13560 Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false);
13561
13562 EVT MemVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: VT.getVectorElementType(),
13563 EC: VT.getVectorElementCount() * 2);
13564 SDValue StackPtr = DAG.CreateStackTemporary(Bytes: MemVT.getStoreSize(), Alignment);
13565 EVT PtrVT = StackPtr.getValueType();
13566 auto &MF = DAG.getMachineFunction();
13567 auto FrameIndex = cast<FrameIndexSDNode>(Val: StackPtr.getNode())->getIndex();
13568 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FI: FrameIndex);
13569
13570 // Store the lo part of CONCAT_VECTORS(V1, V2)
13571 SDValue StoreV1 =
13572 DAG.getStore(Chain: DAG.getEntryNode(), dl: DL, Val: V1, Ptr: StackPtr, PtrInfo, Alignment);
13573 // Store the hi part of CONCAT_VECTORS(V1, V2)
13574 SDValue VTBytes = DAG.getTypeSize(DL, VT: PtrVT, TS: VT.getStoreSize());
13575 SDValue StackPtr2 = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: StackPtr, N2: VTBytes);
13576 SDValue StoreV2 =
13577 DAG.getStore(Chain: StoreV1, dl: DL, Val: V2, Ptr: StackPtr2, PtrInfo, Alignment);
13578
13579 // NOTE: TrailingBytes must be clamped so as not to read outside of V1:V2.
13580 SDValue EltByteSize =
13581 DAG.getTypeSize(DL, VT: PtrVT, TS: VT.getVectorElementType().getStoreSize());
13582 Offset = DAG.getZExtOrTrunc(Op: Offset, DL, VT: PtrVT);
13583 SDValue TrailingBytes = DAG.getNode(Opcode: ISD::MUL, DL, VT: PtrVT, N1: Offset, N2: EltByteSize);
13584
13585 TrailingBytes = DAG.getNode(Opcode: ISD::UMIN, DL, VT: PtrVT, N1: TrailingBytes, N2: VTBytes);
13586
13587 if (Node->getOpcode() == ISD::VECTOR_SPLICE_LEFT)
13588 StackPtr = DAG.getMemBasePlusOffset(Base: StackPtr, Offset: TrailingBytes, DL);
13589 else
13590 StackPtr = DAG.getNode(Opcode: ISD::SUB, DL, VT: PtrVT, N1: StackPtr2, N2: TrailingBytes);
13591
13592 // Load the spliced result
13593 return DAG.getLoad(VT, dl: DL, Chain: StoreV2, Ptr: StackPtr,
13594 PtrInfo: MachinePointerInfo::getUnknownStack(MF), Alignment);
13595}
13596
13597SDValue TargetLowering::expandVECTOR_COMPRESS(SDNode *Node,
13598 SelectionDAG &DAG) const {
13599 SDLoc DL(Node);
13600 SDValue Vec = Node->getOperand(Num: 0);
13601 SDValue Mask = Node->getOperand(Num: 1);
13602 SDValue Passthru = Node->getOperand(Num: 2);
13603
13604 EVT VecVT = Vec.getValueType();
13605 EVT ScalarVT = VecVT.getScalarType();
13606 EVT MaskVT = Mask.getValueType();
13607 EVT MaskScalarVT = MaskVT.getScalarType();
13608
13609 // Needs to be handled by targets that have scalable vector types.
13610 if (VecVT.isScalableVector())
13611 report_fatal_error(reason: "Cannot expand masked_compress for scalable vectors.");
13612
13613 Align Alignment = DAG.getReducedAlign(VT: VecVT, /*UseABI=*/false);
13614 SDValue StackPtr = DAG.CreateStackTemporary(Bytes: VecVT.getStoreSize(), Alignment);
13615 int FI = cast<FrameIndexSDNode>(Val: StackPtr.getNode())->getIndex();
13616 MachinePointerInfo PtrInfo =
13617 MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI);
13618
13619 MVT PositionVT = getVectorIdxTy(DL: DAG.getDataLayout());
13620 SDValue Chain = DAG.getEntryNode();
13621 SDValue OutPos = DAG.getConstant(Val: 0, DL, VT: PositionVT);
13622
13623 bool HasPassthru = !Passthru.isUndef();
13624
13625 // If we have a passthru vector, store it on the stack, overwrite the matching
13626 // positions and then re-write the last element that was potentially
13627 // overwritten even though mask[i] = false.
13628 if (HasPassthru)
13629 Chain = DAG.getStore(Chain, dl: DL, Val: Passthru, Ptr: StackPtr, PtrInfo, Alignment);
13630
13631 SDValue LastWriteVal;
13632 APInt PassthruSplatVal;
13633 bool IsSplatPassthru =
13634 ISD::isConstantSplatVector(N: Passthru.getNode(), SplatValue&: PassthruSplatVal);
13635
13636 if (IsSplatPassthru) {
13637 // As we do not know which position we wrote to last, we cannot simply
13638 // access that index from the passthru vector. So we first check if passthru
13639 // is a splat vector, to use any element ...
13640 LastWriteVal = DAG.getConstant(Val: PassthruSplatVal, DL, VT: ScalarVT);
13641 } else if (HasPassthru) {
13642 // ... if it is not a splat vector, we need to get the passthru value at
13643 // position = popcount(mask) and re-load it from the stack before it is
13644 // overwritten in the loop below.
13645 EVT PopcountVT = ScalarVT.changeTypeToInteger();
13646 SDValue Popcount = DAG.getNode(
13647 Opcode: ISD::TRUNCATE, DL,
13648 VT: MaskVT.changeVectorElementType(Context&: *DAG.getContext(), EltVT: MVT::i1), Operand: Mask);
13649 Popcount = DAG.getNode(
13650 Opcode: ISD::ZERO_EXTEND, DL,
13651 VT: MaskVT.changeVectorElementType(Context&: *DAG.getContext(), EltVT: PopcountVT),
13652 Operand: Popcount);
13653 Popcount = DAG.getNode(Opcode: ISD::VECREDUCE_ADD, DL, VT: PopcountVT, Operand: Popcount);
13654 SDValue LastElmtPtr =
13655 getVectorElementPointer(DAG, VecPtr: StackPtr, VecVT, Index: Popcount);
13656 LastWriteVal = DAG.getLoad(
13657 VT: ScalarVT, dl: DL, Chain, Ptr: LastElmtPtr,
13658 PtrInfo: MachinePointerInfo::getUnknownStack(MF&: DAG.getMachineFunction()));
13659 Chain = LastWriteVal.getValue(R: 1);
13660 }
13661
13662 unsigned NumElms = VecVT.getVectorNumElements();
13663 for (unsigned I = 0; I < NumElms; I++) {
13664 SDValue ValI = DAG.getExtractVectorElt(DL, VT: ScalarVT, Vec, Idx: I);
13665 SDValue OutPtr = getVectorElementPointer(DAG, VecPtr: StackPtr, VecVT, Index: OutPos);
13666 Chain = DAG.getStore(
13667 Chain, dl: DL, Val: ValI, Ptr: OutPtr,
13668 PtrInfo: MachinePointerInfo::getUnknownStack(MF&: DAG.getMachineFunction()));
13669
13670 // Get the mask value and add it to the current output position. This
13671 // either increments by 1 if MaskI is true or adds 0 otherwise.
13672 // Freeze in case we have poison/undef mask entries.
13673 SDValue MaskI = DAG.getExtractVectorElt(DL, VT: MaskScalarVT, Vec: Mask, Idx: I);
13674 MaskI = DAG.getFreeze(V: MaskI);
13675 MaskI = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i1, Operand: MaskI);
13676 MaskI = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: PositionVT, Operand: MaskI);
13677 OutPos = DAG.getNode(Opcode: ISD::ADD, DL, VT: PositionVT, N1: OutPos, N2: MaskI);
13678
13679 if (HasPassthru && I == NumElms - 1) {
13680 SDValue EndOfVector =
13681 DAG.getConstant(Val: VecVT.getVectorNumElements() - 1, DL, VT: PositionVT);
13682 SDValue AllLanesSelected =
13683 DAG.getSetCC(DL, VT: MVT::i1, LHS: OutPos, RHS: EndOfVector, Cond: ISD::CondCode::SETUGT);
13684 OutPos = DAG.getNode(Opcode: ISD::UMIN, DL, VT: PositionVT, N1: OutPos, N2: EndOfVector);
13685 OutPtr = getVectorElementPointer(DAG, VecPtr: StackPtr, VecVT, Index: OutPos);
13686
13687 // Re-write the last ValI if all lanes were selected. Otherwise,
13688 // overwrite the last write it with the passthru value.
13689 LastWriteVal = DAG.getSelect(DL, VT: ScalarVT, Cond: AllLanesSelected, LHS: ValI,
13690 RHS: LastWriteVal, Flags: SDNodeFlags::Unpredictable);
13691 Chain = DAG.getStore(
13692 Chain, dl: DL, Val: LastWriteVal, Ptr: OutPtr,
13693 PtrInfo: MachinePointerInfo::getUnknownStack(MF&: DAG.getMachineFunction()));
13694 }
13695 }
13696
13697 return DAG.getLoad(VT: VecVT, dl: DL, Chain, Ptr: StackPtr, PtrInfo, Alignment);
13698}
13699
13700SDValue TargetLowering::expandCttzElts(SDNode *Node, SelectionDAG &DAG) const {
13701 SDLoc DL(Node);
13702 EVT VT = Node->getValueType(ResNo: 0);
13703
13704 bool ZeroIsPoison = Node->getOpcode() == ISD::CTTZ_ELTS_ZERO_POISON;
13705 auto [Mask, StepVec] =
13706 getLegalMaskAndStepVector(Mask: Node->getOperand(Num: 0), ZeroIsPoison, DL, DAG);
13707
13708 // No legal step vector: split mask in half and recombine results.
13709 // LoNumElts uses the non-poison CTTZ_ELTS so its result is well-defined
13710 // (== LoNumElts when no active lane), allowing the SETNE comparison.
13711 // Result: (ResLo != LoNumElts) ? ResLo : (LoNumElts + ResHi)
13712 if (!StepVec) {
13713 EVT ResVT = Node->getValueType(ResNo: 0);
13714 auto [MaskLo, MaskHi] = DAG.SplitVector(N: Node->getOperand(Num: 0), DL);
13715 SDValue LoNumElts = DAG.getElementCount(
13716 DL, VT: ResVT, EC: MaskLo.getValueType().getVectorElementCount());
13717 SDValue ResLo = DAG.getNode(Opcode: ISD::CTTZ_ELTS, DL, VT: ResVT, Operand: MaskLo);
13718 SDValue ResHi = DAG.getNode(Opcode: Node->getOpcode(), DL, VT: ResVT, Operand: MaskHi);
13719 SDValue ResLoNotNumElts = DAG.getSetCC(
13720 DL, VT: getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: ResVT),
13721 LHS: ResLo, RHS: LoNumElts, Cond: ISD::SETNE);
13722 // Per LangRef, ResVT must be wide enough to hold the total element count,
13723 // so the sum cannot wrap as an unsigned add. NSW is not guaranteed since
13724 // the count is only required to fit unsigned.
13725 SDValue Sum = DAG.getNode(Opcode: ISD::ADD, DL, VT: ResVT, N1: LoNumElts, N2: ResHi,
13726 Flags: SDNodeFlags::NoUnsignedWrap);
13727 return DAG.getSelect(DL, VT: ResVT, Cond: ResLoNotNumElts, LHS: ResLo, RHS: Sum);
13728 }
13729
13730 EVT StepVecVT = StepVec.getValueType();
13731 EVT StepVT = StepVecVT.getVectorElementType();
13732
13733 // Promote the scalar result type early to avoid redundant zexts.
13734 if (getTypeAction(VT: StepVT.getSimpleVT()) == TypePromoteInteger)
13735 StepVT = getTypeToTransformTo(Context&: *DAG.getContext(), VT: StepVT);
13736
13737 SDValue VL =
13738 DAG.getElementCount(DL, VT: StepVT, EC: StepVecVT.getVectorElementCount());
13739 SDValue SplatVL = DAG.getSplat(VT: StepVecVT, DL, Op: VL);
13740 StepVec = DAG.getNode(Opcode: ISD::SUB, DL, VT: StepVecVT, N1: SplatVL, N2: StepVec);
13741 SDValue Zeroes = DAG.getConstant(Val: 0, DL, VT: StepVecVT);
13742 SDValue Select = DAG.getSelect(DL, VT: StepVecVT, Cond: Mask, LHS: StepVec, RHS: Zeroes);
13743 SDValue Max = DAG.getNode(Opcode: ISD::VECREDUCE_UMAX, DL,
13744 VT: StepVecVT.getVectorElementType(), Operand: Select);
13745 SDValue Sub = DAG.getNode(Opcode: ISD::SUB, DL, VT: StepVT, N1: VL,
13746 N2: DAG.getZExtOrTrunc(Op: Max, DL, VT: StepVT));
13747
13748 return DAG.getZExtOrTrunc(Op: Sub, DL, VT);
13749}
13750
13751SDValue TargetLowering::expandPartialReduceMLA(SDNode *N,
13752 SelectionDAG &DAG) const {
13753 SDLoc DL(N);
13754 SDValue Acc = N->getOperand(Num: 0);
13755 SDValue MulLHS = N->getOperand(Num: 1);
13756 SDValue MulRHS = N->getOperand(Num: 2);
13757 EVT AccVT = Acc.getValueType();
13758 EVT MulOpVT = MulLHS.getValueType();
13759
13760 EVT ExtMulOpVT =
13761 EVT::getVectorVT(Context&: *DAG.getContext(), VT: AccVT.getVectorElementType(),
13762 EC: MulOpVT.getVectorElementCount());
13763
13764 unsigned ExtOpcLHS, ExtOpcRHS;
13765 switch (N->getOpcode()) {
13766 default:
13767 llvm_unreachable("Unexpected opcode");
13768 case ISD::PARTIAL_REDUCE_UMLA:
13769 ExtOpcLHS = ExtOpcRHS = ISD::ZERO_EXTEND;
13770 break;
13771 case ISD::PARTIAL_REDUCE_SMLA:
13772 ExtOpcLHS = ExtOpcRHS = ISD::SIGN_EXTEND;
13773 break;
13774 case ISD::PARTIAL_REDUCE_FMLA:
13775 ExtOpcLHS = ExtOpcRHS = ISD::FP_EXTEND;
13776 break;
13777 }
13778
13779 if (ExtMulOpVT != MulOpVT) {
13780 MulLHS = DAG.getNode(Opcode: ExtOpcLHS, DL, VT: ExtMulOpVT, Operand: MulLHS);
13781 MulRHS = DAG.getNode(Opcode: ExtOpcRHS, DL, VT: ExtMulOpVT, Operand: MulRHS);
13782 }
13783 SDValue Input = MulLHS;
13784 if (N->getOpcode() == ISD::PARTIAL_REDUCE_FMLA) {
13785 if (!llvm::isOneOrOneSplatFP(V: MulRHS))
13786 Input = DAG.getNode(Opcode: ISD::FMUL, DL, VT: ExtMulOpVT, N1: MulLHS, N2: MulRHS);
13787 } else if (!llvm::isOneOrOneSplat(V: MulRHS)) {
13788 Input = DAG.getNode(Opcode: ISD::MUL, DL, VT: ExtMulOpVT, N1: MulLHS, N2: MulRHS);
13789 }
13790
13791 unsigned Stride = AccVT.getVectorMinNumElements();
13792 unsigned ScaleFactor = MulOpVT.getVectorMinNumElements() / Stride;
13793
13794 // Collect all of the subvectors
13795 std::deque<SDValue> Subvectors = {Acc};
13796 for (unsigned I = 0; I < ScaleFactor; I++)
13797 Subvectors.push_back(x: DAG.getExtractSubvector(DL, VT: AccVT, Vec: Input, Idx: I * Stride));
13798
13799 unsigned FlatNode =
13800 N->getOpcode() == ISD::PARTIAL_REDUCE_FMLA ? ISD::FADD : ISD::ADD;
13801
13802 // Flatten the subvector tree
13803 while (Subvectors.size() > 1) {
13804 Subvectors.push_back(
13805 x: DAG.getNode(Opcode: FlatNode, DL, VT: AccVT, Ops: {Subvectors[0], Subvectors[1]}));
13806 Subvectors.pop_front();
13807 Subvectors.pop_front();
13808 }
13809
13810 assert(Subvectors.size() == 1 &&
13811 "There should only be one subvector after tree flattening");
13812
13813 return Subvectors[0];
13814}
13815
13816/// Given a store node \p StoreNode, return true if it is safe to fold that node
13817/// into \p FPNode, which expands to a library call with output pointers.
13818static bool canFoldStoreIntoLibCallOutputPointers(StoreSDNode *StoreNode,
13819 SDNode *FPNode) {
13820 SmallVector<const SDNode *, 8> Worklist;
13821 SmallVector<const SDNode *, 8> DeferredNodes;
13822 SmallPtrSet<const SDNode *, 16> Visited;
13823
13824 // Skip FPNode use by StoreNode (that's the use we want to fold into FPNode).
13825 for (SDValue Op : StoreNode->ops())
13826 if (Op.getNode() != FPNode)
13827 Worklist.push_back(Elt: Op.getNode());
13828
13829 unsigned MaxSteps = SelectionDAG::getHasPredecessorMaxSteps();
13830 while (!Worklist.empty()) {
13831 const SDNode *Node = Worklist.pop_back_val();
13832 auto [_, Inserted] = Visited.insert(Ptr: Node);
13833 if (!Inserted)
13834 continue;
13835
13836 if (MaxSteps > 0 && Visited.size() >= MaxSteps)
13837 return false;
13838
13839 // Reached the FPNode (would result in a cycle).
13840 // OR Reached CALLSEQ_START (would result in nested call sequences).
13841 if (Node == FPNode || Node->getOpcode() == ISD::CALLSEQ_START)
13842 return false;
13843
13844 if (Node->getOpcode() == ISD::CALLSEQ_END) {
13845 // Defer looking into call sequences (so we can check we're outside one).
13846 // We still need to look through these for the predecessor check.
13847 DeferredNodes.push_back(Elt: Node);
13848 continue;
13849 }
13850
13851 for (SDValue Op : Node->ops())
13852 Worklist.push_back(Elt: Op.getNode());
13853 }
13854
13855 // True if we're outside a call sequence and don't have the FPNode as a
13856 // predecessor. No cycles or nested call sequences possible.
13857 return !SDNode::hasPredecessorHelper(N: FPNode, Visited, Worklist&: DeferredNodes,
13858 MaxSteps);
13859}
13860
13861bool TargetLowering::expandMultipleResultFPLibCall(
13862 SelectionDAG &DAG, RTLIB::Libcall LC, SDNode *Node,
13863 SmallVectorImpl<SDValue> &Results,
13864 std::optional<unsigned> CallRetResNo) const {
13865 if (LC == RTLIB::UNKNOWN_LIBCALL)
13866 return false;
13867
13868 RTLIB::LibcallImpl LibcallImpl = getLibcallImpl(Call: LC);
13869 if (LibcallImpl == RTLIB::Unsupported)
13870 return false;
13871
13872 LLVMContext &Ctx = *DAG.getContext();
13873 EVT VT = Node->getValueType(ResNo: 0);
13874 unsigned NumResults = Node->getNumValues();
13875
13876 // Find users of the node that store the results (and share input chains). The
13877 // destination pointers can be used instead of creating stack allocations.
13878 SDValue StoresInChain;
13879 SmallVector<StoreSDNode *, 2> ResultStores(NumResults);
13880 for (SDNode *User : Node->users()) {
13881 if (!ISD::isNormalStore(N: User))
13882 continue;
13883 auto *ST = cast<StoreSDNode>(Val: User);
13884 SDValue StoreValue = ST->getValue();
13885 unsigned ResNo = StoreValue.getResNo();
13886 // Ensure the store corresponds to an output pointer.
13887 if (CallRetResNo == ResNo)
13888 continue;
13889 // Ensure the store to the default address space and not atomic or volatile.
13890 if (!ST->isSimple() || ST->getAddressSpace() != 0)
13891 continue;
13892 // Ensure all store chains are the same (so they don't alias).
13893 if (StoresInChain && ST->getChain() != StoresInChain)
13894 continue;
13895 // Ensure the store is properly aligned.
13896 Type *StoreType = StoreValue.getValueType().getTypeForEVT(Context&: Ctx);
13897 if (ST->getAlign() <
13898 DAG.getDataLayout().getABITypeAlign(Ty: StoreType->getScalarType()))
13899 continue;
13900 // Avoid:
13901 // 1. Creating cyclic dependencies.
13902 // 2. Expanding the node to a call within a call sequence.
13903 if (!canFoldStoreIntoLibCallOutputPointers(StoreNode: ST, FPNode: Node))
13904 continue;
13905 ResultStores[ResNo] = ST;
13906 StoresInChain = ST->getChain();
13907 }
13908
13909 ArgListTy Args;
13910
13911 // Pass the arguments.
13912 for (const SDValue &Op : Node->op_values()) {
13913 EVT ArgVT = Op.getValueType();
13914 Type *ArgTy = ArgVT.getTypeForEVT(Context&: Ctx);
13915 Args.emplace_back(args: Op, args&: ArgTy);
13916 }
13917
13918 // Pass the output pointers.
13919 SmallVector<SDValue, 2> ResultPtrs(NumResults);
13920 Type *PointerTy = PointerType::getUnqual(C&: Ctx);
13921 for (auto [ResNo, ST] : llvm::enumerate(First&: ResultStores)) {
13922 if (ResNo == CallRetResNo)
13923 continue;
13924 EVT ResVT = Node->getValueType(ResNo);
13925 SDValue ResultPtr = ST ? ST->getBasePtr() : DAG.CreateStackTemporary(VT: ResVT);
13926 ResultPtrs[ResNo] = ResultPtr;
13927 Args.emplace_back(args&: ResultPtr, args&: PointerTy);
13928 }
13929
13930 SDLoc DL(Node);
13931
13932 if (RTLIB::RuntimeLibcallsInfo::hasVectorMaskArgument(Impl: LibcallImpl)) {
13933 // Pass the vector mask (if required).
13934 EVT MaskVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: Ctx, VT);
13935 SDValue Mask = DAG.getBoolConstant(V: true, DL, VT: MaskVT, OpVT: VT);
13936 Args.emplace_back(args&: Mask, args: MaskVT.getTypeForEVT(Context&: Ctx));
13937 }
13938
13939 Type *RetType = CallRetResNo.has_value()
13940 ? Node->getValueType(ResNo: *CallRetResNo).getTypeForEVT(Context&: Ctx)
13941 : Type::getVoidTy(C&: Ctx);
13942 SDValue InChain = StoresInChain ? StoresInChain : DAG.getEntryNode();
13943 SDValue Callee =
13944 DAG.getExternalSymbol(LCImpl: LibcallImpl, VT: getPointerTy(DL: DAG.getDataLayout()));
13945 TargetLowering::CallLoweringInfo CLI(DAG);
13946 CLI.setDebugLoc(DL).setChain(InChain).setLibCallee(
13947 CC: getLibcallImplCallingConv(Call: LibcallImpl), ResultType: RetType, Target: Callee, ArgsList: std::move(Args));
13948
13949 auto [Call, CallChain] = LowerCallTo(CLI);
13950
13951 for (auto [ResNo, ResultPtr] : llvm::enumerate(First&: ResultPtrs)) {
13952 if (ResNo == CallRetResNo) {
13953 Results.push_back(Elt: Call);
13954 continue;
13955 }
13956 MachinePointerInfo PtrInfo;
13957 SDValue LoadResult = DAG.getLoad(VT: Node->getValueType(ResNo), dl: DL, Chain: CallChain,
13958 Ptr: ResultPtr, PtrInfo);
13959 SDValue OutChain = LoadResult.getValue(R: 1);
13960
13961 if (StoreSDNode *ST = ResultStores[ResNo]) {
13962 // Replace store with the library call.
13963 DAG.ReplaceAllUsesOfValueWith(From: SDValue(ST, 0), To: OutChain);
13964 PtrInfo = ST->getPointerInfo();
13965 } else {
13966 PtrInfo = MachinePointerInfo::getFixedStack(
13967 MF&: DAG.getMachineFunction(),
13968 FI: cast<FrameIndexSDNode>(Val&: ResultPtr)->getIndex());
13969 }
13970
13971 Results.push_back(Elt: LoadResult);
13972 }
13973
13974 return true;
13975}
13976
13977bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT,
13978 SDValue &LHS, SDValue &RHS,
13979 SDValue &CC, SDValue Mask,
13980 SDValue EVL, bool &NeedInvert,
13981 const SDLoc &dl, SDValue &Chain,
13982 bool IsSignaling) const {
13983 MVT OpVT = LHS.getSimpleValueType();
13984 ISD::CondCode CCCode = cast<CondCodeSDNode>(Val&: CC)->get();
13985 NeedInvert = false;
13986 assert(!EVL == !Mask && "VP Mask and EVL must either both be set or unset");
13987 bool IsNonVP = !EVL;
13988 switch (getCondCodeAction(CC: CCCode, VT: OpVT)) {
13989 default:
13990 llvm_unreachable("Unknown condition code action!");
13991 case TargetLowering::Legal:
13992 // Nothing to do.
13993 break;
13994 case TargetLowering::Expand: {
13995 ISD::CondCode InvCC = ISD::getSetCCSwappedOperands(Operation: CCCode);
13996 if (isCondCodeLegalOrCustom(CC: InvCC, VT: OpVT)) {
13997 std::swap(a&: LHS, b&: RHS);
13998 CC = DAG.getCondCode(Cond: InvCC);
13999 return true;
14000 }
14001 // Swapping operands didn't work. Try inverting the condition.
14002 bool NeedSwap = false;
14003 InvCC = getSetCCInverse(Operation: CCCode, Type: OpVT);
14004 if (!isCondCodeLegalOrCustom(CC: InvCC, VT: OpVT)) {
14005 // If inverting the condition is not enough, try swapping operands
14006 // on top of it.
14007 InvCC = ISD::getSetCCSwappedOperands(Operation: InvCC);
14008 NeedSwap = true;
14009 }
14010 if (isCondCodeLegalOrCustom(CC: InvCC, VT: OpVT)) {
14011 CC = DAG.getCondCode(Cond: InvCC);
14012 NeedInvert = true;
14013 if (NeedSwap)
14014 std::swap(a&: LHS, b&: RHS);
14015 return true;
14016 }
14017
14018 // Special case: expand i1 comparisons using logical operations.
14019 if (OpVT == MVT::i1) {
14020 SDValue Ret;
14021 switch (CCCode) {
14022 default:
14023 llvm_unreachable("Unknown integer setcc!");
14024 case ISD::SETEQ: // X == Y --> ~(X ^ Y)
14025 Ret = DAG.getNOT(DL: dl, Val: DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: MVT::i1, N1: LHS, N2: RHS),
14026 VT: MVT::i1);
14027 break;
14028 case ISD::SETNE: // X != Y --> (X ^ Y)
14029 Ret = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: MVT::i1, N1: LHS, N2: RHS);
14030 break;
14031 case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
14032 case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
14033 Ret = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i1, N1: RHS,
14034 N2: DAG.getNOT(DL: dl, Val: LHS, VT: MVT::i1));
14035 break;
14036 case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
14037 case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
14038 Ret = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i1, N1: LHS,
14039 N2: DAG.getNOT(DL: dl, Val: RHS, VT: MVT::i1));
14040 break;
14041 case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
14042 case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
14043 Ret = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: MVT::i1, N1: RHS,
14044 N2: DAG.getNOT(DL: dl, Val: LHS, VT: MVT::i1));
14045 break;
14046 case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
14047 case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
14048 Ret = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: MVT::i1, N1: LHS,
14049 N2: DAG.getNOT(DL: dl, Val: RHS, VT: MVT::i1));
14050 break;
14051 }
14052
14053 LHS = DAG.getZExtOrTrunc(Op: Ret, DL: dl, VT);
14054 RHS = SDValue();
14055 CC = SDValue();
14056 return true;
14057 }
14058
14059 ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
14060 unsigned Opc = 0;
14061 switch (CCCode) {
14062 default:
14063 llvm_unreachable("Don't know how to expand this condition!");
14064 case ISD::SETUO:
14065 if (isCondCodeLegal(CC: ISD::SETUNE, VT: OpVT)) {
14066 CC1 = ISD::SETUNE;
14067 CC2 = ISD::SETUNE;
14068 Opc = ISD::OR;
14069 break;
14070 }
14071 assert(isCondCodeLegal(ISD::SETOEQ, OpVT) &&
14072 "If SETUE is expanded, SETOEQ or SETUNE must be legal!");
14073 NeedInvert = true;
14074 [[fallthrough]];
14075 case ISD::SETO:
14076 assert(isCondCodeLegal(ISD::SETOEQ, OpVT) &&
14077 "If SETO is expanded, SETOEQ must be legal!");
14078 CC1 = ISD::SETOEQ;
14079 CC2 = ISD::SETOEQ;
14080 Opc = ISD::AND;
14081 break;
14082 case ISD::SETONE:
14083 case ISD::SETUEQ:
14084 // If the SETUO or SETO CC isn't legal, we might be able to use
14085 // SETOGT || SETOLT, inverting the result for SETUEQ. We only need one
14086 // of SETOGT/SETOLT to be legal, the other can be emulated by swapping
14087 // the operands.
14088 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
14089 if (!isCondCodeLegal(CC: CC2, VT: OpVT) && (isCondCodeLegal(CC: ISD::SETOGT, VT: OpVT) ||
14090 isCondCodeLegal(CC: ISD::SETOLT, VT: OpVT))) {
14091 CC1 = ISD::SETOGT;
14092 CC2 = ISD::SETOLT;
14093 Opc = ISD::OR;
14094 NeedInvert = ((unsigned)CCCode & 0x8U);
14095 break;
14096 }
14097 [[fallthrough]];
14098 case ISD::SETOEQ:
14099 case ISD::SETOGT:
14100 case ISD::SETOGE:
14101 case ISD::SETOLT:
14102 case ISD::SETOLE:
14103 case ISD::SETUNE:
14104 case ISD::SETUGT:
14105 case ISD::SETUGE:
14106 case ISD::SETULT:
14107 case ISD::SETULE:
14108 // If we are floating point, assign and break, otherwise fall through.
14109 if (!OpVT.isInteger()) {
14110 // We can use the 4th bit to tell if we are the unordered
14111 // or ordered version of the opcode.
14112 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
14113 Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
14114 CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
14115 break;
14116 }
14117 // Fallthrough if we are unsigned integer.
14118 [[fallthrough]];
14119 case ISD::SETLE:
14120 case ISD::SETGT:
14121 case ISD::SETGE:
14122 case ISD::SETLT:
14123 case ISD::SETNE:
14124 case ISD::SETEQ:
14125 // If all combinations of inverting the condition and swapping operands
14126 // didn't work then we have no means to expand the condition.
14127 llvm_unreachable("Don't know how to expand this condition!");
14128 }
14129
14130 SDValue SetCC1, SetCC2;
14131 if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
14132 // If we aren't the ordered or unorder operation,
14133 // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
14134 if (IsNonVP) {
14135 SetCC1 = DAG.getSetCC(DL: dl, VT, LHS, RHS, Cond: CC1, Chain, IsSignaling);
14136 SetCC2 = DAG.getSetCC(DL: dl, VT, LHS, RHS, Cond: CC2, Chain, IsSignaling);
14137 } else {
14138 SetCC1 = DAG.getSetCCVP(DL: dl, VT, LHS, RHS, Cond: CC1, Mask, EVL);
14139 SetCC2 = DAG.getSetCCVP(DL: dl, VT, LHS, RHS, Cond: CC2, Mask, EVL);
14140 }
14141 } else {
14142 // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
14143 if (IsNonVP) {
14144 SetCC1 = DAG.getSetCC(DL: dl, VT, LHS, RHS: LHS, Cond: CC1, Chain, IsSignaling);
14145 SetCC2 = DAG.getSetCC(DL: dl, VT, LHS: RHS, RHS, Cond: CC2, Chain, IsSignaling);
14146 } else {
14147 SetCC1 = DAG.getSetCCVP(DL: dl, VT, LHS, RHS: LHS, Cond: CC1, Mask, EVL);
14148 SetCC2 = DAG.getSetCCVP(DL: dl, VT, LHS: RHS, RHS, Cond: CC2, Mask, EVL);
14149 }
14150 }
14151 if (Chain)
14152 Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, N1: SetCC1.getValue(R: 1),
14153 N2: SetCC2.getValue(R: 1));
14154 if (IsNonVP)
14155 LHS = DAG.getNode(Opcode: Opc, DL: dl, VT, N1: SetCC1, N2: SetCC2);
14156 else {
14157 // Transform the binary opcode to the VP equivalent.
14158 assert((Opc == ISD::OR || Opc == ISD::AND) && "Unexpected opcode");
14159 Opc = Opc == ISD::OR ? ISD::VP_OR : ISD::VP_AND;
14160 LHS = DAG.getNode(Opcode: Opc, DL: dl, VT, N1: SetCC1, N2: SetCC2, N3: Mask, N4: EVL);
14161 }
14162 RHS = SDValue();
14163 CC = SDValue();
14164 return true;
14165 }
14166 }
14167 return false;
14168}
14169
14170SDValue TargetLowering::expandVectorNaryOpBySplitting(SDNode *Node,
14171 SelectionDAG &DAG) const {
14172 EVT VT = Node->getValueType(ResNo: 0);
14173 // Despite its documentation, GetSplitDestVTs will assert if VT cannot be
14174 // split into two equal parts.
14175 if (!VT.isVector() || !VT.getVectorElementCount().isKnownMultipleOf(RHS: 2))
14176 return SDValue();
14177
14178 // Restrict expansion to cases where both parts can be concatenated.
14179 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VT);
14180 if (LoVT != HiVT || !isTypeLegal(VT: LoVT))
14181 return SDValue();
14182
14183 SDLoc DL(Node);
14184 unsigned Opcode = Node->getOpcode();
14185
14186 // Don't expand if the result is likely to be unrolled anyway.
14187 if (!isOperationLegalOrCustomOrPromote(Op: Opcode, VT: LoVT))
14188 return SDValue();
14189
14190 SmallVector<SDValue, 4> LoOps, HiOps;
14191 for (const SDValue &V : Node->op_values()) {
14192 auto [Lo, Hi] = DAG.SplitVector(N: V, DL, LoVT, HiVT);
14193 LoOps.push_back(Elt: Lo);
14194 HiOps.push_back(Elt: Hi);
14195 }
14196
14197 SDValue SplitOpLo = DAG.getNode(Opcode, DL, VT: LoVT, Ops: LoOps, Flags: Node->getFlags());
14198 SDValue SplitOpHi = DAG.getNode(Opcode, DL, VT: HiVT, Ops: HiOps, Flags: Node->getFlags());
14199 return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT, N1: SplitOpLo, N2: SplitOpHi);
14200}
14201
14202SDValue TargetLowering::scalarizeExtractedVectorLoad(EVT ResultVT,
14203 const SDLoc &DL,
14204 EVT InVecVT, SDValue EltNo,
14205 LoadSDNode *OriginalLoad,
14206 SelectionDAG &DAG) const {
14207 assert(OriginalLoad->isSimple());
14208
14209 EVT VecEltVT = InVecVT.getVectorElementType();
14210
14211 // If the vector element type is not a multiple of a byte then we are unable
14212 // to correctly compute an address to load only the extracted element as a
14213 // scalar.
14214 if (!VecEltVT.isByteSized())
14215 return SDValue();
14216
14217 ISD::LoadExtType ExtTy =
14218 ResultVT.bitsGT(VT: VecEltVT) ? ISD::EXTLOAD : ISD::NON_EXTLOAD;
14219 if (!isOperationLegalOrCustom(Op: ISD::LOAD, VT: VecEltVT))
14220 return SDValue();
14221
14222 std::optional<unsigned> ByteOffset;
14223 Align Alignment = OriginalLoad->getAlign();
14224 MachinePointerInfo MPI;
14225 if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(Val&: EltNo)) {
14226 int Elt = ConstEltNo->getZExtValue();
14227 ByteOffset = VecEltVT.getSizeInBits() * Elt / 8;
14228 MPI = OriginalLoad->getPointerInfo().getWithOffset(O: *ByteOffset);
14229 Alignment = commonAlignment(A: Alignment, Offset: *ByteOffset);
14230 } else {
14231 // Discard the pointer info except the address space because the memory
14232 // operand can't represent this new access since the offset is variable.
14233 MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
14234 Alignment = commonAlignment(A: Alignment, Offset: VecEltVT.getSizeInBits() / 8);
14235 }
14236
14237 if (!shouldReduceLoadWidth(Load: OriginalLoad, ExtTy, NewVT: VecEltVT, ByteOffset))
14238 return SDValue();
14239
14240 unsigned IsFast = 0;
14241 if (!allowsMemoryAccess(Context&: *DAG.getContext(), DL: DAG.getDataLayout(), VT: VecEltVT,
14242 AddrSpace: OriginalLoad->getAddressSpace(), Alignment,
14243 Flags: OriginalLoad->getMemOperand()->getFlags(), Fast: &IsFast) ||
14244 !IsFast)
14245 return SDValue();
14246
14247 // The original DAG loaded the entire vector from memory, so arithmetic
14248 // within it must be inbounds.
14249 SDValue NewPtr = getInboundsVectorElementPointer(
14250 DAG, VecPtr: OriginalLoad->getBasePtr(), VecVT: InVecVT, Index: EltNo);
14251
14252 // We are replacing a vector load with a scalar load. The new load must have
14253 // identical memory op ordering to the original.
14254 SDValue Load;
14255 if (ResultVT.bitsGT(VT: VecEltVT)) {
14256 // If the result type of vextract is wider than the load, then issue an
14257 // extending load instead.
14258 ISD::LoadExtType ExtType =
14259 isLoadLegal(ValVT: ResultVT, MemVT: VecEltVT, Alignment,
14260 AddrSpace: OriginalLoad->getAddressSpace(), ExtType: ISD::ZEXTLOAD, Atomic: false)
14261 ? ISD::ZEXTLOAD
14262 : ISD::EXTLOAD;
14263 Load = DAG.getExtLoad(ExtType, dl: DL, VT: ResultVT, Chain: OriginalLoad->getChain(),
14264 Ptr: NewPtr, PtrInfo: MPI, MemVT: VecEltVT, Alignment,
14265 MMOFlags: OriginalLoad->getMemOperand()->getFlags(),
14266 AAInfo: OriginalLoad->getAAInfo());
14267 DAG.makeEquivalentMemoryOrdering(OldLoad: OriginalLoad, NewMemOp: Load);
14268 } else {
14269 // The result type is narrower or the same width as the vector element
14270 Load = DAG.getLoad(VT: VecEltVT, dl: DL, Chain: OriginalLoad->getChain(), Ptr: NewPtr, PtrInfo: MPI,
14271 Alignment, MMOFlags: OriginalLoad->getMemOperand()->getFlags(),
14272 AAInfo: OriginalLoad->getAAInfo());
14273 DAG.makeEquivalentMemoryOrdering(OldLoad: OriginalLoad, NewMemOp: Load);
14274 if (ResultVT.bitsLT(VT: VecEltVT))
14275 Load = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: ResultVT, Operand: Load);
14276 else
14277 Load = DAG.getBitcast(VT: ResultVT, V: Load);
14278 }
14279
14280 return Load;
14281}
14282
14283// Set type id for call site info and metadata 'call_target'.
14284// We are filtering for:
14285// a) The call-graph-section use case that wants to know about indirect
14286// calls, or
14287// b) We want to annotate indirect calls.
14288void TargetLowering::setTypeIdForCallsiteInfo(
14289 const CallBase *CB, MachineFunction &MF,
14290 MachineFunction::CallSiteInfo &CSInfo) const {
14291 if (CB && CB->isIndirectCall() &&
14292 (MF.getTarget().Options.EmitCallGraphSection ||
14293 MF.getTarget().Options.EmitCallSiteInfo))
14294 CSInfo = MachineFunction::CallSiteInfo(*CB);
14295}
14296