TargetLowering.cpp source code [llvm_projects/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp]

1	//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This implements the TargetLowering class.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "llvm/CodeGen/TargetLowering.h"
14	#include "llvm/ADT/STLExtras.h"
15	#include "llvm/Analysis/ValueTracking.h"
16	#include "llvm/Analysis/VectorUtils.h"
17	#include "llvm/CodeGen/Analysis.h"
18	#include "llvm/CodeGen/CallingConvLower.h"
19	#include "llvm/CodeGen/CodeGenCommonISel.h"
20	#include "llvm/CodeGen/MachineFrameInfo.h"
21	#include "llvm/CodeGen/MachineFunction.h"
22	#include "llvm/CodeGen/MachineJumpTableInfo.h"
23	#include "llvm/CodeGen/MachineRegisterInfo.h"
24	#include "llvm/CodeGen/SDPatternMatch.h"
25	#include "llvm/CodeGen/SelectionDAG.h"
26	#include "llvm/CodeGen/TargetRegisterInfo.h"
27	#include "llvm/IR/DataLayout.h"
28	#include "llvm/IR/DerivedTypes.h"
29	#include "llvm/IR/GlobalVariable.h"
30	#include "llvm/IR/LLVMContext.h"
31	#include "llvm/MC/MCAsmInfo.h"
32	#include "llvm/MC/MCExpr.h"
33	#include "llvm/Support/DivisionByConstantInfo.h"
34	#include "llvm/Support/ErrorHandling.h"
35	#include "llvm/Support/KnownBits.h"
36	#include "llvm/Support/MathExtras.h"
37	#include "llvm/Target/TargetMachine.h"
38	#include <cctype>
39	#include <deque>
40	using namespace llvm;
41	using namespace llvm::SDPatternMatch;
42
43	/// NOTE: The TargetMachine owns TLOF.
44	TargetLowering::TargetLowering(const TargetMachine &tm,
45	const TargetSubtargetInfo &STI)
46	: TargetLoweringBase (tm, STI) {}
47
48	// Define the virtual destructor out-of-line for build efficiency.
49	TargetLowering::~TargetLowering() = default;
50
51	const char TargetLowering::getTargetNodeName(unsigned* Opcode) const {
52	return nullptr;
53	}
54
55	bool TargetLowering::isPositionIndependent() const {
56	return getTargetMachine().isPositionIndependent();
57	}
58
59	/// Check whether a given call node is in tail position within its function. If
60	/// so, it sets Chain to the input chain of the tail call.
61	bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
62	SDValue &Chain) const {
63	const Function &F = DAG.getMachineFunction().getFunction();
64
65	// First, check if tail calls have been disabled in this function.
66	if (F.getFnAttribute(Kind: "disable-tail-calls").getValueAsBool())
67	return false;
68
69	// Conservatively require the attributes of the call to match those of
70	// the return. Ignore following attributes because they don't affect the
71	// call sequence.
72	AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs());
73	for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
74	Attribute::DereferenceableOrNull, Attribute::NoAlias,
75	Attribute::NonNull, Attribute::NoUndef,
76	Attribute::Range, Attribute::NoFPClass})
77	CallerAttrs.removeAttribute(Val: Attr);
78
79	if (CallerAttrs.hasAttributes())
80	return false;
81
82	// It's not safe to eliminate the sign / zero extension of the return value.
83	if (CallerAttrs.contains(A: Attribute::ZExt) \|\|
84	CallerAttrs.contains(A: Attribute::SExt))
85	return false;
86
87	// Check if the only use is a function return node.
88	return isUsedByReturnOnly(Node, Chain);
89	}
90
91	bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
92	const uint32_t *CallerPreservedMask,
93	const SmallVectorImpl<CCValAssign> &ArgLocs,
94	const SmallVectorImpl<SDValue> &OutVals) const {
95	for (unsigned I = `0`, E = ArgLocs.size(); I != E; ++I) {
96	const CCValAssign &ArgLoc = ArgLocs [I];
97	if (!ArgLoc.isRegLoc())
98	continue;
99	MCRegister Reg = ArgLoc.getLocReg();
100	// Only look at callee saved registers.
101	if (MachineOperand::clobbersPhysReg(RegMask: CallerPreservedMask, PhysReg: Reg))
102	continue;
103	// Check that we pass the value used for the caller.
104	// (We look for a CopyFromReg reading a virtual register that is used
105	// for the function live-in value of register Reg)
106	SDValue Value = OutVals [I];
107	if (Value ->getOpcode() == ISD::AssertZext)
108	Value = Value.getOperand(i: `0`);
109	if (Value ->getOpcode() != ISD::CopyFromReg)
110	return false;
111	Register ArgReg = cast<RegisterSDNode>(Val: Value ->getOperand(Num: `1`))->getReg();
112	if (MRI.getLiveInPhysReg(VReg: ArgReg) != Reg)
113	return false;
114	}
115	return true;
116	}
117
118	/// Set CallLoweringInfo attribute flags based on a call instruction
119	/// and called function attributes.
120	void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
121	unsigned ArgIdx) {
122	IsSExt = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::SExt);
123	IsZExt = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::ZExt);
124	IsNoExt = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::NoExt);
125	IsInReg = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::InReg);
126	IsSRet = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::StructRet);
127	IsNest = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::Nest);
128	IsByVal = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::ByVal);
129	IsPreallocated = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::Preallocated);
130	IsInAlloca = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::InAlloca);
131	IsReturned = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::Returned);
132	IsSwiftSelf = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::SwiftSelf);
133	IsSwiftAsync = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::SwiftAsync);
134	IsSwiftError = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::SwiftError);
135	Alignment = Call->getParamStackAlign(ArgNo: ArgIdx);
136	IndirectType = nullptr;
137	assert(IsByVal + IsPreallocated + IsInAlloca + IsSRet <= `1` &&
138	"multiple ABI attributes?");
139	if (IsByVal) {
140	IndirectType = Call->getParamByValType(ArgNo: ArgIdx);
141	if (!Alignment)
142	Alignment = Call->getParamAlign(ArgNo: ArgIdx);
143	}
144	if (IsPreallocated)
145	IndirectType = Call->getParamPreallocatedType(ArgNo: ArgIdx);
146	if (IsInAlloca)
147	IndirectType = Call->getParamInAllocaType(ArgNo: ArgIdx);
148	if (IsSRet)
149	IndirectType = Call->getParamStructRetType(ArgNo: ArgIdx);
150	}
151
152	/// Generate a libcall taking the given operands as arguments and returning a
153	/// result of type RetVT.
154	std::pair<SDValue, SDValue>
155	TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl,
156	EVT RetVT, ArrayRef<SDValue> Ops,
157	MakeLibCallOptions CallOptions, const SDLoc &dl,
158	SDValue InChain) const {
159	if (LibcallImpl == RTLIB::Unsupported)
160	reportFatalInternalError(reason: "unsupported library call operation");
161
162	if (!InChain)
163	InChain = DAG.getEntryNode();
164
165	TargetLowering::ArgListTy Args;
166	Args.reserve(n: Ops.size());
167
168	ArrayRef<Type *> OpsTypeOverrides = CallOptions.OpsTypeOverrides;
169	for (unsigned i = `0`; i < Ops.size(); ++i) {
170	SDValue NewOp = Ops [i];
171	Type *Ty = i < OpsTypeOverrides.size() && OpsTypeOverrides [i]
172	? OpsTypeOverrides [i]
173	: NewOp.getValueType().getTypeForEVT(Context&: *DAG.getContext());
174	TargetLowering::ArgListEntry Entry(NewOp, Ty);
175	if (CallOptions.IsSoften)
176	Entry.OrigTy =
177	CallOptions.OpsVTBeforeSoften [i].getTypeForEVT(Context&: *DAG.getContext());
178
179	Entry.IsSExt =
180	shouldSignExtendTypeInLibCall(Ty: Entry.Ty, IsSigned: CallOptions.IsSigned);
181	Entry.IsZExt = !Entry.IsSExt;
182
183	if (CallOptions.IsSoften &&
184	!shouldExtendTypeInLibCall(Type: CallOptions.OpsVTBeforeSoften [i])) {
185	Entry.IsSExt = Entry.IsZExt = false;
186	}
187	Args.push_back(x: Entry);
188	}
189
190	SDValue Callee =
191	DAG.getExternalSymbol(LCImpl: LibcallImpl, VT: getPointerTy(DL: DAG.getDataLayout()));
192
193	Type RetTy = RetVT.getTypeForEVT(Context&: DAG.getContext());
194	Type *OrigRetTy = RetTy;
195	TargetLowering::CallLoweringInfo CLI(DAG);
196	bool signExtend = shouldSignExtendTypeInLibCall(Ty: RetTy, IsSigned: CallOptions.IsSigned);
197	bool zeroExtend = !signExtend;
198
199	if (CallOptions.IsSoften) {
200	OrigRetTy = CallOptions.RetVTBeforeSoften.getTypeForEVT(Context&: *DAG.getContext());
201	if (!shouldExtendTypeInLibCall(Type: CallOptions.RetVTBeforeSoften))
202	signExtend = zeroExtend = false;
203	}
204
205	CLI.setDebugLoc(dl)
206	.setChain(InChain)
207	.setLibCallee(CC: getLibcallImplCallingConv(Call: LibcallImpl), ResultType: RetTy, OrigResultType: OrigRetTy,
208	Target: Callee, ArgsList: std::move(Args))
209	.setNoReturn(CallOptions.DoesNotReturn)
210	.setDiscardResult(!CallOptions.IsReturnValueUsed)
211	.setIsPostTypeLegalization(CallOptions.IsPostTypeLegalization)
212	.setSExtResult(signExtend)
213	.setZExtResult(zeroExtend);
214	return LowerCallTo(CLI);
215	}
216
217	bool TargetLowering::findOptimalMemOpLowering(
218	LLVMContext &Context, std::vector<EVT> &MemOps, unsigned Limit,
219	const MemOp &Op, unsigned DstAS, unsigned SrcAS,
220	const AttributeList &FuncAttributes, EVT LargestVT) const* {
221	if (Limit != ~unsigned(`0`) && Op.isMemcpyWithFixedDstAlign() &&
222	Op.getSrcAlign() < Op.getDstAlign())
223	return false;
224
225	EVT VT = getOptimalMemOpType(Context, Op, FuncAttributes);
226
227	if (VT == MVT::Other) {
228	// Use the largest integer type whose alignment constraints are satisfied.
229	// We only need to check DstAlign here as SrcAlign is always greater or
230	// equal to DstAlign (or zero).
231	VT = MVT::LAST_INTEGER_VALUETYPE;
232	if (Op.isFixedDstAlign())
233	while (Op.getDstAlign() < (VT.getSizeInBits() / `8`) &&
234	!allowsMisalignedMemoryAccesses(VT, AddrSpace: DstAS, Alignment: Op.getDstAlign()))
235	VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - `1`);
236	assert(VT.isInteger());
237
238	// Find the largest legal integer type.
239	MVT LVT = MVT::LAST_INTEGER_VALUETYPE;
240	while (!isTypeLegal(VT: LVT))
241	LVT = (MVT::SimpleValueType)(LVT.SimpleTy - `1`);
242	assert(LVT.isInteger());
243
244	// If the type we've chosen is larger than the largest legal integer type
245	// then use that instead.
246	if (VT.bitsGT(VT: LVT))
247	VT = LVT;
248	}
249
250	unsigned NumMemOps = `0`;
251	uint64_t Size = Op.size();
252	while (Size) {
253	unsigned VTSize = VT.getSizeInBits() / `8`;
254	while (VTSize > Size) {
255	// For now, only use non-vector load / store's for the left-over pieces.
256	EVT NewVT = VT;
257	unsigned NewVTSize;
258
259	bool Found = false;
260	if (VT.isVector() \|\| VT.isFloatingPoint()) {
261	NewVT = (VT.getSizeInBits() > `64`) ? MVT::i64 : MVT::i32;
262	if (isOperationLegalOrCustom(Op: ISD::STORE, VT: NewVT) &&
263	isSafeMemOpType(NewVT.getSimpleVT()))
264	Found = true;
265	else if (NewVT == MVT::i64 &&
266	isOperationLegalOrCustom(Op: ISD::STORE, VT: MVT::f64) &&
267	isSafeMemOpType(MVT::f64)) {
268	// i64 is usually not legal on 32-bit targets, but f64 may be.
269	NewVT = MVT::f64;
270	Found = true;
271	}
272	}
273
274	if (!Found) {
275	do {
276	NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - `1`);
277	if (NewVT == MVT::i8)
278	break;
279	} while (!isSafeMemOpType(NewVT.getSimpleVT()));
280	}
281	NewVTSize = NewVT.getSizeInBits() / `8`;
282
283	// If the new VT cannot cover all of the remaining bits, then consider
284	// issuing a (or a pair of) unaligned and overlapping load / store.
285	unsigned Fast;
286	if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
287	allowsMisalignedMemoryAccesses(
288	VT, AddrSpace: DstAS, Alignment: Op.isFixedDstAlign() ? Op.getDstAlign() : Align (`1`),
289	Flags: MachineMemOperand::MONone, &Fast) &&
290	Fast)
291	VTSize = Size;
292	else {
293	VT = NewVT;
294	VTSize = NewVTSize;
295	}
296	}
297
298	if (++NumMemOps > Limit)
299	return false;
300
301	MemOps.push_back(x: VT);
302	Size -= VTSize;
303	}
304
305	return true;
306	}
307
308	/// Soften the operands of a comparison. This code is shared among BR_CC,
309	/// SELECT_CC, and SETCC handlers.
310	void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
311	SDValue &NewLHS, SDValue &NewRHS,
312	ISD::CondCode &CCCode,
313	const SDLoc &dl, const SDValue OldLHS,
314	const SDValue OldRHS) const {
315	SDValue Chain;
316	return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, DL: dl, OldLHS,
317	OldRHS, Chain);
318	}
319
320	void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
321	SDValue &NewLHS, SDValue &NewRHS,
322	ISD::CondCode &CCCode,
323	const SDLoc &dl, const SDValue OldLHS,
324	const SDValue OldRHS,
325	SDValue &Chain,
326	bool IsSignaling) const {
327	// FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
328	// not supporting it. We can update this code when libgcc provides such
329	// functions.
330
331	assert((VT == MVT::f32 \|\| VT == MVT::f64 \|\| VT == MVT::f128 \|\| VT == MVT::ppcf128)
332	&& "Unsupported setcc type!");
333
334	// Expand into one or more soft-fp libcall(s).
335	RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
336	bool ShouldInvertCC = false;
337	switch (CCCode) {
338	case ISD::SETEQ:
339	case ISD::SETOEQ:
340	LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
341	(VT == MVT::f64) ? RTLIB::OEQ_F64 :
342	(VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
343	break;
344	case ISD::SETNE:
345	case ISD::SETUNE:
346	LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
347	(VT == MVT::f64) ? RTLIB::UNE_F64 :
348	(VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
349	break;
350	case ISD::SETGE:
351	case ISD::SETOGE:
352	LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
353	(VT == MVT::f64) ? RTLIB::OGE_F64 :
354	(VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
355	break;
356	case ISD::SETLT:
357	case ISD::SETOLT:
358	LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
359	(VT == MVT::f64) ? RTLIB::OLT_F64 :
360	(VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
361	break;
362	case ISD::SETLE:
363	case ISD::SETOLE:
364	LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
365	(VT == MVT::f64) ? RTLIB::OLE_F64 :
366	(VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
367	break;
368	case ISD::SETGT:
369	case ISD::SETOGT:
370	LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
371	(VT == MVT::f64) ? RTLIB::OGT_F64 :
372	(VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
373	break;
374	case ISD::SETO:
375	ShouldInvertCC = true;
376	[[fallthrough]];
377	case ISD::SETUO:
378	LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
379	(VT == MVT::f64) ? RTLIB::UO_F64 :
380	(VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
381	break;
382	case ISD::SETONE:
383	// SETONE = O && UNE
384	ShouldInvertCC = true;
385	[[fallthrough]];
386	case ISD::SETUEQ:
387	LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
388	(VT == MVT::f64) ? RTLIB::UO_F64 :
389	(VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
390	LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
391	(VT == MVT::f64) ? RTLIB::OEQ_F64 :
392	(VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
393	break;
394	default:
395	// Invert CC for unordered comparisons
396	ShouldInvertCC = true;
397	switch (CCCode) {
398	case ISD::SETULT:
399	LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
400	(VT == MVT::f64) ? RTLIB::OGE_F64 :
401	(VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
402	break;
403	case ISD::SETULE:
404	LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
405	(VT == MVT::f64) ? RTLIB::OGT_F64 :
406	(VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
407	break;
408	case ISD::SETUGT:
409	LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
410	(VT == MVT::f64) ? RTLIB::OLE_F64 :
411	(VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
412	break;
413	case ISD::SETUGE:
414	LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
415	(VT == MVT::f64) ? RTLIB::OLT_F64 :
416	(VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
417	break;
418	default: llvm_unreachable("Do not know how to soften this setcc!");
419	}
420	}
421
422	// Use the target specific return value for comparison lib calls.
423	EVT RetVT = getCmpLibcallReturnType();
424	SDValue Ops[`2`] = {NewLHS, NewRHS};
425	TargetLowering::MakeLibCallOptions CallOptions;
426	EVT OpsVT[`2`] = { OldLHS.getValueType(),
427	OldRHS.getValueType() };
428	CallOptions.setTypeListBeforeSoften(OpsVT, RetVT);
429	auto Call = makeLibCall(DAG, LC: LC1, RetVT, Ops, CallOptions, dl, Chain);
430	NewLHS = Call.first;
431	NewRHS = DAG.getConstant(Val: `0`, DL: dl, VT: RetVT);
432
433	RTLIB::LibcallImpl LC1Impl = getLibcallImpl(Call: LC1);
434	if (LC1Impl == RTLIB::Unsupported) {
435	reportFatalUsageError(
436	reason: "no libcall available to soften floating-point compare");
437	}
438
439	CCCode = getSoftFloatCmpLibcallPredicate(Call: LC1Impl);
440	if (ShouldInvertCC) {
441	assert(RetVT.isInteger());
442	CCCode = getSetCCInverse(Operation: CCCode, Type: RetVT);
443	}
444
445	if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
446	// Update Chain.
447	Chain = Call.second;
448	} else {
449	RTLIB::LibcallImpl LC2Impl = getLibcallImpl(Call: LC2);
450	if (LC2Impl == RTLIB::Unsupported) {
451	reportFatalUsageError(
452	reason: "no libcall available to soften floating-point compare");
453	}
454
455	assert(CCCode == (ShouldInvertCC ? ISD::SETEQ : ISD::SETNE) &&
456	"unordered call should be simple boolean");
457
458	EVT SetCCVT =
459	getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: RetVT);
460	if (getBooleanContents(Type: RetVT) == ZeroOrOneBooleanContent) {
461	NewLHS = DAG.getNode(Opcode: ISD::AssertZext, DL: dl, VT: RetVT, N1: Call.first,
462	N2: DAG.getValueType(MVT::i1));
463	}
464
465	SDValue Tmp = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: NewLHS, RHS: NewRHS, Cond: CCCode);
466	auto Call2 = makeLibCall(DAG, LC: LC2, RetVT, Ops, CallOptions, dl, Chain);
467	CCCode = getSoftFloatCmpLibcallPredicate(Call: LC2Impl);
468	if (ShouldInvertCC)
469	CCCode = getSetCCInverse(Operation: CCCode, Type: RetVT);
470	NewLHS = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Call2.first, RHS: NewRHS, Cond: CCCode);
471	if (Chain)
472	Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, N1: Call.second,
473	N2: Call2.second);
474	NewLHS = DAG.getNode(Opcode: ShouldInvertCC ? ISD::AND : ISD::OR, DL: dl,
475	VT: Tmp.getValueType(), N1: Tmp, N2: NewLHS);
476	NewRHS = SDValue ();
477	}
478	}
479
480	/// Return the entry encoding for a jump table in the current function. The
481	/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
482	unsigned TargetLowering::getJumpTableEncoding() const {
483	// In non-pic modes, just use the address of a block.
484	if (!isPositionIndependent())
485	return MachineJumpTableInfo::EK_BlockAddress;
486
487	// Otherwise, use a label difference.
488	return MachineJumpTableInfo::EK_LabelDifference32;
489	}
490
491	SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
492	SelectionDAG &DAG) const {
493	return Table;
494	}
495
496	/// This returns the relocation base for the given PIC jumptable, the same as
497	/// getPICJumpTableRelocBase, but as an MCExpr.
498	const MCExpr *
499	TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
500	unsigned JTI,MCContext &Ctx) const{
501	// The normal PIC reloc base is the label at the start of the jump table.
502	return MCSymbolRefExpr::create(Symbol: MF->getJTISymbol(JTI, Ctx), Ctx);
503	}
504
505	SDValue TargetLowering::expandIndirectJTBranch(const SDLoc &dl, SDValue Value,
506	SDValue Addr, int JTI,
507	SelectionDAG &DAG) const {
508	SDValue Chain = Value;
509	// Jump table debug info is only needed if CodeView is enabled.
510	if (DAG.getTarget().getTargetTriple().isOSBinFormatCOFF()) {
511	Chain = DAG.getJumpTableDebugInfo(JTI, Chain, DL: dl);
512	}
513	return DAG.getNode(Opcode: ISD::BRIND, DL: dl, VT: MVT::Other, N1: Chain, N2: Addr);
514	}
515
516	bool
517	TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode GA) const* {
518	const TargetMachine &TM = getTargetMachine();
519	const GlobalValue *GV = GA->getGlobal();
520
521	// If the address is not even local to this DSO we will have to load it from
522	// a got and then add the offset.
523	if (!TM.shouldAssumeDSOLocal(GV))
524	return false;
525
526	// If the code is position independent we will have to add a base register.
527	if (isPositionIndependent())
528	return false;
529
530	// Otherwise we can do it.
531	return true;
532	}
533
534	//===----------------------------------------------------------------------===//
535	// Optimization Methods
536	//===----------------------------------------------------------------------===//
537
538	/// If the specified instruction has a constant integer operand and there are
539	/// bits set in that constant that are not demanded, then clear those bits and
540	/// return true.
541	bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
542	const APInt &DemandedBits,
543	const APInt &DemandedElts,
544	TargetLoweringOpt &TLO) const {
545	SDLoc DL(Op);
546	unsigned Opcode = Op.getOpcode();
547
548	// Early-out if we've ended up calling an undemanded node, leave this to
549	// constant folding.
550	if (DemandedBits.isZero() \|\| DemandedElts.isZero())
551	return false;
552
553	// Do target-specific constant optimization.
554	if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
555	return TLO.New.getNode();
556
557	// FIXME: ISD::SELECT, ISD::SELECT_CC
558	switch (Opcode) {
559	default:
560	break;
561	case ISD::XOR:
562	case ISD::AND:
563	case ISD::OR: {
564	auto *Op1C = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: `1`));
565	if (!Op1C \|\| Op1C->isOpaque())
566	return false;
567
568	// If this is a 'not' op, don't touch it because that's a canonical form.
569	const APInt &C = Op1C->getAPIntValue();
570	if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(RHS: C))
571	return false;
572
573	if (!C.isSubsetOf(RHS: DemandedBits)) {
574	EVT VT = Op.getValueType();
575	SDValue NewC = TLO.DAG.getConstant(Val: DemandedBits & C, DL, VT);
576	SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, N1: Op.getOperand(i: `0`), N2: NewC,
577	Flags: Op ->getFlags());
578	return TLO.CombineTo(O: Op, N: NewOp);
579	}
580
581	break;
582	}
583	}
584
585	return false;
586	}
587
588	bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
589	const APInt &DemandedBits,
590	TargetLoweringOpt &TLO) const {
591	EVT VT = Op.getValueType();
592	APInt DemandedElts = VT.isVector()
593	? APInt::getAllOnes(numBits: VT.getVectorNumElements())
594	: APInt (`1`, `1`);
595	return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
596	}
597
598	/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
599	/// This uses isTruncateFree/isZExtFree and ANY_EXTEND for the widening cast,
600	/// but it could be generalized for targets with other types of implicit
601	/// widening casts.
602	bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
603	const APInt &DemandedBits,
604	TargetLoweringOpt &TLO) const {
605	assert(Op.getNumOperands() == `2` &&
606	"ShrinkDemandedOp only supports binary operators!");
607	assert(Op.getNode()->getNumValues() == `1` &&
608	"ShrinkDemandedOp only supports nodes with one result!");
609
610	EVT VT = Op.getValueType();
611	SelectionDAG &DAG = TLO.DAG;
612	SDLoc dl(Op);
613
614	// Early return, as this function cannot handle vector types.
615	if (VT.isVector())
616	return false;
617
618	assert(Op.getOperand(`0`).getValueType().getScalarSizeInBits() == BitWidth &&
619	Op.getOperand(`1`).getValueType().getScalarSizeInBits() == BitWidth &&
620	"ShrinkDemandedOp only supports operands that have the same size!");
621
622	// Don't do this if the node has another user, which may require the
623	// full value.
624	if (!Op.getNode()->hasOneUse())
625	return false;
626
627	// Search for the smallest integer type with free casts to and from
628	// Op's type. For expedience, just check power-of-2 integer types.
629	unsigned DemandedSize = DemandedBits.getActiveBits();
630	for (unsigned SmallVTBits = llvm::bit_ceil(Value: DemandedSize);
631	SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(A: SmallVTBits)) {
632	EVT SmallVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: SmallVTBits);
633	if (isTruncateFree(Val: Op, VT2: SmallVT) && isZExtFree(FromTy: SmallVT, ToTy: VT)) {
634	// We found a type with free casts.
635
636	// If the operation has the 'disjoint' flag, then the
637	// operands on the new node are also disjoint.
638	SDNodeFlags Flags(Op ->getFlags().hasDisjoint() ? SDNodeFlags::Disjoint
639	: SDNodeFlags::None);
640	unsigned Opcode = Op.getOpcode();
641	if (Opcode == ISD::PTRADD) {
642	// It isn't a ptradd anymore if it doesn't operate on the entire
643	// pointer.
644	Opcode = ISD::ADD;
645	}
646	SDValue X = DAG.getNode(
647	Opcode, DL: dl, VT: SmallVT,
648	N1: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: SmallVT, Operand: Op.getOperand(i: `0`)),
649	N2: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: SmallVT, Operand: Op.getOperand(i: `1`)), Flags);
650	assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
651	SDValue Z = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT, Operand: X);
652	return TLO.CombineTo(O: Op, N: Z);
653	}
654	}
655	return false;
656	}
657
658	bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
659	DAGCombinerInfo &DCI) const {
660	SelectionDAG &DAG = DCI.DAG;
661	TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
662	!DCI.isBeforeLegalizeOps());
663	KnownBits Known;
664
665	bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
666	if (Simplified) {
667	DCI.AddToWorklist(N: Op.getNode());
668	DCI.CommitTargetLoweringOpt(TLO);
669	}
670	return Simplified;
671	}
672
673	bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
674	const APInt &DemandedElts,
675	DAGCombinerInfo &DCI) const {
676	SelectionDAG &DAG = DCI.DAG;
677	TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
678	!DCI.isBeforeLegalizeOps());
679	KnownBits Known;
680
681	bool Simplified =
682	SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO);
683	if (Simplified) {
684	DCI.AddToWorklist(N: Op.getNode());
685	DCI.CommitTargetLoweringOpt(TLO);
686	}
687	return Simplified;
688	}
689
690	bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
691	KnownBits &Known,
692	TargetLoweringOpt &TLO,
693	unsigned Depth,
694	bool AssumeSingleUse) const {
695	EVT VT = Op.getValueType();
696
697	// Since the number of lanes in a scalable vector is unknown at compile time,
698	// we track one bit which is implicitly broadcast to all lanes. This means
699	// that all lanes in a scalable vector are considered demanded.
700	APInt DemandedElts = VT.isFixedLengthVector()
701	? APInt::getAllOnes(numBits: VT.getVectorNumElements())
702	: APInt (`1`, `1`);
703	return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
704	AssumeSingleUse);
705	}
706
707	// TODO: Under what circumstances can we create nodes? Constant folding?
708	SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
709	SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
710	SelectionDAG &DAG, unsigned Depth) const {
711	EVT VT = Op.getValueType();
712
713	// Limit search depth.
714	if (Depth >= SelectionDAG::MaxRecursionDepth)
715	return SDValue ();
716
717	// Ignore UNDEFs.
718	if (Op.isUndef())
719	return SDValue ();
720
721	// Not demanding any bits/elts from Op.
722	if (DemandedBits == `0` \|\| DemandedElts == `0`)
723	return DAG.getUNDEF(VT);
724
725	bool IsLE = DAG.getDataLayout().isLittleEndian();
726	unsigned NumElts = DemandedElts.getBitWidth();
727	unsigned BitWidth = DemandedBits.getBitWidth();
728	KnownBits LHSKnown, RHSKnown;
729	switch (Op.getOpcode()) {
730	case ISD::BITCAST: {
731	if (VT.isScalableVector())
732	return SDValue ();
733
734	SDValue Src = peekThroughBitcasts(V: Op.getOperand(i: `0`));
735	EVT SrcVT = Src.getValueType();
736	EVT DstVT = Op.getValueType();
737	if (SrcVT == DstVT)
738	return Src;
739
740	unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
741	unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
742	if (NumSrcEltBits == NumDstEltBits)
743	if (SDValue V = SimplifyMultipleUseDemandedBits(
744	Op: Src, DemandedBits, DemandedElts, DAG, Depth: Depth + `1`))
745	return DAG.getBitcast(VT: DstVT, V);
746
747	if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == `0`) {
748	unsigned Scale = NumDstEltBits / NumSrcEltBits;
749	unsigned NumSrcElts = SrcVT.getVectorNumElements();
750	APInt DemandedSrcBits = APInt::getZero(numBits: NumSrcEltBits);
751	for (unsigned i = `0`; i != Scale; ++i) {
752	unsigned EltOffset = IsLE ? i : (Scale - `1` - i);
753	unsigned BitOffset = EltOffset * NumSrcEltBits;
754	DemandedSrcBits \|= DemandedBits.extractBits(numBits: NumSrcEltBits, bitPosition: BitOffset);
755	}
756	// Recursive calls below may turn not demanded elements into poison, so we
757	// need to demand all smaller source elements that maps to a demanded
758	// destination element.
759	APInt DemandedSrcElts = APIntOps::ScaleBitMask(A: DemandedElts, NewBitWidth: NumSrcElts);
760
761	if (SDValue V = SimplifyMultipleUseDemandedBits(
762	Op: Src, DemandedBits: DemandedSrcBits, DemandedElts: DemandedSrcElts, DAG, Depth: Depth + `1`))
763	return DAG.getBitcast(VT: DstVT, V);
764	}
765
766	// TODO - bigendian once we have test coverage.
767	if (IsLE && (NumSrcEltBits % NumDstEltBits) == `0`) {
768	unsigned Scale = NumSrcEltBits / NumDstEltBits;
769	unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : `1`;
770	APInt DemandedSrcBits = APInt::getZero(numBits: NumSrcEltBits);
771	APInt DemandedSrcElts = APInt::getZero(numBits: NumSrcElts);
772	for (unsigned i = `0`; i != NumElts; ++i)
773	if (DemandedElts [i]) {
774	unsigned Offset = (i % Scale) * NumDstEltBits;
775	DemandedSrcBits.insertBits(SubBits: DemandedBits, bitPosition: Offset);
776	DemandedSrcElts.setBit(i / Scale);
777	}
778
779	if (SDValue V = SimplifyMultipleUseDemandedBits(
780	Op: Src, DemandedBits: DemandedSrcBits, DemandedElts: DemandedSrcElts, DAG, Depth: Depth + `1`))
781	return DAG.getBitcast(VT: DstVT, V);
782	}
783
784	break;
785	}
786	case ISD::AND: {
787	LHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: `0`), DemandedElts, Depth: Depth + `1`);
788	RHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: `1`), DemandedElts, Depth: Depth + `1`);
789
790	// If all of the demanded bits are known 1 on one side, return the other.
791	// These bits cannot contribute to the result of the 'and' in this
792	// context.
793	if (DemandedBits.isSubsetOf(RHS: LHSKnown.Zero \| RHSKnown.One))
794	return Op.getOperand(i: `0`);
795	if (DemandedBits.isSubsetOf(RHS: RHSKnown.Zero \| LHSKnown.One))
796	return Op.getOperand(i: `1`);
797	break;
798	}
799	case ISD::OR: {
800	LHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: `0`), DemandedElts, Depth: Depth + `1`);
801	RHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: `1`), DemandedElts, Depth: Depth + `1`);
802
803	// If all of the demanded bits are known zero on one side, return the
804	// other. These bits cannot contribute to the result of the 'or' in this
805	// context.
806	if (DemandedBits.isSubsetOf(RHS: LHSKnown.One \| RHSKnown.Zero))
807	return Op.getOperand(i: `0`);
808	if (DemandedBits.isSubsetOf(RHS: RHSKnown.One \| LHSKnown.Zero))
809	return Op.getOperand(i: `1`);
810	break;
811	}
812	case ISD::XOR: {
813	LHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: `0`), DemandedElts, Depth: Depth + `1`);
814	RHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: `1`), DemandedElts, Depth: Depth + `1`);
815
816	// If all of the demanded bits are known zero on one side, return the
817	// other.
818	if (DemandedBits.isSubsetOf(RHS: RHSKnown.Zero))
819	return Op.getOperand(i: `0`);
820	if (DemandedBits.isSubsetOf(RHS: LHSKnown.Zero))
821	return Op.getOperand(i: `1`);
822	break;
823	}
824	case ISD::ADD: {
825	RHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: `1`), DemandedElts, Depth: Depth + `1`);
826	if (RHSKnown.isZero())
827	return Op.getOperand(i: `0`);
828
829	LHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: `0`), DemandedElts, Depth: Depth + `1`);
830	if (LHSKnown.isZero())
831	return Op.getOperand(i: `1`);
832	break;
833	}
834	case ISD::SHL: {
835	// If we are only demanding sign bits then we can use the shift source
836	// directly.
837	if (std::optional<unsigned> MaxSA =
838	DAG.getValidMaximumShiftAmount(V: Op, DemandedElts, Depth: Depth + `1`)) {
839	SDValue Op0 = Op.getOperand(i: `0`);
840	unsigned ShAmt = *MaxSA;
841	unsigned NumSignBits =
842	DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + `1`);
843	unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
844	if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
845	return Op0;
846	}
847	break;
848	}
849	case ISD::SRL: {
850	// If we are only demanding sign bits then we can use the shift source
851	// directly.
852	if (std::optional<unsigned> MaxSA =
853	DAG.getValidMaximumShiftAmount(V: Op, DemandedElts, Depth: Depth + `1`)) {
854	SDValue Op0 = Op.getOperand(i: `0`);
855	unsigned ShAmt = *MaxSA;
856	// Must already be signbits in DemandedBits bounds, and can't demand any
857	// shifted in zeroes.
858	if (DemandedBits.countl_zero() >= ShAmt) {
859	unsigned NumSignBits =
860	DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + `1`);
861	if (DemandedBits.countr_zero() >= (BitWidth - NumSignBits))
862	return Op0;
863	}
864	}
865	break;
866	}
867	case ISD::SETCC: {
868	SDValue Op0 = Op.getOperand(i: `0`);
869	SDValue Op1 = Op.getOperand(i: `1`);
870	ISD::CondCode CC = cast<CondCodeSDNode>(Val: Op.getOperand(i: `2`))->get();
871	// If (1) we only need the sign-bit, (2) the setcc operands are the same
872	// width as the setcc result, and (3) the result of a setcc conforms to 0 or
873	// -1, we may be able to bypass the setcc.
874	if (DemandedBits.isSignMask() &&
875	Op0.getScalarValueSizeInBits() == BitWidth &&
876	getBooleanContents(Type: Op0.getValueType()) ==
877	BooleanContent::ZeroOrNegativeOneBooleanContent) {
878	// If we're testing X < 0, then this compare isn't needed - just use X!
879	// FIXME: We're limiting to integer types here, but this should also work
880	// if we don't care about FP signed-zero. The use of SETLT with FP means
881	// that we don't care about NaNs.
882	if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
883	(isNullConstant(V: Op1) \|\| ISD::isBuildVectorAllZeros(N: Op1.getNode())))
884	return Op0;
885	}
886	break;
887	}
888	case ISD::SIGN_EXTEND_INREG: {
889	// If none of the extended bits are demanded, eliminate the sextinreg.
890	SDValue Op0 = Op.getOperand(i: `0`);
891	EVT ExVT = cast<VTSDNode>(Val: Op.getOperand(i: `1`))->getVT();
892	unsigned ExBits = ExVT.getScalarSizeInBits();
893	if (DemandedBits.getActiveBits() <= ExBits &&
894	shouldRemoveRedundantExtend(Op))
895	return Op0;
896	// If the input is already sign extended, just drop the extension.
897	unsigned NumSignBits = DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + `1`);
898	if (NumSignBits >= (BitWidth - ExBits + `1`))
899	return Op0;
900	break;
901	}
902	case ISD::ANY_EXTEND_VECTOR_INREG:
903	case ISD::SIGN_EXTEND_VECTOR_INREG:
904	case ISD::ZERO_EXTEND_VECTOR_INREG: {
905	if (VT.isScalableVector())
906	return SDValue ();
907
908	// If we only want the lowest element and none of extended bits, then we can
909	// return the bitcasted source vector.
910	SDValue Src = Op.getOperand(i: `0`);
911	EVT SrcVT = Src.getValueType();
912	EVT DstVT = Op.getValueType();
913	if (IsLE && DemandedElts == `1` &&
914	DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
915	DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
916	return DAG.getBitcast(VT: DstVT, V: Src);
917	}
918	break;
919	}
920	case ISD::INSERT_VECTOR_ELT: {
921	if (VT.isScalableVector())
922	return SDValue ();
923
924	// If we don't demand the inserted element, return the base vector.
925	SDValue Vec = Op.getOperand(i: `0`);
926	auto *CIdx = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: `2`));
927	EVT VecVT = Vec.getValueType();
928	if (CIdx && CIdx->getAPIntValue().ult(RHS: VecVT.getVectorNumElements()) &&
929	!DemandedElts [CIdx->getZExtValue()])
930	return Vec;
931	break;
932	}
933	case ISD::INSERT_SUBVECTOR: {
934	if (VT.isScalableVector())
935	return SDValue ();
936
937	SDValue Vec = Op.getOperand(i: `0`);
938	SDValue Sub = Op.getOperand(i: `1`);
939	uint64_t Idx = Op.getConstantOperandVal(i: `2`);
940	unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
941	APInt DemandedSubElts = DemandedElts.extractBits(numBits: NumSubElts, bitPosition: Idx);
942	// If we don't demand the inserted subvector, return the base vector.
943	if (DemandedSubElts == `0`)
944	return Vec;
945	break;
946	}
947	case ISD::VECTOR_SHUFFLE: {
948	assert(!VT.isScalableVector());
949	ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Val&: Op)->getMask();
950
951	// If all the demanded elts are from one operand and are inline,
952	// then we can use the operand directly.
953	bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
954	for (unsigned i = `0`; i != NumElts; ++i) {
955	int M = ShuffleMask [i];
956	if (M < `0` \|\| !DemandedElts [i])
957	continue;
958	AllUndef = false;
959	IdentityLHS &= (M == (int)i);
960	IdentityRHS &= ((M - NumElts) == i);
961	}
962
963	if (AllUndef)
964	return DAG.getUNDEF(VT: Op.getValueType());
965	if (IdentityLHS)
966	return Op.getOperand(i: `0`);
967	if (IdentityRHS)
968	return Op.getOperand(i: `1`);
969	break;
970	}
971	default:
972	// TODO: Probably okay to remove after audit; here to reduce change size
973	// in initial enablement patch for scalable vectors
974	if (VT.isScalableVector())
975	return SDValue ();
976
977	if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
978	if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
979	Op, DemandedBits, DemandedElts, DAG, Depth))
980	return V;
981	break;
982	}
983	return SDValue ();
984	}
985
986	SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
987	SDValue Op, const APInt &DemandedBits, SelectionDAG &DAG,
988	unsigned Depth) const {
989	EVT VT = Op.getValueType();
990	// Since the number of lanes in a scalable vector is unknown at compile time,
991	// we track one bit which is implicitly broadcast to all lanes. This means
992	// that all lanes in a scalable vector are considered demanded.
993	APInt DemandedElts = VT.isFixedLengthVector()
994	? APInt::getAllOnes(numBits: VT.getVectorNumElements())
995	: APInt (`1`, `1`);
996	return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
997	Depth);
998	}
999
1000	SDValue TargetLowering::SimplifyMultipleUseDemandedVectorElts(
1001	SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
1002	unsigned Depth) const {
1003	APInt DemandedBits = APInt::getAllOnes(numBits: Op.getScalarValueSizeInBits());
1004	return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
1005	Depth);
1006	}
1007
1008	// Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1).
1009	// or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1).
1010	static SDValue combineShiftToAVG(SDValue Op,
1011	TargetLowering::TargetLoweringOpt &TLO,
1012	const TargetLowering &TLI,
1013	const APInt &DemandedBits,
1014	const APInt &DemandedElts, unsigned Depth) {
1015	assert((Op.getOpcode() == ISD::SRL \|\| Op.getOpcode() == ISD::SRA) &&
1016	"SRL or SRA node is required here!");
1017	// Is the right shift using an immediate value of 1?
1018	ConstantSDNode *N1C = isConstOrConstSplat(N: Op.getOperand(i: `1`), DemandedElts);
1019	if (!N1C \|\| !N1C->isOne())
1020	return SDValue ();
1021
1022	// We are looking for an avgfloor
1023	// add(ext, ext)
1024	// or one of these as a avgceil
1025	// add(add(ext, ext), 1)
1026	// add(add(ext, 1), ext)
1027	// add(ext, add(ext, 1))
1028	SDValue Add = Op.getOperand(i: `0`);
1029	if (Add.getOpcode() != ISD::ADD)
1030	return SDValue ();
1031
1032	SDValue ExtOpA = Add.getOperand(i: `0`);
1033	SDValue ExtOpB = Add.getOperand(i: `1`);
1034	SDValue Add2;
1035	auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3, SDValue A) {
1036	ConstantSDNode *ConstOp;
1037	if ((ConstOp = isConstOrConstSplat(N: Op2, DemandedElts)) &&
1038	ConstOp->isOne()) {
1039	ExtOpA = Op1;
1040	ExtOpB = Op3;
1041	Add2 = A;
1042	return true;
1043	}
1044	if ((ConstOp = isConstOrConstSplat(N: Op3, DemandedElts)) &&
1045	ConstOp->isOne()) {
1046	ExtOpA = Op1;
1047	ExtOpB = Op2;
1048	Add2 = A;
1049	return true;
1050	}
1051	return false;
1052	};
1053	bool IsCeil =
1054	(ExtOpA.getOpcode() == ISD::ADD &&
1055	MatchOperands (ExtOpA.getOperand(i: `0`), ExtOpA.getOperand(i: `1`), ExtOpB, ExtOpA)) \|\|
1056	(ExtOpB.getOpcode() == ISD::ADD &&
1057	MatchOperands (ExtOpB.getOperand(i: `0`), ExtOpB.getOperand(i: `1`), ExtOpA, ExtOpB));
1058
1059	// If the shift is signed (sra):
1060	// - Needs >= 2 sign bit for both operands.
1061	// - Needs >= 2 zero bits.
1062	// If the shift is unsigned (srl):
1063	// - Needs >= 1 zero bit for both operands.
1064	// - Needs 1 demanded bit zero and >= 2 sign bits.
1065	SelectionDAG &DAG = TLO.DAG;
1066	unsigned ShiftOpc = Op.getOpcode();
1067	bool IsSigned = false;
1068	unsigned KnownBits;
1069	unsigned NumSignedA = DAG.ComputeNumSignBits(Op: ExtOpA, DemandedElts, Depth);
1070	unsigned NumSignedB = DAG.ComputeNumSignBits(Op: ExtOpB, DemandedElts, Depth);
1071	unsigned NumSigned = std::min(a: NumSignedA, b: NumSignedB) - `1`;
1072	unsigned NumZeroA =
1073	DAG.computeKnownBits(Op: ExtOpA, DemandedElts, Depth).countMinLeadingZeros();
1074	unsigned NumZeroB =
1075	DAG.computeKnownBits(Op: ExtOpB, DemandedElts, Depth).countMinLeadingZeros();
1076	unsigned NumZero = std::min(a: NumZeroA, b: NumZeroB);
1077
1078	switch (ShiftOpc) {
1079	default:
1080	llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG");
1081	case ISD::SRA: {
1082	if (NumZero >= `2` && NumSigned < NumZero) {
1083	IsSigned = false;
1084	KnownBits = NumZero;
1085	break;
1086	}
1087	if (NumSigned >= `1`) {
1088	IsSigned = true;
1089	KnownBits = NumSigned;
1090	break;
1091	}
1092	return SDValue ();
1093	}
1094	case ISD::SRL: {
1095	if (NumZero >= `1` && NumSigned < NumZero) {
1096	IsSigned = false;
1097	KnownBits = NumZero;
1098	break;
1099	}
1100	if (NumSigned >= `1` && DemandedBits.isSignBitClear()) {
1101	IsSigned = true;
1102	KnownBits = NumSigned;
1103	break;
1104	}
1105	return SDValue ();
1106	}
1107	}
1108
1109	unsigned AVGOpc = IsCeil ? (IsSigned ? ISD::AVGCEILS : ISD::AVGCEILU)
1110	: (IsSigned ? ISD::AVGFLOORS : ISD::AVGFLOORU);
1111
1112	// Find the smallest power-2 type that is legal for this vector size and
1113	// operation, given the original type size and the number of known sign/zero
1114	// bits.
1115	EVT VT = Op.getValueType();
1116	unsigned MinWidth =
1117	std::max<unsigned>(a: VT.getScalarSizeInBits() - KnownBits, b: `8`);
1118	EVT NVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: llvm::bit_ceil(Value: MinWidth));
1119	if (NVT.getScalarSizeInBits() > VT.getScalarSizeInBits())
1120	return SDValue ();
1121	if (VT.isVector())
1122	NVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: NVT, EC: VT.getVectorElementCount());
1123	if (TLO.LegalTypes() && !TLI.isOperationLegal(Op: AVGOpc, VT: NVT)) {
1124	// If we could not transform, and (both) adds are nuw/nsw, we can use the
1125	// larger type size to do the transform.
1126	if (TLO.LegalOperations() && !TLI.isOperationLegal(Op: AVGOpc, VT))
1127	return SDValue ();
1128	if (DAG.willNotOverflowAdd(IsSigned, N0: Add.getOperand(i: `0`),
1129	N1: Add.getOperand(i: `1`)) &&
1130	(!Add2 \|\| DAG.willNotOverflowAdd(IsSigned, N0: Add2.getOperand(i: `0`),
1131	N1: Add2.getOperand(i: `1`))))
1132	NVT = VT;
1133	else
1134	return SDValue ();
1135	}
1136
1137	// Don't create a AVGFLOOR node with a scalar constant unless its legal as
1138	// this is likely to stop other folds (reassociation, value tracking etc.)
1139	if (!IsCeil && !TLI.isOperationLegal(Op: AVGOpc, VT: NVT) &&
1140	(isa<ConstantSDNode>(Val: ExtOpA) \|\| isa<ConstantSDNode>(Val: ExtOpB)))
1141	return SDValue ();
1142
1143	SDLoc DL(Op);
1144	SDValue ResultAVG =
1145	DAG.getNode(Opcode: AVGOpc, DL, VT: NVT, N1: DAG.getExtOrTrunc(IsSigned, Op: ExtOpA, DL, VT: NVT),
1146	N2: DAG.getExtOrTrunc(IsSigned, Op: ExtOpB, DL, VT: NVT));
1147	return DAG.getExtOrTrunc(IsSigned, Op: ResultAVG, DL, VT);
1148	}
1149
1150	/// Look at Op. At this point, we know that only the OriginalDemandedBits of the
1151	/// result of Op are ever used downstream. If we can use this information to
1152	/// simplify Op, create a new simplified DAG node and return true, returning the
1153	/// original and new nodes in Old and New. Otherwise, analyze the expression and
1154	/// return a mask of Known bits for the expression (used to simplify the
1155	/// caller). The Known bits may only be accurate for those bits in the
1156	/// OriginalDemandedBits and OriginalDemandedElts.
1157	bool TargetLowering::SimplifyDemandedBits(
1158	SDValue Op, const APInt &OriginalDemandedBits,
1159	const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
1160	unsigned Depth, bool AssumeSingleUse) const {
1161	unsigned BitWidth = OriginalDemandedBits.getBitWidth();
1162	assert(Op.getScalarValueSizeInBits() == BitWidth &&
1163	"Mask size mismatches value type size!");
1164
1165	// Don't know anything.
1166	Known = KnownBits (BitWidth);
1167
1168	EVT VT = Op.getValueType();
1169	bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
1170	unsigned NumElts = OriginalDemandedElts.getBitWidth();
1171	assert((!VT.isFixedLengthVector() \|\| NumElts == VT.getVectorNumElements()) &&
1172	"Unexpected vector size");
1173
1174	APInt DemandedBits = OriginalDemandedBits;
1175	APInt DemandedElts = OriginalDemandedElts;
1176	SDLoc dl(Op);
1177
1178	// Undef operand.
1179	if (Op.isUndef())
1180	return false;
1181
1182	// We can't simplify target constants.
1183	if (Op.getOpcode() == ISD::TargetConstant)
1184	return false;
1185
1186	if (Op.getOpcode() == ISD::Constant) {
1187	// We know all of the bits for a constant!
1188	Known = KnownBits::makeConstant(C: Op ->getAsAPIntVal());
1189	return false;
1190	}
1191
1192	if (Op.getOpcode() == ISD::ConstantFP) {
1193	// We know all of the bits for a floating point constant!
1194	Known = KnownBits::makeConstant(
1195	C: cast<ConstantFPSDNode>(Val&: Op)->getValueAPF().bitcastToAPInt());
1196	return false;
1197	}
1198
1199	// Other users may use these bits.
1200	bool HasMultiUse = false;
1201	if (!AssumeSingleUse && !Op.getNode()->hasOneUse()) {
1202	if (Depth >= SelectionDAG::MaxRecursionDepth) {
1203	// Limit search depth.
1204	return false;
1205	}
1206	// Allow multiple uses, just set the DemandedBits/Elts to all bits.
1207	DemandedBits = APInt::getAllOnes(numBits: BitWidth);
1208	DemandedElts = APInt::getAllOnes(numBits: NumElts);
1209	HasMultiUse = true;
1210	} else if (OriginalDemandedBits == `0` \|\| OriginalDemandedElts == `0`) {
1211	// Not demanding any bits/elts from Op.
1212	return TLO.CombineTo(O: Op, N: TLO.DAG.getUNDEF(VT));
1213	} else if (Depth >= SelectionDAG::MaxRecursionDepth) {
1214	// Limit search depth.
1215	return false;
1216	}
1217
1218	KnownBits Known2;
1219	switch (Op.getOpcode()) {
1220	case ISD::SCALAR_TO_VECTOR: {
1221	if (VT.isScalableVector())
1222	return false;
1223	if (!DemandedElts [`0`])
1224	return TLO.CombineTo(O: Op, N: TLO.DAG.getUNDEF(VT));
1225
1226	KnownBits SrcKnown;
1227	SDValue Src = Op.getOperand(i: `0`);
1228	unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
1229	APInt SrcDemandedBits = DemandedBits.zext(width: SrcBitWidth);
1230	if (SimplifyDemandedBits(Op: Src, DemandedBits: SrcDemandedBits, Known&: SrcKnown, TLO, Depth: Depth + `1`))
1231	return true;
1232
1233	// Upper elements are undef, so only get the knownbits if we just demand
1234	// the bottom element.
1235	if (DemandedElts == `1`)
1236	Known = SrcKnown.anyextOrTrunc(BitWidth);
1237	break;
1238	}
1239	case ISD::BUILD_VECTOR:
1240	// Collect the known bits that are shared by every demanded element.
1241	// TODO: Call SimplifyDemandedBits for non-constant demanded elements.
1242	Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1243	return false; // Don't fall through, will infinitely loop.
1244	case ISD::SPLAT_VECTOR: {
1245	SDValue Scl = Op.getOperand(i: `0`);
1246	APInt DemandedSclBits = DemandedBits.zextOrTrunc(width: Scl.getValueSizeInBits());
1247	KnownBits KnownScl;
1248	if (SimplifyDemandedBits(Op: Scl, DemandedBits: DemandedSclBits, Known&: KnownScl, TLO, Depth: Depth + `1`))
1249	return true;
1250
1251	// Implicitly truncate the bits to match the official semantics of
1252	// SPLAT_VECTOR.
1253	Known = KnownScl.trunc(BitWidth);
1254	break;
1255	}
1256	case ISD::LOAD: {
1257	auto *LD = cast<LoadSDNode>(Val&: Op);
1258	if (getTargetConstantFromLoad(LD)) {
1259	Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1260	return false; // Don't fall through, will infinitely loop.
1261	}
1262	if (ISD::isZEXTLoad(N: Op.getNode()) && Op.getResNo() == `0`) {
1263	// If this is a ZEXTLoad and we are looking at the loaded value.
1264	EVT MemVT = LD->getMemoryVT();
1265	unsigned MemBits = MemVT.getScalarSizeInBits();
1266	Known.Zero.setBitsFrom(MemBits);
1267	return false; // Don't fall through, will infinitely loop.
1268	}
1269	break;
1270	}
1271	case ISD::INSERT_VECTOR_ELT: {
1272	if (VT.isScalableVector())
1273	return false;
1274	SDValue Vec = Op.getOperand(i: `0`);
1275	SDValue Scl = Op.getOperand(i: `1`);
1276	auto *CIdx = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: `2`));
1277	EVT VecVT = Vec.getValueType();
1278
1279	// If index isn't constant, assume we need all vector elements AND the
1280	// inserted element.
1281	APInt DemandedVecElts(DemandedElts);
1282	if (CIdx && CIdx->getAPIntValue().ult(RHS: VecVT.getVectorNumElements())) {
1283	unsigned Idx = CIdx->getZExtValue();
1284	DemandedVecElts.clearBit(BitPosition: Idx);
1285
1286	// Inserted element is not required.
1287	if (!DemandedElts [Idx])
1288	return TLO.CombineTo(O: Op, N: Vec);
1289	}
1290
1291	KnownBits KnownScl;
1292	unsigned NumSclBits = Scl.getScalarValueSizeInBits();
1293	APInt DemandedSclBits = DemandedBits.zextOrTrunc(width: NumSclBits);
1294	if (SimplifyDemandedBits(Op: Scl, DemandedBits: DemandedSclBits, Known&: KnownScl, TLO, Depth: Depth + `1`))
1295	return true;
1296
1297	Known = KnownScl.anyextOrTrunc(BitWidth);
1298
1299	KnownBits KnownVec;
1300	if (SimplifyDemandedBits(Op: Vec, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedVecElts, Known&: KnownVec, TLO,
1301	Depth: Depth + `1`))
1302	return true;
1303
1304	if (!!DemandedVecElts)
1305	Known = Known.intersectWith(RHS: KnownVec);
1306
1307	return false;
1308	}
1309	case ISD::INSERT_SUBVECTOR: {
1310	if (VT.isScalableVector())
1311	return false;
1312	// Demand any elements from the subvector and the remainder from the src its
1313	// inserted into.
1314	SDValue Src = Op.getOperand(i: `0`);
1315	SDValue Sub = Op.getOperand(i: `1`);
1316	uint64_t Idx = Op.getConstantOperandVal(i: `2`);
1317	unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
1318	APInt DemandedSubElts = DemandedElts.extractBits(numBits: NumSubElts, bitPosition: Idx);
1319	APInt DemandedSrcElts = DemandedElts;
1320	DemandedSrcElts.clearBits(LoBit: Idx, HiBit: Idx + NumSubElts);
1321
1322	KnownBits KnownSub, KnownSrc;
1323	if (SimplifyDemandedBits(Op: Sub, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedSubElts, Known&: KnownSub, TLO,
1324	Depth: Depth + `1`))
1325	return true;
1326	if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedSrcElts, Known&: KnownSrc, TLO,
1327	Depth: Depth + `1`))
1328	return true;
1329
1330	Known.setAllConflict();
1331	if (!!DemandedSubElts)
1332	Known = Known.intersectWith(RHS: KnownSub);
1333	if (!!DemandedSrcElts)
1334	Known = Known.intersectWith(RHS: KnownSrc);
1335
1336	// Attempt to avoid multi-use src if we don't need anything from it.
1337	if (!DemandedBits.isAllOnes() \|\| !DemandedSubElts.isAllOnes() \|\|
1338	!DemandedSrcElts.isAllOnes()) {
1339	SDValue NewSub = SimplifyMultipleUseDemandedBits(
1340	Op: Sub, DemandedBits, DemandedElts: DemandedSubElts, DAG&: TLO.DAG, Depth: Depth + `1`);
1341	SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1342	Op: Src, DemandedBits, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + `1`);
1343	if (NewSub \|\| NewSrc) {
1344	NewSub = NewSub ? NewSub : Sub;
1345	NewSrc = NewSrc ? NewSrc : Src;
1346	SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: NewSrc, N2: NewSub,
1347	N3: Op.getOperand(i: `2`));
1348	return TLO.CombineTo(O: Op, N: NewOp);
1349	}
1350	}
1351	break;
1352	}
1353	case ISD::EXTRACT_SUBVECTOR: {
1354	if (VT.isScalableVector())
1355	return false;
1356	// Offset the demanded elts by the subvector index.
1357	SDValue Src = Op.getOperand(i: `0`);
1358	if (Src.getValueType().isScalableVector())
1359	break;
1360	uint64_t Idx = Op.getConstantOperandVal(i: `1`);
1361	unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1362	APInt DemandedSrcElts = DemandedElts.zext(width: NumSrcElts).shl(shiftAmt: Idx);
1363
1364	if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedSrcElts, Known, TLO,
1365	Depth: Depth + `1`))
1366	return true;
1367
1368	// Attempt to avoid multi-use src if we don't need anything from it.
1369	if (!DemandedBits.isAllOnes() \|\| !DemandedSrcElts.isAllOnes()) {
1370	SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
1371	Op: Src, DemandedBits, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + `1`);
1372	if (DemandedSrc) {
1373	SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: DemandedSrc,
1374	N2: Op.getOperand(i: `1`));
1375	return TLO.CombineTo(O: Op, N: NewOp);
1376	}
1377	}
1378	break;
1379	}
1380	case ISD::CONCAT_VECTORS: {
1381	if (VT.isScalableVector())
1382	return false;
1383	Known.setAllConflict();
1384	EVT SubVT = Op.getOperand(i: `0`).getValueType();
1385	unsigned NumSubVecs = Op.getNumOperands();
1386	unsigned NumSubElts = SubVT.getVectorNumElements();
1387	for (unsigned i = `0`; i != NumSubVecs; ++i) {
1388	APInt DemandedSubElts =
1389	DemandedElts.extractBits(numBits: NumSubElts, bitPosition: i * NumSubElts);
1390	if (SimplifyDemandedBits(Op: Op.getOperand(i), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedSubElts,
1391	Known&: Known2, TLO, Depth: Depth + `1`))
1392	return true;
1393	// Known bits are shared by every demanded subvector element.
1394	if (!!DemandedSubElts)
1395	Known = Known.intersectWith(RHS: Known2);
1396	}
1397	break;
1398	}
1399	case ISD::VECTOR_SHUFFLE: {
1400	assert(!VT.isScalableVector());
1401	ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Val&: Op)->getMask();
1402
1403	// Collect demanded elements from shuffle operands..
1404	APInt DemandedLHS, DemandedRHS;
1405	if (!getShuffleDemandedElts(SrcWidth: NumElts, Mask: ShuffleMask, DemandedElts, DemandedLHS,
1406	DemandedRHS))
1407	break;
1408
1409	if (!!DemandedLHS \|\| !!DemandedRHS) {
1410	SDValue Op0 = Op.getOperand(i: `0`);
1411	SDValue Op1 = Op.getOperand(i: `1`);
1412
1413	Known.setAllConflict();
1414	if (!!DemandedLHS) {
1415	if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedLHS, Known&: Known2, TLO,
1416	Depth: Depth + `1`))
1417	return true;
1418	Known = Known.intersectWith(RHS: Known2);
1419	}
1420	if (!!DemandedRHS) {
1421	if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedRHS, Known&: Known2, TLO,
1422	Depth: Depth + `1`))
1423	return true;
1424	Known = Known.intersectWith(RHS: Known2);
1425	}
1426
1427	// Attempt to avoid multi-use ops if we don't need anything from them.
1428	SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1429	Op: Op0, DemandedBits, DemandedElts: DemandedLHS, DAG&: TLO.DAG, Depth: Depth + `1`);
1430	SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1431	Op: Op1, DemandedBits, DemandedElts: DemandedRHS, DAG&: TLO.DAG, Depth: Depth + `1`);
1432	if (DemandedOp0 \|\| DemandedOp1) {
1433	Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1434	Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1435	SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, N1: Op0, N2: Op1, Mask: ShuffleMask);
1436	return TLO.CombineTo(O: Op, N: NewOp);
1437	}
1438	}
1439	break;
1440	}
1441	case ISD::AND: {
1442	SDValue Op0 = Op.getOperand(i: `0`);
1443	SDValue Op1 = Op.getOperand(i: `1`);
1444
1445	// If the RHS is a constant, check to see if the LHS would be zero without
1446	// using the bits from the RHS. Below, we use knowledge about the RHS to
1447	// simplify the LHS, here we're using information from the LHS to simplify
1448	// the RHS.
1449	if (ConstantSDNode *RHSC = isConstOrConstSplat(N: Op1, DemandedElts)) {
1450	// Do not increment Depth here; that can cause an infinite loop.
1451	KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op: Op0, DemandedElts, Depth);
1452	// If the LHS already has zeros where RHSC does, this 'and' is dead.
1453	if ((LHSKnown.Zero & DemandedBits) ==
1454	(~RHSC->getAPIntValue() & DemandedBits))
1455	return TLO.CombineTo(O: Op, N: Op0);
1456
1457	// If any of the set bits in the RHS are known zero on the LHS, shrink
1458	// the constant.
1459	if (ShrinkDemandedConstant(Op, DemandedBits: ~LHSKnown.Zero & DemandedBits,
1460	DemandedElts, TLO))
1461	return true;
1462
1463	// Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1464	// constant, but if this 'and' is only clearing bits that were just set by
1465	// the xor, then this 'and' can be eliminated by shrinking the mask of
1466	// the xor. For example, for a 32-bit X:
1467	// and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1468	if (isBitwiseNot(V: Op0) && Op0.hasOneUse() &&
1469	LHSKnown.One == ~RHSC->getAPIntValue()) {
1470	SDValue Xor = TLO.DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: Op0.getOperand(i: `0`), N2: Op1);
1471	return TLO.CombineTo(O: Op, N: Xor);
1472	}
1473	}
1474
1475	// (X +/- Y) & Y --> ~X & Y when Y is a power of 2 (or zero).
1476	SDValue X, Y;
1477	if (sd_match(N: Op,
1478	P: m_And(L: m_Value(N&: Y),
1479	R: m_OneUse(P: m_AnyOf(preds: m_Add(L: m_Value(N&: X), R: m_Deferred(V&: Y)),
1480	preds: m_Sub(L: m_Value(N&: X), R: m_Deferred(V&: Y)))))) &&
1481	TLO.DAG.isKnownToBeAPowerOfTwo(Val: Y, DemandedElts, /OrZero=/true)) {
1482	return TLO.CombineTo(
1483	O: Op, N: TLO.DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: TLO.DAG.getNOT(DL: dl, Val: X, VT), N2: Y));
1484	}
1485
1486	// AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)
1487	// iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits).
1488	if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR && !VT.isScalableVector() &&
1489	(Op0.getOperand(i: `0`).isUndef() \|\|
1490	ISD::isBuildVectorOfConstantSDNodes(N: Op0.getOperand(i: `0`).getNode())) &&
1491	Op0 ->hasOneUse()) {
1492	unsigned NumSubElts =
1493	Op0.getOperand(i: `1`).getValueType().getVectorNumElements();
1494	unsigned SubIdx = Op0.getConstantOperandVal(i: `2`);
1495	APInt DemandedSub =
1496	APInt::getBitsSet(numBits: NumElts, loBit: SubIdx, hiBit: SubIdx + NumSubElts);
1497	KnownBits KnownSubMask =
1498	TLO.DAG.computeKnownBits(Op: Op1, DemandedElts: DemandedSub & DemandedElts, Depth: Depth + `1`);
1499	if (DemandedBits.isSubsetOf(RHS: KnownSubMask.One)) {
1500	SDValue NewAnd =
1501	TLO.DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op0.getOperand(i: `0`), N2: Op1);
1502	SDValue NewInsert =
1503	TLO.DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL: dl, VT, N1: NewAnd,
1504	N2: Op0.getOperand(i: `1`), N3: Op0.getOperand(i: `2`));
1505	return TLO.CombineTo(O: Op, N: NewInsert);
1506	}
1507	}
1508
1509	if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts, Known, TLO,
1510	Depth: Depth + `1`))
1511	return true;
1512	if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: ~Known.Zero & DemandedBits, OriginalDemandedElts: DemandedElts,
1513	Known&: Known2, TLO, Depth: Depth + `1`))
1514	return true;
1515
1516	// If all of the demanded bits are known one on one side, return the other.
1517	// These bits cannot contribute to the result of the 'and'.
1518	if (DemandedBits.isSubsetOf(RHS: Known2.Zero \| Known.One))
1519	return TLO.CombineTo(O: Op, N: Op0);
1520	if (DemandedBits.isSubsetOf(RHS: Known.Zero \| Known2.One))
1521	return TLO.CombineTo(O: Op, N: Op1);
1522	// If all of the demanded bits in the inputs are known zeros, return zero.
1523	if (DemandedBits.isSubsetOf(RHS: Known.Zero \| Known2.Zero))
1524	return TLO.CombineTo(O: Op, N: TLO.DAG.getConstant(Val: `0`, DL: dl, VT));
1525	// If the RHS is a constant, see if we can simplify it.
1526	if (ShrinkDemandedConstant(Op, DemandedBits: ~Known2.Zero & DemandedBits, DemandedElts,
1527	TLO))
1528	return true;
1529	// If the operation can be done in a smaller type, do so.
1530	if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1531	return true;
1532
1533	// Attempt to avoid multi-use ops if we don't need anything from them.
1534	if (!DemandedBits.isAllOnes() \|\| !DemandedElts.isAllOnes()) {
1535	SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1536	Op: Op0, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`);
1537	SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1538	Op: Op1, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`);
1539	if (DemandedOp0 \|\| DemandedOp1) {
1540	Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1541	Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1542	SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: Op0, N2: Op1);
1543	return TLO.CombineTo(O: Op, N: NewOp);
1544	}
1545	}
1546
1547	Known &= Known2;
1548	break;
1549	}
1550	case ISD::OR: {
1551	SDValue Op0 = Op.getOperand(i: `0`);
1552	SDValue Op1 = Op.getOperand(i: `1`);
1553	if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts, Known, TLO,
1554	Depth: Depth + `1`)) {
1555	Op ->dropFlags(Mask: SDNodeFlags::Disjoint);
1556	return true;
1557	}
1558
1559	if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: ~Known.One & DemandedBits, OriginalDemandedElts: DemandedElts,
1560	Known&: Known2, TLO, Depth: Depth + `1`)) {
1561	Op ->dropFlags(Mask: SDNodeFlags::Disjoint);
1562	return true;
1563	}
1564
1565	// If all of the demanded bits are known zero on one side, return the other.
1566	// These bits cannot contribute to the result of the 'or'.
1567	if (DemandedBits.isSubsetOf(RHS: Known2.One \| Known.Zero))
1568	return TLO.CombineTo(O: Op, N: Op0);
1569	if (DemandedBits.isSubsetOf(RHS: Known.One \| Known2.Zero))
1570	return TLO.CombineTo(O: Op, N: Op1);
1571	// If the RHS is a constant, see if we can simplify it.
1572	if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1573	return true;
1574	// If the operation can be done in a smaller type, do so.
1575	if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1576	return true;
1577
1578	// Attempt to avoid multi-use ops if we don't need anything from them.
1579	if (!DemandedBits.isAllOnes() \|\| !DemandedElts.isAllOnes()) {
1580	SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1581	Op: Op0, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`);
1582	SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1583	Op: Op1, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`);
1584	if (DemandedOp0 \|\| DemandedOp1) {
1585	Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1586	Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1587	SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: Op0, N2: Op1);
1588	return TLO.CombineTo(O: Op, N: NewOp);
1589	}
1590	}
1591
1592	// (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1\|C2), (and Y, C2))
1593	// TODO: Use SimplifyMultipleUseDemandedBits to peek through masks.
1594	if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::AND &&
1595	Op0 ->hasOneUse() && Op1 ->hasOneUse()) {
1596	// Attempt to match all commutations - m_c_Or would've been useful!
1597	for (int I = `0`; I != `2`; ++I) {
1598	SDValue X = Op.getOperand(i: I).getOperand(i: `0`);
1599	SDValue C1 = Op.getOperand(i: I).getOperand(i: `1`);
1600	SDValue Alt = Op.getOperand(i: `1` - I).getOperand(i: `0`);
1601	SDValue C2 = Op.getOperand(i: `1` - I).getOperand(i: `1`);
1602	if (Alt.getOpcode() == ISD::OR) {
1603	for (int J = `0`; J != `2`; ++J) {
1604	if (X == Alt.getOperand(i: J)) {
1605	SDValue Y = Alt.getOperand(i: `1` - J);
1606	if (SDValue C12 = TLO.DAG.FoldConstantArithmetic(Opcode: ISD::OR, DL: dl, VT,
1607	Ops: {C1, C2})) {
1608	SDValue MaskX = TLO.DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: X, N2: C12);
1609	SDValue MaskY = TLO.DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Y, N2: C2);
1610	return TLO.CombineTo(
1611	O: Op, N: TLO.DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: MaskX, N2: MaskY));
1612	}
1613	}
1614	}
1615	}
1616	}
1617	}
1618
1619	Known \|= Known2;
1620	break;
1621	}
1622	case ISD::XOR: {
1623	SDValue Op0 = Op.getOperand(i: `0`);
1624	SDValue Op1 = Op.getOperand(i: `1`);
1625
1626	if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts, Known, TLO,
1627	Depth: Depth + `1`))
1628	return true;
1629	if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
1630	Depth: Depth + `1`))
1631	return true;
1632
1633	// If all of the demanded bits are known zero on one side, return the other.
1634	// These bits cannot contribute to the result of the 'xor'.
1635	if (DemandedBits.isSubsetOf(RHS: Known.Zero))
1636	return TLO.CombineTo(O: Op, N: Op0);
1637	if (DemandedBits.isSubsetOf(RHS: Known2.Zero))
1638	return TLO.CombineTo(O: Op, N: Op1);
1639	// If the operation can be done in a smaller type, do so.
1640	if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1641	return true;
1642
1643	// If all of the unknown bits are known to be zero on one side or the other
1644	// turn this into an inclusive* or.*
1645	// e.g. (A & C1)^(B & C2) -> (A & C1)\|(B & C2) iff C1&C2 == 0
1646	if (DemandedBits.isSubsetOf(RHS: Known.Zero \| Known2.Zero))
1647	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Op0, N2: Op1));
1648
1649	ConstantSDNode *C = isConstOrConstSplat(N: Op1, DemandedElts);
1650	if (C) {
1651	// If one side is a constant, and all of the set bits in the constant are
1652	// also known set on the other side, turn this into an AND, as we know
1653	// the bits will be cleared.
1654	// e.g. (X \| C1) ^ C2 --> (X \| C1) & ~C2 iff (C1&C2) == C2
1655	// NB: it is okay if more bits are known than are requested
1656	if (C->getAPIntValue() == Known2.One) {
1657	SDValue ANDC =
1658	TLO.DAG.getConstant(Val: ~C->getAPIntValue() & DemandedBits, DL: dl, VT);
1659	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op0, N2: ANDC));
1660	}
1661
1662	// If the RHS is a constant, see if we can change it. Don't alter a -1
1663	// constant because that's a 'not' op, and that is better for combining
1664	// and codegen.
1665	if (!C->isAllOnes() && DemandedBits.isSubsetOf(RHS: C->getAPIntValue())) {
1666	// We're flipping all demanded bits. Flip the undemanded bits too.
1667	SDValue New = TLO.DAG.getNOT(DL: dl, Val: Op0, VT);
1668	return TLO.CombineTo(O: Op, N: New);
1669	}
1670
1671	unsigned Op0Opcode = Op0.getOpcode();
1672	if ((Op0Opcode == ISD::SRL \|\| Op0Opcode == ISD::SHL) && Op0.hasOneUse()) {
1673	if (ConstantSDNode *ShiftC =
1674	isConstOrConstSplat(N: Op0.getOperand(i: `1`), DemandedElts)) {
1675	// Don't crash on an oversized shift. We can not guarantee that a
1676	// bogus shift has been simplified to undef.
1677	if (ShiftC->getAPIntValue().ult(RHS: BitWidth)) {
1678	uint64_t ShiftAmt = ShiftC->getZExtValue();
1679	APInt Ones = APInt::getAllOnes(numBits: BitWidth);
1680	Ones = Op0Opcode == ISD::SHL ? Ones.shl(shiftAmt: ShiftAmt)
1681	: Ones.lshr(shiftAmt: ShiftAmt);
1682	if ((DemandedBits & C->getAPIntValue()) == (DemandedBits & Ones) &&
1683	isDesirableToCommuteXorWithShift(N: Op.getNode())) {
1684	// If the xor constant is a demanded mask, do a 'not' before the
1685	// shift:
1686	// xor (X << ShiftC), XorC --> (not X) << ShiftC
1687	// xor (X >> ShiftC), XorC --> (not X) >> ShiftC
1688	SDValue Not = TLO.DAG.getNOT(DL: dl, Val: Op0.getOperand(i: `0`), VT);
1689	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Op0Opcode, DL: dl, VT, N1: Not,
1690	N2: Op0.getOperand(i: `1`)));
1691	}
1692	}
1693	}
1694	}
1695	}
1696
1697	// If we can't turn this into a 'not', try to shrink the constant.
1698	if (!C \|\| !C->isAllOnes())
1699	if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1700	return true;
1701
1702	// Attempt to avoid multi-use ops if we don't need anything from them.
1703	if (!DemandedBits.isAllOnes() \|\| !DemandedElts.isAllOnes()) {
1704	SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1705	Op: Op0, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`);
1706	SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1707	Op: Op1, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`);
1708	if (DemandedOp0 \|\| DemandedOp1) {
1709	Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1710	Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1711	SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: Op0, N2: Op1);
1712	return TLO.CombineTo(O: Op, N: NewOp);
1713	}
1714	}
1715
1716	Known ^= Known2;
1717	break;
1718	}
1719	case ISD::SELECT:
1720	if (SimplifyDemandedBits(Op: Op.getOperand(i: `2`), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1721	Known, TLO, Depth: Depth + `1`))
1722	return true;
1723	if (SimplifyDemandedBits(Op: Op.getOperand(i: `1`), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1724	Known&: Known2, TLO, Depth: Depth + `1`))
1725	return true;
1726
1727	// If the operands are constants, see if we can simplify them.
1728	if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1729	return true;
1730
1731	// Only known if known in both the LHS and RHS.
1732	Known = Known.intersectWith(RHS: Known2);
1733	break;
1734	case ISD::VSELECT:
1735	if (SimplifyDemandedBits(Op: Op.getOperand(i: `2`), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1736	Known, TLO, Depth: Depth + `1`))
1737	return true;
1738	if (SimplifyDemandedBits(Op: Op.getOperand(i: `1`), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1739	Known&: Known2, TLO, Depth: Depth + `1`))
1740	return true;
1741
1742	// Only known if known in both the LHS and RHS.
1743	Known = Known.intersectWith(RHS: Known2);
1744	break;
1745	case ISD::SELECT_CC:
1746	if (SimplifyDemandedBits(Op: Op.getOperand(i: `3`), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1747	Known, TLO, Depth: Depth + `1`))
1748	return true;
1749	if (SimplifyDemandedBits(Op: Op.getOperand(i: `2`), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1750	Known&: Known2, TLO, Depth: Depth + `1`))
1751	return true;
1752
1753	// If the operands are constants, see if we can simplify them.
1754	if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1755	return true;
1756
1757	// Only known if known in both the LHS and RHS.
1758	Known = Known.intersectWith(RHS: Known2);
1759	break;
1760	case ISD::SETCC: {
1761	SDValue Op0 = Op.getOperand(i: `0`);
1762	SDValue Op1 = Op.getOperand(i: `1`);
1763	ISD::CondCode CC = cast<CondCodeSDNode>(Val: Op.getOperand(i: `2`))->get();
1764	// If we're testing X < 0, X >= 0, X <= -1 or X > -1
1765	// (X is of integer type) then we only need the sign mask of the previous
1766	// result
1767	if (Op1.getValueType().isInteger() &&
1768	(((CC == ISD::SETLT \|\| CC == ISD::SETGE) && isNullOrNullSplat(V: Op1)) \|\|
1769	((CC == ISD::SETLE \|\| CC == ISD::SETGT) &&
1770	isAllOnesOrAllOnesSplat(V: Op1)))) {
1771	KnownBits KnownOp0;
1772	if (SimplifyDemandedBits(
1773	Op: Op0, OriginalDemandedBits: APInt::getSignMask(BitWidth: Op0.getScalarValueSizeInBits()),
1774	OriginalDemandedElts: DemandedElts, Known&: KnownOp0, TLO, Depth: Depth + `1`))
1775	return true;
1776	// If (1) we only need the sign-bit, (2) the setcc operands are the same
1777	// width as the setcc result, and (3) the result of a setcc conforms to 0
1778	// or -1, we may be able to bypass the setcc.
1779	if (DemandedBits.isSignMask() &&
1780	Op0.getScalarValueSizeInBits() == BitWidth &&
1781	getBooleanContents(Type: Op0.getValueType()) ==
1782	BooleanContent::ZeroOrNegativeOneBooleanContent) {
1783	// If we remove a >= 0 or > -1 (for integers), we need to introduce a
1784	// NOT Operation
1785	if (CC == ISD::SETGE \|\| CC == ISD::SETGT) {
1786	SDLoc DL(Op);
1787	EVT VT = Op0.getValueType();
1788	SDValue NotOp0 = TLO.DAG.getNOT(DL, Val: Op0, VT);
1789	return TLO.CombineTo(O: Op, N: NotOp0);
1790	}
1791	return TLO.CombineTo(O: Op, N: Op0);
1792	}
1793	}
1794	if (getBooleanContents(Type: Op0.getValueType()) ==
1795	TargetLowering::ZeroOrOneBooleanContent &&
1796	BitWidth > `1`)
1797	Known.Zero.setBitsFrom(`1`);
1798	break;
1799	}
1800	case ISD::SHL: {
1801	SDValue Op0 = Op.getOperand(i: `0`);
1802	SDValue Op1 = Op.getOperand(i: `1`);
1803	EVT ShiftVT = Op1.getValueType();
1804
1805	if (std::optional<unsigned> KnownSA =
1806	TLO.DAG.getValidShiftAmount(V: Op, DemandedElts, Depth: Depth + `1`)) {
1807	unsigned ShAmt = *KnownSA;
1808	if (ShAmt == `0`)
1809	return TLO.CombineTo(O: Op, N: Op0);
1810
1811	// If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1812	// single shift. We can do this if the bottom bits (which are shifted
1813	// out) are never demanded.
1814	// TODO - support non-uniform vector amounts.
1815	if (Op0.getOpcode() == ISD::SRL) {
1816	if (!DemandedBits.intersects(RHS: APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: ShAmt))) {
1817	if (std::optional<unsigned> InnerSA =
1818	TLO.DAG.getValidShiftAmount(V: Op0, DemandedElts, Depth: Depth + `2`)) {
1819	unsigned C1 = *InnerSA;
1820	unsigned Opc = ISD::SHL;
1821	int Diff = ShAmt - C1;
1822	if (Diff < `0`) {
1823	Diff = -Diff;
1824	Opc = ISD::SRL;
1825	}
1826	SDValue NewSA = TLO.DAG.getConstant(Val: Diff, DL: dl, VT: ShiftVT);
1827	return TLO.CombineTo(
1828	O: Op, N: TLO.DAG.getNode(Opcode: Opc, DL: dl, VT, N1: Op0.getOperand(i: `0`), N2: NewSA));
1829	}
1830	}
1831	}
1832
1833	// Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1834	// are not demanded. This will likely allow the anyext to be folded away.
1835	// TODO - support non-uniform vector amounts.
1836	if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1837	SDValue InnerOp = Op0.getOperand(i: `0`);
1838	EVT InnerVT = InnerOp.getValueType();
1839	unsigned InnerBits = InnerVT.getScalarSizeInBits();
1840	if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1841	isTypeDesirableForOp(ISD::SHL, VT: InnerVT)) {
1842	SDValue NarrowShl = TLO.DAG.getNode(
1843	Opcode: ISD::SHL, DL: dl, VT: InnerVT, N1: InnerOp,
1844	N2: TLO.DAG.getShiftAmountConstant(Val: ShAmt, VT: InnerVT, DL: dl));
1845	return TLO.CombineTo(
1846	O: Op, N: TLO.DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT, Operand: NarrowShl));
1847	}
1848
1849	// Repeat the SHL optimization above in cases where an extension
1850	// intervenes: (shl (anyext (shr x, c1)), c2) to
1851	// (shl (anyext x), c2-c1). This requires that the bottom c1 bits
1852	// aren't demanded (as above) and that the shifted upper c1 bits of
1853	// x aren't demanded.
1854	// TODO - support non-uniform vector amounts.
1855	if (InnerOp.getOpcode() == ISD::SRL && Op0.hasOneUse() &&
1856	InnerOp.hasOneUse()) {
1857	if (std::optional<unsigned> SA2 = TLO.DAG.getValidShiftAmount(
1858	V: InnerOp, DemandedElts, Depth: Depth + `2`)) {
1859	unsigned InnerShAmt = *SA2;
1860	if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1861	DemandedBits.getActiveBits() <=
1862	(InnerBits - InnerShAmt + ShAmt) &&
1863	DemandedBits.countr_zero() >= ShAmt) {
1864	SDValue NewSA =
1865	TLO.DAG.getConstant(Val: ShAmt - InnerShAmt, DL: dl, VT: ShiftVT);
1866	SDValue NewExt = TLO.DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT,
1867	Operand: InnerOp.getOperand(i: `0`));
1868	return TLO.CombineTo(
1869	O: Op, N: TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: NewExt, N2: NewSA));
1870	}
1871	}
1872	}
1873	}
1874
1875	APInt InDemandedMask = DemandedBits.lshr(shiftAmt: ShAmt);
1876	if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: InDemandedMask, OriginalDemandedElts: DemandedElts, Known, TLO,
1877	Depth: Depth + `1`)) {
1878	// Disable the nsw and nuw flags. We can no longer guarantee that we
1879	// won't wrap after simplification.
1880	Op ->dropFlags(Mask: SDNodeFlags::NoWrap);
1881	return true;
1882	}
1883	Known <<= ShAmt;
1884	// low bits known zero.
1885	Known.Zero.setLowBits(ShAmt);
1886
1887	// Attempt to avoid multi-use ops if we don't need anything from them.
1888	if (!InDemandedMask.isAllOnes() \|\| !DemandedElts.isAllOnes()) {
1889	SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1890	Op: Op0, DemandedBits: InDemandedMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`);
1891	if (DemandedOp0) {
1892	SDValue NewOp = TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: DemandedOp0, N2: Op1);
1893	return TLO.CombineTo(O: Op, N: NewOp);
1894	}
1895	}
1896
1897	// TODO: Can we merge this fold with the one below?
1898	// Try shrinking the operation as long as the shift amount will still be
1899	// in range.
1900	if (ShAmt < DemandedBits.getActiveBits() && !VT.isVector() &&
1901	Op.getNode()->hasOneUse()) {
1902	// Search for the smallest integer type with free casts to and from
1903	// Op's type. For expedience, just check power-of-2 integer types.
1904	unsigned DemandedSize = DemandedBits.getActiveBits();
1905	for (unsigned SmallVTBits = llvm::bit_ceil(Value: DemandedSize);
1906	SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(A: SmallVTBits)) {
1907	EVT SmallVT = EVT::getIntegerVT(Context&: *TLO.DAG.getContext(), BitWidth: SmallVTBits);
1908	if (isNarrowingProfitable(N: Op.getNode(), SrcVT: VT, DestVT: SmallVT) &&
1909	isTypeDesirableForOp(ISD::SHL, VT: SmallVT) &&
1910	isTruncateFree(FromVT: VT, ToVT: SmallVT) && isZExtFree(FromTy: SmallVT, ToTy: VT) &&
1911	(!TLO.LegalOperations() \|\| isOperationLegal(Op: ISD::SHL, VT: SmallVT))) {
1912	assert(DemandedSize <= SmallVTBits &&
1913	"Narrowed below demanded bits?");
1914	// We found a type with free casts.
1915	SDValue NarrowShl = TLO.DAG.getNode(
1916	Opcode: ISD::SHL, DL: dl, VT: SmallVT,
1917	N1: TLO.DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: SmallVT, Operand: Op.getOperand(i: `0`)),
1918	N2: TLO.DAG.getShiftAmountConstant(Val: ShAmt, VT: SmallVT, DL: dl));
1919	return TLO.CombineTo(
1920	O: Op, N: TLO.DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT, Operand: NarrowShl));
1921	}
1922	}
1923	}
1924
1925	// Narrow shift to lower half - similar to ShrinkDemandedOp.
1926	// (shl i64:x, K) -> (i64 zero_extend (shl (i32 (trunc i64:x)), K))
1927	// Only do this if we demand the upper half so the knownbits are correct.
1928	unsigned HalfWidth = BitWidth / `2`;
1929	if ((BitWidth % `2`) == `0` && !VT.isVector() && ShAmt < HalfWidth &&
1930	DemandedBits.countLeadingOnes() >= HalfWidth) {
1931	EVT HalfVT = EVT::getIntegerVT(Context&: *TLO.DAG.getContext(), BitWidth: HalfWidth);
1932	if (isNarrowingProfitable(N: Op.getNode(), SrcVT: VT, DestVT: HalfVT) &&
1933	isTypeDesirableForOp(ISD::SHL, VT: HalfVT) &&
1934	isTruncateFree(FromVT: VT, ToVT: HalfVT) && isZExtFree(FromTy: HalfVT, ToTy: VT) &&
1935	(!TLO.LegalOperations() \|\| isOperationLegal(Op: ISD::SHL, VT: HalfVT))) {
1936	// If we're demanding the upper bits at all, we must ensure
1937	// that the upper bits of the shift result are known to be zero,
1938	// which is equivalent to the narrow shift being NUW.
1939	if (bool IsNUW = (Known.countMinLeadingZeros() >= HalfWidth)) {
1940	bool IsNSW = Known.countMinSignBits() > HalfWidth;
1941	SDNodeFlags Flags;
1942	Flags.setNoSignedWrap(IsNSW);
1943	Flags.setNoUnsignedWrap(IsNUW);
1944	SDValue NewOp = TLO.DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HalfVT, Operand: Op0);
1945	SDValue NewShiftAmt =
1946	TLO.DAG.getShiftAmountConstant(Val: ShAmt, VT: HalfVT, DL: dl);
1947	SDValue NewShift = TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: HalfVT, N1: NewOp,
1948	N2: NewShiftAmt, Flags);
1949	SDValue NewExt =
1950	TLO.DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: NewShift);
1951	return TLO.CombineTo(O: Op, N: NewExt);
1952	}
1953	}
1954	}
1955	} else {
1956	// This is a variable shift, so we can't shift the demand mask by a known
1957	// amount. But if we are not demanding high bits, then we are not
1958	// demanding those bits from the pre-shifted operand either.
1959	if (unsigned CTLZ = DemandedBits.countl_zero()) {
1960	APInt DemandedFromOp(APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth - CTLZ));
1961	if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: DemandedFromOp, OriginalDemandedElts: DemandedElts, Known, TLO,
1962	Depth: Depth + `1`)) {
1963	// Disable the nsw and nuw flags. We can no longer guarantee that we
1964	// won't wrap after simplification.
1965	Op ->dropFlags(Mask: SDNodeFlags::NoWrap);
1966	return true;
1967	}
1968	Known.resetAll();
1969	}
1970	}
1971
1972	// If we are only demanding sign bits then we can use the shift source
1973	// directly.
1974	if (std::optional<unsigned> MaxSA =
1975	TLO.DAG.getValidMaximumShiftAmount(V: Op, DemandedElts, Depth: Depth + `1`)) {
1976	unsigned ShAmt = *MaxSA;
1977	unsigned NumSignBits =
1978	TLO.DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + `1`);
1979	unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
1980	if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
1981	return TLO.CombineTo(O: Op, N: Op0);
1982	}
1983	break;
1984	}
1985	case ISD::SRL: {
1986	SDValue Op0 = Op.getOperand(i: `0`);
1987	SDValue Op1 = Op.getOperand(i: `1`);
1988	EVT ShiftVT = Op1.getValueType();
1989
1990	if (std::optional<unsigned> KnownSA =
1991	TLO.DAG.getValidShiftAmount(V: Op, DemandedElts, Depth: Depth + `1`)) {
1992	unsigned ShAmt = *KnownSA;
1993	if (ShAmt == `0`)
1994	return TLO.CombineTo(O: Op, N: Op0);
1995
1996	// If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1997	// single shift. We can do this if the top bits (which are shifted out)
1998	// are never demanded.
1999	// TODO - support non-uniform vector amounts.
2000	if (Op0.getOpcode() == ISD::SHL) {
2001	if (!DemandedBits.intersects(RHS: APInt::getHighBitsSet(numBits: BitWidth, hiBitsSet: ShAmt))) {
2002	if (std::optional<unsigned> InnerSA =
2003	TLO.DAG.getValidShiftAmount(V: Op0, DemandedElts, Depth: Depth + `2`)) {
2004	unsigned C1 = *InnerSA;
2005	unsigned Opc = ISD::SRL;
2006	int Diff = ShAmt - C1;
2007	if (Diff < `0`) {
2008	Diff = -Diff;
2009	Opc = ISD::SHL;
2010	}
2011	SDValue NewSA = TLO.DAG.getConstant(Val: Diff, DL: dl, VT: ShiftVT);
2012	return TLO.CombineTo(
2013	O: Op, N: TLO.DAG.getNode(Opcode: Opc, DL: dl, VT, N1: Op0.getOperand(i: `0`), N2: NewSA));
2014	}
2015	}
2016	}
2017
2018	// If this is (srl (sra X, C1), ShAmt), see if we can combine this into a
2019	// single sra. We can do this if the top bits are never demanded.
2020	if (Op0.getOpcode() == ISD::SRA && Op0.hasOneUse()) {
2021	if (!DemandedBits.intersects(RHS: APInt::getHighBitsSet(numBits: BitWidth, hiBitsSet: ShAmt))) {
2022	if (std::optional<unsigned> InnerSA =
2023	TLO.DAG.getValidShiftAmount(V: Op0, DemandedElts, Depth: Depth + `2`)) {
2024	unsigned C1 = *InnerSA;
2025	// Clamp the combined shift amount if it exceeds the bit width.
2026	unsigned Combined = std::min(a: C1 + ShAmt, b: BitWidth - `1`);
2027	SDValue NewSA = TLO.DAG.getConstant(Val: Combined, DL: dl, VT: ShiftVT);
2028	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::SRA, DL: dl, VT,
2029	N1: Op0.getOperand(i: `0`), N2: NewSA));
2030	}
2031	}
2032	}
2033
2034	APInt InDemandedMask = (DemandedBits << ShAmt);
2035
2036	// If the shift is exact, then it does demand the low bits (and knows that
2037	// they are zero).
2038	if (Op ->getFlags().hasExact())
2039	InDemandedMask.setLowBits(ShAmt);
2040
2041	// Narrow shift to lower half - similar to ShrinkDemandedOp.
2042	// (srl i64:x, K) -> (i64 zero_extend (srl (i32 (trunc i64:x)), K))
2043	if ((BitWidth % `2`) == `0` && !VT.isVector()) {
2044	APInt HiBits = APInt::getHighBitsSet(numBits: BitWidth, hiBitsSet: BitWidth / `2`);
2045	EVT HalfVT = EVT::getIntegerVT(Context&: *TLO.DAG.getContext(), BitWidth: BitWidth / `2`);
2046	if (isNarrowingProfitable(N: Op.getNode(), SrcVT: VT, DestVT: HalfVT) &&
2047	isTypeDesirableForOp(ISD::SRL, VT: HalfVT) &&
2048	isTruncateFree(FromVT: VT, ToVT: HalfVT) && isZExtFree(FromTy: HalfVT, ToTy: VT) &&
2049	(!TLO.LegalOperations() \|\| isOperationLegal(Op: ISD::SRL, VT: HalfVT)) &&
2050	((InDemandedMask.countLeadingZeros() >= (BitWidth / `2`)) \|\|
2051	TLO.DAG.MaskedValueIsZero(Op: Op0, Mask: HiBits))) {
2052	SDValue NewOp = TLO.DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HalfVT, Operand: Op0);
2053	SDValue NewShiftAmt =
2054	TLO.DAG.getShiftAmountConstant(Val: ShAmt, VT: HalfVT, DL: dl);
2055	SDValue NewShift =
2056	TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: HalfVT, N1: NewOp, N2: NewShiftAmt);
2057	return TLO.CombineTo(
2058	O: Op, N: TLO.DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: NewShift));
2059	}
2060	}
2061
2062	// Compute the new bits that are at the top now.
2063	if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: InDemandedMask, OriginalDemandedElts: DemandedElts, Known, TLO,
2064	Depth: Depth + `1`))
2065	return true;
2066	Known >>= ShAmt;
2067	// High bits known zero.
2068	Known.Zero.setHighBits(ShAmt);
2069
2070	// Attempt to avoid multi-use ops if we don't need anything from them.
2071	if (!InDemandedMask.isAllOnes() \|\| !DemandedElts.isAllOnes()) {
2072	SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2073	Op: Op0, DemandedBits: InDemandedMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`);
2074	if (DemandedOp0) {
2075	SDValue NewOp = TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: DemandedOp0, N2: Op1);
2076	return TLO.CombineTo(O: Op, N: NewOp);
2077	}
2078	}
2079	} else {
2080	// Use generic knownbits computation as it has support for non-uniform
2081	// shift amounts.
2082	Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2083	}
2084
2085	// If we are only demanding sign bits then we can use the shift source
2086	// directly.
2087	if (std::optional<unsigned> MaxSA =
2088	TLO.DAG.getValidMaximumShiftAmount(V: Op, DemandedElts, Depth: Depth + `1`)) {
2089	unsigned ShAmt = *MaxSA;
2090	// Must already be signbits in DemandedBits bounds, and can't demand any
2091	// shifted in zeroes.
2092	if (DemandedBits.countl_zero() >= ShAmt) {
2093	unsigned NumSignBits =
2094	TLO.DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + `1`);
2095	if (DemandedBits.countr_zero() >= (BitWidth - NumSignBits))
2096	return TLO.CombineTo(O: Op, N: Op0);
2097	}
2098	}
2099
2100	// Try to match AVG patterns (after shift simplification).
2101	if (SDValue AVG = combineShiftToAVG(Op, TLO, TLI: *this, DemandedBits,
2102	DemandedElts, Depth: Depth + `1`))
2103	return TLO.CombineTo(O: Op, N: AVG);
2104
2105	break;
2106	}
2107	case ISD::SRA: {
2108	SDValue Op0 = Op.getOperand(i: `0`);
2109	SDValue Op1 = Op.getOperand(i: `1`);
2110	EVT ShiftVT = Op1.getValueType();
2111
2112	// If we only want bits that already match the signbit then we don't need
2113	// to shift.
2114	unsigned NumHiDemandedBits = BitWidth - DemandedBits.countr_zero();
2115	if (TLO.DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + `1`) >=
2116	NumHiDemandedBits)
2117	return TLO.CombineTo(O: Op, N: Op0);
2118
2119	// If this is an arithmetic shift right and only the low-bit is set, we can
2120	// always convert this into a logical shr, even if the shift amount is
2121	// variable. The low bit of the shift cannot be an input sign bit unless
2122	// the shift amount is >= the size of the datatype, which is undefined.
2123	if (DemandedBits.isOne())
2124	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op0, N2: Op1));
2125
2126	if (std::optional<unsigned> KnownSA =
2127	TLO.DAG.getValidShiftAmount(V: Op, DemandedElts, Depth: Depth + `1`)) {
2128	unsigned ShAmt = *KnownSA;
2129	if (ShAmt == `0`)
2130	return TLO.CombineTo(O: Op, N: Op0);
2131
2132	// fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target
2133	// supports sext_inreg.
2134	if (Op0.getOpcode() == ISD::SHL) {
2135	if (std::optional<unsigned> InnerSA =
2136	TLO.DAG.getValidShiftAmount(V: Op0, DemandedElts, Depth: Depth + `2`)) {
2137	unsigned LowBits = BitWidth - ShAmt;
2138	EVT ExtVT = VT.changeElementType(
2139	Context&: *TLO.DAG.getContext(),
2140	EltVT: EVT::getIntegerVT(Context&: *TLO.DAG.getContext(), BitWidth: LowBits));
2141
2142	if (*InnerSA == ShAmt) {
2143	if (!TLO.LegalOperations() \|\|
2144	getOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: ExtVT) == Legal)
2145	return TLO.CombineTo(
2146	O: Op, N: TLO.DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL: dl, VT,
2147	N1: Op0.getOperand(i: `0`),
2148	N2: TLO.DAG.getValueType(ExtVT)));
2149
2150	// Even if we can't convert to sext_inreg, we might be able to
2151	// remove this shift pair if the input is already sign extended.
2152	unsigned NumSignBits =
2153	TLO.DAG.ComputeNumSignBits(Op: Op0.getOperand(i: `0`), DemandedElts);
2154	if (NumSignBits > ShAmt)
2155	return TLO.CombineTo(O: Op, N: Op0.getOperand(i: `0`));
2156	}
2157	}
2158	}
2159
2160	APInt InDemandedMask = (DemandedBits << ShAmt);
2161
2162	// If the shift is exact, then it does demand the low bits (and knows that
2163	// they are zero).
2164	if (Op ->getFlags().hasExact())
2165	InDemandedMask.setLowBits(ShAmt);
2166
2167	// If any of the demanded bits are produced by the sign extension, we also
2168	// demand the input sign bit.
2169	if (DemandedBits.countl_zero() < ShAmt)
2170	InDemandedMask.setSignBit();
2171
2172	if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: InDemandedMask, OriginalDemandedElts: DemandedElts, Known, TLO,
2173	Depth: Depth + `1`))
2174	return true;
2175	Known >>= ShAmt;
2176
2177	// If the input sign bit is known to be zero, or if none of the top bits
2178	// are demanded, turn this into an unsigned shift right.
2179	if (Known.Zero [BitWidth - ShAmt - `1`] \|\|
2180	DemandedBits.countl_zero() >= ShAmt) {
2181	SDNodeFlags Flags;
2182	Flags.setExact(Op ->getFlags().hasExact());
2183	return TLO.CombineTo(
2184	O: Op, N: TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op0, N2: Op1, Flags));
2185	}
2186
2187	int Log2 = DemandedBits.exactLogBase2();
2188	if (Log2 >= `0`) {
2189	// The bit must come from the sign.
2190	SDValue NewSA = TLO.DAG.getConstant(Val: BitWidth - `1` - Log2, DL: dl, VT: ShiftVT);
2191	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op0, N2: NewSA));
2192	}
2193
2194	if (Known.One [BitWidth - ShAmt - `1`])
2195	// New bits are known one.
2196	Known.One.setHighBits(ShAmt);
2197
2198	// Attempt to avoid multi-use ops if we don't need anything from them.
2199	if (!InDemandedMask.isAllOnes() \|\| !DemandedElts.isAllOnes()) {
2200	SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2201	Op: Op0, DemandedBits: InDemandedMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`);
2202	if (DemandedOp0) {
2203	SDValue NewOp = TLO.DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: DemandedOp0, N2: Op1);
2204	return TLO.CombineTo(O: Op, N: NewOp);
2205	}
2206	}
2207	}
2208
2209	// Try to match AVG patterns (after shift simplification).
2210	if (SDValue AVG = combineShiftToAVG(Op, TLO, TLI: *this, DemandedBits,
2211	DemandedElts, Depth: Depth + `1`))
2212	return TLO.CombineTo(O: Op, N: AVG);
2213
2214	break;
2215	}
2216	case ISD::FSHL:
2217	case ISD::FSHR: {
2218	SDValue Op0 = Op.getOperand(i: `0`);
2219	SDValue Op1 = Op.getOperand(i: `1`);
2220	SDValue Op2 = Op.getOperand(i: `2`);
2221	bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
2222
2223	if (ConstantSDNode *SA = isConstOrConstSplat(N: Op2, DemandedElts)) {
2224	unsigned Amt = SA->getAPIntValue().urem(RHS: BitWidth);
2225
2226	// For fshl, 0-shift returns the 1st arg.
2227	// For fshr, 0-shift returns the 2nd arg.
2228	if (Amt == `0`) {
2229	if (SimplifyDemandedBits(Op: IsFSHL ? Op0 : Op1, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
2230	Known, TLO, Depth: Depth + `1`))
2231	return true;
2232	break;
2233	}
2234
2235	// fshl: (Op0 << Amt) \| (Op1 >> (BW - Amt))
2236	// fshr: (Op0 << (BW - Amt)) \| (Op1 >> Amt)
2237	APInt Demanded0 = DemandedBits.lshr(shiftAmt: IsFSHL ? Amt : (BitWidth - Amt));
2238	APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
2239	if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: Demanded0, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
2240	Depth: Depth + `1`))
2241	return true;
2242	if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: Demanded1, OriginalDemandedElts: DemandedElts, Known, TLO,
2243	Depth: Depth + `1`))
2244	return true;
2245
2246	Known2 <<= (IsFSHL ? Amt : (BitWidth - Amt));
2247	Known >>= (IsFSHL ? (BitWidth - Amt) : Amt);
2248	Known = Known.unionWith(RHS: Known2);
2249
2250	// Attempt to avoid multi-use ops if we don't need anything from them.
2251	if (!Demanded0.isAllOnes() \|\| !Demanded1.isAllOnes() \|\|
2252	!DemandedElts.isAllOnes()) {
2253	SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2254	Op: Op0, DemandedBits: Demanded0, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`);
2255	SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2256	Op: Op1, DemandedBits: Demanded1, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`);
2257	if (DemandedOp0 \|\| DemandedOp1) {
2258	DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0;
2259	DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1;
2260	SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: DemandedOp0,
2261	N2: DemandedOp1, N3: Op2);
2262	return TLO.CombineTo(O: Op, N: NewOp);
2263	}
2264	}
2265	}
2266
2267	if (isPowerOf2_32(Value: BitWidth)) {
2268	// Fold FSHR(Op0,Op1,Op2) -> SRL(Op1,Op2)
2269	// iff we're guaranteed not to use Op0.
2270	// TODO: Add FSHL equivalent?
2271	if (!IsFSHL && !DemandedBits.isAllOnes() &&
2272	(!TLO.LegalOperations() \|\| isOperationLegal(Op: ISD::SRL, VT))) {
2273	KnownBits KnownAmt =
2274	TLO.DAG.computeKnownBits(Op: Op2, DemandedElts, Depth: Depth + `1`);
2275	unsigned MaxShiftAmt =
2276	KnownAmt.getMaxValue().getLimitedValue(Limit: BitWidth - `1`);
2277	// Check we don't demand any shifted bits outside Op1.
2278	if (DemandedBits.countl_zero() >= MaxShiftAmt) {
2279	EVT AmtVT = Op2.getValueType();
2280	SDValue NewAmt =
2281	TLO.DAG.getNode(Opcode: ISD::AND, DL: dl, VT: AmtVT, N1: Op2,
2282	N2: TLO.DAG.getConstant(Val: BitWidth - `1`, DL: dl, VT: AmtVT));
2283	SDValue NewOp = TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op1, N2: NewAmt);
2284	return TLO.CombineTo(O: Op, N: NewOp);
2285	}
2286	}
2287
2288	// For pow-2 bitwidths we only demand the bottom modulo amt bits.
2289	APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - `1`);
2290	if (SimplifyDemandedBits(Op: Op2, OriginalDemandedBits: DemandedAmtBits, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
2291	Depth: Depth + `1`))
2292	return true;
2293	}
2294	break;
2295	}
2296	case ISD::ROTL:
2297	case ISD::ROTR: {
2298	SDValue Op0 = Op.getOperand(i: `0`);
2299	SDValue Op1 = Op.getOperand(i: `1`);
2300	bool IsROTL = (Op.getOpcode() == ISD::ROTL);
2301
2302	// If we're rotating an 0/-1 value, then it stays an 0/-1 value.
2303	if (BitWidth == TLO.DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + `1`))
2304	return TLO.CombineTo(O: Op, N: Op0);
2305
2306	if (ConstantSDNode *SA = isConstOrConstSplat(N: Op1, DemandedElts)) {
2307	unsigned Amt = SA->getAPIntValue().urem(RHS: BitWidth);
2308	unsigned RevAmt = BitWidth - Amt;
2309
2310	// rotl: (Op0 << Amt) \| (Op0 >> (BW - Amt))
2311	// rotr: (Op0 << (BW - Amt)) \| (Op0 >> Amt)
2312	APInt Demanded0 = DemandedBits.rotr(rotateAmt: IsROTL ? Amt : RevAmt);
2313	if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: Demanded0, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
2314	Depth: Depth + `1`))
2315	return true;
2316
2317	// rot(x, 0) --> x*
2318	if (Amt == `0`)
2319	return TLO.CombineTo(O: Op, N: Op0);
2320
2321	// See if we don't demand either half of the rotated bits.
2322	if ((!TLO.LegalOperations() \|\| isOperationLegal(Op: ISD::SHL, VT)) &&
2323	DemandedBits.countr_zero() >= (IsROTL ? Amt : RevAmt)) {
2324	Op1 = TLO.DAG.getConstant(Val: IsROTL ? Amt : RevAmt, DL: dl, VT: Op1.getValueType());
2325	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op0, N2: Op1));
2326	}
2327	if ((!TLO.LegalOperations() \|\| isOperationLegal(Op: ISD::SRL, VT)) &&
2328	DemandedBits.countl_zero() >= (IsROTL ? RevAmt : Amt)) {
2329	Op1 = TLO.DAG.getConstant(Val: IsROTL ? RevAmt : Amt, DL: dl, VT: Op1.getValueType());
2330	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op0, N2: Op1));
2331	}
2332	}
2333
2334	// For pow-2 bitwidths we only demand the bottom modulo amt bits.
2335	if (isPowerOf2_32(Value: BitWidth)) {
2336	APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - `1`);
2337	if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: DemandedAmtBits, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
2338	Depth: Depth + `1`))
2339	return true;
2340	}
2341	break;
2342	}
2343	case ISD::SMIN:
2344	case ISD::SMAX:
2345	case ISD::UMIN:
2346	case ISD::UMAX: {
2347	unsigned Opc = Op.getOpcode();
2348	SDValue Op0 = Op.getOperand(i: `0`);
2349	SDValue Op1 = Op.getOperand(i: `1`);
2350
2351	// If we're only demanding signbits, then we can simplify to OR/AND node.
2352	unsigned BitOp =
2353	(Opc == ISD::SMIN \|\| Opc == ISD::UMAX) ? ISD::OR : ISD::AND;
2354	unsigned NumSignBits =
2355	std::min(a: TLO.DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + `1`),
2356	b: TLO.DAG.ComputeNumSignBits(Op: Op1, DemandedElts, Depth: Depth + `1`));
2357	unsigned NumDemandedUpperBits = BitWidth - DemandedBits.countr_zero();
2358	if (NumSignBits >= NumDemandedUpperBits)
2359	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: BitOp, DL: SDLoc (Op), VT, N1: Op0, N2: Op1));
2360
2361	// Check if one arg is always less/greater than (or equal) to the other arg.
2362	KnownBits Known0 = TLO.DAG.computeKnownBits(Op: Op0, DemandedElts, Depth: Depth + `1`);
2363	KnownBits Known1 = TLO.DAG.computeKnownBits(Op: Op1, DemandedElts, Depth: Depth + `1`);
2364	switch (Opc) {
2365	case ISD::SMIN:
2366	if (std::optional<bool> IsSLE = KnownBits::sle(LHS: Known0, RHS: Known1))
2367	return TLO.CombineTo(O: Op, N: *IsSLE ? Op0 : Op1);
2368	if (std::optional<bool> IsSLT = KnownBits::slt(LHS: Known0, RHS: Known1))
2369	return TLO.CombineTo(O: Op, N: *IsSLT ? Op0 : Op1);
2370	Known = KnownBits::smin(LHS: Known0, RHS: Known1);
2371	break;
2372	case ISD::SMAX:
2373	if (std::optional<bool> IsSGE = KnownBits::sge(LHS: Known0, RHS: Known1))
2374	return TLO.CombineTo(O: Op, N: *IsSGE ? Op0 : Op1);
2375	if (std::optional<bool> IsSGT = KnownBits::sgt(LHS: Known0, RHS: Known1))
2376	return TLO.CombineTo(O: Op, N: *IsSGT ? Op0 : Op1);
2377	Known = KnownBits::smax(LHS: Known0, RHS: Known1);
2378	break;
2379	case ISD::UMIN:
2380	if (std::optional<bool> IsULE = KnownBits::ule(LHS: Known0, RHS: Known1))
2381	return TLO.CombineTo(O: Op, N: *IsULE ? Op0 : Op1);
2382	if (std::optional<bool> IsULT = KnownBits::ult(LHS: Known0, RHS: Known1))
2383	return TLO.CombineTo(O: Op, N: *IsULT ? Op0 : Op1);
2384	Known = KnownBits::umin(LHS: Known0, RHS: Known1);
2385	break;
2386	case ISD::UMAX:
2387	if (std::optional<bool> IsUGE = KnownBits::uge(LHS: Known0, RHS: Known1))
2388	return TLO.CombineTo(O: Op, N: *IsUGE ? Op0 : Op1);
2389	if (std::optional<bool> IsUGT = KnownBits::ugt(LHS: Known0, RHS: Known1))
2390	return TLO.CombineTo(O: Op, N: *IsUGT ? Op0 : Op1);
2391	Known = KnownBits::umax(LHS: Known0, RHS: Known1);
2392	break;
2393	}
2394	break;
2395	}
2396	case ISD::BITREVERSE: {
2397	SDValue Src = Op.getOperand(i: `0`);
2398	APInt DemandedSrcBits = DemandedBits.reverseBits();
2399	if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedSrcBits, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
2400	Depth: Depth + `1`))
2401	return true;
2402	Known = Known2.reverseBits();
2403	break;
2404	}
2405	case ISD::BSWAP: {
2406	SDValue Src = Op.getOperand(i: `0`);
2407
2408	// If the only bits demanded come from one byte of the bswap result,
2409	// just shift the input byte into position to eliminate the bswap.
2410	unsigned NLZ = DemandedBits.countl_zero();
2411	unsigned NTZ = DemandedBits.countr_zero();
2412
2413	// Round NTZ down to the next byte. If we have 11 trailing zeros, then
2414	// we need all the bits down to bit 8. Likewise, round NLZ. If we
2415	// have 14 leading zeros, round to 8.
2416	NLZ = alignDown(Value: NLZ, Align: `8`);
2417	NTZ = alignDown(Value: NTZ, Align: `8`);
2418	// If we need exactly one byte, we can do this transformation.
2419	if (BitWidth - NLZ - NTZ == `8`) {
2420	// Replace this with either a left or right shift to get the byte into
2421	// the right place.
2422	unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL;
2423	if (!TLO.LegalOperations() \|\| isOperationLegal(Op: ShiftOpcode, VT)) {
2424	unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
2425	SDValue ShAmt = TLO.DAG.getShiftAmountConstant(Val: ShiftAmount, VT, DL: dl);
2426	SDValue NewOp = TLO.DAG.getNode(Opcode: ShiftOpcode, DL: dl, VT, N1: Src, N2: ShAmt);
2427	return TLO.CombineTo(O: Op, N: NewOp);
2428	}
2429	}
2430
2431	APInt DemandedSrcBits = DemandedBits.byteSwap();
2432	if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedSrcBits, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
2433	Depth: Depth + `1`))
2434	return true;
2435	Known = Known2.byteSwap();
2436	break;
2437	}
2438	case ISD::CTPOP: {
2439	// If only 1 bit is demanded, replace with PARITY as long as we're before
2440	// op legalization.
2441	// FIXME: Limit to scalars for now.
2442	if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
2443	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::PARITY, DL: dl, VT,
2444	Operand: Op.getOperand(i: `0`)));
2445
2446	Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2447	break;
2448	}
2449	case ISD::SIGN_EXTEND_INREG: {
2450	SDValue Op0 = Op.getOperand(i: `0`);
2451	EVT ExVT = cast<VTSDNode>(Val: Op.getOperand(i: `1`))->getVT();
2452	unsigned ExVTBits = ExVT.getScalarSizeInBits();
2453
2454	// If we only care about the highest bit, don't bother shifting right.
2455	if (DemandedBits.isSignMask()) {
2456	unsigned MinSignedBits =
2457	TLO.DAG.ComputeMaxSignificantBits(Op: Op0, DemandedElts, Depth: Depth + `1`);
2458	bool AlreadySignExtended = ExVTBits >= MinSignedBits;
2459	// However if the input is already sign extended we expect the sign
2460	// extension to be dropped altogether later and do not simplify.
2461	if (!AlreadySignExtended) {
2462	// Compute the correct shift amount type, which must be getShiftAmountTy
2463	// for scalar types after legalization.
2464	SDValue ShiftAmt =
2465	TLO.DAG.getShiftAmountConstant(Val: BitWidth - ExVTBits, VT, DL: dl);
2466	return TLO.CombineTo(O: Op,
2467	N: TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op0, N2: ShiftAmt));
2468	}
2469	}
2470
2471	// If none of the extended bits are demanded, eliminate the sextinreg.
2472	if (DemandedBits.getActiveBits() <= ExVTBits)
2473	return TLO.CombineTo(O: Op, N: Op0);
2474
2475	APInt InputDemandedBits = DemandedBits.getLoBits(numBits: ExVTBits);
2476
2477	// Since the sign extended bits are demanded, we know that the sign
2478	// bit is demanded.
2479	InputDemandedBits.setBit(ExVTBits - `1`);
2480
2481	if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: InputDemandedBits, OriginalDemandedElts: DemandedElts, Known, TLO,
2482	Depth: Depth + `1`))
2483	return true;
2484
2485	// If the sign bit of the input is known set or clear, then we know the
2486	// top bits of the result.
2487
2488	// If the input sign bit is known zero, convert this into a zero extension.
2489	if (Known.Zero [ExVTBits - `1`])
2490	return TLO.CombineTo(O: Op, N: TLO.DAG.getZeroExtendInReg(Op: Op0, DL: dl, VT: ExVT));
2491
2492	APInt Mask = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: ExVTBits);
2493	if (Known.One [ExVTBits - `1`]) { // Input sign bit known set
2494	Known.One.setBitsFrom(ExVTBits);
2495	Known.Zero &= Mask;
2496	} else { // Input sign bit unknown
2497	Known.Zero &= Mask;
2498	Known.One &= Mask;
2499	}
2500	break;
2501	}
2502	case ISD::BUILD_PAIR: {
2503	EVT HalfVT = Op.getOperand(i: `0`).getValueType();
2504	unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
2505
2506	APInt MaskLo = DemandedBits.getLoBits(numBits: HalfBitWidth).trunc(width: HalfBitWidth);
2507	APInt MaskHi = DemandedBits.getHiBits(numBits: HalfBitWidth).trunc(width: HalfBitWidth);
2508
2509	KnownBits KnownLo, KnownHi;
2510
2511	if (SimplifyDemandedBits(Op: Op.getOperand(i: `0`), DemandedBits: MaskLo, Known&: KnownLo, TLO, Depth: Depth + `1`))
2512	return true;
2513
2514	if (SimplifyDemandedBits(Op: Op.getOperand(i: `1`), DemandedBits: MaskHi, Known&: KnownHi, TLO, Depth: Depth + `1`))
2515	return true;
2516
2517	Known = KnownHi.concat(Lo: KnownLo);
2518	break;
2519	}
2520	case ISD::ZERO_EXTEND_VECTOR_INREG:
2521	if (VT.isScalableVector())
2522	return false;
2523	[[fallthrough]];
2524	case ISD::ZERO_EXTEND: {
2525	SDValue Src = Op.getOperand(i: `0`);
2526	EVT SrcVT = Src.getValueType();
2527	unsigned InBits = SrcVT.getScalarSizeInBits();
2528	unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : `1`;
2529	bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
2530
2531	// If none of the top bits are demanded, convert this into an any_extend.
2532	if (DemandedBits.getActiveBits() <= InBits) {
2533	// If we only need the non-extended bits of the bottom element
2534	// then we can just bitcast to the result.
2535	if (IsLE && IsVecInReg && DemandedElts == `1` &&
2536	VT.getSizeInBits() == SrcVT.getSizeInBits())
2537	return TLO.CombineTo(O: Op, N: TLO.DAG.getBitcast(VT, V: Src));
2538
2539	unsigned Opc =
2540	IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
2541	if (!TLO.LegalOperations() \|\| isOperationLegal(Op: Opc, VT))
2542	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Opc, DL: dl, VT, Operand: Src));
2543	}
2544
2545	APInt InDemandedBits = DemandedBits.trunc(width: InBits);
2546	APInt InDemandedElts = DemandedElts.zext(width: InElts);
2547	if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: InDemandedBits, OriginalDemandedElts: InDemandedElts, Known, TLO,
2548	Depth: Depth + `1`)) {
2549	Op ->dropFlags(Mask: SDNodeFlags::NonNeg);
2550	return true;
2551	}
2552	assert(Known.getBitWidth() == InBits && "Src width has changed?");
2553	Known = Known.zext(BitWidth);
2554
2555	// Attempt to avoid multi-use ops if we don't need anything from them.
2556	if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2557	Op: Src, DemandedBits: InDemandedBits, DemandedElts: InDemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`))
2558	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, Operand: NewSrc));
2559	break;
2560	}
2561	case ISD::SIGN_EXTEND_VECTOR_INREG:
2562	if (VT.isScalableVector())
2563	return false;
2564	[[fallthrough]];
2565	case ISD::SIGN_EXTEND: {
2566	SDValue Src = Op.getOperand(i: `0`);
2567	EVT SrcVT = Src.getValueType();
2568	unsigned InBits = SrcVT.getScalarSizeInBits();
2569	unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : `1`;
2570	bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
2571
2572	APInt InDemandedElts = DemandedElts.zext(width: InElts);
2573	APInt InDemandedBits = DemandedBits.trunc(width: InBits);
2574
2575	// Since some of the sign extended bits are demanded, we know that the sign
2576	// bit is demanded.
2577	InDemandedBits.setBit(InBits - `1`);
2578
2579	// If none of the top bits are demanded, convert this into an any_extend.
2580	if (DemandedBits.getActiveBits() <= InBits) {
2581	// If we only need the non-extended bits of the bottom element
2582	// then we can just bitcast to the result.
2583	if (IsLE && IsVecInReg && DemandedElts == `1` &&
2584	VT.getSizeInBits() == SrcVT.getSizeInBits())
2585	return TLO.CombineTo(O: Op, N: TLO.DAG.getBitcast(VT, V: Src));
2586
2587	// Don't lose an all signbits 0/-1 splat on targets with 0/-1 booleans.
2588	if (getBooleanContents(Type: VT) != ZeroOrNegativeOneBooleanContent \|\|
2589	TLO.DAG.ComputeNumSignBits(Op: Src, DemandedElts: InDemandedElts, Depth: Depth + `1`) !=
2590	InBits) {
2591	unsigned Opc =
2592	IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
2593	if (!TLO.LegalOperations() \|\| isOperationLegal(Op: Opc, VT))
2594	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Opc, DL: dl, VT, Operand: Src));
2595	}
2596	}
2597
2598	if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: InDemandedBits, OriginalDemandedElts: InDemandedElts, Known, TLO,
2599	Depth: Depth + `1`))
2600	return true;
2601	assert(Known.getBitWidth() == InBits && "Src width has changed?");
2602
2603	// If the sign bit is known one, the top bits match.
2604	Known = Known.sext(BitWidth);
2605
2606	// If the sign bit is known zero, convert this to a zero extend.
2607	if (Known.isNonNegative()) {
2608	unsigned Opc =
2609	IsVecInReg ? ISD::ZERO_EXTEND_VECTOR_INREG : ISD::ZERO_EXTEND;
2610	if (!TLO.LegalOperations() \|\| isOperationLegal(Op: Opc, VT)) {
2611	SDNodeFlags Flags;
2612	if (!IsVecInReg)
2613	Flags \|= SDNodeFlags::NonNeg;
2614	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Opc, DL: dl, VT, Operand: Src, Flags));
2615	}
2616	}
2617
2618	// Attempt to avoid multi-use ops if we don't need anything from them.
2619	if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2620	Op: Src, DemandedBits: InDemandedBits, DemandedElts: InDemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`))
2621	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, Operand: NewSrc));
2622	break;
2623	}
2624	case ISD::ANY_EXTEND_VECTOR_INREG:
2625	if (VT.isScalableVector())
2626	return false;
2627	[[fallthrough]];
2628	case ISD::ANY_EXTEND: {
2629	SDValue Src = Op.getOperand(i: `0`);
2630	EVT SrcVT = Src.getValueType();
2631	unsigned InBits = SrcVT.getScalarSizeInBits();
2632	unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : `1`;
2633	bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
2634
2635	// If we only need the bottom element then we can just bitcast.
2636	// TODO: Handle ANY_EXTEND?
2637	if (IsLE && IsVecInReg && DemandedElts == `1` &&
2638	VT.getSizeInBits() == SrcVT.getSizeInBits())
2639	return TLO.CombineTo(O: Op, N: TLO.DAG.getBitcast(VT, V: Src));
2640
2641	APInt InDemandedBits = DemandedBits.trunc(width: InBits);
2642	APInt InDemandedElts = DemandedElts.zext(width: InElts);
2643	if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: InDemandedBits, OriginalDemandedElts: InDemandedElts, Known, TLO,
2644	Depth: Depth + `1`))
2645	return true;
2646	assert(Known.getBitWidth() == InBits && "Src width has changed?");
2647	Known = Known.anyext(BitWidth);
2648
2649	// Attempt to avoid multi-use ops if we don't need anything from them.
2650	if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2651	Op: Src, DemandedBits: InDemandedBits, DemandedElts: InDemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`))
2652	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, Operand: NewSrc));
2653	break;
2654	}
2655	case ISD::TRUNCATE: {
2656	SDValue Src = Op.getOperand(i: `0`);
2657
2658	// Simplify the input, using demanded bit information, and compute the known
2659	// zero/one bits live out.
2660	unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
2661	APInt TruncMask = DemandedBits.zext(width: OperandBitWidth);
2662	if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: TruncMask, OriginalDemandedElts: DemandedElts, Known, TLO,
2663	Depth: Depth + `1`)) {
2664	// Disable the nsw and nuw flags. We can no longer guarantee that we
2665	// won't wrap after simplification.
2666	Op ->dropFlags(Mask: SDNodeFlags::NoWrap);
2667	return true;
2668	}
2669	Known = Known.trunc(BitWidth);
2670
2671	// Attempt to avoid multi-use ops if we don't need anything from them.
2672	if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2673	Op: Src, DemandedBits: TruncMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`))
2674	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: NewSrc));
2675
2676	// If the input is only used by this truncate, see if we can shrink it based
2677	// on the known demanded bits.
2678	switch (Src.getOpcode()) {
2679	default:
2680	break;
2681	case ISD::SRL:
2682	// Shrink SRL by a constant if none of the high bits shifted in are
2683	// demanded.
2684	if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
2685	// Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
2686	// undesirable.
2687	break;
2688
2689	if (Src.getNode()->hasOneUse()) {
2690	if (isTruncateFree(Val: Src, VT2: VT) &&
2691	!isTruncateFree(FromVT: Src.getValueType(), ToVT: VT)) {
2692	// If truncate is only free at trunc(srl), do not turn it into
2693	// srl(trunc). The check is done by first check the truncate is free
2694	// at Src's opcode(srl), then check the truncate is not done by
2695	// referencing sub-register. In test, if both trunc(srl) and
2696	// srl(trunc)'s trunc are free, srl(trunc) performs better. If only
2697	// trunc(srl)'s trunc is free, trunc(srl) is better.
2698	break;
2699	}
2700
2701	std::optional<unsigned> ShAmtC =
2702	TLO.DAG.getValidShiftAmount(V: Src, DemandedElts, Depth: Depth + `2`);
2703	if (!ShAmtC \|\| *ShAmtC >= BitWidth)
2704	break;
2705	unsigned ShVal = *ShAmtC;
2706
2707	APInt HighBits =
2708	APInt::getHighBitsSet(numBits: OperandBitWidth, hiBitsSet: OperandBitWidth - BitWidth);
2709	HighBits.lshrInPlace(ShiftAmt: ShVal);
2710	HighBits = HighBits.trunc(width: BitWidth);
2711	if (!(HighBits & DemandedBits)) {
2712	// None of the shifted in bits are needed. Add a truncate of the
2713	// shift input, then shift it.
2714	SDValue NewShAmt = TLO.DAG.getShiftAmountConstant(Val: ShVal, VT, DL: dl);
2715	SDValue NewTrunc =
2716	TLO.DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Src.getOperand(i: `0`));
2717	return TLO.CombineTo(
2718	O: Op, N: TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: NewTrunc, N2: NewShAmt));
2719	}
2720	}
2721	break;
2722	}
2723
2724	break;
2725	}
2726	case ISD::AssertZext: {
2727	// AssertZext demands all of the high bits, plus any of the low bits
2728	// demanded by its users.
2729	EVT ZVT = cast<VTSDNode>(Val: Op.getOperand(i: `1`))->getVT();
2730	APInt InMask = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: ZVT.getSizeInBits());
2731	if (SimplifyDemandedBits(Op: Op.getOperand(i: `0`), DemandedBits: ~InMask \| DemandedBits, Known,
2732	TLO, Depth: Depth + `1`))
2733	return true;
2734
2735	Known.Zero \|= ~InMask;
2736	Known.One &= (~Known.Zero);
2737	break;
2738	}
2739	case ISD::EXTRACT_VECTOR_ELT: {
2740	SDValue Src = Op.getOperand(i: `0`);
2741	SDValue Idx = Op.getOperand(i: `1`);
2742	ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
2743	unsigned EltBitWidth = Src.getScalarValueSizeInBits();
2744
2745	if (SrcEltCnt.isScalable())
2746	return false;
2747
2748	// Demand the bits from every vector element without a constant index.
2749	unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2750	APInt DemandedSrcElts = APInt::getAllOnes(numBits: NumSrcElts);
2751	if (auto *CIdx = dyn_cast<ConstantSDNode>(Val&: Idx))
2752	if (CIdx->getAPIntValue().ult(RHS: NumSrcElts))
2753	DemandedSrcElts = APInt::getOneBitSet(numBits: NumSrcElts, BitNo: CIdx->getZExtValue());
2754
2755	// If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
2756	// anything about the extended bits.
2757	APInt DemandedSrcBits = DemandedBits;
2758	if (BitWidth > EltBitWidth)
2759	DemandedSrcBits = DemandedSrcBits.trunc(width: EltBitWidth);
2760
2761	if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedSrcBits, OriginalDemandedElts: DemandedSrcElts, Known&: Known2, TLO,
2762	Depth: Depth + `1`))
2763	return true;
2764
2765	// Attempt to avoid multi-use ops if we don't need anything from them.
2766	if (!DemandedSrcBits.isAllOnes() \|\| !DemandedSrcElts.isAllOnes()) {
2767	if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2768	Op: Src, DemandedBits: DemandedSrcBits, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + `1`)) {
2769	SDValue NewOp =
2770	TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: DemandedSrc, N2: Idx);
2771	return TLO.CombineTo(O: Op, N: NewOp);
2772	}
2773	}
2774
2775	Known = Known2;
2776	if (BitWidth > EltBitWidth)
2777	Known = Known.anyext(BitWidth);
2778	break;
2779	}
2780	case ISD::BITCAST: {
2781	if (VT.isScalableVector())
2782	return false;
2783	SDValue Src = Op.getOperand(i: `0`);
2784	EVT SrcVT = Src.getValueType();
2785	unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
2786
2787	// If this is an FP->Int bitcast and if the sign bit is the only
2788	// thing demanded, turn this into a FGETSIGN.
2789	if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
2790	DemandedBits == APInt::getSignMask(BitWidth: Op.getValueSizeInBits()) &&
2791	SrcVT.isFloatingPoint()) {
2792	bool OpVTLegal = isOperationLegalOrCustom(Op: ISD::FGETSIGN, VT);
2793	bool i32Legal = isOperationLegalOrCustom(Op: ISD::FGETSIGN, VT: MVT::i32);
2794	if ((OpVTLegal \|\| i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
2795	SrcVT != MVT::f128) {
2796	// Cannot eliminate/lower SHL for f128 yet.
2797	EVT Ty = OpVTLegal ? VT : MVT::i32;
2798	// Make a FGETSIGN + SHL to move the sign bit into the appropriate
2799	// place. We expect the SHL to be eliminated by other optimizations.
2800	SDValue Sign = TLO.DAG.getNode(Opcode: ISD::FGETSIGN, DL: dl, VT: Ty, Operand: Src);
2801	unsigned OpVTSizeInBits = Op.getValueSizeInBits();
2802	if (!OpVTLegal && OpVTSizeInBits > `32`)
2803	Sign = TLO.DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: Sign);
2804	unsigned ShVal = Op.getValueSizeInBits() - `1`;
2805	SDValue ShAmt = TLO.DAG.getConstant(Val: ShVal, DL: dl, VT);
2806	return TLO.CombineTo(O: Op,
2807	N: TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Sign, N2: ShAmt));
2808	}
2809	}
2810
2811	// Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
2812	// Demand the elt/bit if any of the original elts/bits are demanded.
2813	if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == `0`) {
2814	unsigned Scale = BitWidth / NumSrcEltBits;
2815	unsigned NumSrcElts = SrcVT.getVectorNumElements();
2816	APInt DemandedSrcBits = APInt::getZero(numBits: NumSrcEltBits);
2817	for (unsigned i = `0`; i != Scale; ++i) {
2818	unsigned EltOffset = IsLE ? i : (Scale - `1` - i);
2819	unsigned BitOffset = EltOffset * NumSrcEltBits;
2820	DemandedSrcBits \|= DemandedBits.extractBits(numBits: NumSrcEltBits, bitPosition: BitOffset);
2821	}
2822	// Recursive calls below may turn not demanded elements into poison, so we
2823	// need to demand all smaller source elements that maps to a demanded
2824	// destination element.
2825	APInt DemandedSrcElts = APIntOps::ScaleBitMask(A: DemandedElts, NewBitWidth: NumSrcElts);
2826
2827	APInt KnownSrcUndef, KnownSrcZero;
2828	if (SimplifyDemandedVectorElts(Op: Src, DemandedEltMask: DemandedSrcElts, KnownUndef&: KnownSrcUndef,
2829	KnownZero&: KnownSrcZero, TLO, Depth: Depth + `1`))
2830	return true;
2831
2832	KnownBits KnownSrcBits;
2833	if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedSrcBits, OriginalDemandedElts: DemandedSrcElts,
2834	Known&: KnownSrcBits, TLO, Depth: Depth + `1`))
2835	return true;
2836	} else if (IsLE && (NumSrcEltBits % BitWidth) == `0`) {
2837	// TODO - bigendian once we have test coverage.
2838	unsigned Scale = NumSrcEltBits / BitWidth;
2839	unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : `1`;
2840	APInt DemandedSrcBits = APInt::getZero(numBits: NumSrcEltBits);
2841	APInt DemandedSrcElts = APInt::getZero(numBits: NumSrcElts);
2842	for (unsigned i = `0`; i != NumElts; ++i)
2843	if (DemandedElts [i]) {
2844	unsigned Offset = (i % Scale) * BitWidth;
2845	DemandedSrcBits.insertBits(SubBits: DemandedBits, bitPosition: Offset);
2846	DemandedSrcElts.setBit(i / Scale);
2847	}
2848
2849	if (SrcVT.isVector()) {
2850	APInt KnownSrcUndef, KnownSrcZero;
2851	if (SimplifyDemandedVectorElts(Op: Src, DemandedEltMask: DemandedSrcElts, KnownUndef&: KnownSrcUndef,
2852	KnownZero&: KnownSrcZero, TLO, Depth: Depth + `1`))
2853	return true;
2854	}
2855
2856	KnownBits KnownSrcBits;
2857	if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedSrcBits, OriginalDemandedElts: DemandedSrcElts,
2858	Known&: KnownSrcBits, TLO, Depth: Depth + `1`))
2859	return true;
2860
2861	// Attempt to avoid multi-use ops if we don't need anything from them.
2862	if (!DemandedSrcBits.isAllOnes() \|\| !DemandedSrcElts.isAllOnes()) {
2863	if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2864	Op: Src, DemandedBits: DemandedSrcBits, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + `1`)) {
2865	SDValue NewOp = TLO.DAG.getBitcast(VT, V: DemandedSrc);
2866	return TLO.CombineTo(O: Op, N: NewOp);
2867	}
2868	}
2869	}
2870
2871	// If this is a bitcast, let computeKnownBits handle it. Only do this on a
2872	// recursive call where Known may be useful to the caller.
2873	if (Depth > `0`) {
2874	Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2875	return false;
2876	}
2877	break;
2878	}
2879	case ISD::MUL:
2880	if (DemandedBits.isPowerOf2()) {
2881	// The LSB of XY is set only if (X & 1) == 1 and (Y & 1) == 1.*
2882	// If we demand exactly one bit N and we have "X (C' << N)" where C' is*
2883	// odd (has LSB set), then the left-shifted low bit of X is the answer.
2884	unsigned CTZ = DemandedBits.countr_zero();
2885	ConstantSDNode *C = isConstOrConstSplat(N: Op.getOperand(i: `1`), DemandedElts);
2886	if (C && C->getAPIntValue().countr_zero() == CTZ) {
2887	SDValue AmtC = TLO.DAG.getShiftAmountConstant(Val: CTZ, VT, DL: dl);
2888	SDValue Shl = TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op.getOperand(i: `0`), N2: AmtC);
2889	return TLO.CombineTo(O: Op, N: Shl);
2890	}
2891	}
2892	// For a squared value "X X", the bottom 2 bits are 0 and X[0] because:*
2893	// X X is odd iff X is odd.*
2894	// 'Quadratic Reciprocity': X X -> 0 for bit[1]*
2895	if (Op.getOperand(i: `0`) == Op.getOperand(i: `1`) && DemandedBits.ult(RHS: `4`)) {
2896	SDValue One = TLO.DAG.getConstant(Val: `1`, DL: dl, VT);
2897	SDValue And1 = TLO.DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op.getOperand(i: `0`), N2: One);
2898	return TLO.CombineTo(O: Op, N: And1);
2899	}
2900	[[fallthrough]];
2901	case ISD::PTRADD:
2902	if (Op.getOperand(i: `0`).getValueType() != Op.getOperand(i: `1`).getValueType())
2903	break;
2904	// PTRADD behaves like ADD if pointers are represented as integers.
2905	[[fallthrough]];
2906	case ISD::ADD:
2907	case ISD::SUB: {
2908	// Add, Sub, and Mul don't demand any bits in positions beyond that
2909	// of the highest bit demanded of them.
2910	SDValue Op0 = Op.getOperand(i: `0`), Op1 = Op.getOperand(i: `1`);
2911	SDNodeFlags Flags = Op.getNode()->getFlags();
2912	unsigned DemandedBitsLZ = DemandedBits.countl_zero();
2913	APInt LoMask = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth - DemandedBitsLZ);
2914	KnownBits KnownOp0, KnownOp1;
2915	auto GetDemandedBitsLHSMask = [&](APInt Demanded,
2916	const KnownBits &KnownRHS) {
2917	if (Op.getOpcode() == ISD::MUL)
2918	Demanded.clearHighBits(hiBits: KnownRHS.countMinTrailingZeros());
2919	return Demanded;
2920	};
2921	if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: LoMask, OriginalDemandedElts: DemandedElts, Known&: KnownOp1, TLO,
2922	Depth: Depth + `1`) \|\|
2923	SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: GetDemandedBitsLHSMask (LoMask, KnownOp1),
2924	OriginalDemandedElts: DemandedElts, Known&: KnownOp0, TLO, Depth: Depth + `1`) \|\|
2925	// See if the operation should be performed at a smaller bit width.
2926	ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
2927	// Disable the nsw and nuw flags. We can no longer guarantee that we
2928	// won't wrap after simplification.
2929	Op ->dropFlags(Mask: SDNodeFlags::NoWrap);
2930	return true;
2931	}
2932
2933	// neg x with only low bit demanded is simply x.
2934	if (Op.getOpcode() == ISD::SUB && DemandedBits.isOne() &&
2935	isNullConstant(V: Op0))
2936	return TLO.CombineTo(O: Op, N: Op1);
2937
2938	// Attempt to avoid multi-use ops if we don't need anything from them.
2939	if (!LoMask.isAllOnes() \|\| !DemandedElts.isAllOnes()) {
2940	SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2941	Op: Op0, DemandedBits: LoMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`);
2942	SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2943	Op: Op1, DemandedBits: LoMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`);
2944	if (DemandedOp0 \|\| DemandedOp1) {
2945	Op0 = DemandedOp0 ? DemandedOp0 : Op0;
2946	Op1 = DemandedOp1 ? DemandedOp1 : Op1;
2947	SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: Op0, N2: Op1,
2948	Flags: Flags & ~SDNodeFlags::NoWrap);
2949	return TLO.CombineTo(O: Op, N: NewOp);
2950	}
2951	}
2952
2953	// If we have a constant operand, we may be able to turn it into -1 if we
2954	// do not demand the high bits. This can make the constant smaller to
2955	// encode, allow more general folding, or match specialized instruction
2956	// patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
2957	// is probably not useful (and could be detrimental).
2958	ConstantSDNode *C = isConstOrConstSplat(N: Op1);
2959	APInt HighMask = APInt::getHighBitsSet(numBits: BitWidth, hiBitsSet: DemandedBitsLZ);
2960	if (C && !C->isAllOnes() && !C->isOne() &&
2961	(C->getAPIntValue() \| HighMask).isAllOnes()) {
2962	SDValue Neg1 = TLO.DAG.getAllOnesConstant(DL: dl, VT);
2963	// Disable the nsw and nuw flags. We can no longer guarantee that we
2964	// won't wrap after simplification.
2965	SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: Op0, N2: Neg1,
2966	Flags: Flags & ~SDNodeFlags::NoWrap);
2967	return TLO.CombineTo(O: Op, N: NewOp);
2968	}
2969
2970	// Match a multiply with a disguised negated-power-of-2 and convert to a
2971	// an equivalent shift-left amount.
2972	// Example: (X MulC) + Op1 --> Op1 - (X << log2(-MulC))*
2973	auto getShiftLeftAmt = [&HighMask](SDValue Mul) -> unsigned {
2974	if (Mul.getOpcode() != ISD::MUL \|\| !Mul.hasOneUse())
2975	return `0`;
2976
2977	// Don't touch opaque constants. Also, ignore zero and power-of-2
2978	// multiplies. Those will get folded later.
2979	ConstantSDNode *MulC = isConstOrConstSplat(N: Mul.getOperand(i: `1`));
2980	if (MulC && !MulC->isOpaque() && !MulC->isZero() &&
2981	!MulC->getAPIntValue().isPowerOf2()) {
2982	APInt UnmaskedC = MulC->getAPIntValue() \| HighMask;
2983	if (UnmaskedC.isNegatedPowerOf2())
2984	return (-UnmaskedC).logBase2();
2985	}
2986	return `0`;
2987	};
2988
2989	auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y,
2990	unsigned ShlAmt) {
2991	SDValue ShlAmtC = TLO.DAG.getShiftAmountConstant(Val: ShlAmt, VT, DL: dl);
2992	SDValue Shl = TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: X, N2: ShlAmtC);
2993	SDValue Res = TLO.DAG.getNode(Opcode: NT, DL: dl, VT, N1: Y, N2: Shl);
2994	return TLO.CombineTo(O: Op, N: Res);
2995	};
2996
2997	if (isOperationLegalOrCustom(Op: ISD::SHL, VT)) {
2998	if (Op.getOpcode() == ISD::ADD) {
2999	// (X MulC) + Op1 --> Op1 - (X << log2(-MulC))*
3000	if (unsigned ShAmt = getShiftLeftAmt (Op0))
3001	return foldMul (ISD::SUB, Op0.getOperand(i: `0`), Op1, ShAmt);
3002	// Op0 + (X MulC) --> Op0 - (X << log2(-MulC))*
3003	if (unsigned ShAmt = getShiftLeftAmt (Op1))
3004	return foldMul (ISD::SUB, Op1.getOperand(i: `0`), Op0, ShAmt);
3005	}
3006	if (Op.getOpcode() == ISD::SUB) {
3007	// Op0 - (X MulC) --> Op0 + (X << log2(-MulC))*
3008	if (unsigned ShAmt = getShiftLeftAmt (Op1))
3009	return foldMul (ISD::ADD, Op1.getOperand(i: `0`), Op0, ShAmt);
3010	}
3011	}
3012
3013	if (Op.getOpcode() == ISD::MUL) {
3014	Known = KnownBits::mul(LHS: KnownOp0, RHS: KnownOp1);
3015	} else { // Op.getOpcode() is either ISD::ADD, ISD::PTRADD, or ISD::SUB.
3016	Known = KnownBits::computeForAddSub(
3017	Add: Op.getOpcode() != ISD::SUB, NSW: Flags.hasNoSignedWrap(),
3018	NUW: Flags.hasNoUnsignedWrap(), LHS: KnownOp0, RHS: KnownOp1);
3019	}
3020	break;
3021	}
3022	case ISD::FABS: {
3023	SDValue Op0 = Op.getOperand(i: `0`);
3024	APInt SignMask = APInt::getSignMask(BitWidth);
3025
3026	if (!DemandedBits.intersects(RHS: SignMask))
3027	return TLO.CombineTo(O: Op, N: Op0);
3028
3029	if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts, Known, TLO,
3030	Depth: Depth + `1`))
3031	return true;
3032
3033	if (Known.isNonNegative())
3034	return TLO.CombineTo(O: Op, N: Op0);
3035	if (Known.isNegative())
3036	return TLO.CombineTo(
3037	O: Op, N: TLO.DAG.getNode(Opcode: ISD::FNEG, DL: dl, VT, Operand: Op0, Flags: Op ->getFlags()));
3038
3039	Known.Zero \|= SignMask;
3040	Known.One &= ~SignMask;
3041
3042	break;
3043	}
3044	case ISD::FCOPYSIGN: {
3045	SDValue Op0 = Op.getOperand(i: `0`);
3046	SDValue Op1 = Op.getOperand(i: `1`);
3047
3048	unsigned BitWidth0 = Op0.getScalarValueSizeInBits();
3049	unsigned BitWidth1 = Op1.getScalarValueSizeInBits();
3050	APInt SignMask0 = APInt::getSignMask(BitWidth: BitWidth0);
3051	APInt SignMask1 = APInt::getSignMask(BitWidth: BitWidth1);
3052
3053	if (!DemandedBits.intersects(RHS: SignMask0))
3054	return TLO.CombineTo(O: Op, N: Op0);
3055
3056	if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: ~SignMask0 & DemandedBits, OriginalDemandedElts: DemandedElts,
3057	Known, TLO, Depth: Depth + `1`) \|\|
3058	SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: SignMask1, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
3059	Depth: Depth + `1`))
3060	return true;
3061
3062	if (Known2.isNonNegative())
3063	return TLO.CombineTo(
3064	O: Op, N: TLO.DAG.getNode(Opcode: ISD::FABS, DL: dl, VT, Operand: Op0, Flags: Op ->getFlags()));
3065
3066	if (Known2.isNegative())
3067	return TLO.CombineTo(
3068	O: Op, N: TLO.DAG.getNode(Opcode: ISD::FNEG, DL: dl, VT,
3069	Operand: TLO.DAG.getNode(Opcode: ISD::FABS, DL: SDLoc (Op0), VT, Operand: Op0)));
3070
3071	Known.Zero &= ~SignMask0;
3072	Known.One &= ~SignMask0;
3073	break;
3074	}
3075	case ISD::FNEG: {
3076	SDValue Op0 = Op.getOperand(i: `0`);
3077	APInt SignMask = APInt::getSignMask(BitWidth);
3078
3079	if (!DemandedBits.intersects(RHS: SignMask))
3080	return TLO.CombineTo(O: Op, N: Op0);
3081
3082	if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts, Known, TLO,
3083	Depth: Depth + `1`))
3084	return true;
3085
3086	if (!Known.isSignUnknown()) {
3087	Known.Zero ^= SignMask;
3088	Known.One ^= SignMask;
3089	}
3090
3091	break;
3092	}
3093	default:
3094	// We also ask the target about intrinsics (which could be specific to it).
3095	if (Op.getOpcode() >= ISD::BUILTIN_OP_END \|\|
3096	Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
3097	// TODO: Probably okay to remove after audit; here to reduce change size
3098	// in initial enablement patch for scalable vectors
3099	if (Op.getValueType().isScalableVector())
3100	break;
3101	if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
3102	Known, TLO, Depth))
3103	return true;
3104	break;
3105	}
3106
3107	// Just use computeKnownBits to compute output bits.
3108	Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
3109	break;
3110	}
3111
3112	// If we know the value of all of the demanded bits, return this as a
3113	// constant.
3114	if (!isTargetCanonicalConstantNode(Op) &&
3115	DemandedBits.isSubsetOf(RHS: Known.Zero \| Known.One)) {
3116	// Avoid folding to a constant if any OpaqueConstant is involved.
3117	if (llvm::any_of(Range: Op ->ops(), P: [](SDValue V) {
3118	auto *C = dyn_cast<ConstantSDNode>(Val&: V);
3119	return C && C->isOpaque();
3120	}))
3121	return false;
3122	if (VT.isInteger())
3123	return TLO.CombineTo(O: Op, N: TLO.DAG.getConstant(Val: Known.One, DL: dl, VT));
3124	if (VT.isFloatingPoint())
3125	return TLO.CombineTo(
3126	O: Op, N: TLO.DAG.getConstantFP(Val: APFloat (VT.getFltSemantics(), Known.One),
3127	DL: dl, VT));
3128	}
3129
3130	// A multi use 'all demanded elts' simplify failed to find any knownbits.
3131	// Try again just for the original demanded elts.
3132	// Ensure we do this AFTER constant folding above.
3133	if (HasMultiUse && Known.isUnknown() && !OriginalDemandedElts.isAllOnes())
3134	Known = TLO.DAG.computeKnownBits(Op, DemandedElts: OriginalDemandedElts, Depth);
3135
3136	return false;
3137	}
3138
3139	bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op,
3140	const APInt &DemandedElts,
3141	DAGCombinerInfo &DCI) const {
3142	SelectionDAG &DAG = DCI.DAG;
3143	TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
3144	!DCI.isBeforeLegalizeOps());
3145
3146	APInt KnownUndef, KnownZero;
3147	bool Simplified =
3148	SimplifyDemandedVectorElts(Op, DemandedEltMask: DemandedElts, KnownUndef, KnownZero, TLO);
3149	if (Simplified) {
3150	DCI.AddToWorklist(N: Op.getNode());
3151	DCI.CommitTargetLoweringOpt(TLO);
3152	}
3153
3154	return Simplified;
3155	}
3156
3157	/// Given a vector binary operation and known undefined elements for each input
3158	/// operand, compute whether each element of the output is undefined.
3159	static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG,
3160	const APInt &UndefOp0,
3161	const APInt &UndefOp1) {
3162	EVT VT = BO.getValueType();
3163	assert(DAG.getTargetLoweringInfo().isBinOp(BO.getOpcode()) && VT.isVector() &&
3164	"Vector binop only");
3165
3166	EVT EltVT = VT.getVectorElementType();
3167	unsigned NumElts = VT.isFixedLengthVector() ? VT.getVectorNumElements() : `1`;
3168	assert(UndefOp0.getBitWidth() == NumElts &&
3169	UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
3170
3171	auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
3172	const APInt &UndefVals) {
3173	if (UndefVals [Index])
3174	return DAG.getUNDEF(VT: EltVT);
3175
3176	if (auto *BV = dyn_cast<BuildVectorSDNode>(Val&: V)) {
3177	// Try hard to make sure that the getNode() call is not creating temporary
3178	// nodes. Ignore opaque integers because they do not constant fold.
3179	SDValue Elt = BV->getOperand(Num: Index);
3180	auto *C = dyn_cast<ConstantSDNode>(Val&: Elt);
3181	if (isa<ConstantFPSDNode>(Val: Elt) \|\| Elt.isUndef() \|\| (C && !C->isOpaque()))
3182	return Elt;
3183	}
3184
3185	return SDValue ();
3186	};
3187
3188	APInt KnownUndef = APInt::getZero(numBits: NumElts);
3189	for (unsigned i = `0`; i != NumElts; ++i) {
3190	// If both inputs for this element are either constant or undef and match
3191	// the element type, compute the constant/undef result for this element of
3192	// the vector.
3193	// TODO: Ideally we would use FoldConstantArithmetic() here, but that does
3194	// not handle FP constants. The code within getNode() should be refactored
3195	// to avoid the danger of creating a bogus temporary node here.
3196	SDValue C0 = getUndefOrConstantElt (BO.getOperand(i: `0`), i, UndefOp0);
3197	SDValue C1 = getUndefOrConstantElt (BO.getOperand(i: `1`), i, UndefOp1);
3198	if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
3199	if (DAG.getNode(Opcode: BO.getOpcode(), DL: SDLoc (BO), VT: EltVT, N1: C0, N2: C1).isUndef())
3200	KnownUndef.setBit(i);
3201	}
3202	return KnownUndef;
3203	}
3204
3205	bool TargetLowering::SimplifyDemandedVectorElts(
3206	SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
3207	APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
3208	bool AssumeSingleUse) const {
3209	EVT VT = Op.getValueType();
3210	unsigned Opcode = Op.getOpcode();
3211	APInt DemandedElts = OriginalDemandedElts;
3212	unsigned NumElts = DemandedElts.getBitWidth();
3213	assert(VT.isVector() && "Expected vector op");
3214
3215	KnownUndef = KnownZero = APInt::getZero(numBits: NumElts);
3216
3217	if (!shouldSimplifyDemandedVectorElts(Op, TLO))
3218	return false;
3219
3220	// TODO: For now we assume we know nothing about scalable vectors.
3221	if (VT.isScalableVector())
3222	return false;
3223
3224	assert(VT.getVectorNumElements() == NumElts &&
3225	"Mask size mismatches value type element count!");
3226
3227	// Undef operand.
3228	if (Op.isUndef()) {
3229	KnownUndef.setAllBits();
3230	return false;
3231	}
3232
3233	// If Op has other users, assume that all elements are needed.
3234	if (!AssumeSingleUse && !Op.getNode()->hasOneUse())
3235	DemandedElts.setAllBits();
3236
3237	// Not demanding any elements from Op.
3238	if (DemandedElts == `0`) {
3239	KnownUndef.setAllBits();
3240	return TLO.CombineTo(O: Op, N: TLO.DAG.getUNDEF(VT));
3241	}
3242
3243	// Limit search depth.
3244	if (Depth >= SelectionDAG::MaxRecursionDepth)
3245	return false;
3246
3247	SDLoc DL(Op);
3248	unsigned EltSizeInBits = VT.getScalarSizeInBits();
3249	bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
3250
3251	// Helper for demanding the specified elements and all the bits of both binary
3252	// operands.
3253	auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
3254	SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op: Op0, DemandedElts,
3255	DAG&: TLO.DAG, Depth: Depth + `1`);
3256	SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op: Op1, DemandedElts,
3257	DAG&: TLO.DAG, Depth: Depth + `1`);
3258	if (NewOp0 \|\| NewOp1) {
3259	SDValue NewOp =
3260	TLO.DAG.getNode(Opcode, DL: SDLoc (Op), VT, N1: NewOp0 ? NewOp0 : Op0,
3261	N2: NewOp1 ? NewOp1 : Op1, Flags: Op ->getFlags());
3262	return TLO.CombineTo(O: Op, N: NewOp);
3263	}
3264	return false;
3265	};
3266
3267	switch (Opcode) {
3268	case ISD::SCALAR_TO_VECTOR: {
3269	if (!DemandedElts [`0`]) {
3270	KnownUndef.setAllBits();
3271	return TLO.CombineTo(O: Op, N: TLO.DAG.getUNDEF(VT));
3272	}
3273	KnownUndef.setHighBits(NumElts - `1`);
3274	break;
3275	}
3276	case ISD::BITCAST: {
3277	SDValue Src = Op.getOperand(i: `0`);
3278	EVT SrcVT = Src.getValueType();
3279
3280	if (!SrcVT.isVector()) {
3281	// TODO - bigendian once we have test coverage.
3282	if (IsLE) {
3283	APInt DemandedSrcBits = APInt::getZero(numBits: SrcVT.getSizeInBits());
3284	unsigned EltSize = VT.getScalarSizeInBits();
3285	for (unsigned I = `0`; I != NumElts; ++I) {
3286	if (DemandedElts [I]) {
3287	unsigned Offset = I * EltSize;
3288	DemandedSrcBits.setBits(loBit: Offset, hiBit: Offset + EltSize);
3289	}
3290	}
3291	KnownBits Known;
3292	if (SimplifyDemandedBits(Op: Src, DemandedBits: DemandedSrcBits, Known, TLO, Depth: Depth + `1`))
3293	return true;
3294	}
3295	break;
3296	}
3297
3298	// Fast handling of 'identity' bitcasts.
3299	unsigned NumSrcElts = SrcVT.getVectorNumElements();
3300	if (NumSrcElts == NumElts)
3301	return SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: DemandedElts, KnownUndef,
3302	KnownZero, TLO, Depth: Depth + `1`);
3303
3304	APInt SrcDemandedElts, SrcZero, SrcUndef;
3305
3306	// Bitcast from 'large element' src vector to 'small element' vector, we
3307	// must demand a source element if any DemandedElt maps to it.
3308	if ((NumElts % NumSrcElts) == `0`) {
3309	unsigned Scale = NumElts / NumSrcElts;
3310	SrcDemandedElts = APIntOps::ScaleBitMask(A: DemandedElts, NewBitWidth: NumSrcElts);
3311	if (SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: SrcDemandedElts, KnownUndef&: SrcUndef, KnownZero&: SrcZero,
3312	TLO, Depth: Depth + `1`))
3313	return true;
3314
3315	// Try calling SimplifyDemandedBits, converting demanded elts to the bits
3316	// of the large element.
3317	// TODO - bigendian once we have test coverage.
3318	if (IsLE) {
3319	unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
3320	APInt SrcDemandedBits = APInt::getZero(numBits: SrcEltSizeInBits);
3321	for (unsigned i = `0`; i != NumElts; ++i)
3322	if (DemandedElts [i]) {
3323	unsigned Ofs = (i % Scale) * EltSizeInBits;
3324	SrcDemandedBits.setBits(loBit: Ofs, hiBit: Ofs + EltSizeInBits);
3325	}
3326
3327	KnownBits Known;
3328	if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: SrcDemandedBits, OriginalDemandedElts: SrcDemandedElts, Known,
3329	TLO, Depth: Depth + `1`))
3330	return true;
3331
3332	// The bitcast has split each wide element into a number of
3333	// narrow subelements. We have just computed the Known bits
3334	// for wide elements. See if element splitting results in
3335	// some subelements being zero. Only for demanded elements!
3336	for (unsigned SubElt = `0`; SubElt != Scale; ++SubElt) {
3337	if (!Known.Zero.extractBits(numBits: EltSizeInBits, bitPosition: SubElt * EltSizeInBits)
3338	.isAllOnes())
3339	continue;
3340	for (unsigned SrcElt = `0`; SrcElt != NumSrcElts; ++SrcElt) {
3341	unsigned Elt = Scale * SrcElt + SubElt;
3342	if (DemandedElts [Elt])
3343	KnownZero.setBit(Elt);
3344	}
3345	}
3346	}
3347
3348	// If the src element is zero/undef then all the output elements will be -
3349	// only demanded elements are guaranteed to be correct.
3350	for (unsigned i = `0`; i != NumSrcElts; ++i) {
3351	if (SrcDemandedElts [i]) {
3352	if (SrcZero [i])
3353	KnownZero.setBits(loBit: i * Scale, hiBit: (i + `1`) * Scale);
3354	if (SrcUndef [i])
3355	KnownUndef.setBits(loBit: i * Scale, hiBit: (i + `1`) * Scale);
3356	}
3357	}
3358	}
3359
3360	// Bitcast from 'small element' src vector to 'large element' vector, we
3361	// demand all smaller source elements covered by the larger demanded element
3362	// of this vector.
3363	if ((NumSrcElts % NumElts) == `0`) {
3364	unsigned Scale = NumSrcElts / NumElts;
3365	SrcDemandedElts = APIntOps::ScaleBitMask(A: DemandedElts, NewBitWidth: NumSrcElts);
3366	if (SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: SrcDemandedElts, KnownUndef&: SrcUndef, KnownZero&: SrcZero,
3367	TLO, Depth: Depth + `1`))
3368	return true;
3369
3370	// If all the src elements covering an output element are zero/undef, then
3371	// the output element will be as well, assuming it was demanded.
3372	for (unsigned i = `0`; i != NumElts; ++i) {
3373	if (DemandedElts [i]) {
3374	if (SrcZero.extractBits(numBits: Scale, bitPosition: i * Scale).isAllOnes())
3375	KnownZero.setBit(i);
3376	if (SrcUndef.extractBits(numBits: Scale, bitPosition: i * Scale).isAllOnes())
3377	KnownUndef.setBit(i);
3378	}
3379	}
3380	}
3381	break;
3382	}
3383	case ISD::FREEZE: {
3384	SDValue N0 = Op.getOperand(i: `0`);
3385	if (TLO.DAG.isGuaranteedNotToBeUndefOrPoison(Op: N0, DemandedElts,
3386	/PoisonOnly=/false,
3387	Depth: Depth + `1`))
3388	return TLO.CombineTo(O: Op, N: N0);
3389
3390	// TODO: Replace this with the general fold from DAGCombiner::visitFREEZE
3391	// freeze(op(x, ...)) -> op(freeze(x), ...).
3392	if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && DemandedElts == `1`)
3393	return TLO.CombineTo(
3394	O: Op, N: TLO.DAG.getNode(Opcode: ISD::SCALAR_TO_VECTOR, DL, VT,
3395	Operand: TLO.DAG.getFreeze(V: N0.getOperand(i: `0`))));
3396	break;
3397	}
3398	case ISD::BUILD_VECTOR: {
3399	// Check all elements and simplify any unused elements with UNDEF.
3400	if (!DemandedElts.isAllOnes()) {
3401	// Don't simplify BROADCASTS.
3402	if (llvm::any_of(Range: Op ->op_values(),
3403	P: [&](SDValue Elt) { return Op.getOperand(i: `0`) != Elt; })) {
3404	SmallVector<SDValue, `32`> Ops(Op ->ops());
3405	bool Updated = false;
3406	for (unsigned i = `0`; i != NumElts; ++i) {
3407	if (!DemandedElts [i] && !Ops [i].isUndef()) {
3408	Ops [i] = TLO.DAG.getUNDEF(VT: Ops [`0`].getValueType());
3409	KnownUndef.setBit(i);
3410	Updated = true;
3411	}
3412	}
3413	if (Updated)
3414	return TLO.CombineTo(O: Op, N: TLO.DAG.getBuildVector(VT, DL, Ops));
3415	}
3416	}
3417	for (unsigned i = `0`; i != NumElts; ++i) {
3418	SDValue SrcOp = Op.getOperand(i);
3419	if (SrcOp.isUndef()) {
3420	KnownUndef.setBit(i);
3421	} else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
3422	(isNullConstant(V: SrcOp) \|\| isNullFPConstant(V: SrcOp))) {
3423	KnownZero.setBit(i);
3424	}
3425	}
3426	break;
3427	}
3428	case ISD::CONCAT_VECTORS: {
3429	EVT SubVT = Op.getOperand(i: `0`).getValueType();
3430	unsigned NumSubVecs = Op.getNumOperands();
3431	unsigned NumSubElts = SubVT.getVectorNumElements();
3432	for (unsigned i = `0`; i != NumSubVecs; ++i) {
3433	SDValue SubOp = Op.getOperand(i);
3434	APInt SubElts = DemandedElts.extractBits(numBits: NumSubElts, bitPosition: i * NumSubElts);
3435	APInt SubUndef, SubZero;
3436	if (SimplifyDemandedVectorElts(Op: SubOp, OriginalDemandedElts: SubElts, KnownUndef&: SubUndef, KnownZero&: SubZero, TLO,
3437	Depth: Depth + `1`))
3438	return true;
3439	KnownUndef.insertBits(SubBits: SubUndef, bitPosition: i * NumSubElts);
3440	KnownZero.insertBits(SubBits: SubZero, bitPosition: i * NumSubElts);
3441	}
3442
3443	// Attempt to avoid multi-use ops if we don't need anything from them.
3444	if (!DemandedElts.isAllOnes()) {
3445	bool FoundNewSub = false;
3446	SmallVector<SDValue, `2`> DemandedSubOps;
3447	for (unsigned i = `0`; i != NumSubVecs; ++i) {
3448	SDValue SubOp = Op.getOperand(i);
3449	APInt SubElts = DemandedElts.extractBits(numBits: NumSubElts, bitPosition: i * NumSubElts);
3450	SDValue NewSubOp = SimplifyMultipleUseDemandedVectorElts(
3451	Op: SubOp, DemandedElts: SubElts, DAG&: TLO.DAG, Depth: Depth + `1`);
3452	DemandedSubOps.push_back(Elt: NewSubOp ? NewSubOp : SubOp);
3453	FoundNewSub = NewSubOp ? true : FoundNewSub;
3454	}
3455	if (FoundNewSub) {
3456	SDValue NewOp =
3457	TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: SDLoc (Op), VT, Ops: DemandedSubOps);
3458	return TLO.CombineTo(O: Op, N: NewOp);
3459	}
3460	}
3461	break;
3462	}
3463	case ISD::INSERT_SUBVECTOR: {
3464	// Demand any elements from the subvector and the remainder from the src it
3465	// is inserted into.
3466	SDValue Src = Op.getOperand(i: `0`);
3467	SDValue Sub = Op.getOperand(i: `1`);
3468	uint64_t Idx = Op.getConstantOperandVal(i: `2`);
3469	unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
3470	APInt DemandedSubElts = DemandedElts.extractBits(numBits: NumSubElts, bitPosition: Idx);
3471	APInt DemandedSrcElts = DemandedElts;
3472	DemandedSrcElts.clearBits(LoBit: Idx, HiBit: Idx + NumSubElts);
3473
3474	// If none of the sub operand elements are demanded, bypass the insert.
3475	if (!DemandedSubElts)
3476	return TLO.CombineTo(O: Op, N: Src);
3477
3478	APInt SubUndef, SubZero;
3479	if (SimplifyDemandedVectorElts(Op: Sub, OriginalDemandedElts: DemandedSubElts, KnownUndef&: SubUndef, KnownZero&: SubZero, TLO,
3480	Depth: Depth + `1`))
3481	return true;
3482
3483	// If none of the src operand elements are demanded, replace it with undef.
3484	if (!DemandedSrcElts && !Src.isUndef())
3485	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT,
3486	N1: TLO.DAG.getUNDEF(VT), N2: Sub,
3487	N3: Op.getOperand(i: `2`)));
3488
3489	if (SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: DemandedSrcElts, KnownUndef, KnownZero,
3490	TLO, Depth: Depth + `1`))
3491	return true;
3492	KnownUndef.insertBits(SubBits: SubUndef, bitPosition: Idx);
3493	KnownZero.insertBits(SubBits: SubZero, bitPosition: Idx);
3494
3495	// Attempt to avoid multi-use ops if we don't need anything from them.
3496	if (!DemandedSrcElts.isAllOnes() \|\| !DemandedSubElts.isAllOnes()) {
3497	SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3498	Op: Src, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + `1`);
3499	SDValue NewSub = SimplifyMultipleUseDemandedVectorElts(
3500	Op: Sub, DemandedElts: DemandedSubElts, DAG&: TLO.DAG, Depth: Depth + `1`);
3501	if (NewSrc \|\| NewSub) {
3502	NewSrc = NewSrc ? NewSrc : Src;
3503	NewSub = NewSub ? NewSub : Sub;
3504	SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: SDLoc (Op), VT, N1: NewSrc,
3505	N2: NewSub, N3: Op.getOperand(i: `2`));
3506	return TLO.CombineTo(O: Op, N: NewOp);
3507	}
3508	}
3509	break;
3510	}
3511	case ISD::EXTRACT_SUBVECTOR: {
3512	// Offset the demanded elts by the subvector index.
3513	SDValue Src = Op.getOperand(i: `0`);
3514	if (Src.getValueType().isScalableVector())
3515	break;
3516	uint64_t Idx = Op.getConstantOperandVal(i: `1`);
3517	unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3518	APInt DemandedSrcElts = DemandedElts.zext(width: NumSrcElts).shl(shiftAmt: Idx);
3519
3520	APInt SrcUndef, SrcZero;
3521	if (SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: DemandedSrcElts, KnownUndef&: SrcUndef, KnownZero&: SrcZero, TLO,
3522	Depth: Depth + `1`))
3523	return true;
3524	KnownUndef = SrcUndef.extractBits(numBits: NumElts, bitPosition: Idx);
3525	KnownZero = SrcZero.extractBits(numBits: NumElts, bitPosition: Idx);
3526
3527	// Attempt to avoid multi-use ops if we don't need anything from them.
3528	if (!DemandedElts.isAllOnes()) {
3529	SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3530	Op: Src, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + `1`);
3531	if (NewSrc) {
3532	SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: SDLoc (Op), VT, N1: NewSrc,
3533	N2: Op.getOperand(i: `1`));
3534	return TLO.CombineTo(O: Op, N: NewOp);
3535	}
3536	}
3537	break;
3538	}
3539	case ISD::INSERT_VECTOR_ELT: {
3540	SDValue Vec = Op.getOperand(i: `0`);
3541	SDValue Scl = Op.getOperand(i: `1`);
3542	auto *CIdx = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: `2`));
3543
3544	// For a legal, constant insertion index, if we don't need this insertion
3545	// then strip it, else remove it from the demanded elts.
3546	if (CIdx && CIdx->getAPIntValue().ult(RHS: NumElts)) {
3547	unsigned Idx = CIdx->getZExtValue();
3548	if (!DemandedElts [Idx])
3549	return TLO.CombineTo(O: Op, N: Vec);
3550
3551	APInt DemandedVecElts(DemandedElts);
3552	DemandedVecElts.clearBit(BitPosition: Idx);
3553	if (SimplifyDemandedVectorElts(Op: Vec, OriginalDemandedElts: DemandedVecElts, KnownUndef,
3554	KnownZero, TLO, Depth: Depth + `1`))
3555	return true;
3556
3557	KnownUndef.setBitVal(BitPosition: Idx, BitValue: Scl.isUndef());
3558
3559	KnownZero.setBitVal(BitPosition: Idx, BitValue: isNullConstant(V: Scl) \|\| isNullFPConstant(V: Scl));
3560	break;
3561	}
3562
3563	APInt VecUndef, VecZero;
3564	if (SimplifyDemandedVectorElts(Op: Vec, OriginalDemandedElts: DemandedElts, KnownUndef&: VecUndef, KnownZero&: VecZero, TLO,
3565	Depth: Depth + `1`))
3566	return true;
3567	// Without knowing the insertion index we can't set KnownUndef/KnownZero.
3568	break;
3569	}
3570	case ISD::VSELECT: {
3571	SDValue Sel = Op.getOperand(i: `0`);
3572	SDValue LHS = Op.getOperand(i: `1`);
3573	SDValue RHS = Op.getOperand(i: `2`);
3574
3575	// Try to transform the select condition based on the current demanded
3576	// elements.
3577	APInt UndefSel, ZeroSel;
3578	if (SimplifyDemandedVectorElts(Op: Sel, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefSel, KnownZero&: ZeroSel, TLO,
3579	Depth: Depth + `1`))
3580	return true;
3581
3582	// See if we can simplify either vselect operand.
3583	APInt DemandedLHS(DemandedElts);
3584	APInt DemandedRHS(DemandedElts);
3585	APInt UndefLHS, ZeroLHS;
3586	APInt UndefRHS, ZeroRHS;
3587	if (SimplifyDemandedVectorElts(Op: LHS, OriginalDemandedElts: DemandedLHS, KnownUndef&: UndefLHS, KnownZero&: ZeroLHS, TLO,
3588	Depth: Depth + `1`))
3589	return true;
3590	if (SimplifyDemandedVectorElts(Op: RHS, OriginalDemandedElts: DemandedRHS, KnownUndef&: UndefRHS, KnownZero&: ZeroRHS, TLO,
3591	Depth: Depth + `1`))
3592	return true;
3593
3594	KnownUndef = UndefLHS & UndefRHS;
3595	KnownZero = ZeroLHS & ZeroRHS;
3596
3597	// If we know that the selected element is always zero, we don't need the
3598	// select value element.
3599	APInt DemandedSel = DemandedElts & ~KnownZero;
3600	if (DemandedSel != DemandedElts)
3601	if (SimplifyDemandedVectorElts(Op: Sel, OriginalDemandedElts: DemandedSel, KnownUndef&: UndefSel, KnownZero&: ZeroSel, TLO,
3602	Depth: Depth + `1`))
3603	return true;
3604
3605	break;
3606	}
3607	case ISD::VECTOR_SHUFFLE: {
3608	SDValue LHS = Op.getOperand(i: `0`);
3609	SDValue RHS = Op.getOperand(i: `1`);
3610	ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Val&: Op)->getMask();
3611
3612	// Collect demanded elements from shuffle operands..
3613	APInt DemandedLHS(NumElts, `0`);
3614	APInt DemandedRHS(NumElts, `0`);
3615	for (unsigned i = `0`; i != NumElts; ++i) {
3616	int M = ShuffleMask [i];
3617	if (M < `0` \|\| !DemandedElts [i])
3618	continue;
3619	assert(`0` <= M && M < (int)(`2` * NumElts) && "Shuffle index out of range");
3620	if (M < (int)NumElts)
3621	DemandedLHS.setBit(M);
3622	else
3623	DemandedRHS.setBit(M - NumElts);
3624	}
3625
3626	// If either side isn't demanded, replace it by UNDEF. We handle this
3627	// explicitly here to also simplify in case of multiple uses (on the
3628	// contrary to the SimplifyDemandedVectorElts calls below).
3629	bool FoldLHS = !DemandedLHS && !LHS.isUndef();
3630	bool FoldRHS = !DemandedRHS && !RHS.isUndef();
3631	if (FoldLHS \|\| FoldRHS) {
3632	LHS = FoldLHS ? TLO.DAG.getUNDEF(VT: LHS.getValueType()) : LHS;
3633	RHS = FoldRHS ? TLO.DAG.getUNDEF(VT: RHS.getValueType()) : RHS;
3634	SDValue NewOp =
3635	TLO.DAG.getVectorShuffle(VT, dl: SDLoc (Op), N1: LHS, N2: RHS, Mask: ShuffleMask);
3636	return TLO.CombineTo(O: Op, N: NewOp);
3637	}
3638
3639	// See if we can simplify either shuffle operand.
3640	APInt UndefLHS, ZeroLHS;
3641	APInt UndefRHS, ZeroRHS;
3642	if (SimplifyDemandedVectorElts(Op: LHS, OriginalDemandedElts: DemandedLHS, KnownUndef&: UndefLHS, KnownZero&: ZeroLHS, TLO,
3643	Depth: Depth + `1`))
3644	return true;
3645	if (SimplifyDemandedVectorElts(Op: RHS, OriginalDemandedElts: DemandedRHS, KnownUndef&: UndefRHS, KnownZero&: ZeroRHS, TLO,
3646	Depth: Depth + `1`))
3647	return true;
3648
3649	// Simplify mask using undef elements from LHS/RHS.
3650	bool Updated = false;
3651	bool IdentityLHS = true, IdentityRHS = true;
3652	SmallVector<int, `32`> NewMask(ShuffleMask);
3653	for (unsigned i = `0`; i != NumElts; ++i) {
3654	int &M = NewMask [i];
3655	if (M < `0`)
3656	continue;
3657	if (!DemandedElts [i] \|\| (M < (int)NumElts && UndefLHS [M]) \|\|
3658	(M >= (int)NumElts && UndefRHS [M - NumElts])) {
3659	Updated = true;
3660	M = -`1`;
3661	}
3662	IdentityLHS &= (M < `0`) \|\| (M == (int)i);
3663	IdentityRHS &= (M < `0`) \|\| ((M - NumElts) == i);
3664	}
3665
3666	// Update legal shuffle masks based on demanded elements if it won't reduce
3667	// to Identity which can cause premature removal of the shuffle mask.
3668	if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
3669	SDValue LegalShuffle =
3670	buildLegalVectorShuffle(VT, DL, N0: LHS, N1: RHS, Mask: NewMask, DAG&: TLO.DAG);
3671	if (LegalShuffle)
3672	return TLO.CombineTo(O: Op, N: LegalShuffle);
3673	}
3674
3675	// Propagate undef/zero elements from LHS/RHS.
3676	for (unsigned i = `0`; i != NumElts; ++i) {
3677	int M = ShuffleMask [i];
3678	if (M < `0`) {
3679	KnownUndef.setBit(i);
3680	} else if (M < (int)NumElts) {
3681	if (UndefLHS [M])
3682	KnownUndef.setBit(i);
3683	if (ZeroLHS [M])
3684	KnownZero.setBit(i);
3685	} else {
3686	if (UndefRHS [M - NumElts])
3687	KnownUndef.setBit(i);
3688	if (ZeroRHS [M - NumElts])
3689	KnownZero.setBit(i);
3690	}
3691	}
3692	break;
3693	}
3694	case ISD::ANY_EXTEND_VECTOR_INREG:
3695	case ISD::SIGN_EXTEND_VECTOR_INREG:
3696	case ISD::ZERO_EXTEND_VECTOR_INREG: {
3697	APInt SrcUndef, SrcZero;
3698	SDValue Src = Op.getOperand(i: `0`);
3699	unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3700	APInt DemandedSrcElts = DemandedElts.zext(width: NumSrcElts);
3701	if (SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: DemandedSrcElts, KnownUndef&: SrcUndef, KnownZero&: SrcZero, TLO,
3702	Depth: Depth + `1`))
3703	return true;
3704	KnownZero = SrcZero.zextOrTrunc(width: NumElts);
3705	KnownUndef = SrcUndef.zextOrTrunc(width: NumElts);
3706
3707	if (IsLE && Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
3708	Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
3709	DemandedSrcElts == `1`) {
3710	// aext - if we just need the bottom element then we can bitcast.
3711	return TLO.CombineTo(O: Op, N: TLO.DAG.getBitcast(VT, V: Src));
3712	}
3713
3714	if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
3715	// zext(undef) upper bits are guaranteed to be zero.
3716	if (DemandedElts.isSubsetOf(RHS: KnownUndef))
3717	return TLO.CombineTo(O: Op, N: TLO.DAG.getConstant(Val: `0`, DL: SDLoc (Op), VT));
3718	KnownUndef.clearAllBits();
3719
3720	// zext - if we just need the bottom element then we can mask:
3721	// zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
3722	if (IsLE && DemandedSrcElts == `1` && Src.getOpcode() == ISD::AND &&
3723	Op ->isOnlyUserOf(N: Src.getNode()) &&
3724	Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
3725	SDLoc DL(Op);
3726	EVT SrcVT = Src.getValueType();
3727	EVT SrcSVT = SrcVT.getScalarType();
3728
3729	// If we're after type legalization and SrcSVT is not legal, use the
3730	// promoted type for creating constants to avoid creating nodes with
3731	// illegal types.
3732	if (AfterLegalizeTypes)
3733	SrcSVT = getLegalTypeToTransformTo(Context&: *TLO.DAG.getContext(), VT: SrcSVT);
3734
3735	SmallVector<SDValue> MaskElts;
3736	MaskElts.push_back(Elt: TLO.DAG.getAllOnesConstant(DL, VT: SrcSVT));
3737	MaskElts.append(NumInputs: NumSrcElts - `1`, Elt: TLO.DAG.getConstant(Val: `0`, DL, VT: SrcSVT));
3738	SDValue Mask = TLO.DAG.getBuildVector(VT: SrcVT, DL, Ops: MaskElts);
3739	if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
3740	Opcode: ISD::AND, DL, VT: SrcVT, Ops: {Src.getOperand(i: `1`), Mask})) {
3741	Fold = TLO.DAG.getNode(Opcode: ISD::AND, DL, VT: SrcVT, N1: Src.getOperand(i: `0`), N2: Fold);
3742	return TLO.CombineTo(O: Op, N: TLO.DAG.getBitcast(VT, V: Fold));
3743	}
3744	}
3745	}
3746	break;
3747	}
3748
3749	// TODO: There are more binop opcodes that could be handled here - MIN,
3750	// MAX, saturated math, etc.
3751	case ISD::ADD: {
3752	SDValue Op0 = Op.getOperand(i: `0`);
3753	SDValue Op1 = Op.getOperand(i: `1`);
3754	if (Op0 == Op1 && Op ->isOnlyUserOf(N: Op0.getNode())) {
3755	APInt UndefLHS, ZeroLHS;
3756	if (SimplifyDemandedVectorElts(Op: Op0, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefLHS, KnownZero&: ZeroLHS, TLO,
3757	Depth: Depth + `1`, /AssumeSingleUse/ true))
3758	return true;
3759	}
3760	[[fallthrough]];
3761	}
3762	case ISD::AVGCEILS:
3763	case ISD::AVGCEILU:
3764	case ISD::AVGFLOORS:
3765	case ISD::AVGFLOORU:
3766	case ISD::OR:
3767	case ISD::XOR:
3768	case ISD::SUB:
3769	case ISD::FADD:
3770	case ISD::FSUB:
3771	case ISD::FMUL:
3772	case ISD::FDIV:
3773	case ISD::FREM: {
3774	SDValue Op0 = Op.getOperand(i: `0`);
3775	SDValue Op1 = Op.getOperand(i: `1`);
3776
3777	APInt UndefRHS, ZeroRHS;
3778	if (SimplifyDemandedVectorElts(Op: Op1, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefRHS, KnownZero&: ZeroRHS, TLO,
3779	Depth: Depth + `1`))
3780	return true;
3781	APInt UndefLHS, ZeroLHS;
3782	if (SimplifyDemandedVectorElts(Op: Op0, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefLHS, KnownZero&: ZeroLHS, TLO,
3783	Depth: Depth + `1`))
3784	return true;
3785
3786	KnownZero = ZeroLHS & ZeroRHS;
3787	KnownUndef = getKnownUndefForVectorBinop(BO: Op, DAG&: TLO.DAG, UndefOp0: UndefLHS, UndefOp1: UndefRHS);
3788
3789	// Attempt to avoid multi-use ops if we don't need anything from them.
3790	// TODO - use KnownUndef to relax the demandedelts?
3791	if (!DemandedElts.isAllOnes())
3792	if (SimplifyDemandedVectorEltsBinOp (Op0, Op1))
3793	return true;
3794	break;
3795	}
3796	case ISD::SHL:
3797	case ISD::SRL:
3798	case ISD::SRA:
3799	case ISD::ROTL:
3800	case ISD::ROTR: {
3801	SDValue Op0 = Op.getOperand(i: `0`);
3802	SDValue Op1 = Op.getOperand(i: `1`);
3803
3804	APInt UndefRHS, ZeroRHS;
3805	if (SimplifyDemandedVectorElts(Op: Op1, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefRHS, KnownZero&: ZeroRHS, TLO,
3806	Depth: Depth + `1`))
3807	return true;
3808	APInt UndefLHS, ZeroLHS;
3809	if (SimplifyDemandedVectorElts(Op: Op0, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefLHS, KnownZero&: ZeroLHS, TLO,
3810	Depth: Depth + `1`))
3811	return true;
3812
3813	KnownZero = ZeroLHS;
3814	KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
3815
3816	// Attempt to avoid multi-use ops if we don't need anything from them.
3817	// TODO - use KnownUndef to relax the demandedelts?
3818	if (!DemandedElts.isAllOnes())
3819	if (SimplifyDemandedVectorEltsBinOp (Op0, Op1))
3820	return true;
3821	break;
3822	}
3823	case ISD::MUL:
3824	case ISD::MULHU:
3825	case ISD::MULHS:
3826	case ISD::AND: {
3827	SDValue Op0 = Op.getOperand(i: `0`);
3828	SDValue Op1 = Op.getOperand(i: `1`);
3829
3830	APInt SrcUndef, SrcZero;
3831	if (SimplifyDemandedVectorElts(Op: Op1, OriginalDemandedElts: DemandedElts, KnownUndef&: SrcUndef, KnownZero&: SrcZero, TLO,
3832	Depth: Depth + `1`))
3833	return true;
3834	// FIXME: If we know that a demanded element was zero in Op1 we don't need
3835	// to demand it in Op0 - its guaranteed to be zero. There is however a
3836	// restriction, as we must not make any of the originally demanded elements
3837	// more poisonous. We could reduce amount of elements demanded, but then we
3838	// also need a to inform SimplifyDemandedVectorElts that some elements must
3839	// not be made more poisonous.
3840	if (SimplifyDemandedVectorElts(Op: Op0, OriginalDemandedElts: DemandedElts, KnownUndef, KnownZero,
3841	TLO, Depth: Depth + `1`))
3842	return true;
3843
3844	KnownUndef &= DemandedElts;
3845	KnownZero &= DemandedElts;
3846
3847	// If every element pair has a zero/undef/poison then just fold to zero.
3848	// fold (and x, undef/poison) -> 0 / (and x, 0) -> 0
3849	// fold (mul x, undef/poison) -> 0 / (mul x, 0) -> 0
3850	if (DemandedElts.isSubsetOf(RHS: SrcZero \| KnownZero \| SrcUndef \| KnownUndef))
3851	return TLO.CombineTo(O: Op, N: TLO.DAG.getConstant(Val: `0`, DL: SDLoc (Op), VT));
3852
3853	// If either side has a zero element, then the result element is zero, even
3854	// if the other is an UNDEF.
3855	// TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
3856	// and then handle 'and' nodes with the rest of the binop opcodes.
3857	KnownZero \|= SrcZero;
3858	KnownUndef &= SrcUndef;
3859	KnownUndef &= ~KnownZero;
3860
3861	// Attempt to avoid multi-use ops if we don't need anything from them.
3862	if (!DemandedElts.isAllOnes())
3863	if (SimplifyDemandedVectorEltsBinOp (Op0, Op1))
3864	return true;
3865	break;
3866	}
3867	case ISD::TRUNCATE:
3868	case ISD::SIGN_EXTEND:
3869	case ISD::ZERO_EXTEND:
3870	if (SimplifyDemandedVectorElts(Op: Op.getOperand(i: `0`), OriginalDemandedElts: DemandedElts, KnownUndef,
3871	KnownZero, TLO, Depth: Depth + `1`))
3872	return true;
3873
3874	if (!DemandedElts.isAllOnes())
3875	if (SDValue NewOp = SimplifyMultipleUseDemandedVectorElts(
3876	Op: Op.getOperand(i: `0`), DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`))
3877	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode, DL: SDLoc (Op), VT, Operand: NewOp));
3878
3879	if (Op.getOpcode() == ISD::ZERO_EXTEND) {
3880	// zext(undef) upper bits are guaranteed to be zero.
3881	if (DemandedElts.isSubsetOf(RHS: KnownUndef))
3882	return TLO.CombineTo(O: Op, N: TLO.DAG.getConstant(Val: `0`, DL: SDLoc (Op), VT));
3883	KnownUndef.clearAllBits();
3884	}
3885	break;
3886	case ISD::SINT_TO_FP:
3887	case ISD::UINT_TO_FP:
3888	case ISD::FP_TO_SINT:
3889	case ISD::FP_TO_UINT:
3890	if (SimplifyDemandedVectorElts(Op: Op.getOperand(i: `0`), OriginalDemandedElts: DemandedElts, KnownUndef,
3891	KnownZero, TLO, Depth: Depth + `1`))
3892	return true;
3893	// Don't fall through to generic undef -> undef handling.
3894	return false;
3895	default: {
3896	if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
3897	if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
3898	KnownZero, TLO, Depth))
3899	return true;
3900	} else {
3901	KnownBits Known;
3902	APInt DemandedBits = APInt::getAllOnes(numBits: EltSizeInBits);
3903	if (SimplifyDemandedBits(Op, OriginalDemandedBits: DemandedBits, OriginalDemandedElts, Known,
3904	TLO, Depth, AssumeSingleUse))
3905	return true;
3906	}
3907	break;
3908	}
3909	}
3910	assert((KnownUndef & KnownZero) == `0` && "Elements flagged as undef AND zero");
3911
3912	// Constant fold all undef cases.
3913	// TODO: Handle zero cases as well.
3914	if (DemandedElts.isSubsetOf(RHS: KnownUndef))
3915	return TLO.CombineTo(O: Op, N: TLO.DAG.getUNDEF(VT));
3916
3917	return false;
3918	}
3919
3920	/// Determine which of the bits specified in Mask are known to be either zero or
3921	/// one and return them in the Known.
3922	void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
3923	KnownBits &Known,
3924	const APInt &DemandedElts,
3925	const SelectionDAG &DAG,
3926	unsigned Depth) const {
3927	assert((Op.getOpcode() >= ISD::BUILTIN_OP_END \|\|
3928	Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN \|\|
3929	Op.getOpcode() == ISD::INTRINSIC_W_CHAIN \|\|
3930	Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3931	"Should use MaskedValueIsZero if you don't know whether Op"
3932	" is a target node!");
3933	Known.resetAll();
3934	}
3935
3936	void TargetLowering::computeKnownBitsForTargetInstr(
3937	GISelValueTracking &Analysis, Register R, KnownBits &Known,
3938	const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3939	unsigned Depth) const {
3940	Known.resetAll();
3941	}
3942
3943	void TargetLowering::computeKnownFPClassForTargetInstr(
3944	GISelValueTracking &Analysis, Register R, KnownFPClass &Known,
3945	const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3946	unsigned Depth) const {
3947	Known.resetAll();
3948	}
3949
3950	void TargetLowering::computeKnownBitsForFrameIndex(
3951	const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
3952	// The low bits are known zero if the pointer is aligned.
3953	Known.Zero.setLowBits(Log2(A: MF.getFrameInfo().getObjectAlign(ObjectIdx: FrameIdx)));
3954	}
3955
3956	Align TargetLowering::computeKnownAlignForTargetInstr(
3957	GISelValueTracking &Analysis, Register R, const MachineRegisterInfo &MRI,
3958	unsigned Depth) const {
3959	return Align (`1`);
3960	}
3961
3962	/// This method can be implemented by targets that want to expose additional
3963	/// information about sign bits to the DAG Combiner.
3964	unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
3965	const APInt &,
3966	const SelectionDAG &,
3967	unsigned Depth) const {
3968	assert((Op.getOpcode() >= ISD::BUILTIN_OP_END \|\|
3969	Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN \|\|
3970	Op.getOpcode() == ISD::INTRINSIC_W_CHAIN \|\|
3971	Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3972	"Should use ComputeNumSignBits if you don't know whether Op"
3973	" is a target node!");
3974	return `1`;
3975	}
3976
3977	unsigned TargetLowering::computeNumSignBitsForTargetInstr(
3978	GISelValueTracking &Analysis, Register R, const APInt &DemandedElts,
3979	const MachineRegisterInfo &MRI, unsigned Depth) const {
3980	return `1`;
3981	}
3982
3983	bool TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
3984	SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
3985	TargetLoweringOpt &TLO, unsigned Depth) const {
3986	assert((Op.getOpcode() >= ISD::BUILTIN_OP_END \|\|
3987	Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN \|\|
3988	Op.getOpcode() == ISD::INTRINSIC_W_CHAIN \|\|
3989	Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3990	"Should use SimplifyDemandedVectorElts if you don't know whether Op"
3991	" is a target node!");
3992	return false;
3993	}
3994
3995	bool TargetLowering::SimplifyDemandedBitsForTargetNode(
3996	SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3997	KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
3998	assert((Op.getOpcode() >= ISD::BUILTIN_OP_END \|\|
3999	Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN \|\|
4000	Op.getOpcode() == ISD::INTRINSIC_W_CHAIN \|\|
4001	Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4002	"Should use SimplifyDemandedBits if you don't know whether Op"
4003	" is a target node!");
4004	computeKnownBitsForTargetNode(Op, Known, DemandedElts, DAG: TLO.DAG, Depth);
4005	return false;
4006	}
4007
4008	SDValue TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
4009	SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
4010	SelectionDAG &DAG, unsigned Depth) const {
4011	assert(
4012	(Op.getOpcode() >= ISD::BUILTIN_OP_END \|\|
4013	Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN \|\|
4014	Op.getOpcode() == ISD::INTRINSIC_W_CHAIN \|\|
4015	Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4016	"Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
4017	" is a target node!");
4018	return SDValue ();
4019	}
4020
4021	SDValue
4022	TargetLowering::buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0,
4023	SDValue N1, MutableArrayRef<int> Mask,
4024	SelectionDAG &DAG) const {
4025	bool LegalMask = isShuffleMaskLegal(Mask, VT);
4026	if (!LegalMask) {
4027	std::swap(a&: N0, b&: N1);
4028	ShuffleVectorSDNode::commuteMask(Mask);
4029	LegalMask = isShuffleMaskLegal(Mask, VT);
4030	}
4031
4032	if (!LegalMask)
4033	return SDValue ();
4034
4035	return DAG.getVectorShuffle(VT, dl: DL, N1: N0, N2: N1, Mask);
4036	}
4037
4038	const Constant TargetLowering::getTargetConstantFromLoad(LoadSDNode) const {
4039	return nullptr;
4040	}
4041
4042	bool TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode(
4043	SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
4044	bool PoisonOnly, unsigned Depth) const {
4045	assert(
4046	(Op.getOpcode() >= ISD::BUILTIN_OP_END \|\|
4047	Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN \|\|
4048	Op.getOpcode() == ISD::INTRINSIC_W_CHAIN \|\|
4049	Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4050	"Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"
4051	" is a target node!");
4052
4053	// If Op can't create undef/poison and none of its operands are undef/poison
4054	// then Op is never undef/poison.
4055	return !canCreateUndefOrPoisonForTargetNode(Op, DemandedElts, DAG, PoisonOnly,
4056	/ConsiderFlags/ true, Depth) &&
4057	all_of(Range: Op ->ops(), P: [&](SDValue V) {
4058	return DAG.isGuaranteedNotToBeUndefOrPoison(Op: V, PoisonOnly,
4059	Depth: Depth + `1`);
4060	});
4061	}
4062
4063	bool TargetLowering::canCreateUndefOrPoisonForTargetNode(
4064	SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
4065	bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
4066	assert((Op.getOpcode() >= ISD::BUILTIN_OP_END \|\|
4067	Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN \|\|
4068	Op.getOpcode() == ISD::INTRINSIC_W_CHAIN \|\|
4069	Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4070	"Should use canCreateUndefOrPoison if you don't know whether Op"
4071	" is a target node!");
4072	// Be conservative and return true.
4073	return true;
4074	}
4075
4076	bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
4077	const APInt &DemandedElts,
4078	const SelectionDAG &DAG,
4079	bool SNaN,
4080	unsigned Depth) const {
4081	assert((Op.getOpcode() >= ISD::BUILTIN_OP_END \|\|
4082	Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN \|\|
4083	Op.getOpcode() == ISD::INTRINSIC_W_CHAIN \|\|
4084	Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4085	"Should use isKnownNeverNaN if you don't know whether Op"
4086	" is a target node!");
4087	return false;
4088	}
4089
4090	bool TargetLowering::isSplatValueForTargetNode(SDValue Op,
4091	const APInt &DemandedElts,
4092	APInt &UndefElts,
4093	const SelectionDAG &DAG,
4094	unsigned Depth) const {
4095	assert((Op.getOpcode() >= ISD::BUILTIN_OP_END \|\|
4096	Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN \|\|
4097	Op.getOpcode() == ISD::INTRINSIC_W_CHAIN \|\|
4098	Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4099	"Should use isSplatValue if you don't know whether Op"
4100	" is a target node!");
4101	return false;
4102	}
4103
4104	// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
4105	// work with truncating build vectors and vectors with elements of less than
4106	// 8 bits.
4107	bool TargetLowering::isConstTrueVal(SDValue N) const {
4108	if (!N)
4109	return false;
4110
4111	unsigned EltWidth;
4112	APInt CVal;
4113	if (ConstantSDNode CN = isConstOrConstSplat(N, /AllowUndefs=/*false,
4114	/AllowTruncation=/true)) {
4115	CVal = CN->getAPIntValue();
4116	EltWidth = N.getValueType().getScalarSizeInBits();
4117	} else
4118	return false;
4119
4120	// If this is a truncating splat, truncate the splat value.
4121	// Otherwise, we may fail to match the expected values below.
4122	if (EltWidth < CVal.getBitWidth())
4123	CVal = CVal.trunc(width: EltWidth);
4124
4125	switch (getBooleanContents(Type: N.getValueType())) {
4126	case UndefinedBooleanContent:
4127	return CVal [`0`];
4128	case ZeroOrOneBooleanContent:
4129	return CVal.isOne();
4130	case ZeroOrNegativeOneBooleanContent:
4131	return CVal.isAllOnes();
4132	}
4133
4134	llvm_unreachable("Invalid boolean contents");
4135	}
4136
4137	bool TargetLowering::isConstFalseVal(SDValue N) const {
4138	if (!N)
4139	return false;
4140
4141	const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val&: N);
4142	if (!CN) {
4143	const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Val&: N);
4144	if (!BV)
4145	return false;
4146
4147	// Only interested in constant splats, we don't care about undef
4148	// elements in identifying boolean constants and getConstantSplatNode
4149	// returns NULL if all ops are undef;
4150	CN = BV->getConstantSplatNode();
4151	if (!CN)
4152	return false;
4153	}
4154
4155	if (getBooleanContents(Type: N ->getValueType(ResNo: `0`)) == UndefinedBooleanContent)
4156	return !CN->getAPIntValue()[`0`];
4157
4158	return CN->isZero();
4159	}
4160
4161	bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
4162	bool SExt) const {
4163	if (VT == MVT::i1)
4164	return N->isOne();
4165
4166	TargetLowering::BooleanContent Cnt = getBooleanContents(Type: VT);
4167	switch (Cnt) {
4168	case TargetLowering::ZeroOrOneBooleanContent:
4169	// An extended value of 1 is always true, unless its original type is i1,
4170	// in which case it will be sign extended to -1.
4171	return (N->isOne() && !SExt) \|\| (SExt && (N->getValueType(ResNo: `0`) != MVT::i1));
4172	case TargetLowering::UndefinedBooleanContent:
4173	case TargetLowering::ZeroOrNegativeOneBooleanContent:
4174	return N->isAllOnes() && SExt;
4175	}
4176	llvm_unreachable("Unexpected enumeration.");
4177	}
4178
4179	/// This helper function of SimplifySetCC tries to optimize the comparison when
4180	/// either operand of the SetCC node is a bitwise-and instruction.
4181	SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
4182	ISD::CondCode Cond, const SDLoc &DL,
4183	DAGCombinerInfo &DCI) const {
4184	if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
4185	std::swap(a&: N0, b&: N1);
4186
4187	SelectionDAG &DAG = DCI.DAG;
4188	EVT OpVT = N0.getValueType();
4189	if (N0.getOpcode() != ISD::AND \|\| !OpVT.isInteger() \|\|
4190	(Cond != ISD::SETEQ && Cond != ISD::SETNE))
4191	return SDValue ();
4192
4193	// (X & Y) != 0 --> zextOrTrunc(X & Y)
4194	// iff everything but LSB is known zero:
4195	if (Cond == ISD::SETNE && isNullConstant(V: N1) &&
4196	(getBooleanContents(Type: OpVT) == TargetLowering::UndefinedBooleanContent \|\|
4197	getBooleanContents(Type: OpVT) == TargetLowering::ZeroOrOneBooleanContent)) {
4198	unsigned NumEltBits = OpVT.getScalarSizeInBits();
4199	APInt UpperBits = APInt::getHighBitsSet(numBits: NumEltBits, hiBitsSet: NumEltBits - `1`);
4200	if (DAG.MaskedValueIsZero(Op: N0, Mask: UpperBits))
4201	return DAG.getBoolExtOrTrunc(Op: N0, SL: DL, VT, OpVT);
4202	}
4203
4204	// Try to eliminate a power-of-2 mask constant by converting to a signbit
4205	// test in a narrow type that we can truncate to with no cost. Examples:
4206	// (i32 X & 32768) == 0 --> (trunc X to i16) >= 0
4207	// (i32 X & 32768) != 0 --> (trunc X to i16) < 0
4208	// TODO: This conservatively checks for type legality on the source and
4209	// destination types. That may inhibit optimizations, but it also
4210	// allows setcc->shift transforms that may be more beneficial.
4211	auto *AndC = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: `1`));
4212	if (AndC && isNullConstant(V: N1) && AndC->getAPIntValue().isPowerOf2() &&
4213	isTypeLegal(VT: OpVT) && N0.hasOneUse()) {
4214	EVT NarrowVT = EVT::getIntegerVT(Context&: *DAG.getContext(),
4215	BitWidth: AndC->getAPIntValue().getActiveBits());
4216	if (isTruncateFree(FromVT: OpVT, ToVT: NarrowVT) && isTypeLegal(VT: NarrowVT)) {
4217	SDValue Trunc = DAG.getZExtOrTrunc(Op: N0.getOperand(i: `0`), DL, VT: NarrowVT);
4218	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT: NarrowVT);
4219	return DAG.getSetCC(DL, VT, LHS: Trunc, RHS: Zero,
4220	Cond: Cond == ISD::SETEQ ? ISD::SETGE : ISD::SETLT);
4221	}
4222	}
4223
4224	// Match these patterns in any of their permutations:
4225	// (X & Y) == Y
4226	// (X & Y) != Y
4227	SDValue X, Y;
4228	if (N0.getOperand(i: `0`) == N1) {
4229	X = N0.getOperand(i: `1`);
4230	Y = N0.getOperand(i: `0`);
4231	} else if (N0.getOperand(i: `1`) == N1) {
4232	X = N0.getOperand(i: `0`);
4233	Y = N0.getOperand(i: `1`);
4234	} else {
4235	return SDValue ();
4236	}
4237
4238	// TODO: We should invert (X & Y) eq/ne 0 -> (X & Y) ne/eq Y if
4239	// `isXAndYEqZeroPreferableToXAndYEqY` is false. This is a bit difficult as
4240	// its liable to create and infinite loop.
4241	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT: OpVT);
4242	if (isXAndYEqZeroPreferableToXAndYEqY(Cond, OpVT) &&
4243	DAG.isKnownToBeAPowerOfTwo(Val: Y)) {
4244	// Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
4245	// Note that where Y is variable and is known to have at most one bit set
4246	// (for example, if it is Z & 1) we cannot do this; the expressions are not
4247	// equivalent when Y == 0.
4248	assert(OpVT.isInteger());
4249	Cond = ISD::getSetCCInverse(Operation: Cond, Type: OpVT);
4250	if (DCI.isBeforeLegalizeOps() \|\|
4251	isCondCodeLegal(CC: Cond, VT: N0.getSimpleValueType()))
4252	return DAG.getSetCC(DL, VT, LHS: N0, RHS: Zero, Cond);
4253	} else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
4254	// If the target supports an 'and-not' or 'and-complement' logic operation,
4255	// try to use that to make a comparison operation more efficient.
4256	// But don't do this transform if the mask is a single bit because there are
4257	// more efficient ways to deal with that case (for example, 'bt' on x86 or
4258	// 'rlwinm' on PPC).
4259
4260	// Bail out if the compare operand that we want to turn into a zero is
4261	// already a zero (otherwise, infinite loop).
4262	if (isNullConstant(V: Y))
4263	return SDValue ();
4264
4265	// Transform this into: ~X & Y == 0.
4266	SDValue NotX = DAG.getNOT(DL: SDLoc (X), Val: X, VT: OpVT);
4267	SDValue NewAnd = DAG.getNode(Opcode: ISD::AND, DL: SDLoc (N0), VT: OpVT, N1: NotX, N2: Y);
4268	return DAG.getSetCC(DL, VT, LHS: NewAnd, RHS: Zero, Cond);
4269	}
4270
4271	return SDValue ();
4272	}
4273
4274	/// This helper function of SimplifySetCC tries to optimize the comparison when
4275	/// either operand of the SetCC node is a bitwise-or instruction.
4276	/// For now, this just transforms (X \| Y) ==/!= Y into X & ~Y ==/!= 0.
4277	SDValue TargetLowering::foldSetCCWithOr(EVT VT, SDValue N0, SDValue N1,
4278	ISD::CondCode Cond, const SDLoc &DL,
4279	DAGCombinerInfo &DCI) const {
4280	if (N1.getOpcode() == ISD::OR && N0.getOpcode() != ISD::OR)
4281	std::swap(a&: N0, b&: N1);
4282
4283	SelectionDAG &DAG = DCI.DAG;
4284	EVT OpVT = N0.getValueType();
4285	if (!N0.hasOneUse() \|\| !OpVT.isInteger() \|\|
4286	(Cond != ISD::SETEQ && Cond != ISD::SETNE))
4287	return SDValue ();
4288
4289	// (X \| Y) == Y
4290	// (X \| Y) != Y
4291	SDValue X;
4292	if (sd_match(N: N0, P: m_Or(L: m_Value(N&: X), R: m_Specific(N: N1))) && hasAndNotCompare(Y: X)) {
4293	// If the target supports an 'and-not' or 'and-complement' logic operation,
4294	// try to use that to make a comparison operation more efficient.
4295
4296	// Bail out if the compare operand that we want to turn into a zero is
4297	// already a zero (otherwise, infinite loop).
4298	if (isNullConstant(V: N1))
4299	return SDValue ();
4300
4301	// Transform this into: X & ~Y ==/!= 0.
4302	SDValue NotY = DAG.getNOT(DL: SDLoc (N1), Val: N1, VT: OpVT);
4303	SDValue NewAnd = DAG.getNode(Opcode: ISD::AND, DL: SDLoc (N0), VT: OpVT, N1: X, N2: NotY);
4304	return DAG.getSetCC(DL, VT, LHS: NewAnd, RHS: DAG.getConstant(Val: `0`, DL, VT: OpVT), Cond);
4305	}
4306
4307	return SDValue ();
4308	}
4309
4310	/// There are multiple IR patterns that could be checking whether certain
4311	/// truncation of a signed number would be lossy or not. The pattern which is
4312	/// best at IR level, may not lower optimally. Thus, we want to unfold it.
4313	/// We are looking for the following pattern: (KeptBits is a constant)
4314	/// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
4315	/// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
4316	/// KeptBits also can't be 1, that would have been folded to %x dstcond 0
4317	/// We will unfold it into the natural trunc+sext pattern:
4318	/// ((%x << C) a>> C) dstcond %x
4319	/// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
4320	SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
4321	EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
4322	const SDLoc &DL) const {
4323	// We must be comparing with a constant.
4324	ConstantSDNode *C1;
4325	if (!(C1 = dyn_cast<ConstantSDNode>(Val&: N1)))
4326	return SDValue ();
4327
4328	// N0 should be: add %x, (1 << (KeptBits-1))
4329	if (N0 ->getOpcode() != ISD::ADD)
4330	return SDValue ();
4331
4332	// And we must be 'add'ing a constant.
4333	ConstantSDNode *C01;
4334	if (!(C01 = dyn_cast<ConstantSDNode>(Val: N0 ->getOperand(Num: `1`))))
4335	return SDValue ();
4336
4337	SDValue X = N0 ->getOperand(Num: `0`);
4338	EVT XVT = X.getValueType();
4339
4340	// Validate constants ...
4341
4342	APInt I1 = C1->getAPIntValue();
4343
4344	ISD::CondCode NewCond;
4345	if (Cond == ISD::CondCode::SETULT) {
4346	NewCond = ISD::CondCode::SETEQ;
4347	} else if (Cond == ISD::CondCode::SETULE) {
4348	NewCond = ISD::CondCode::SETEQ;
4349	// But need to 'canonicalize' the constant.
4350	I1 += `1`;
4351	} else if (Cond == ISD::CondCode::SETUGT) {
4352	NewCond = ISD::CondCode::SETNE;
4353	// But need to 'canonicalize' the constant.
4354	I1 += `1`;
4355	} else if (Cond == ISD::CondCode::SETUGE) {
4356	NewCond = ISD::CondCode::SETNE;
4357	} else
4358	return SDValue ();
4359
4360	APInt I01 = C01->getAPIntValue();
4361
4362	auto checkConstants = [&I1, &I01]() -> bool {
4363	// Both of them must be power-of-two, and the constant from setcc is bigger.
4364	return I1.ugt(RHS: I01) && I1.isPowerOf2() && I01.isPowerOf2();
4365	};
4366
4367	if (checkConstants ()) {
4368	// Great, e.g. got icmp ult i16 (add i16 %x, 128), 256
4369	} else {
4370	// What if we invert constants? (and the target predicate)
4371	I1.negate();
4372	I01.negate();
4373	assert(XVT.isInteger());
4374	NewCond = getSetCCInverse(Operation: NewCond, Type: XVT);
4375	if (!checkConstants ())
4376	return SDValue ();
4377	// Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
4378	}
4379
4380	// They are power-of-two, so which bit is set?
4381	const unsigned KeptBits = I1.logBase2();
4382	const unsigned KeptBitsMinusOne = I01.logBase2();
4383
4384	// Magic!
4385	if (KeptBits != (KeptBitsMinusOne + `1`))
4386	return SDValue ();
4387	assert(KeptBits > `0` && KeptBits < XVT.getSizeInBits() && "unreachable");
4388
4389	// We don't want to do this in every single case.
4390	SelectionDAG &DAG = DCI.DAG;
4391	if (!shouldTransformSignedTruncationCheck(XVT, KeptBits))
4392	return SDValue ();
4393
4394	// Unfold into: sext_inreg(%x) cond %x
4395	// Where 'cond' will be either 'eq' or 'ne'.
4396	SDValue SExtInReg = DAG.getNode(
4397	Opcode: ISD::SIGN_EXTEND_INREG, DL, VT: XVT, N1: X,
4398	N2: DAG.getValueType(EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: KeptBits)));
4399	return DAG.getSetCC(DL, VT: SCCVT, LHS: SExtInReg, RHS: X, Cond: NewCond);
4400	}
4401
4402	// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
4403	SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
4404	EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
4405	DAGCombinerInfo &DCI, const SDLoc &DL) const {
4406	assert(isConstOrConstSplat(N1C) && isConstOrConstSplat(N1C)->isZero() &&
4407	"Should be a comparison with 0.");
4408	assert((Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) &&
4409	"Valid only for [in]equality comparisons.");
4410
4411	unsigned NewShiftOpcode;
4412	SDValue X, C, Y;
4413
4414	SelectionDAG &DAG = DCI.DAG;
4415
4416	// Look for '(C l>>/<< Y)'.
4417	auto Match = [&NewShiftOpcode, &X, &C, &Y, &DAG, this](SDValue V) {
4418	// The shift should be one-use.
4419	if (!V.hasOneUse())
4420	return false;
4421	unsigned OldShiftOpcode = V.getOpcode();
4422	switch (OldShiftOpcode) {
4423	case ISD::SHL:
4424	NewShiftOpcode = ISD::SRL;
4425	break;
4426	case ISD::SRL:
4427	NewShiftOpcode = ISD::SHL;
4428	break;
4429	default:
4430	return false; // must be a logical shift.
4431	}
4432	// We should be shifting a constant.
4433	// FIXME: best to use isConstantOrConstantVector().
4434	C = V.getOperand(i: `0`);
4435	ConstantSDNode *CC =
4436	isConstOrConstSplat(N: C, /AllowUndefs=/true, /AllowTruncation=/true);
4437	if (!CC)
4438	return false;
4439	Y = V.getOperand(i: `1`);
4440
4441	ConstantSDNode *XC =
4442	isConstOrConstSplat(N: X, /AllowUndefs=/true, /AllowTruncation=/true);
4443	return shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
4444	X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
4445	};
4446
4447	// LHS of comparison should be an one-use 'and'.
4448	if (N0.getOpcode() != ISD::AND \|\| !N0.hasOneUse())
4449	return SDValue ();
4450
4451	X = N0.getOperand(i: `0`);
4452	SDValue Mask = N0.getOperand(i: `1`);
4453
4454	// 'and' is commutative!
4455	if (!Match (Mask)) {
4456	std::swap(a&: X, b&: Mask);
4457	if (!Match (Mask))
4458	return SDValue ();
4459	}
4460
4461	EVT VT = X.getValueType();
4462
4463	// Produce:
4464	// ((X 'OppositeShiftOpcode' Y) & C) Cond 0
4465	SDValue T0 = DAG.getNode(Opcode: NewShiftOpcode, DL, VT, N1: X, N2: Y);
4466	SDValue T1 = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: T0, N2: C);
4467	SDValue T2 = DAG.getSetCC(DL, VT: SCCVT, LHS: T1, RHS: N1C, Cond);
4468	return T2;
4469	}
4470
4471	/// Try to fold an equality comparison with a {add/sub/xor} binary operation as
4472	/// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
4473	/// handle the commuted versions of these patterns.
4474	SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
4475	ISD::CondCode Cond, const SDLoc &DL,
4476	DAGCombinerInfo &DCI) const {
4477	unsigned BOpcode = N0.getOpcode();
4478	assert((BOpcode == ISD::ADD \|\| BOpcode == ISD::SUB \|\| BOpcode == ISD::XOR) &&
4479	"Unexpected binop");
4480	assert((Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) && "Unexpected condcode");
4481
4482	// (X + Y) == X --> Y == 0
4483	// (X - Y) == X --> Y == 0
4484	// (X ^ Y) == X --> Y == 0
4485	SelectionDAG &DAG = DCI.DAG;
4486	EVT OpVT = N0.getValueType();
4487	SDValue X = N0.getOperand(i: `0`);
4488	SDValue Y = N0.getOperand(i: `1`);
4489	if (X == N1)
4490	return DAG.getSetCC(DL, VT, LHS: Y, RHS: DAG.getConstant(Val: `0`, DL, VT: OpVT), Cond);
4491
4492	if (Y != N1)
4493	return SDValue ();
4494
4495	// (X + Y) == Y --> X == 0
4496	// (X ^ Y) == Y --> X == 0
4497	if (BOpcode == ISD::ADD \|\| BOpcode == ISD::XOR)
4498	return DAG.getSetCC(DL, VT, LHS: X, RHS: DAG.getConstant(Val: `0`, DL, VT: OpVT), Cond);
4499
4500	// The shift would not be valid if the operands are boolean (i1).
4501	if (!N0.hasOneUse() \|\| OpVT.getScalarSizeInBits() == `1`)
4502	return SDValue ();
4503
4504	// (X - Y) == Y --> X == Y << 1
4505	SDValue One = DAG.getShiftAmountConstant(Val: `1`, VT: OpVT, DL);
4506	SDValue YShl1 = DAG.getNode(Opcode: ISD::SHL, DL, VT: N1.getValueType(), N1: Y, N2: One);
4507	if (!DCI.isCalledByLegalizer())
4508	DCI.AddToWorklist(N: YShl1.getNode());
4509	return DAG.getSetCC(DL, VT, LHS: X, RHS: YShl1, Cond);
4510	}
4511
4512	static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT,
4513	SDValue N0, const APInt &C1,
4514	ISD::CondCode Cond, const SDLoc &dl,
4515	SelectionDAG &DAG) {
4516	// Look through truncs that don't change the value of a ctpop.
4517	// FIXME: Add vector support? Need to be careful with setcc result type below.
4518	SDValue CTPOP = N0;
4519	if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
4520	N0.getScalarValueSizeInBits() > Log2_32(Value: N0.getOperand(i: `0`).getScalarValueSizeInBits()))
4521	CTPOP = N0.getOperand(i: `0`);
4522
4523	if (CTPOP.getOpcode() != ISD::CTPOP \|\| !CTPOP.hasOneUse())
4524	return SDValue ();
4525
4526	EVT CTVT = CTPOP.getValueType();
4527	SDValue CTOp = CTPOP.getOperand(i: `0`);
4528
4529	// Expand a power-of-2-or-zero comparison based on ctpop:
4530	// (ctpop x) u< 2 -> (x & x-1) == 0
4531	// (ctpop x) u> 1 -> (x & x-1) != 0
4532	if (Cond == ISD::SETULT \|\| Cond == ISD::SETUGT) {
4533	// Keep the CTPOP if it is a cheap vector op.
4534	if (CTVT.isVector() && TLI.isCtpopFast(VT: CTVT))
4535	return SDValue ();
4536
4537	unsigned CostLimit = TLI.getCustomCtpopCost(VT: CTVT, Cond);
4538	if (C1.ugt(RHS: CostLimit + (Cond == ISD::SETULT)))
4539	return SDValue ();
4540	if (C1 == `0` && (Cond == ISD::SETULT))
4541	return SDValue (); // This is handled elsewhere.
4542
4543	unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);
4544
4545	SDValue NegOne = DAG.getAllOnesConstant(DL: dl, VT: CTVT);
4546	SDValue Result = CTOp;
4547	for (unsigned i = `0`; i < Passes; i++) {
4548	SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: CTVT, N1: Result, N2: NegOne);
4549	Result = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: CTVT, N1: Result, N2: Add);
4550	}
4551	ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;
4552	return DAG.getSetCC(DL: dl, VT, LHS: Result, RHS: DAG.getConstant(Val: `0`, DL: dl, VT: CTVT), Cond: CC);
4553	}
4554
4555	// Expand a power-of-2 comparison based on ctpop
4556	if ((Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) && C1 == `1`) {
4557	// Keep the CTPOP if it is cheap.
4558	if (TLI.isCtpopFast(VT: CTVT))
4559	return SDValue ();
4560
4561	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT: CTVT);
4562	SDValue NegOne = DAG.getAllOnesConstant(DL: dl, VT: CTVT);
4563	assert(CTVT.isInteger());
4564	SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: CTVT, N1: CTOp, N2: NegOne);
4565
4566	// Its not uncommon for known-never-zero X to exist in (ctpop X) eq/ne 1, so
4567	// check before emitting a potentially unnecessary op.
4568	if (DAG.isKnownNeverZero(Op: CTOp)) {
4569	// (ctpop x) == 1 --> (x & x-1) == 0
4570	// (ctpop x) != 1 --> (x & x-1) != 0
4571	SDValue And = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: CTVT, N1: CTOp, N2: Add);
4572	SDValue RHS = DAG.getSetCC(DL: dl, VT, LHS: And, RHS: Zero, Cond);
4573	return RHS;
4574	}
4575
4576	// (ctpop x) == 1 --> (x ^ x-1) > x-1
4577	// (ctpop x) != 1 --> (x ^ x-1) <= x-1
4578	SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: CTVT, N1: CTOp, N2: Add);
4579	ISD::CondCode CmpCond = Cond == ISD::SETEQ ? ISD::SETUGT : ISD::SETULE;
4580	return DAG.getSetCC(DL: dl, VT, LHS: Xor, RHS: Add, Cond: CmpCond);
4581	}
4582
4583	return SDValue ();
4584	}
4585
4586	static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1,
4587	ISD::CondCode Cond, const SDLoc &dl,
4588	SelectionDAG &DAG) {
4589	if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4590	return SDValue ();
4591
4592	auto C1 = isConstOrConstSplat(N: N1, /* AllowUndefs / true);
4593	if (!C1 \|\| !(C1->isZero() \|\| C1->isAllOnes()))
4594	return SDValue ();
4595
4596	auto getRotateSource = [](SDValue X) {
4597	if (X.getOpcode() == ISD::ROTL \|\| X.getOpcode() == ISD::ROTR)
4598	return X.getOperand(i: `0`);
4599	return SDValue ();
4600	};
4601
4602	// Peek through a rotated value compared against 0 or -1:
4603	// (rot X, Y) == 0/-1 --> X == 0/-1
4604	// (rot X, Y) != 0/-1 --> X != 0/-1
4605	if (SDValue R = getRotateSource (N0))
4606	return DAG.getSetCC(DL: dl, VT, LHS: R, RHS: N1, Cond);
4607
4608	// Peek through an 'or' of a rotated value compared against 0:
4609	// or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
4610	// or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
4611	//
4612	// TODO: Add the 'and' with -1 sibling.
4613	// TODO: Recurse through a series of 'or' ops to find the rotate.
4614	EVT OpVT = N0.getValueType();
4615	if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) {
4616	if (SDValue R = getRotateSource (N0.getOperand(i: `0`))) {
4617	SDValue NewOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1: R, N2: N0.getOperand(i: `1`));
4618	return DAG.getSetCC(DL: dl, VT, LHS: NewOr, RHS: N1, Cond);
4619	}
4620	if (SDValue R = getRotateSource (N0.getOperand(i: `1`))) {
4621	SDValue NewOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1: R, N2: N0.getOperand(i: `0`));
4622	return DAG.getSetCC(DL: dl, VT, LHS: NewOr, RHS: N1, Cond);
4623	}
4624	}
4625
4626	return SDValue ();
4627	}
4628
4629	static SDValue foldSetCCWithFunnelShift(EVT VT, SDValue N0, SDValue N1,
4630	ISD::CondCode Cond, const SDLoc &dl,
4631	SelectionDAG &DAG) {
4632	// If we are testing for all-bits-clear, we might be able to do that with
4633	// less shifting since bit-order does not matter.
4634	if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4635	return SDValue ();
4636
4637	auto C1 = isConstOrConstSplat(N: N1, /* AllowUndefs / true);
4638	if (!C1 \|\| !C1->isZero())
4639	return SDValue ();
4640
4641	if (!N0.hasOneUse() \|\|
4642	(N0.getOpcode() != ISD::FSHL && N0.getOpcode() != ISD::FSHR))
4643	return SDValue ();
4644
4645	unsigned BitWidth = N0.getScalarValueSizeInBits();
4646	auto *ShAmtC = isConstOrConstSplat(N: N0.getOperand(i: `2`));
4647	if (!ShAmtC)
4648	return SDValue ();
4649
4650	uint64_t ShAmt = ShAmtC->getAPIntValue().urem(RHS: BitWidth);
4651	if (ShAmt == `0`)
4652	return SDValue ();
4653
4654	// Canonicalize fshr as fshl to reduce pattern-matching.
4655	if (N0.getOpcode() == ISD::FSHR)
4656	ShAmt = BitWidth - ShAmt;
4657
4658	// Match an 'or' with a specific operand 'Other' in either commuted variant.
4659	SDValue X, Y;
4660	auto matchOr = [&X, &Y](SDValue Or, SDValue Other) {
4661	if (Or.getOpcode() != ISD::OR \|\| !Or.hasOneUse())
4662	return false;
4663	if (Or.getOperand(i: `0`) == Other) {
4664	X = Or.getOperand(i: `0`);
4665	Y = Or.getOperand(i: `1`);
4666	return true;
4667	}
4668	if (Or.getOperand(i: `1`) == Other) {
4669	X = Or.getOperand(i: `1`);
4670	Y = Or.getOperand(i: `0`);
4671	return true;
4672	}
4673	return false;
4674	};
4675
4676	EVT OpVT = N0.getValueType();
4677	EVT ShAmtVT = N0.getOperand(i: `2`).getValueType();
4678	SDValue F0 = N0.getOperand(i: `0`);
4679	SDValue F1 = N0.getOperand(i: `1`);
4680	if (matchOr (F0, F1)) {
4681	// fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0
4682	SDValue NewShAmt = DAG.getConstant(Val: ShAmt, DL: dl, VT: ShAmtVT);
4683	SDValue Shift = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: OpVT, N1: Y, N2: NewShAmt);
4684	SDValue NewOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1: Shift, N2: X);
4685	return DAG.getSetCC(DL: dl, VT, LHS: NewOr, RHS: N1, Cond);
4686	}
4687	if (matchOr (F1, F0)) {
4688	// fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0
4689	SDValue NewShAmt = DAG.getConstant(Val: BitWidth - ShAmt, DL: dl, VT: ShAmtVT);
4690	SDValue Shift = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: OpVT, N1: Y, N2: NewShAmt);
4691	SDValue NewOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1: Shift, N2: X);
4692	return DAG.getSetCC(DL: dl, VT, LHS: NewOr, RHS: N1, Cond);
4693	}
4694
4695	return SDValue ();
4696	}
4697
4698	/// Try to simplify a setcc built with the specified operands and cc. If it is
4699	/// unable to simplify it, return a null SDValue.
4700	SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
4701	ISD::CondCode Cond, bool foldBooleans,
4702	DAGCombinerInfo &DCI,
4703	const SDLoc &dl) const {
4704	SelectionDAG &DAG = DCI.DAG;
4705	const DataLayout &Layout = DAG.getDataLayout();
4706	EVT OpVT = N0.getValueType();
4707	AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4708
4709	// Constant fold or commute setcc.
4710	if (SDValue Fold = DAG.FoldSetCC(VT, N1: N0, N2: N1, Cond, dl))
4711	return Fold;
4712
4713	bool N0ConstOrSplat =
4714	isConstOrConstSplat(N: N0, /AllowUndefs/ false, /AllowTruncate/ AllowTruncation: true);
4715	bool N1ConstOrSplat =
4716	isConstOrConstSplat(N: N1, /AllowUndefs/ false, /AllowTruncate/ AllowTruncation: true);
4717
4718	// Canonicalize toward having the constant on the RHS.
4719	// TODO: Handle non-splat vector constants. All undef causes trouble.
4720	// FIXME: We can't yet fold constant scalable vector splats, so avoid an
4721	// infinite loop here when we encounter one.
4722	ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Operation: Cond);
4723	if (N0ConstOrSplat && !N1ConstOrSplat &&
4724	(DCI.isBeforeLegalizeOps() \|\|
4725	isCondCodeLegal(CC: SwappedCC, VT: N0.getSimpleValueType())))
4726	return DAG.getSetCC(DL: dl, VT, LHS: N1, RHS: N0, Cond: SwappedCC);
4727
4728	// If we have a subtract with the same 2 non-constant operands as this setcc
4729	// -- but in reverse order -- then try to commute the operands of this setcc
4730	// to match. A matching pair of setcc (cmp) and sub may be combined into 1
4731	// instruction on some targets.
4732	if (!N0ConstOrSplat && !N1ConstOrSplat &&
4733	(DCI.isBeforeLegalizeOps() \|\|
4734	isCondCodeLegal(CC: SwappedCC, VT: N0.getSimpleValueType())) &&
4735	DAG.doesNodeExist(Opcode: ISD::SUB, VTList: DAG.getVTList(VT: OpVT), Ops: {N1, N0}) &&
4736	!DAG.doesNodeExist(Opcode: ISD::SUB, VTList: DAG.getVTList(VT: OpVT), Ops: {N0, N1}))
4737	return DAG.getSetCC(DL: dl, VT, LHS: N1, RHS: N0, Cond: SwappedCC);
4738
4739	if (SDValue V = foldSetCCWithRotate(VT, N0, N1, Cond, dl, DAG))
4740	return V;
4741
4742	if (SDValue V = foldSetCCWithFunnelShift(VT, N0, N1, Cond, dl, DAG))
4743	return V;
4744
4745	if (auto *N1C = isConstOrConstSplat(N: N1)) {
4746	const APInt &C1 = N1C->getAPIntValue();
4747
4748	// Optimize some CTPOP cases.
4749	if (SDValue V = simplifySetCCWithCTPOP(TLI: *this, VT, N0, C1, Cond, dl, DAG))
4750	return V;
4751
4752	// For equality to 0 of a no-wrap multiply, decompose and test each op:
4753	// X Y == 0 --> (X == 0) \|\| (Y == 0)*
4754	// X Y != 0 --> (X != 0) && (Y != 0)*
4755	// TODO: This bails out if minsize is set, but if the target doesn't have a
4756	// single instruction multiply for this type, it would likely be
4757	// smaller to decompose.
4758	if (C1.isZero() && (Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) &&
4759	N0.getOpcode() == ISD::MUL && N0.hasOneUse() &&
4760	(N0 ->getFlags().hasNoUnsignedWrap() \|\|
4761	N0 ->getFlags().hasNoSignedWrap()) &&
4762	!Attr.hasFnAttr(Kind: Attribute::MinSize)) {
4763	SDValue IsXZero = DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: `0`), RHS: N1, Cond);
4764	SDValue IsYZero = DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: `1`), RHS: N1, Cond);
4765	unsigned LogicOp = Cond == ISD::SETEQ ? ISD::OR : ISD::AND;
4766	return DAG.getNode(Opcode: LogicOp, DL: dl, VT, N1: IsXZero, N2: IsYZero);
4767	}
4768
4769	// If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
4770	// equality comparison, then we're just comparing whether X itself is
4771	// zero.
4772	if (N0.getOpcode() == ISD::SRL && (C1.isZero() \|\| C1.isOne()) &&
4773	N0.getOperand(i: `0`).getOpcode() == ISD::CTLZ &&
4774	llvm::has_single_bit<uint32_t>(Value: N0.getScalarValueSizeInBits())) {
4775	if (ConstantSDNode *ShAmt = isConstOrConstSplat(N: N0.getOperand(i: `1`))) {
4776	if ((Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) &&
4777	ShAmt->getAPIntValue() == Log2_32(Value: N0.getScalarValueSizeInBits())) {
4778	if ((C1 == `0`) == (Cond == ISD::SETEQ)) {
4779	// (srl (ctlz x), 5) == 0 -> X != 0
4780	// (srl (ctlz x), 5) != 1 -> X != 0
4781	Cond = ISD::SETNE;
4782	} else {
4783	// (srl (ctlz x), 5) != 0 -> X == 0
4784	// (srl (ctlz x), 5) == 1 -> X == 0
4785	Cond = ISD::SETEQ;
4786	}
4787	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT: N0.getValueType());
4788	return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: `0`).getOperand(i: `0`), RHS: Zero,
4789	Cond);
4790	}
4791	}
4792	}
4793	}
4794
4795	// setcc X, 0, setlt --> X (when X is all sign bits)
4796	// setcc X, 0, setne --> X (when X is all sign bits)
4797	//
4798	// When we know that X has 0 or -1 in each element (or scalar), this
4799	// comparison will produce X. This is only true when boolean contents are
4800	// represented via 0s and -1s.
4801	if (VT == OpVT &&
4802	// Check that the result of setcc is 0 and -1.
4803	getBooleanContents(Type: VT) == ZeroOrNegativeOneBooleanContent &&
4804	// Match only for checks X < 0 and X != 0
4805	(Cond == ISD::SETLT \|\| Cond == ISD::SETNE) && isNullOrNullSplat(V: N1) &&
4806	// The identity holds iff we know all sign bits for all lanes.
4807	DAG.ComputeNumSignBits(Op: N0) == N0.getScalarValueSizeInBits())
4808	return N0;
4809
4810	// FIXME: Support vectors.
4811	if (auto *N1C = dyn_cast<ConstantSDNode>(Val: N1.getNode())) {
4812	const APInt &C1 = N1C->getAPIntValue();
4813
4814	// (zext x) == C --> x == (trunc C)
4815	// (sext x) == C --> x == (trunc C)
4816	if ((Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) &&
4817	DCI.isBeforeLegalize() && N0 ->hasOneUse()) {
4818	unsigned MinBits = N0.getValueSizeInBits();
4819	SDValue PreExt;
4820	bool Signed = false;
4821	if (N0 ->getOpcode() == ISD::ZERO_EXTEND) {
4822	// ZExt
4823	MinBits = N0 ->getOperand(Num: `0`).getValueSizeInBits();
4824	PreExt = N0 ->getOperand(Num: `0`);
4825	} else if (N0 ->getOpcode() == ISD::AND) {
4826	// DAGCombine turns costly ZExts into ANDs
4827	if (auto *C = dyn_cast<ConstantSDNode>(Val: N0 ->getOperand(Num: `1`)))
4828	if ((C->getAPIntValue()+`1`).isPowerOf2()) {
4829	MinBits = C->getAPIntValue().countr_one();
4830	PreExt = N0 ->getOperand(Num: `0`);
4831	}
4832	} else if (N0 ->getOpcode() == ISD::SIGN_EXTEND) {
4833	// SExt
4834	MinBits = N0 ->getOperand(Num: `0`).getValueSizeInBits();
4835	PreExt = N0 ->getOperand(Num: `0`);
4836	Signed = true;
4837	} else if (auto *LN0 = dyn_cast<LoadSDNode>(Val&: N0)) {
4838	// ZEXTLOAD / SEXTLOAD
4839	if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
4840	MinBits = LN0->getMemoryVT().getSizeInBits();
4841	PreExt = N0;
4842	} else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
4843	Signed = true;
4844	MinBits = LN0->getMemoryVT().getSizeInBits();
4845	PreExt = N0;
4846	}
4847	}
4848
4849	// Figure out how many bits we need to preserve this constant.
4850	unsigned ReqdBits = Signed ? C1.getSignificantBits() : C1.getActiveBits();
4851
4852	// Make sure we're not losing bits from the constant.
4853	if (MinBits > `0` &&
4854	MinBits < C1.getBitWidth() &&
4855	MinBits >= ReqdBits) {
4856	EVT MinVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: MinBits);
4857	if (isTypeDesirableForOp(ISD::SETCC, VT: MinVT)) {
4858	// Will get folded away.
4859	SDValue Trunc = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MinVT, Operand: PreExt);
4860	if (MinBits == `1` && C1 == `1`)
4861	// Invert the condition.
4862	return DAG.getSetCC(DL: dl, VT, LHS: Trunc, RHS: DAG.getConstant(Val: `0`, DL: dl, VT: MVT::i1),
4863	Cond: Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4864	SDValue C = DAG.getConstant(Val: C1.trunc(width: MinBits), DL: dl, VT: MinVT);
4865	return DAG.getSetCC(DL: dl, VT, LHS: Trunc, RHS: C, Cond);
4866	}
4867
4868	// If truncating the setcc operands is not desirable, we can still
4869	// simplify the expression in some cases:
4870	// setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
4871	// setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
4872	// setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
4873	// setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
4874	// setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
4875	// setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
4876	SDValue TopSetCC = N0 ->getOperand(Num: `0`);
4877	unsigned N0Opc = N0 ->getOpcode();
4878	bool SExt = (N0Opc == ISD::SIGN_EXTEND);
4879	if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
4880	TopSetCC.getOpcode() == ISD::SETCC &&
4881	(N0Opc == ISD::ZERO_EXTEND \|\| N0Opc == ISD::SIGN_EXTEND) &&
4882	(isConstFalseVal(N: N1) \|\|
4883	isExtendedTrueVal(N: N1C, VT: N0 ->getValueType(ResNo: `0`), SExt))) {
4884
4885	bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) \|\|
4886	(!N1C->isZero() && Cond == ISD::SETNE);
4887
4888	if (!Inverse)
4889	return TopSetCC;
4890
4891	ISD::CondCode InvCond = ISD::getSetCCInverse(
4892	Operation: cast<CondCodeSDNode>(Val: TopSetCC.getOperand(i: `2`))->get(),
4893	Type: TopSetCC.getOperand(i: `0`).getValueType());
4894	return DAG.getSetCC(DL: dl, VT, LHS: TopSetCC.getOperand(i: `0`),
4895	RHS: TopSetCC.getOperand(i: `1`),
4896	Cond: InvCond);
4897	}
4898	}
4899	}
4900
4901	// If the LHS is '(and load, const)', the RHS is 0, the test is for
4902	// equality or unsigned, and all 1 bits of the const are in the same
4903	// partial word, see if we can shorten the load.
4904	if (DCI.isBeforeLegalize() &&
4905	!ISD::isSignedIntSetCC(Code: Cond) &&
4906	N0.getOpcode() == ISD::AND && C1 == `0` &&
4907	N0.getNode()->hasOneUse() &&
4908	isa<LoadSDNode>(Val: N0.getOperand(i: `0`)) &&
4909	N0.getOperand(i: `0`).getNode()->hasOneUse() &&
4910	isa<ConstantSDNode>(Val: N0.getOperand(i: `1`))) {
4911	auto *Lod = cast<LoadSDNode>(Val: N0.getOperand(i: `0`));
4912	APInt bestMask;
4913	unsigned bestWidth = `0`, bestOffset = `0`;
4914	if (Lod->isSimple() && Lod->isUnindexed() &&
4915	(Lod->getMemoryVT().isByteSized() \|\|
4916	isPaddedAtMostSignificantBitsWhenStored(VT: Lod->getMemoryVT()))) {
4917	unsigned memWidth = Lod->getMemoryVT().getStoreSizeInBits();
4918	unsigned origWidth = N0.getValueSizeInBits();
4919	unsigned maskWidth = origWidth;
4920	// We can narrow (e.g.) 16-bit extending loads on 32-bit target to
4921	// 8 bits, but have to be careful...
4922	if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
4923	origWidth = Lod->getMemoryVT().getSizeInBits();
4924	const APInt &Mask = N0.getConstantOperandAPInt(i: `1`);
4925	// Only consider power-of-2 widths (and at least one byte) as candiates
4926	// for the narrowed load.
4927	for (unsigned width = `8`; width < origWidth; width *= `2`) {
4928	EVT newVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: width);
4929	APInt newMask = APInt::getLowBitsSet(numBits: maskWidth, loBitsSet: width);
4930	// Avoid accessing any padding here for now (we could use memWidth
4931	// instead of origWidth here otherwise).
4932	unsigned maxOffset = origWidth - width;
4933	for (unsigned offset = `0`; offset <= maxOffset; offset += `8`) {
4934	if (Mask.isSubsetOf(RHS: newMask)) {
4935	unsigned ptrOffset =
4936	Layout.isLittleEndian() ? offset : memWidth - width - offset;
4937	unsigned IsFast = `0`;
4938	assert((ptrOffset % `8`) == `0` && "Non-Bytealigned pointer offset");
4939	Align NewAlign = commonAlignment(A: Lod->getAlign(), Offset: ptrOffset / `8`);
4940	if (shouldReduceLoadWidth(Load: Lod, ExtTy: ISD::NON_EXTLOAD, NewVT: newVT,
4941	ByteOffset: ptrOffset / `8`) &&
4942	allowsMemoryAccess(
4943	Context&: *DAG.getContext(), DL: Layout, VT: newVT, AddrSpace: Lod->getAddressSpace(),
4944	Alignment: NewAlign, Flags: Lod->getMemOperand()->getFlags(), Fast: &IsFast) &&
4945	IsFast) {
4946	bestOffset = ptrOffset / `8`;
4947	bestMask = Mask.lshr(shiftAmt: offset);
4948	bestWidth = width;
4949	break;
4950	}
4951	}
4952	newMask <<= `8`;
4953	}
4954	if (bestWidth)
4955	break;
4956	}
4957	}
4958	if (bestWidth) {
4959	EVT newVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: bestWidth);
4960	SDValue Ptr = Lod->getBasePtr();
4961	if (bestOffset != `0`)
4962	Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: TypeSize::getFixed(ExactSize: bestOffset));
4963	SDValue NewLoad =
4964	DAG.getLoad(VT: newVT, dl, Chain: Lod->getChain(), Ptr,
4965	PtrInfo: Lod->getPointerInfo().getWithOffset(O: bestOffset),
4966	Alignment: Lod->getBaseAlign());
4967	SDValue And =
4968	DAG.getNode(Opcode: ISD::AND, DL: dl, VT: newVT, N1: NewLoad,
4969	N2: DAG.getConstant(Val: bestMask.trunc(width: bestWidth), DL: dl, VT: newVT));
4970	return DAG.getSetCC(DL: dl, VT, LHS: And, RHS: DAG.getConstant(Val: `0LL`, DL: dl, VT: newVT), Cond);
4971	}
4972	}
4973
4974	// If the LHS is a ZERO_EXTEND, perform the comparison on the input.
4975	if (N0.getOpcode() == ISD::ZERO_EXTEND) {
4976	unsigned InSize = N0.getOperand(i: `0`).getValueSizeInBits();
4977
4978	// If the comparison constant has bits in the upper part, the
4979	// zero-extended value could never match.
4980	if (C1.intersects(RHS: APInt::getHighBitsSet(numBits: C1.getBitWidth(),
4981	hiBitsSet: C1.getBitWidth() - InSize))) {
4982	switch (Cond) {
4983	case ISD::SETUGT:
4984	case ISD::SETUGE:
4985	case ISD::SETEQ:
4986	return DAG.getConstant(Val: `0`, DL: dl, VT);
4987	case ISD::SETULT:
4988	case ISD::SETULE:
4989	case ISD::SETNE:
4990	return DAG.getConstant(Val: `1`, DL: dl, VT);
4991	case ISD::SETGT:
4992	case ISD::SETGE:
4993	// True if the sign bit of C1 is set.
4994	return DAG.getConstant(Val: C1.isNegative(), DL: dl, VT);
4995	case ISD::SETLT:
4996	case ISD::SETLE:
4997	// True if the sign bit of C1 isn't set.
4998	return DAG.getConstant(Val: C1.isNonNegative(), DL: dl, VT);
4999	default:
5000	break;
5001	}
5002	}
5003
5004	// Otherwise, we can perform the comparison with the low bits.
5005	switch (Cond) {
5006	case ISD::SETEQ:
5007	case ISD::SETNE:
5008	case ISD::SETUGT:
5009	case ISD::SETUGE:
5010	case ISD::SETULT:
5011	case ISD::SETULE: {
5012	EVT newVT = N0.getOperand(i: `0`).getValueType();
5013	// FIXME: Should use isNarrowingProfitable.
5014	if (DCI.isBeforeLegalizeOps() \|\|
5015	(isOperationLegal(Op: ISD::SETCC, VT: newVT) &&
5016	isCondCodeLegal(CC: Cond, VT: newVT.getSimpleVT()) &&
5017	isTypeDesirableForOp(ISD::SETCC, VT: newVT))) {
5018	EVT NewSetCCVT = getSetCCResultType(DL: Layout, Context&: *DAG.getContext(), VT: newVT);
5019	SDValue NewConst = DAG.getConstant(Val: C1.trunc(width: InSize), DL: dl, VT: newVT);
5020
5021	SDValue NewSetCC = DAG.getSetCC(DL: dl, VT: NewSetCCVT, LHS: N0.getOperand(i: `0`),
5022	RHS: NewConst, Cond);
5023	return DAG.getBoolExtOrTrunc(Op: NewSetCC, SL: dl, VT, OpVT: N0.getValueType());
5024	}
5025	break;
5026	}
5027	default:
5028	break; // todo, be more careful with signed comparisons
5029	}
5030	} else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
5031	(Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) &&
5032	!isSExtCheaperThanZExt(FromTy: cast<VTSDNode>(Val: N0.getOperand(i: `1`))->getVT(),
5033	ToTy: OpVT)) {
5034	EVT ExtSrcTy = cast<VTSDNode>(Val: N0.getOperand(i: `1`))->getVT();
5035	unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
5036	EVT ExtDstTy = N0.getValueType();
5037	unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
5038
5039	// If the constant doesn't fit into the number of bits for the source of
5040	// the sign extension, it is impossible for both sides to be equal.
5041	if (C1.getSignificantBits() > ExtSrcTyBits)
5042	return DAG.getBoolConstant(V: Cond == ISD::SETNE, DL: dl, VT, OpVT);
5043
5044	assert(ExtDstTy == N0.getOperand(`0`).getValueType() &&
5045	ExtDstTy != ExtSrcTy && "Unexpected types!");
5046	APInt Imm = APInt::getLowBitsSet(numBits: ExtDstTyBits, loBitsSet: ExtSrcTyBits);
5047	SDValue ZextOp = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ExtDstTy, N1: N0.getOperand(i: `0`),
5048	N2: DAG.getConstant(Val: Imm, DL: dl, VT: ExtDstTy));
5049	if (!DCI.isCalledByLegalizer())
5050	DCI.AddToWorklist(N: ZextOp.getNode());
5051	// Otherwise, make this a use of a zext.
5052	return DAG.getSetCC(DL: dl, VT, LHS: ZextOp,
5053	RHS: DAG.getConstant(Val: C1 & Imm, DL: dl, VT: ExtDstTy), Cond);
5054	} else if ((N1C->isZero() \|\| N1C->isOne()) &&
5055	(Cond == ISD::SETEQ \|\| Cond == ISD::SETNE)) {
5056	// SETCC (X), [0\|1], [EQ\|NE] -> X if X is known 0/1. i1 types are
5057	// excluded as they are handled below whilst checking for foldBooleans.
5058	if ((N0.getOpcode() == ISD::SETCC \|\| VT.getScalarType() != MVT::i1) &&
5059	isTypeLegal(VT) && VT.bitsLE(VT: N0.getValueType()) &&
5060	(N0.getValueType() == MVT::i1 \|\|
5061	getBooleanContents(Type: N0.getValueType()) == ZeroOrOneBooleanContent) &&
5062	DAG.MaskedValueIsZero(
5063	Op: N0, Mask: APInt::getBitsSetFrom(numBits: N0.getValueSizeInBits(), loBit: `1`))) {
5064	bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
5065	if (TrueWhenTrue)
5066	return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: N0);
5067	// Invert the condition.
5068	if (N0.getOpcode() == ISD::SETCC) {
5069	ISD::CondCode CC = cast<CondCodeSDNode>(Val: N0.getOperand(i: `2`))->get();
5070	CC = ISD::getSetCCInverse(Operation: CC, Type: N0.getOperand(i: `0`).getValueType());
5071	if (DCI.isBeforeLegalizeOps() \|\|
5072	isCondCodeLegal(CC, VT: N0.getOperand(i: `0`).getSimpleValueType()))
5073	return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: `0`), RHS: N0.getOperand(i: `1`), Cond: CC);
5074	}
5075	}
5076
5077	if ((N0.getOpcode() == ISD::XOR \|\|
5078	(N0.getOpcode() == ISD::AND &&
5079	N0.getOperand(i: `0`).getOpcode() == ISD::XOR &&
5080	N0.getOperand(i: `1`) == N0.getOperand(i: `0`).getOperand(i: `1`))) &&
5081	isOneConstant(V: N0.getOperand(i: `1`))) {
5082	// If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
5083	// can only do this if the top bits are known zero.
5084	unsigned BitWidth = N0.getValueSizeInBits();
5085	if (DAG.MaskedValueIsZero(Op: N0,
5086	Mask: APInt::getHighBitsSet(numBits: BitWidth,
5087	hiBitsSet: BitWidth-`1`))) {
5088	// Okay, get the un-inverted input value.
5089	SDValue Val;
5090	if (N0.getOpcode() == ISD::XOR) {
5091	Val = N0.getOperand(i: `0`);
5092	} else {
5093	assert(N0.getOpcode() == ISD::AND &&
5094	N0.getOperand(`0`).getOpcode() == ISD::XOR);
5095	// ((X^1)&1)^1 -> X & 1
5096	Val = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: N0.getValueType(),
5097	N1: N0.getOperand(i: `0`).getOperand(i: `0`),
5098	N2: N0.getOperand(i: `1`));
5099	}
5100
5101	return DAG.getSetCC(DL: dl, VT, LHS: Val, RHS: N1,
5102	Cond: Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
5103	}
5104	} else if (N1C->isOne()) {
5105	SDValue Op0 = N0;
5106	if (Op0.getOpcode() == ISD::TRUNCATE)
5107	Op0 = Op0.getOperand(i: `0`);
5108
5109	if ((Op0.getOpcode() == ISD::XOR) &&
5110	Op0.getOperand(i: `0`).getOpcode() == ISD::SETCC &&
5111	Op0.getOperand(i: `1`).getOpcode() == ISD::SETCC) {
5112	SDValue XorLHS = Op0.getOperand(i: `0`);
5113	SDValue XorRHS = Op0.getOperand(i: `1`);
5114	// Ensure that the input setccs return an i1 type or 0/1 value.
5115	if (Op0.getValueType() == MVT::i1 \|\|
5116	(getBooleanContents(Type: XorLHS.getOperand(i: `0`).getValueType()) ==
5117	ZeroOrOneBooleanContent &&
5118	getBooleanContents(Type: XorRHS.getOperand(i: `0`).getValueType()) ==
5119	ZeroOrOneBooleanContent)) {
5120	// (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
5121	Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ;
5122	return DAG.getSetCC(DL: dl, VT, LHS: XorLHS, RHS: XorRHS, Cond);
5123	}
5124	}
5125	if (Op0.getOpcode() == ISD::AND && isOneConstant(V: Op0.getOperand(i: `1`))) {
5126	// If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
5127	if (Op0.getValueType().bitsGT(VT))
5128	Op0 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
5129	N1: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Op0.getOperand(i: `0`)),
5130	N2: DAG.getConstant(Val: `1`, DL: dl, VT));
5131	else if (Op0.getValueType().bitsLT(VT))
5132	Op0 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
5133	N1: DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT, Operand: Op0.getOperand(i: `0`)),
5134	N2: DAG.getConstant(Val: `1`, DL: dl, VT));
5135
5136	return DAG.getSetCC(DL: dl, VT, LHS: Op0,
5137	RHS: DAG.getConstant(Val: `0`, DL: dl, VT: Op0.getValueType()),
5138	Cond: Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
5139	}
5140	if (Op0.getOpcode() == ISD::AssertZext &&
5141	cast<VTSDNode>(Val: Op0.getOperand(i: `1`))->getVT() == MVT::i1)
5142	return DAG.getSetCC(DL: dl, VT, LHS: Op0,
5143	RHS: DAG.getConstant(Val: `0`, DL: dl, VT: Op0.getValueType()),
5144	Cond: Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
5145	}
5146	}
5147
5148	// Given:
5149	// icmp eq/ne (urem %x, %y), 0
5150	// Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
5151	// icmp eq/ne %x, 0
5152	if (N0.getOpcode() == ISD::UREM && N1C->isZero() &&
5153	(Cond == ISD::SETEQ \|\| Cond == ISD::SETNE)) {
5154	KnownBits XKnown = DAG.computeKnownBits(Op: N0.getOperand(i: `0`));
5155	KnownBits YKnown = DAG.computeKnownBits(Op: N0.getOperand(i: `1`));
5156	if (XKnown.countMaxPopulation() == `1` && YKnown.countMinPopulation() >= `2`)
5157	return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: `0`), RHS: N1, Cond);
5158	}
5159
5160	// Fold set_cc seteq (ashr X, BW-1), -1 -> set_cc setlt X, 0
5161	// and set_cc setne (ashr X, BW-1), -1 -> set_cc setge X, 0
5162	if ((Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) &&
5163	N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(Val: N0.getOperand(i: `1`)) &&
5164	N0.getConstantOperandAPInt(i: `1`) == OpVT.getScalarSizeInBits() - `1` &&
5165	N1C->isAllOnes()) {
5166	return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: `0`),
5167	RHS: DAG.getConstant(Val: `0`, DL: dl, VT: OpVT),
5168	Cond: Cond == ISD::SETEQ ? ISD::SETLT : ISD::SETGE);
5169	}
5170
5171	// fold (setcc (trunc x) c) -> (setcc x c)
5172	if (N0.getOpcode() == ISD::TRUNCATE &&
5173	((N0 ->getFlags().hasNoUnsignedWrap() && !ISD::isSignedIntSetCC(Code: Cond)) \|\|
5174	(N0 ->getFlags().hasNoSignedWrap() &&
5175	!ISD::isUnsignedIntSetCC(Code: Cond))) &&
5176	isTypeDesirableForOp(ISD::SETCC, VT: N0.getOperand(i: `0`).getValueType())) {
5177	EVT NewVT = N0.getOperand(i: `0`).getValueType();
5178	SDValue NewConst = DAG.getConstant(
5179	Val: (N0 ->getFlags().hasNoSignedWrap() && !ISD::isUnsignedIntSetCC(Code: Cond))
5180	? C1.sext(width: NewVT.getSizeInBits())
5181	: C1.zext(width: NewVT.getSizeInBits()),
5182	DL: dl, VT: NewVT);
5183	return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: `0`), RHS: NewConst, Cond);
5184	}
5185
5186	if (SDValue V =
5187	optimizeSetCCOfSignedTruncationCheck(SCCVT: VT, N0, N1, Cond, DCI, DL: dl))
5188	return V;
5189	}
5190
5191	// These simplifications apply to splat vectors as well.
5192	// TODO: Handle more splat vector cases.
5193	if (auto *N1C = isConstOrConstSplat(N: N1)) {
5194	const APInt &C1 = N1C->getAPIntValue();
5195
5196	APInt MinVal, MaxVal;
5197	unsigned OperandBitSize = N1C->getValueType(ResNo: `0`).getScalarSizeInBits();
5198	if (ISD::isSignedIntSetCC(Code: Cond)) {
5199	MinVal = APInt::getSignedMinValue(numBits: OperandBitSize);
5200	MaxVal = APInt::getSignedMaxValue(numBits: OperandBitSize);
5201	} else {
5202	MinVal = APInt::getMinValue(numBits: OperandBitSize);
5203	MaxVal = APInt::getMaxValue(numBits: OperandBitSize);
5204	}
5205
5206	// Canonicalize GE/LE comparisons to use GT/LT comparisons.
5207	if (Cond == ISD::SETGE \|\| Cond == ISD::SETUGE) {
5208	// X >= MIN --> true
5209	if (C1 == MinVal)
5210	return DAG.getBoolConstant(V: true, DL: dl, VT, OpVT);
5211
5212	if (!VT.isVector()) { // TODO: Support this for vectors.
5213	// X >= C0 --> X > (C0 - 1)
5214	APInt C = C1 - `1`;
5215	ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT;
5216	if ((DCI.isBeforeLegalizeOps() \|\|
5217	isCondCodeLegal(CC: NewCC, VT: OpVT.getSimpleVT())) &&
5218	(!N1C->isOpaque() \|\| (C.getBitWidth() <= `64` &&
5219	isLegalICmpImmediate(C.getSExtValue())))) {
5220	return DAG.getSetCC(DL: dl, VT, LHS: N0,
5221	RHS: DAG.getConstant(Val: C, DL: dl, VT: N1.getValueType()),
5222	Cond: NewCC);
5223	}
5224	}
5225	}
5226
5227	if (Cond == ISD::SETLE \|\| Cond == ISD::SETULE) {
5228	// X <= MAX --> true
5229	if (C1 == MaxVal)
5230	return DAG.getBoolConstant(V: true, DL: dl, VT, OpVT);
5231
5232	// X <= C0 --> X < (C0 + 1)
5233	if (!VT.isVector()) { // TODO: Support this for vectors.
5234	APInt C = C1 + `1`;
5235	ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT;
5236	if ((DCI.isBeforeLegalizeOps() \|\|
5237	isCondCodeLegal(CC: NewCC, VT: OpVT.getSimpleVT())) &&
5238	(!N1C->isOpaque() \|\| (C.getBitWidth() <= `64` &&
5239	isLegalICmpImmediate(C.getSExtValue())))) {
5240	return DAG.getSetCC(DL: dl, VT, LHS: N0,
5241	RHS: DAG.getConstant(Val: C, DL: dl, VT: N1.getValueType()),
5242	Cond: NewCC);
5243	}
5244	}
5245	}
5246
5247	if (Cond == ISD::SETLT \|\| Cond == ISD::SETULT) {
5248	if (C1 == MinVal)
5249	return DAG.getBoolConstant(V: false, DL: dl, VT, OpVT); // X < MIN --> false
5250
5251	// TODO: Support this for vectors after legalize ops.
5252	if (!VT.isVector() \|\| DCI.isBeforeLegalizeOps()) {
5253	// Canonicalize setlt X, Max --> setne X, Max
5254	if (C1 == MaxVal)
5255	return DAG.getSetCC(DL: dl, VT, LHS: N0, RHS: N1, Cond: ISD::SETNE);
5256
5257	// If we have setult X, 1, turn it into seteq X, 0
5258	if (C1 == MinVal +`1`)
5259	return DAG.getSetCC(DL: dl, VT, LHS: N0,
5260	RHS: DAG.getConstant(Val: MinVal, DL: dl, VT: N0.getValueType()),
5261	Cond: ISD::SETEQ);
5262	}
5263	}
5264
5265	if (Cond == ISD::SETGT \|\| Cond == ISD::SETUGT) {
5266	if (C1 == MaxVal)
5267	return DAG.getBoolConstant(V: false, DL: dl, VT, OpVT); // X > MAX --> false
5268
5269	// TODO: Support this for vectors after legalize ops.
5270	if (!VT.isVector() \|\| DCI.isBeforeLegalizeOps()) {
5271	// Canonicalize setgt X, Min --> setne X, Min
5272	if (C1 == MinVal)
5273	return DAG.getSetCC(DL: dl, VT, LHS: N0, RHS: N1, Cond: ISD::SETNE);
5274
5275	// If we have setugt X, Max-1, turn it into seteq X, Max
5276	if (C1 == MaxVal -`1`)
5277	return DAG.getSetCC(DL: dl, VT, LHS: N0,
5278	RHS: DAG.getConstant(Val: MaxVal, DL: dl, VT: N0.getValueType()),
5279	Cond: ISD::SETEQ);
5280	}
5281	}
5282
5283	if (Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) {
5284	// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
5285	if (C1.isZero())
5286	if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
5287	SCCVT: VT, N0, N1C: N1, Cond, DCI, DL: dl))
5288	return CC;
5289
5290	// For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y).
5291	// For example, when high 32-bits of i64 X are known clear:
5292	// all bits clear: (X \| (Y<<32)) == 0 --> (X \| Y) == 0
5293	// all bits set: (X \| (Y<<32)) == -1 --> (X & Y) == -1
5294	bool CmpZero = N1C->isZero();
5295	bool CmpNegOne = N1C->isAllOnes();
5296	if ((CmpZero \|\| CmpNegOne) && N0.hasOneUse()) {
5297	// Match or(lo,shl(hi,bw/2)) pattern.
5298	auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
5299	unsigned EltBits = V.getScalarValueSizeInBits();
5300	if (V.getOpcode() != ISD::OR \|\| (EltBits % `2`) != `0`)
5301	return false;
5302	SDValue LHS = V.getOperand(i: `0`);
5303	SDValue RHS = V.getOperand(i: `1`);
5304	APInt HiBits = APInt::getHighBitsSet(numBits: EltBits, hiBitsSet: EltBits / `2`);
5305	// Unshifted element must have zero upperbits.
5306	if (RHS.getOpcode() == ISD::SHL &&
5307	isa<ConstantSDNode>(Val: RHS.getOperand(i: `1`)) &&
5308	RHS.getConstantOperandAPInt(i: `1`) == (EltBits / `2`) &&
5309	DAG.MaskedValueIsZero(Op: LHS, Mask: HiBits)) {
5310	Lo = LHS;
5311	Hi = RHS.getOperand(i: `0`);
5312	return true;
5313	}
5314	if (LHS.getOpcode() == ISD::SHL &&
5315	isa<ConstantSDNode>(Val: LHS.getOperand(i: `1`)) &&
5316	LHS.getConstantOperandAPInt(i: `1`) == (EltBits / `2`) &&
5317	DAG.MaskedValueIsZero(Op: RHS, Mask: HiBits)) {
5318	Lo = RHS;
5319	Hi = LHS.getOperand(i: `0`);
5320	return true;
5321	}
5322	return false;
5323	};
5324
5325	auto MergeConcat = [&](SDValue Lo, SDValue Hi) {
5326	unsigned EltBits = N0.getScalarValueSizeInBits();
5327	unsigned HalfBits = EltBits / `2`;
5328	APInt HiBits = APInt::getHighBitsSet(numBits: EltBits, hiBitsSet: HalfBits);
5329	SDValue LoBits = DAG.getConstant(Val: ~HiBits, DL: dl, VT: OpVT);
5330	SDValue HiMask = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: OpVT, N1: Hi, N2: LoBits);
5331	SDValue NewN0 =
5332	DAG.getNode(Opcode: CmpZero ? ISD::OR : ISD::AND, DL: dl, VT: OpVT, N1: Lo, N2: HiMask);
5333	SDValue NewN1 = CmpZero ? DAG.getConstant(Val: `0`, DL: dl, VT: OpVT) : LoBits;
5334	return DAG.getSetCC(DL: dl, VT, LHS: NewN0, RHS: NewN1, Cond);
5335	};
5336
5337	SDValue Lo, Hi;
5338	if (IsConcat (N0, Lo, Hi))
5339	return MergeConcat (Lo, Hi);
5340
5341	if (N0.getOpcode() == ISD::AND \|\| N0.getOpcode() == ISD::OR) {
5342	SDValue Lo0, Lo1, Hi0, Hi1;
5343	if (IsConcat (N0.getOperand(i: `0`), Lo0, Hi0) &&
5344	IsConcat (N0.getOperand(i: `1`), Lo1, Hi1)) {
5345	return MergeConcat (DAG.getNode(Opcode: N0.getOpcode(), DL: dl, VT: OpVT, N1: Lo0, N2: Lo1),
5346	DAG.getNode(Opcode: N0.getOpcode(), DL: dl, VT: OpVT, N1: Hi0, N2: Hi1));
5347	}
5348	}
5349	}
5350	}
5351
5352	// If we have "setcc X, C0", check to see if we can shrink the immediate
5353	// by changing cc.
5354	// TODO: Support this for vectors after legalize ops.
5355	if (!VT.isVector() \|\| DCI.isBeforeLegalizeOps()) {
5356	// SETUGT X, SINTMAX -> SETLT X, 0
5357	// SETUGE X, SINTMIN -> SETLT X, 0
5358	if ((Cond == ISD::SETUGT && C1.isMaxSignedValue()) \|\|
5359	(Cond == ISD::SETUGE && C1.isMinSignedValue()))
5360	return DAG.getSetCC(DL: dl, VT, LHS: N0,
5361	RHS: DAG.getConstant(Val: `0`, DL: dl, VT: N1.getValueType()),
5362	Cond: ISD::SETLT);
5363
5364	// SETULT X, SINTMIN -> SETGT X, -1
5365	// SETULE X, SINTMAX -> SETGT X, -1
5366	if ((Cond == ISD::SETULT && C1.isMinSignedValue()) \|\|
5367	(Cond == ISD::SETULE && C1.isMaxSignedValue()))
5368	return DAG.getSetCC(DL: dl, VT, LHS: N0,
5369	RHS: DAG.getAllOnesConstant(DL: dl, VT: N1.getValueType()),
5370	Cond: ISD::SETGT);
5371	}
5372	}
5373
5374	// Back to non-vector simplifications.
5375	// TODO: Can we do these for vector splats?
5376	if (auto *N1C = dyn_cast<ConstantSDNode>(Val: N1.getNode())) {
5377	const APInt &C1 = N1C->getAPIntValue();
5378	EVT ShValTy = N0.getValueType();
5379
5380	// Fold bit comparisons when we can. This will result in an
5381	// incorrect value when boolean false is negative one, unless
5382	// the bitsize is 1 in which case the false value is the same
5383	// in practice regardless of the representation.
5384	if ((VT.getSizeInBits() == `1` \|\|
5385	getBooleanContents(Type: N0.getValueType()) == ZeroOrOneBooleanContent) &&
5386	(Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) &&
5387	(VT == ShValTy \|\| (isTypeLegal(VT) && VT.bitsLE(VT: ShValTy))) &&
5388	N0.getOpcode() == ISD::AND) {
5389	if (auto *AndRHS = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: `1`))) {
5390	if (Cond == ISD::SETNE && C1 == `0`) {// (X & 8) != 0 --> (X & 8) >> 3
5391	// Perform the xform if the AND RHS is a single bit.
5392	unsigned ShCt = AndRHS->getAPIntValue().logBase2();
5393	if (AndRHS->getAPIntValue().isPowerOf2() &&
5394	!shouldAvoidTransformToShift(VT: ShValTy, Amount: ShCt)) {
5395	return DAG.getNode(
5396	Opcode: ISD::TRUNCATE, DL: dl, VT,
5397	Operand: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: ShValTy, N1: N0,
5398	N2: DAG.getShiftAmountConstant(Val: ShCt, VT: ShValTy, DL: dl)));
5399	}
5400	} else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
5401	// (X & 8) == 8 --> (X & 8) >> 3
5402	// Perform the xform if C1 is a single bit.
5403	unsigned ShCt = C1.logBase2();
5404	if (C1.isPowerOf2() && !shouldAvoidTransformToShift(VT: ShValTy, Amount: ShCt)) {
5405	return DAG.getNode(
5406	Opcode: ISD::TRUNCATE, DL: dl, VT,
5407	Operand: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: ShValTy, N1: N0,
5408	N2: DAG.getShiftAmountConstant(Val: ShCt, VT: ShValTy, DL: dl)));
5409	}
5410	}
5411	}
5412	}
5413
5414	if (C1.getSignificantBits() <= `64` &&
5415	!isLegalICmpImmediate(C1.getSExtValue())) {
5416	// (X & -256) == 256 -> (X >> 8) == 1
5417	if ((Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) &&
5418	N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
5419	if (auto *AndRHS = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: `1`))) {
5420	const APInt &AndRHSC = AndRHS->getAPIntValue();
5421	if (AndRHSC.isNegatedPowerOf2() && C1.isSubsetOf(RHS: AndRHSC)) {
5422	unsigned ShiftBits = AndRHSC.countr_zero();
5423	if (!shouldAvoidTransformToShift(VT: ShValTy, Amount: ShiftBits)) {
5424	// If using an unsigned shift doesn't yield a legal compare
5425	// immediate, try using sra instead.
5426	APInt NewC = C1.lshr(shiftAmt: ShiftBits);
5427	if (NewC.getSignificantBits() <= `64` &&
5428	!isLegalICmpImmediate(NewC.getSExtValue())) {
5429	APInt SignedC = C1.ashr(ShiftAmt: ShiftBits);
5430	if (SignedC.getSignificantBits() <= `64` &&
5431	isLegalICmpImmediate(SignedC.getSExtValue())) {
5432	SDValue Shift = DAG.getNode(
5433	Opcode: ISD::SRA, DL: dl, VT: ShValTy, N1: N0.getOperand(i: `0`),
5434	N2: DAG.getShiftAmountConstant(Val: ShiftBits, VT: ShValTy, DL: dl));
5435	SDValue CmpRHS = DAG.getConstant(Val: SignedC, DL: dl, VT: ShValTy);
5436	return DAG.getSetCC(DL: dl, VT, LHS: Shift, RHS: CmpRHS, Cond);
5437	}
5438	}
5439	SDValue Shift = DAG.getNode(
5440	Opcode: ISD::SRL, DL: dl, VT: ShValTy, N1: N0.getOperand(i: `0`),
5441	N2: DAG.getShiftAmountConstant(Val: ShiftBits, VT: ShValTy, DL: dl));
5442	SDValue CmpRHS = DAG.getConstant(Val: NewC, DL: dl, VT: ShValTy);
5443	return DAG.getSetCC(DL: dl, VT, LHS: Shift, RHS: CmpRHS, Cond);
5444	}
5445	}
5446	}
5447	} else if (Cond == ISD::SETULT \|\| Cond == ISD::SETUGE \|\|
5448	Cond == ISD::SETULE \|\| Cond == ISD::SETUGT) {
5449	bool AdjOne = (Cond == ISD::SETULE \|\| Cond == ISD::SETUGT);
5450	// X < 0x100000000 -> (X >> 32) < 1
5451	// X >= 0x100000000 -> (X >> 32) >= 1
5452	// X <= 0x0ffffffff -> (X >> 32) < 1
5453	// X > 0x0ffffffff -> (X >> 32) >= 1
5454	unsigned ShiftBits;
5455	APInt NewC = C1;
5456	ISD::CondCode NewCond = Cond;
5457	if (AdjOne) {
5458	ShiftBits = C1.countr_one();
5459	NewC = NewC + `1`;
5460	NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
5461	} else {
5462	ShiftBits = C1.countr_zero();
5463	}
5464	NewC.lshrInPlace(ShiftAmt: ShiftBits);
5465	if (ShiftBits && NewC.getSignificantBits() <= `64` &&
5466	isLegalICmpImmediate(NewC.getSExtValue()) &&
5467	!shouldAvoidTransformToShift(VT: ShValTy, Amount: ShiftBits)) {
5468	SDValue Shift =
5469	DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: ShValTy, N1: N0,
5470	N2: DAG.getShiftAmountConstant(Val: ShiftBits, VT: ShValTy, DL: dl));
5471	SDValue CmpRHS = DAG.getConstant(Val: NewC, DL: dl, VT: ShValTy);
5472	return DAG.getSetCC(DL: dl, VT, LHS: Shift, RHS: CmpRHS, Cond: NewCond);
5473	}
5474	}
5475	}
5476	}
5477
5478	if (!isa<ConstantFPSDNode>(Val: N0) && isa<ConstantFPSDNode>(Val: N1)) {
5479	auto *CFP = cast<ConstantFPSDNode>(Val&: N1);
5480	assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
5481
5482	// Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
5483	// constant if knowing that the operand is non-nan is enough. We prefer to
5484	// have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
5485	// materialize 0.0.
5486	if (Cond == ISD::SETO \|\| Cond == ISD::SETUO)
5487	return DAG.getSetCC(DL: dl, VT, LHS: N0, RHS: N0, Cond);
5488
5489	// setcc (fneg x), C -> setcc swap(pred) x, -C
5490	if (N0.getOpcode() == ISD::FNEG) {
5491	ISD::CondCode SwapCond = ISD::getSetCCSwappedOperands(Operation: Cond);
5492	if (DCI.isBeforeLegalizeOps() \|\|
5493	isCondCodeLegal(CC: SwapCond, VT: N0.getSimpleValueType())) {
5494	SDValue NegN1 = DAG.getNode(Opcode: ISD::FNEG, DL: dl, VT: N0.getValueType(), Operand: N1);
5495	return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: `0`), RHS: NegN1, Cond: SwapCond);
5496	}
5497	}
5498
5499	// setueq/setoeq X, (fabs Inf) -> is_fpclass X, fcInf
5500	if (isOperationLegalOrCustom(Op: ISD::IS_FPCLASS, VT: N0.getValueType()) &&
5501	!isFPImmLegal(CFP->getValueAPF(), CFP->getValueType(ResNo: `0`))) {
5502	bool IsFabs = N0.getOpcode() == ISD::FABS;
5503	SDValue Op = IsFabs ? N0.getOperand(i: `0`) : N0;
5504	if ((Cond == ISD::SETOEQ \|\| Cond == ISD::SETUEQ) && CFP->isInfinity()) {
5505	FPClassTest Flag = CFP->isNegative() ? (IsFabs ? fcNone : fcNegInf)
5506	: (IsFabs ? fcInf : fcPosInf);
5507	if (Cond == ISD::SETUEQ)
5508	Flag \|= fcNan;
5509	return DAG.getNode(Opcode: ISD::IS_FPCLASS, DL: dl, VT, N1: Op,
5510	N2: DAG.getTargetConstant(Val: Flag, DL: dl, VT: MVT::i32));
5511	}
5512	}
5513
5514	// If the condition is not legal, see if we can find an equivalent one
5515	// which is legal.
5516	if (!isCondCodeLegal(CC: Cond, VT: N0.getSimpleValueType())) {
5517	// If the comparison was an awkward floating-point == or != and one of
5518	// the comparison operands is infinity or negative infinity, convert the
5519	// condition to a less-awkward <= or >=.
5520	if (CFP->getValueAPF().isInfinity()) {
5521	bool IsNegInf = CFP->getValueAPF().isNegative();
5522	ISD::CondCode NewCond = ISD::SETCC_INVALID;
5523	switch (Cond) {
5524	case ISD::SETOEQ: NewCond = IsNegInf ? ISD::SETOLE : ISD::SETOGE; break;
5525	case ISD::SETUEQ: NewCond = IsNegInf ? ISD::SETULE : ISD::SETUGE; break;
5526	case ISD::SETUNE: NewCond = IsNegInf ? ISD::SETUGT : ISD::SETULT; break;
5527	case ISD::SETONE: NewCond = IsNegInf ? ISD::SETOGT : ISD::SETOLT; break;
5528	default: break;
5529	}
5530	if (NewCond != ISD::SETCC_INVALID &&
5531	isCondCodeLegal(CC: NewCond, VT: N0.getSimpleValueType()))
5532	return DAG.getSetCC(DL: dl, VT, LHS: N0, RHS: N1, Cond: NewCond);
5533	}
5534	}
5535	}
5536
5537	if (N0 == N1) {
5538	// The sext(setcc()) => setcc() optimization relies on the appropriate
5539	// constant being emitted.
5540	assert(!N0.getValueType().isInteger() &&
5541	"Integer types should be handled by FoldSetCC");
5542
5543	bool EqTrue = ISD::isTrueWhenEqual(Cond);
5544	unsigned UOF = ISD::getUnorderedFlavor(Cond);
5545	if (UOF == `2`) // FP operators that are undefined on NaNs.
5546	return DAG.getBoolConstant(V: EqTrue, DL: dl, VT, OpVT);
5547	if (UOF == unsigned(EqTrue))
5548	return DAG.getBoolConstant(V: EqTrue, DL: dl, VT, OpVT);
5549	// Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
5550	// if it is not already.
5551	ISD::CondCode NewCond = UOF == `0` ? ISD::SETO : ISD::SETUO;
5552	if (NewCond != Cond &&
5553	(DCI.isBeforeLegalizeOps() \|\|
5554	isCondCodeLegal(CC: NewCond, VT: N0.getSimpleValueType())))
5555	return DAG.getSetCC(DL: dl, VT, LHS: N0, RHS: N1, Cond: NewCond);
5556	}
5557
5558	// ~X > ~Y --> Y > X
5559	// ~X < ~Y --> Y < X
5560	// ~X < C --> X > ~C
5561	// ~X > C --> X < ~C
5562	if ((isSignedIntSetCC(Code: Cond) \|\| isUnsignedIntSetCC(Code: Cond)) &&
5563	N0.getValueType().isInteger()) {
5564	if (isBitwiseNot(V: N0)) {
5565	if (isBitwiseNot(V: N1))
5566	return DAG.getSetCC(DL: dl, VT, LHS: N1.getOperand(i: `0`), RHS: N0.getOperand(i: `0`), Cond);
5567
5568	if (DAG.isConstantIntBuildVectorOrConstantInt(N: N1) &&
5569	!DAG.isConstantIntBuildVectorOrConstantInt(N: N0.getOperand(i: `0`))) {
5570	SDValue Not = DAG.getNOT(DL: dl, Val: N1, VT: OpVT);
5571	return DAG.getSetCC(DL: dl, VT, LHS: Not, RHS: N0.getOperand(i: `0`), Cond);
5572	}
5573	}
5574	}
5575
5576	if ((Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) &&
5577	N0.getValueType().isInteger()) {
5578	if (N0.getOpcode() == ISD::ADD \|\| N0.getOpcode() == ISD::SUB \|\|
5579	N0.getOpcode() == ISD::XOR) {
5580	// Simplify (X+Y) == (X+Z) --> Y == Z
5581	if (N0.getOpcode() == N1.getOpcode()) {
5582	if (N0.getOperand(i: `0`) == N1.getOperand(i: `0`))
5583	return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: `1`), RHS: N1.getOperand(i: `1`), Cond);
5584	if (N0.getOperand(i: `1`) == N1.getOperand(i: `1`))
5585	return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: `0`), RHS: N1.getOperand(i: `0`), Cond);
5586	if (isCommutativeBinOp(Opcode: N0.getOpcode())) {
5587	// If X op Y == Y op X, try other combinations.
5588	if (N0.getOperand(i: `0`) == N1.getOperand(i: `1`))
5589	return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: `1`), RHS: N1.getOperand(i: `0`),
5590	Cond);
5591	if (N0.getOperand(i: `1`) == N1.getOperand(i: `0`))
5592	return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: `0`), RHS: N1.getOperand(i: `1`),
5593	Cond);
5594	}
5595	}
5596
5597	// If RHS is a legal immediate value for a compare instruction, we need
5598	// to be careful about increasing register pressure needlessly.
5599	bool LegalRHSImm = false;
5600
5601	if (auto *RHSC = dyn_cast<ConstantSDNode>(Val&: N1)) {
5602	if (auto *LHSR = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: `1`))) {
5603	// Turn (X+C1) == C2 --> X == C2-C1
5604	if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse())
5605	return DAG.getSetCC(
5606	DL: dl, VT, LHS: N0.getOperand(i: `0`),
5607	RHS: DAG.getConstant(Val: RHSC->getAPIntValue() - LHSR->getAPIntValue(),
5608	DL: dl, VT: N0.getValueType()),
5609	Cond);
5610
5611	// Turn (X^C1) == C2 --> X == C1^C2
5612	if (N0.getOpcode() == ISD::XOR && N0.getNode()->hasOneUse())
5613	return DAG.getSetCC(
5614	DL: dl, VT, LHS: N0.getOperand(i: `0`),
5615	RHS: DAG.getConstant(Val: LHSR->getAPIntValue() ^ RHSC->getAPIntValue(),
5616	DL: dl, VT: N0.getValueType()),
5617	Cond);
5618	}
5619
5620	// Turn (C1-X) == C2 --> X == C1-C2
5621	if (auto *SUBC = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: `0`)))
5622	if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse())
5623	return DAG.getSetCC(
5624	DL: dl, VT, LHS: N0.getOperand(i: `1`),
5625	RHS: DAG.getConstant(Val: SUBC->getAPIntValue() - RHSC->getAPIntValue(),
5626	DL: dl, VT: N0.getValueType()),
5627	Cond);
5628
5629	// Could RHSC fold directly into a compare?
5630	if (RHSC->getValueType(ResNo: `0`).getSizeInBits() <= `64`)
5631	LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
5632	}
5633
5634	// (X+Y) == X --> Y == 0 and similar folds.
5635	// Don't do this if X is an immediate that can fold into a cmp
5636	// instruction and X+Y has other uses. It could be an induction variable
5637	// chain, and the transform would increase register pressure.
5638	if (!LegalRHSImm \|\| N0.hasOneUse())
5639	if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, DL: dl, DCI))
5640	return V;
5641	}
5642
5643	if (N1.getOpcode() == ISD::ADD \|\| N1.getOpcode() == ISD::SUB \|\|
5644	N1.getOpcode() == ISD::XOR)
5645	if (SDValue V = foldSetCCWithBinOp(VT, N0: N1, N1: N0, Cond, DL: dl, DCI))
5646	return V;
5647
5648	if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, DL: dl, DCI))
5649	return V;
5650
5651	if (SDValue V = foldSetCCWithOr(VT, N0, N1, Cond, DL: dl, DCI))
5652	return V;
5653	}
5654
5655	// Fold remainder of division by a constant.
5656	if ((N0.getOpcode() == ISD::UREM \|\| N0.getOpcode() == ISD::SREM) &&
5657	N0.hasOneUse() && (Cond == ISD::SETEQ \|\| Cond == ISD::SETNE)) {
5658	// When division is cheap or optimizing for minimum size,
5659	// fall through to DIVREM creation by skipping this fold.
5660	if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Kind: Attribute::MinSize)) {
5661	if (N0.getOpcode() == ISD::UREM) {
5662	if (SDValue Folded = buildUREMEqFold(SETCCVT: VT, REMNode: N0, CompTargetNode: N1, Cond, DCI, DL: dl))
5663	return Folded;
5664	} else if (N0.getOpcode() == ISD::SREM) {
5665	if (SDValue Folded = buildSREMEqFold(SETCCVT: VT, REMNode: N0, CompTargetNode: N1, Cond, DCI, DL: dl))
5666	return Folded;
5667	}
5668	}
5669	}
5670
5671	// Fold away ALL boolean setcc's.
5672	if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
5673	SDValue Temp;
5674	switch (Cond) {
5675	default: llvm_unreachable("Unknown integer setcc!");
5676	case ISD::SETEQ: // X == Y -> ~(X^Y)
5677	Temp = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: OpVT, N1: N0, N2: N1);
5678	N0 = DAG.getNOT(DL: dl, Val: Temp, VT: OpVT);
5679	if (!DCI.isCalledByLegalizer())
5680	DCI.AddToWorklist(N: Temp.getNode());
5681	break;
5682	case ISD::SETNE: // X != Y --> (X^Y)
5683	N0 = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: OpVT, N1: N0, N2: N1);
5684	break;
5685	case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
5686	case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
5687	Temp = DAG.getNOT(DL: dl, Val: N0, VT: OpVT);
5688	N0 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: OpVT, N1, N2: Temp);
5689	if (!DCI.isCalledByLegalizer())
5690	DCI.AddToWorklist(N: Temp.getNode());
5691	break;
5692	case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
5693	case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
5694	Temp = DAG.getNOT(DL: dl, Val: N1, VT: OpVT);
5695	N0 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: OpVT, N1: N0, N2: Temp);
5696	if (!DCI.isCalledByLegalizer())
5697	DCI.AddToWorklist(N: Temp.getNode());
5698	break;
5699	case ISD::SETULE: // X <=u Y --> X == 0 \| Y == 1 --> ~X \| Y
5700	case ISD::SETGE: // X >=s Y --> X == 0 \| Y == 1 --> ~X \| Y
5701	Temp = DAG.getNOT(DL: dl, Val: N0, VT: OpVT);
5702	N0 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1, N2: Temp);
5703	if (!DCI.isCalledByLegalizer())
5704	DCI.AddToWorklist(N: Temp.getNode());
5705	break;
5706	case ISD::SETUGE: // X >=u Y --> X == 1 \| Y == 0 --> ~Y \| X
5707	case ISD::SETLE: // X <=s Y --> X == 1 \| Y == 0 --> ~Y \| X
5708	Temp = DAG.getNOT(DL: dl, Val: N1, VT: OpVT);
5709	N0 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1: N0, N2: Temp);
5710	break;
5711	}
5712	if (VT.getScalarType() != MVT::i1) {
5713	if (!DCI.isCalledByLegalizer())
5714	DCI.AddToWorklist(N: N0.getNode());
5715	// FIXME: If running after legalize, we probably can't do this.
5716	ISD::NodeType ExtendCode = getExtendForContent(Content: getBooleanContents(Type: OpVT));
5717	N0 = DAG.getNode(Opcode: ExtendCode, DL: dl, VT, Operand: N0);
5718	}
5719	return N0;
5720	}
5721
5722	// Fold (setcc (trunc x) (trunc y)) -> (setcc x y)
5723	if (N0.getOpcode() == ISD::TRUNCATE && N1.getOpcode() == ISD::TRUNCATE &&
5724	N0.getOperand(i: `0`).getValueType() == N1.getOperand(i: `0`).getValueType() &&
5725	((!ISD::isSignedIntSetCC(Code: Cond) && N0 ->getFlags().hasNoUnsignedWrap() &&
5726	N1 ->getFlags().hasNoUnsignedWrap()) \|\|
5727	(!ISD::isUnsignedIntSetCC(Code: Cond) && N0 ->getFlags().hasNoSignedWrap() &&
5728	N1 ->getFlags().hasNoSignedWrap())) &&
5729	isTypeDesirableForOp(ISD::SETCC, VT: N0.getOperand(i: `0`).getValueType())) {
5730	return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: `0`), RHS: N1.getOperand(i: `0`), Cond);
5731	}
5732
5733	// Fold (setcc (sub nsw a, b), zero, s??) -> (setcc a, b, s??)
5734	// TODO: Remove that .isVector() check
5735	if (VT.isVector() && isZeroOrZeroSplat(N: N1) && N0.getOpcode() == ISD::SUB &&
5736	N0 ->getFlags().hasNoSignedWrap() && ISD::isSignedIntSetCC(Code: Cond)) {
5737	return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: `0`), RHS: N0.getOperand(i: `1`), Cond);
5738	}
5739
5740	// Could not fold it.
5741	return SDValue ();
5742	}
5743
5744	/// Returns true (and the GlobalValue and the offset) if the node is a
5745	/// GlobalAddress + offset.
5746	bool TargetLowering::isGAPlusOffset(SDNode WN, const* GlobalValue *&GA,
5747	int64_t &Offset) const {
5748
5749	SDNode *N = unwrapAddress(N: SDValue (WN, `0`)).getNode();
5750
5751	if (auto *GASD = dyn_cast<GlobalAddressSDNode>(Val: N)) {
5752	GA = GASD->getGlobal();
5753	Offset += GASD->getOffset();
5754	return true;
5755	}
5756
5757	if (N->isAnyAdd()) {
5758	SDValue N1 = N->getOperand(Num: `0`);
5759	SDValue N2 = N->getOperand(Num: `1`);
5760	if (isGAPlusOffset(WN: N1.getNode(), GA, Offset)) {
5761	if (auto *V = dyn_cast<ConstantSDNode>(Val&: N2)) {
5762	Offset += V->getSExtValue();
5763	return true;
5764	}
5765	} else if (isGAPlusOffset(WN: N2.getNode(), GA, Offset)) {
5766	if (auto *V = dyn_cast<ConstantSDNode>(Val&: N1)) {
5767	Offset += V->getSExtValue();
5768	return true;
5769	}
5770	}
5771	}
5772
5773	return false;
5774	}
5775
5776	SDValue TargetLowering::PerformDAGCombine(SDNode *N,
5777	DAGCombinerInfo &DCI) const {
5778	// Default implementation: no optimization.
5779	return SDValue ();
5780	}
5781
5782	//===----------------------------------------------------------------------===//
5783	// Inline Assembler Implementation Methods
5784	//===----------------------------------------------------------------------===//
5785
5786	TargetLowering::ConstraintType
5787	TargetLowering::getConstraintType(StringRef Constraint) const {
5788	unsigned S = Constraint.size();
5789
5790	if (S == `1`) {
5791	switch (Constraint [`0`]) {
5792	default: break;
5793	case `'r'`:
5794	return C_RegisterClass;
5795	case `'m'`: // memory
5796	case `'o'`: // offsetable
5797	case `'V'`: // not offsetable
5798	return C_Memory;
5799	case `'p'`: // Address.
5800	return C_Address;
5801	case `'n'`: // Simple Integer
5802	case `'E'`: // Floating Point Constant
5803	case `'F'`: // Floating Point Constant
5804	return C_Immediate;
5805	case `'i'`: // Simple Integer or Relocatable Constant
5806	case `'s'`: // Relocatable Constant
5807	case `'X'`: // Allow ANY value.
5808	case `'I'`: // Target registers.
5809	case `'J'`:
5810	case `'K'`:
5811	case `'L'`:
5812	case `'M'`:
5813	case `'N'`:
5814	case `'O'`:
5815	case `'P'`:
5816	case `'<'`:
5817	case `'>'`:
5818	return C_Other;
5819	}
5820	}
5821
5822	if (S > `1` && Constraint [`0`] == `'{'` && Constraint [S - `1`] == `'}'`) {
5823	if (S == `8` && Constraint.substr(Start: `1`, N: `6`) == "memory") // "{memory}"
5824	return C_Memory;
5825	return C_Register;
5826	}
5827	return C_Unknown;
5828	}
5829
5830	/// Try to replace an X constraint, which matches anything, with another that
5831	/// has more specific requirements based on the type of the corresponding
5832	/// operand.
5833	const char TargetLowering::LowerXConstraint(EVT ConstraintVT) const* {
5834	if (ConstraintVT.isInteger())
5835	return "r";
5836	if (ConstraintVT.isFloatingPoint())
5837	return "f"; // works for many targets
5838	return nullptr;
5839	}
5840
5841	SDValue TargetLowering::LowerAsmOutputForConstraint(
5842	SDValue &Chain, SDValue &Glue, const SDLoc &DL,
5843	const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
5844	return SDValue ();
5845	}
5846
5847	/// Lower the specified operand into the Ops vector.
5848	/// If it is invalid, don't add anything to Ops.
5849	void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
5850	StringRef Constraint,
5851	std::vector<SDValue> &Ops,
5852	SelectionDAG &DAG) const {
5853
5854	if (Constraint.size() > `1`)
5855	return;
5856
5857	char ConstraintLetter = Constraint [`0`];
5858	switch (ConstraintLetter) {
5859	default: break;
5860	case `'X'`: // Allows any operand
5861	case `'i'`: // Simple Integer or Relocatable Constant
5862	case `'n'`: // Simple Integer
5863	case `'s'`: { // Relocatable Constant
5864
5865	ConstantSDNode *C;
5866	uint64_t Offset = `0`;
5867
5868	// Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
5869	// etc., since getelementpointer is variadic. We can't use
5870	// SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
5871	// while in this case the GA may be furthest from the root node which is
5872	// likely an ISD::ADD.
5873	while (true) {
5874	if ((C = dyn_cast<ConstantSDNode>(Val&: Op)) && ConstraintLetter != `'s'`) {
5875	// gcc prints these as sign extended. Sign extend value to 64 bits
5876	// now; without this it would get ZExt'd later in
5877	// ScheduleDAGSDNodes::EmitNode, which is very generic.
5878	bool IsBool = C->getConstantIntValue()->getBitWidth() == `1`;
5879	BooleanContent BCont = getBooleanContents(Type: MVT::i64);
5880	ISD::NodeType ExtOpc =
5881	IsBool ? getExtendForContent(Content: BCont) : ISD::SIGN_EXTEND;
5882	int64_t ExtVal =
5883	ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue() : C->getSExtValue();
5884	Ops.push_back(
5885	x: DAG.getTargetConstant(Val: Offset + ExtVal, DL: SDLoc (C), VT: MVT::i64));
5886	return;
5887	}
5888	if (ConstraintLetter != `'n'`) {
5889	if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Val&: Op)) {
5890	Ops.push_back(x: DAG.getTargetGlobalAddress(GV: GA->getGlobal(), DL: SDLoc (Op),
5891	VT: GA->getValueType(ResNo: `0`),
5892	offset: Offset + GA->getOffset()));
5893	return;
5894	}
5895	if (const auto *BA = dyn_cast<BlockAddressSDNode>(Val&: Op)) {
5896	Ops.push_back(x: DAG.getTargetBlockAddress(
5897	BA: BA->getBlockAddress(), VT: BA->getValueType(ResNo: `0`),
5898	Offset: Offset + BA->getOffset(), TargetFlags: BA->getTargetFlags()));
5899	return;
5900	}
5901	if (isa<BasicBlockSDNode>(Val: Op)) {
5902	Ops.push_back(x: Op);
5903	return;
5904	}
5905	}
5906	const unsigned OpCode = Op.getOpcode();
5907	if (OpCode == ISD::ADD \|\| OpCode == ISD::SUB) {
5908	if ((C = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: `0`))))
5909	Op = Op.getOperand(i: `1`);
5910	// Subtraction is not commutative.
5911	else if (OpCode == ISD::ADD &&
5912	(C = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: `1`))))
5913	Op = Op.getOperand(i: `0`);
5914	else
5915	return;
5916	Offset += (OpCode == ISD::ADD ? `1` : -`1`) * C->getSExtValue();
5917	continue;
5918	}
5919	return;
5920	}
5921	break;
5922	}
5923	}
5924	}
5925
5926	void TargetLowering::CollectTargetIntrinsicOperands(
5927	const CallInst &I, SmallVectorImpl<SDValue> &Ops, SelectionDAG &DAG) const {
5928	}
5929
5930	std::pair<unsigned, const TargetRegisterClass *>
5931	TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
5932	StringRef Constraint,
5933	MVT VT) const {
5934	if (!Constraint.starts_with(Prefix: "{"))
5935	return std::make_pair(x: `0u`, y: static_cast<TargetRegisterClass >(nullptr*));
5936	assert(*(Constraint.end() - `1`) == `'}'` && "Not a brace enclosed constraint?");
5937
5938	// Remove the braces from around the name.
5939	StringRef RegName(Constraint.data() + `1`, Constraint.size() - `2`);
5940
5941	std::pair<unsigned, const TargetRegisterClass *> R =
5942	std::make_pair(x: `0u`, y: static_cast<const TargetRegisterClass >(nullptr*));
5943
5944	// Figure out which register class contains this reg.
5945	for (const TargetRegisterClass *RC : RI->regclasses()) {
5946	// If none of the value types for this register class are valid, we
5947	// can't use it. For example, 64-bit reg classes on 32-bit targets.
5948	if (!isLegalRC(TRI: RI, RC: RC))
5949	continue;
5950
5951	for (const MCPhysReg &PR : *RC) {
5952	if (RegName.equals_insensitive(RHS: RI->getRegAsmName(Reg: PR))) {
5953	std::pair<unsigned, const TargetRegisterClass *> S =
5954	std::make_pair(x: PR, y&: RC);
5955
5956	// If this register class has the requested value type, return it,
5957	// otherwise keep searching and return the first class found
5958	// if no other is found which explicitly has the requested type.
5959	if (RI->isTypeLegalForClass(RC: *RC, T: VT))
5960	return S;
5961	if (!R.second)
5962	R = S;
5963	}
5964	}
5965	}
5966
5967	return R;
5968	}
5969
5970	//===----------------------------------------------------------------------===//
5971	// Constraint Selection.
5972
5973	/// Return true of this is an input operand that is a matching constraint like
5974	/// "4".
5975	bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {
5976	assert(!ConstraintCode.empty() && "No known constraint!");
5977	return isdigit(static_cast<unsigned char>(ConstraintCode [`0`]));
5978	}
5979
5980	/// If this is an input matching constraint, this method returns the output
5981	/// operand it matches.
5982	unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
5983	assert(!ConstraintCode.empty() && "No known constraint!");
5984	return atoi(nptr: ConstraintCode.c_str());
5985	}
5986
5987	/// Split up the constraint string from the inline assembly value into the
5988	/// specific constraints and their prefixes, and also tie in the associated
5989	/// operand values.
5990	/// If this returns an empty vector, and if the constraint string itself
5991	/// isn't empty, there was an error parsing.
5992	TargetLowering::AsmOperandInfoVector
5993	TargetLowering::ParseConstraints(const DataLayout &DL,
5994	const TargetRegisterInfo *TRI,
5995	const CallBase &Call) const {
5996	/// Information about all of the constraints.
5997	AsmOperandInfoVector ConstraintOperands;
5998	const InlineAsm *IA = cast<InlineAsm>(Val: Call.getCalledOperand());
5999	unsigned maCount = `0`; // Largest number of multiple alternative constraints.
6000
6001	// Do a prepass over the constraints, canonicalizing them, and building up the
6002	// ConstraintOperands list.
6003	unsigned ArgNo = `0`; // ArgNo - The argument of the CallInst.
6004	unsigned ResNo = `0`; // ResNo - The result number of the next output.
6005	unsigned LabelNo = `0`; // LabelNo - CallBr indirect dest number.
6006
6007	for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
6008	ConstraintOperands.emplace_back(args: std::move(CI));
6009	AsmOperandInfo &OpInfo = ConstraintOperands.back();
6010
6011	// Update multiple alternative constraint count.
6012	if (OpInfo.multipleAlternatives.size() > maCount)
6013	maCount = OpInfo.multipleAlternatives.size();
6014
6015	OpInfo.ConstraintVT = MVT::Other;
6016
6017	// Compute the value type for each operand.
6018	switch (OpInfo.Type) {
6019	case InlineAsm::isOutput: {
6020	// Indirect outputs just consume an argument.
6021	if (OpInfo.isIndirect) {
6022	OpInfo.CallOperandVal = Call.getArgOperand(i: ArgNo);
6023	break;
6024	}
6025
6026	// The return value of the call is this value. As such, there is no
6027	// corresponding argument.
6028	assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
6029	EVT VT;
6030	if (auto *STy = dyn_cast<StructType>(Val: Call.getType())) {
6031	VT = getAsmOperandValueType(DL, Ty: STy->getElementType(N: ResNo));
6032	} else {
6033	assert(ResNo == `0` && "Asm only has one result!");
6034	VT = getAsmOperandValueType(DL, Ty: Call.getType());
6035	}
6036	OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
6037	++ResNo;
6038	break;
6039	}
6040	case InlineAsm::isInput:
6041	OpInfo.CallOperandVal = Call.getArgOperand(i: ArgNo);
6042	break;
6043	case InlineAsm::isLabel:
6044	OpInfo.CallOperandVal = cast<CallBrInst>(Val: &Call)->getIndirectDest(i: LabelNo);
6045	++LabelNo;
6046	continue;
6047	case InlineAsm::isClobber:
6048	// Nothing to do.
6049	break;
6050	}
6051
6052	if (OpInfo.CallOperandVal) {
6053	llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
6054	if (OpInfo.isIndirect) {
6055	OpTy = Call.getParamElementType(ArgNo);
6056	assert(OpTy && "Indirect operand must have elementtype attribute");
6057	}
6058
6059	// Look for vector wrapped in a struct. e.g. { <16 x i8> }.
6060	if (StructType *STy = dyn_cast<StructType>(Val: OpTy))
6061	if (STy->getNumElements() == `1`)
6062	OpTy = STy->getElementType(N: `0`);
6063
6064	// If OpTy is not a single value, it may be a struct/union that we
6065	// can tile with integers.
6066	if (!OpTy->isSingleValueType() && OpTy->isSized()) {
6067	unsigned BitSize = DL.getTypeSizeInBits(Ty: OpTy);
6068	switch (BitSize) {
6069	default: break;
6070	case `1`:
6071	case `8`:
6072	case `16`:
6073	case `32`:
6074	case `64`:
6075	case `128`:
6076	OpTy = IntegerType::get(C&: OpTy->getContext(), NumBits: BitSize);
6077	break;
6078	}
6079	}
6080
6081	EVT VT = getAsmOperandValueType(DL, Ty: OpTy, AllowUnknown: true);
6082	OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
6083	ArgNo++;
6084	}
6085	}
6086
6087	// If we have multiple alternative constraints, select the best alternative.
6088	if (!ConstraintOperands.empty()) {
6089	if (maCount) {
6090	unsigned bestMAIndex = `0`;
6091	int bestWeight = -`1`;
6092	// weight: -1 = invalid match, and 0 = so-so match to 5 = good match.
6093	int weight = -`1`;
6094	unsigned maIndex;
6095	// Compute the sums of the weights for each alternative, keeping track
6096	// of the best (highest weight) one so far.
6097	for (maIndex = `0`; maIndex < maCount; ++maIndex) {
6098	int weightSum = `0`;
6099	for (unsigned cIndex = `0`, eIndex = ConstraintOperands.size();
6100	cIndex != eIndex; ++cIndex) {
6101	AsmOperandInfo &OpInfo = ConstraintOperands [cIndex];
6102	if (OpInfo.Type == InlineAsm::isClobber)
6103	continue;
6104
6105	// If this is an output operand with a matching input operand,
6106	// look up the matching input. If their types mismatch, e.g. one
6107	// is an integer, the other is floating point, or their sizes are
6108	// different, flag it as an maCantMatch.
6109	if (OpInfo.hasMatchingInput()) {
6110	AsmOperandInfo &Input = ConstraintOperands [OpInfo.MatchingInput];
6111	if (OpInfo.ConstraintVT != Input.ConstraintVT) {
6112	if ((OpInfo.ConstraintVT.isInteger() !=
6113	Input.ConstraintVT.isInteger()) \|\|
6114	(OpInfo.ConstraintVT.getSizeInBits() !=
6115	Input.ConstraintVT.getSizeInBits())) {
6116	weightSum = -`1`; // Can't match.
6117	break;
6118	}
6119	}
6120	}
6121	weight = getMultipleConstraintMatchWeight(info&: OpInfo, maIndex);
6122	if (weight == -`1`) {
6123	weightSum = -`1`;
6124	break;
6125	}
6126	weightSum += weight;
6127	}
6128	// Update best.
6129	if (weightSum > bestWeight) {
6130	bestWeight = weightSum;
6131	bestMAIndex = maIndex;
6132	}
6133	}
6134
6135	// Now select chosen alternative in each constraint.
6136	for (AsmOperandInfo &cInfo : ConstraintOperands)
6137	if (cInfo.Type != InlineAsm::isClobber)
6138	cInfo.selectAlternative(index: bestMAIndex);
6139	}
6140	}
6141
6142	// Check and hook up tied operands, choose constraint code to use.
6143	for (unsigned cIndex = `0`, eIndex = ConstraintOperands.size();
6144	cIndex != eIndex; ++cIndex) {
6145	AsmOperandInfo &OpInfo = ConstraintOperands [cIndex];
6146
6147	// If this is an output operand with a matching input operand, look up the
6148	// matching input. If their types mismatch, e.g. one is an integer, the
6149	// other is floating point, or their sizes are different, flag it as an
6150	// error.
6151	if (OpInfo.hasMatchingInput()) {
6152	AsmOperandInfo &Input = ConstraintOperands [OpInfo.MatchingInput];
6153
6154	if (OpInfo.ConstraintVT != Input.ConstraintVT) {
6155	std::pair<unsigned, const TargetRegisterClass *> MatchRC =
6156	getRegForInlineAsmConstraint(RI: TRI, Constraint: OpInfo.ConstraintCode,
6157	VT: OpInfo.ConstraintVT);
6158	std::pair<unsigned, const TargetRegisterClass *> InputRC =
6159	getRegForInlineAsmConstraint(RI: TRI, Constraint: Input.ConstraintCode,
6160	VT: Input.ConstraintVT);
6161	const bool OutOpIsIntOrFP = OpInfo.ConstraintVT.isInteger() \|\|
6162	OpInfo.ConstraintVT.isFloatingPoint();
6163	const bool InOpIsIntOrFP = Input.ConstraintVT.isInteger() \|\|
6164	Input.ConstraintVT.isFloatingPoint();
6165	if ((OutOpIsIntOrFP != InOpIsIntOrFP) \|\|
6166	(MatchRC.second != InputRC.second)) {
6167	report_fatal_error(reason: "Unsupported asm: input constraint"
6168	" with a matching output constraint of"
6169	" incompatible type!");
6170	}
6171	}
6172	}
6173	}
6174
6175	return ConstraintOperands;
6176	}
6177
6178	/// Return a number indicating our preference for chosing a type of constraint
6179	/// over another, for the purpose of sorting them. Immediates are almost always
6180	/// preferrable (when they can be emitted). A higher return value means a
6181	/// stronger preference for one constraint type relative to another.
6182	/// FIXME: We should prefer registers over memory but doing so may lead to
6183	/// unrecoverable register exhaustion later.
6184	/// https://github.com/llvm/llvm-project/issues/20571
6185	static unsigned getConstraintPiority(TargetLowering::ConstraintType CT) {
6186	switch (CT) {
6187	case TargetLowering::C_Immediate:
6188	case TargetLowering::C_Other:
6189	return `4`;
6190	case TargetLowering::C_Memory:
6191	case TargetLowering::C_Address:
6192	return `3`;
6193	case TargetLowering::C_RegisterClass:
6194	return `2`;
6195	case TargetLowering::C_Register:
6196	return `1`;
6197	case TargetLowering::C_Unknown:
6198	return `0`;
6199	}
6200	llvm_unreachable("Invalid constraint type");
6201	}
6202
6203	/// Examine constraint type and operand type and determine a weight value.
6204	/// This object must already have been set up with the operand type
6205	/// and the current alternative constraint selected.
6206	TargetLowering::ConstraintWeight
6207	TargetLowering::getMultipleConstraintMatchWeight(
6208	AsmOperandInfo &info, int maIndex) const {
6209	InlineAsm::ConstraintCodeVector *rCodes;
6210	if (maIndex >= (int)info.multipleAlternatives.size())
6211	rCodes = &info.Codes;
6212	else
6213	rCodes = &info.multipleAlternatives [maIndex].Codes;
6214	ConstraintWeight BestWeight = CW_Invalid;
6215
6216	// Loop over the options, keeping track of the most general one.
6217	for (const std::string &rCode : *rCodes) {
6218	ConstraintWeight weight =
6219	getSingleConstraintMatchWeight(info, constraint: rCode.c_str());
6220	if (weight > BestWeight)
6221	BestWeight = weight;
6222	}
6223
6224	return BestWeight;
6225	}
6226
6227	/// Examine constraint type and operand type and determine a weight value.
6228	/// This object must already have been set up with the operand type
6229	/// and the current alternative constraint selected.
6230	TargetLowering::ConstraintWeight
6231	TargetLowering::getSingleConstraintMatchWeight(
6232	AsmOperandInfo &info, const char constraint) const* {
6233	ConstraintWeight weight = CW_Invalid;
6234	Value *CallOperandVal = info.CallOperandVal;
6235	// If we don't have a value, we can't do a match,
6236	// but allow it at the lowest weight.
6237	if (!CallOperandVal)
6238	return CW_Default;
6239	// Look at the constraint type.
6240	switch (*constraint) {
6241	case `'i'`: // immediate integer.
6242	case `'n'`: // immediate integer with a known value.
6243	if (isa<ConstantInt>(Val: CallOperandVal))
6244	weight = CW_Constant;
6245	break;
6246	case `'s'`: // non-explicit intregal immediate.
6247	if (isa<GlobalValue>(Val: CallOperandVal))
6248	weight = CW_Constant;
6249	break;
6250	case `'E'`: // immediate float if host format.
6251	case `'F'`: // immediate float.
6252	if (isa<ConstantFP>(Val: CallOperandVal))
6253	weight = CW_Constant;
6254	break;
6255	case `'<'`: // memory operand with autodecrement.
6256	case `'>'`: // memory operand with autoincrement.
6257	case `'m'`: // memory operand.
6258	case `'o'`: // offsettable memory operand
6259	case `'V'`: // non-offsettable memory operand
6260	weight = CW_Memory;
6261	break;
6262	case `'r'`: // general register.
6263	case `'g'`: // general register, memory operand or immediate integer.
6264	// note: Clang converts "g" to "imr".
6265	if (CallOperandVal->getType()->isIntegerTy())
6266	weight = CW_Register;
6267	break;
6268	case `'X'`: // any operand.
6269	default:
6270	weight = CW_Default;
6271	break;
6272	}
6273	return weight;
6274	}
6275
6276	/// If there are multiple different constraints that we could pick for this
6277	/// operand (e.g. "imr") try to pick the 'best' one.
6278	/// This is somewhat tricky: constraints (TargetLowering::ConstraintType) fall
6279	/// into seven classes:
6280	/// Register -> one specific register
6281	/// RegisterClass -> a group of regs
6282	/// Memory -> memory
6283	/// Address -> a symbolic memory reference
6284	/// Immediate -> immediate values
6285	/// Other -> magic values (such as "Flag Output Operands")
6286	/// Unknown -> something we don't recognize yet and can't handle
6287	/// Ideally, we would pick the most specific constraint possible: if we have
6288	/// something that fits into a register, we would pick it. The problem here
6289	/// is that if we have something that could either be in a register or in
6290	/// memory that use of the register could cause selection of other
6291	/// operands to fail: they might only succeed if we pick memory. Because of
6292	/// this the heuristic we use is:
6293	///
6294	/// 1) If there is an 'other' constraint, and if the operand is valid for
6295	/// that constraint, use it. This makes us take advantage of 'i'
6296	/// constraints when available.
6297	/// 2) Otherwise, pick the most general constraint present. This prefers
6298	/// 'm' over 'r', for example.
6299	///
6300	TargetLowering::ConstraintGroup TargetLowering::getConstraintPreferences(
6301	TargetLowering::AsmOperandInfo &OpInfo) const {
6302	ConstraintGroup Ret;
6303
6304	Ret.reserve(N: OpInfo.Codes.size());
6305	for (StringRef Code : OpInfo.Codes) {
6306	TargetLowering::ConstraintType CType = getConstraintType(Constraint: Code);
6307
6308	// Indirect 'other' or 'immediate' constraints are not allowed.
6309	if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory \|\|
6310	CType == TargetLowering::C_Register \|\|
6311	CType == TargetLowering::C_RegisterClass))
6312	continue;
6313
6314	// Things with matching constraints can only be registers, per gcc
6315	// documentation. This mainly affects "g" constraints.
6316	if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
6317	continue;
6318
6319	Ret.emplace_back(Args&: Code, Args&: CType);
6320	}
6321
6322	llvm::stable_sort(Range&: Ret, C: [](ConstraintPair a, ConstraintPair b) {
6323	return getConstraintPiority(CT: a.second) > getConstraintPiority(CT: b.second);
6324	});
6325
6326	return Ret;
6327	}
6328
6329	/// If we have an immediate, see if we can lower it. Return true if we can,
6330	/// false otherwise.
6331	static bool lowerImmediateIfPossible(TargetLowering::ConstraintPair &P,
6332	SDValue Op, SelectionDAG *DAG,
6333	const TargetLowering &TLI) {
6334
6335	assert((P.second == TargetLowering::C_Other \|\|
6336	P.second == TargetLowering::C_Immediate) &&
6337	"need immediate or other");
6338
6339	if (!Op.getNode())
6340	return false;
6341
6342	std::vector<SDValue> ResultOps;
6343	TLI.LowerAsmOperandForConstraint(Op, Constraint: P.first, Ops&: ResultOps, DAG&: *DAG);
6344	return !ResultOps.empty();
6345	}
6346
6347	/// Determines the constraint code and constraint type to use for the specific
6348	/// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
6349	void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
6350	SDValue Op,
6351	SelectionDAG DAG) const* {
6352	assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
6353
6354	// Single-letter constraints ('r') are very common.
6355	if (OpInfo.Codes.size() == `1`) {
6356	OpInfo.ConstraintCode = OpInfo.Codes [`0`];
6357	OpInfo.ConstraintType = getConstraintType(Constraint: OpInfo.ConstraintCode);
6358	} else {
6359	ConstraintGroup G = getConstraintPreferences(OpInfo);
6360	if (G.empty())
6361	return;
6362
6363	unsigned BestIdx = `0`;
6364	for (const unsigned E = G.size();
6365	BestIdx < E && (G [BestIdx].second == TargetLowering::C_Other \|\|
6366	G [BestIdx].second == TargetLowering::C_Immediate);
6367	++BestIdx) {
6368	if (lowerImmediateIfPossible(P&: G [BestIdx], Op, DAG, TLI: *this))
6369	break;
6370	// If we're out of constraints, just pick the first one.
6371	if (BestIdx + `1` == E) {
6372	BestIdx = `0`;
6373	break;
6374	}
6375	}
6376
6377	OpInfo.ConstraintCode = G [BestIdx].first;
6378	OpInfo.ConstraintType = G [BestIdx].second;
6379	}
6380
6381	// 'X' matches anything.
6382	if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
6383	// Constants are handled elsewhere. For Functions, the type here is the
6384	// type of the result, which is not what we want to look at; leave them
6385	// alone.
6386	Value *v = OpInfo.CallOperandVal;
6387	if (isa<ConstantInt>(Val: v) \|\| isa<Function>(Val: v)) {
6388	return;
6389	}
6390
6391	if (isa<BasicBlock>(Val: v) \|\| isa<BlockAddress>(Val: v)) {
6392	OpInfo.ConstraintCode = "i";
6393	return;
6394	}
6395
6396	// Otherwise, try to resolve it to something we know about by looking at
6397	// the actual operand type.
6398	if (const char *Repl = LowerXConstraint(ConstraintVT: OpInfo.ConstraintVT)) {
6399	OpInfo.ConstraintCode = Repl;
6400	OpInfo.ConstraintType = getConstraintType(Constraint: OpInfo.ConstraintCode);
6401	}
6402	}
6403	}
6404
6405	/// Given an exact SDIV by a constant, create a multiplication
6406	/// with the multiplicative inverse of the constant.
6407	/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6408	static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
6409	const SDLoc &dl, SelectionDAG &DAG,
6410	SmallVectorImpl<SDNode *> &Created) {
6411	SDValue Op0 = N->getOperand(Num: `0`);
6412	SDValue Op1 = N->getOperand(Num: `1`);
6413	EVT VT = N->getValueType(ResNo: `0`);
6414	EVT ShVT = TLI.getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
6415	EVT ShSVT = ShVT.getScalarType();
6416
6417	bool UseSRA = false;
6418	SmallVector<SDValue, `16`> Shifts, Factors;
6419
6420	auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6421	if (C->isZero())
6422	return false;
6423
6424	EVT CT = C->getValueType(ResNo: `0`);
6425	APInt Divisor = C->getAPIntValue();
6426	unsigned Shift = Divisor.countr_zero();
6427	if (Shift) {
6428	Divisor.ashrInPlace(ShiftAmt: Shift);
6429	UseSRA = true;
6430	}
6431	APInt Factor = Divisor.multiplicativeInverse();
6432	Shifts.push_back(Elt: DAG.getConstant(Val: Shift, DL: dl, VT: ShSVT));
6433	Factors.push_back(Elt: DAG.getConstant(Val: Factor, DL: dl, VT: CT));
6434	return true;
6435	};
6436
6437	// Collect all magic values from the build vector.
6438	if (!ISD::matchUnaryPredicate(Op: Op1, Match: BuildSDIVPattern, /AllowUndefs=/false,
6439	/AllowTruncation=/true))
6440	return SDValue ();
6441
6442	SDValue Shift, Factor;
6443	if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6444	Shift = DAG.getBuildVector(VT: ShVT, DL: dl, Ops: Shifts);
6445	Factor = DAG.getBuildVector(VT, DL: dl, Ops: Factors);
6446	} else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6447	assert(Shifts.size() == `1` && Factors.size() == `1` &&
6448	"Expected matchUnaryPredicate to return one element for scalable "
6449	"vectors");
6450	Shift = DAG.getSplatVector(VT: ShVT, DL: dl, Op: Shifts [`0`]);
6451	Factor = DAG.getSplatVector(VT, DL: dl, Op: Factors [`0`]);
6452	} else {
6453	assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6454	Shift = Shifts [`0`];
6455	Factor = Factors [`0`];
6456	}
6457
6458	SDValue Res = Op0;
6459	if (UseSRA) {
6460	Res = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: Res, N2: Shift, Flags: SDNodeFlags::Exact);
6461	Created.push_back(Elt: Res.getNode());
6462	}
6463
6464	return DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: Res, N2: Factor);
6465	}
6466
6467	/// Given an exact UDIV by a constant, create a multiplication
6468	/// with the multiplicative inverse of the constant.
6469	/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6470	static SDValue BuildExactUDIV(const TargetLowering &TLI, SDNode *N,
6471	const SDLoc &dl, SelectionDAG &DAG,
6472	SmallVectorImpl<SDNode *> &Created) {
6473	EVT VT = N->getValueType(ResNo: `0`);
6474	EVT ShVT = TLI.getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
6475	EVT ShSVT = ShVT.getScalarType();
6476
6477	bool UseSRL = false;
6478	SmallVector<SDValue, `16`> Shifts, Factors;
6479
6480	auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6481	if (C->isZero())
6482	return false;
6483
6484	EVT CT = C->getValueType(ResNo: `0`);
6485	APInt Divisor = C->getAPIntValue();
6486	unsigned Shift = Divisor.countr_zero();
6487	if (Shift) {
6488	Divisor.lshrInPlace(ShiftAmt: Shift);
6489	UseSRL = true;
6490	}
6491	// Calculate the multiplicative inverse modulo BW.
6492	APInt Factor = Divisor.multiplicativeInverse();
6493	Shifts.push_back(Elt: DAG.getConstant(Val: Shift, DL: dl, VT: ShSVT));
6494	Factors.push_back(Elt: DAG.getConstant(Val: Factor, DL: dl, VT: CT));
6495	return true;
6496	};
6497
6498	SDValue Op1 = N->getOperand(Num: `1`);
6499
6500	// Collect all magic values from the build vector.
6501	if (!ISD::matchUnaryPredicate(Op: Op1, Match: BuildUDIVPattern, /AllowUndefs=/false,
6502	/AllowTruncation=/true))
6503	return SDValue ();
6504
6505	SDValue Shift, Factor;
6506	if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6507	Shift = DAG.getBuildVector(VT: ShVT, DL: dl, Ops: Shifts);
6508	Factor = DAG.getBuildVector(VT, DL: dl, Ops: Factors);
6509	} else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6510	assert(Shifts.size() == `1` && Factors.size() == `1` &&
6511	"Expected matchUnaryPredicate to return one element for scalable "
6512	"vectors");
6513	Shift = DAG.getSplatVector(VT: ShVT, DL: dl, Op: Shifts [`0`]);
6514	Factor = DAG.getSplatVector(VT, DL: dl, Op: Factors [`0`]);
6515	} else {
6516	assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6517	Shift = Shifts [`0`];
6518	Factor = Factors [`0`];
6519	}
6520
6521	SDValue Res = N->getOperand(Num: `0`);
6522	if (UseSRL) {
6523	Res = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Res, N2: Shift, Flags: SDNodeFlags::Exact);
6524	Created.push_back(Elt: Res.getNode());
6525	}
6526
6527	return DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: Res, N2: Factor);
6528	}
6529
6530	SDValue TargetLowering::BuildSDIVPow2(SDNode N, const* APInt &Divisor,
6531	SelectionDAG &DAG,
6532	SmallVectorImpl<SDNode > &Created) const* {
6533	AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6534	if (isIntDivCheap(VT: N->getValueType(ResNo: `0`), Attr))
6535	return SDValue (N, `0`); // Lower SDIV as SDIV
6536	return SDValue ();
6537	}
6538
6539	SDValue
6540	TargetLowering::BuildSREMPow2(SDNode N, const* APInt &Divisor,
6541	SelectionDAG &DAG,
6542	SmallVectorImpl<SDNode > &Created) const* {
6543	AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6544	if (isIntDivCheap(VT: N->getValueType(ResNo: `0`), Attr))
6545	return SDValue (N, `0`); // Lower SREM as SREM
6546	return SDValue ();
6547	}
6548
6549	/// Build sdiv by power-of-2 with conditional move instructions
6550	/// Ref: "Hacker's Delight" by Henry Warren 10-1
6551	/// If conditional move/branch is preferred, we lower sdiv x, +/-2k into:
6552	/// bgez x, label
6553	/// add x, x, 2k-1
6554	/// label:
6555	/// sra res, x, k
6556	/// neg res, res (when the divisor is negative)
6557	SDValue TargetLowering::buildSDIVPow2WithCMov(
6558	SDNode N, const* APInt &Divisor, SelectionDAG &DAG,
6559	SmallVectorImpl<SDNode > &Created) const* {
6560	unsigned Lg2 = Divisor.countr_zero();
6561	EVT VT = N->getValueType(ResNo: `0`);
6562
6563	SDLoc DL(N);
6564	SDValue N0 = N->getOperand(Num: `0`);
6565	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT);
6566	APInt Lg2Mask = APInt::getLowBitsSet(numBits: VT.getSizeInBits(), loBitsSet: Lg2);
6567	SDValue Pow2MinusOne = DAG.getConstant(Val: Lg2Mask, DL, VT);
6568
6569	// If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
6570	EVT CCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
6571	SDValue Cmp = DAG.getSetCC(DL, VT: CCVT, LHS: N0, RHS: Zero, Cond: ISD::SETLT);
6572	SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: N0, N2: Pow2MinusOne);
6573	SDValue CMov = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: Cmp, N2: Add, N3: N0);
6574
6575	Created.push_back(Elt: Cmp.getNode());
6576	Created.push_back(Elt: Add.getNode());
6577	Created.push_back(Elt: CMov.getNode());
6578
6579	// Divide by pow2.
6580	SDValue SRA = DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: CMov,
6581	N2: DAG.getShiftAmountConstant(Val: Lg2, VT, DL));
6582
6583	// If we're dividing by a positive value, we're done. Otherwise, we must
6584	// negate the result.
6585	if (Divisor.isNonNegative())
6586	return SRA;
6587
6588	Created.push_back(Elt: SRA.getNode());
6589	return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Zero, N2: SRA);
6590	}
6591
6592	/// Given an ISD::SDIV node expressing a divide by constant,
6593	/// return a DAG expression to select that will generate the same value by
6594	/// multiplying by a magic number.
6595	/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6596	SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
6597	bool IsAfterLegalization,
6598	bool IsAfterLegalTypes,
6599	SmallVectorImpl<SDNode > &Created) const* {
6600	SDLoc dl(N);
6601	EVT VT = N->getValueType(ResNo: `0`);
6602	EVT SVT = VT.getScalarType();
6603	EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
6604	EVT ShSVT = ShVT.getScalarType();
6605	unsigned EltBits = VT.getScalarSizeInBits();
6606	EVT MulVT;
6607
6608	// Check to see if we can do this.
6609	// FIXME: We should be more aggressive here.
6610	if (!isTypeLegal(VT)) {
6611	// Limit this to simple scalars for now.
6612	if (VT.isVector() \|\| !VT.isSimple())
6613	return SDValue ();
6614
6615	// If this type will be promoted to a large enough type with a legal
6616	// multiply operation, we can go ahead and do this transform.
6617	if (getTypeAction(VT: VT.getSimpleVT()) != TypePromoteInteger)
6618	return SDValue ();
6619
6620	MulVT = getTypeToTransformTo(Context&: *DAG.getContext(), VT);
6621	if (MulVT.getSizeInBits() < (`2` * EltBits) \|\|
6622	!isOperationLegal(Op: ISD::MUL, VT: MulVT))
6623	return SDValue ();
6624	}
6625
6626	// If the sdiv has an 'exact' bit we can use a simpler lowering.
6627	if (N->getFlags().hasExact())
6628	return BuildExactSDIV(TLI: *this, N, dl, DAG, Created);
6629
6630	SmallVector<SDValue, `16`> MagicFactors, Factors, Shifts, ShiftMasks;
6631
6632	auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6633	if (C->isZero())
6634	return false;
6635	// Truncate the divisor to the target scalar type in case it was promoted
6636	// during type legalization.
6637	APInt Divisor = C->getAPIntValue().trunc(width: EltBits);
6638	SignedDivisionByConstantInfo magics = SignedDivisionByConstantInfo::get(D: Divisor);
6639	int NumeratorFactor = `0`;
6640	int ShiftMask = -`1`;
6641
6642	if (Divisor.isOne() \|\| Divisor.isAllOnes()) {
6643	// If d is +1/-1, we just multiply the numerator by +1/-1.
6644	NumeratorFactor = Divisor.getSExtValue();
6645	magics.Magic = `0`;
6646	magics.ShiftAmount = `0`;
6647	ShiftMask = `0`;
6648	} else if (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) {
6649	// If d > 0 and m < 0, add the numerator.
6650	NumeratorFactor = `1`;
6651	} else if (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) {
6652	// If d < 0 and m > 0, subtract the numerator.
6653	NumeratorFactor = -`1`;
6654	}
6655
6656	MagicFactors.push_back(Elt: DAG.getConstant(Val: magics.Magic, DL: dl, VT: SVT));
6657	Factors.push_back(Elt: DAG.getSignedConstant(Val: NumeratorFactor, DL: dl, VT: SVT));
6658	Shifts.push_back(Elt: DAG.getConstant(Val: magics.ShiftAmount, DL: dl, VT: ShSVT));
6659	ShiftMasks.push_back(Elt: DAG.getSignedConstant(Val: ShiftMask, DL: dl, VT: SVT));
6660	return true;
6661	};
6662
6663	SDValue N0 = N->getOperand(Num: `0`);
6664	SDValue N1 = N->getOperand(Num: `1`);
6665
6666	// Collect the shifts / magic values from each element.
6667	if (!ISD::matchUnaryPredicate(Op: N1, Match: BuildSDIVPattern, /AllowUndefs=/false,
6668	/AllowTruncation=/true))
6669	return SDValue ();
6670
6671	SDValue MagicFactor, Factor, Shift, ShiftMask;
6672	if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6673	MagicFactor = DAG.getBuildVector(VT, DL: dl, Ops: MagicFactors);
6674	Factor = DAG.getBuildVector(VT, DL: dl, Ops: Factors);
6675	Shift = DAG.getBuildVector(VT: ShVT, DL: dl, Ops: Shifts);
6676	ShiftMask = DAG.getBuildVector(VT, DL: dl, Ops: ShiftMasks);
6677	} else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6678	assert(MagicFactors.size() == `1` && Factors.size() == `1` &&
6679	Shifts.size() == `1` && ShiftMasks.size() == `1` &&
6680	"Expected matchUnaryPredicate to return one element for scalable "
6681	"vectors");
6682	MagicFactor = DAG.getSplatVector(VT, DL: dl, Op: MagicFactors [`0`]);
6683	Factor = DAG.getSplatVector(VT, DL: dl, Op: Factors [`0`]);
6684	Shift = DAG.getSplatVector(VT: ShVT, DL: dl, Op: Shifts [`0`]);
6685	ShiftMask = DAG.getSplatVector(VT, DL: dl, Op: ShiftMasks [`0`]);
6686	} else {
6687	assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6688	MagicFactor = MagicFactors [`0`];
6689	Factor = Factors [`0`];
6690	Shift = Shifts [`0`];
6691	ShiftMask = ShiftMasks [`0`];
6692	}
6693
6694	// Multiply the numerator (operand 0) by the magic value.
6695	// FIXME: We should support doing a MUL in a wider type.
6696	auto GetMULHS = [&](SDValue X, SDValue Y) {
6697	// If the type isn't legal, use a wider mul of the type calculated
6698	// earlier.
6699	if (!isTypeLegal(VT)) {
6700	X = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: MulVT, Operand: X);
6701	Y = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: MulVT, Operand: Y);
6702	Y = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: MulVT, N1: X, N2: Y);
6703	Y = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: MulVT, N1: Y,
6704	N2: DAG.getShiftAmountConstant(Val: EltBits, VT: MulVT, DL: dl));
6705	return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Y);
6706	}
6707
6708	if (isOperationLegalOrCustom(Op: ISD::MULHS, VT, LegalOnly: IsAfterLegalization))
6709	return DAG.getNode(Opcode: ISD::MULHS, DL: dl, VT, N1: X, N2: Y);
6710	if (isOperationLegalOrCustom(Op: ISD::SMUL_LOHI, VT, LegalOnly: IsAfterLegalization)) {
6711	SDValue LoHi =
6712	DAG.getNode(Opcode: ISD::SMUL_LOHI, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: VT), N1: X, N2: Y);
6713	return SDValue (LoHi.getNode(), `1`);
6714	}
6715	// If type twice as wide legal, widen and use a mul plus a shift.
6716	unsigned Size = VT.getScalarSizeInBits();
6717	EVT WideVT = VT.changeElementType(
6718	Context&: DAG.getContext(), EltVT: EVT::getIntegerVT(Context&: DAG.getContext(), BitWidth: Size * `2`));
6719	// Some targets like AMDGPU try to go from SDIV to SDIVREM which is then
6720	// custom lowered. This is very expensive so avoid it at all costs for
6721	// constant divisors.
6722	if ((!IsAfterLegalTypes && isOperationExpand(Op: ISD::SDIV, VT) &&
6723	isOperationCustom(Op: ISD::SDIVREM, VT: VT.getScalarType())) \|\|
6724	isOperationLegalOrCustom(Op: ISD::MUL, VT: WideVT)) {
6725	X = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: WideVT, Operand: X);
6726	Y = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: WideVT, Operand: Y);
6727	Y = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: WideVT, N1: X, N2: Y);
6728	Y = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: WideVT, N1: Y,
6729	N2: DAG.getShiftAmountConstant(Val: EltBits, VT: WideVT, DL: dl));
6730	return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Y);
6731	}
6732	return SDValue ();
6733	};
6734
6735	SDValue Q = GetMULHS (N0, MagicFactor);
6736	if (!Q)
6737	return SDValue ();
6738
6739	Created.push_back(Elt: Q.getNode());
6740
6741	// (Optionally) Add/subtract the numerator using Factor.
6742	Factor = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: N0, N2: Factor);
6743	Created.push_back(Elt: Factor.getNode());
6744	Q = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Q, N2: Factor);
6745	Created.push_back(Elt: Q.getNode());
6746
6747	// Shift right algebraic by shift value.
6748	Q = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: Q, N2: Shift);
6749	Created.push_back(Elt: Q.getNode());
6750
6751	// Extract the sign bit, mask it and add it to the quotient.
6752	SDValue SignShift = DAG.getConstant(Val: EltBits - `1`, DL: dl, VT: ShVT);
6753	SDValue T = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Q, N2: SignShift);
6754	Created.push_back(Elt: T.getNode());
6755	T = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: T, N2: ShiftMask);
6756	Created.push_back(Elt: T.getNode());
6757	return DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Q, N2: T);
6758	}
6759
6760	/// Given an ISD::UDIV node expressing a divide by constant,
6761	/// return a DAG expression to select that will generate the same value by
6762	/// multiplying by a magic number.
6763	/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6764	SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
6765	bool IsAfterLegalization,
6766	bool IsAfterLegalTypes,
6767	SmallVectorImpl<SDNode > &Created) const* {
6768	SDLoc dl(N);
6769	EVT VT = N->getValueType(ResNo: `0`);
6770	EVT SVT = VT.getScalarType();
6771	EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
6772	EVT ShSVT = ShVT.getScalarType();
6773	unsigned EltBits = VT.getScalarSizeInBits();
6774	EVT MulVT;
6775
6776	// Check to see if we can do this.
6777	// FIXME: We should be more aggressive here.
6778	if (!isTypeLegal(VT)) {
6779	// Limit this to simple scalars for now.
6780	if (VT.isVector() \|\| !VT.isSimple())
6781	return SDValue ();
6782
6783	// If this type will be promoted to a large enough type with a legal
6784	// multiply operation, we can go ahead and do this transform.
6785	if (getTypeAction(VT: VT.getSimpleVT()) != TypePromoteInteger)
6786	return SDValue ();
6787
6788	MulVT = getTypeToTransformTo(Context&: *DAG.getContext(), VT);
6789	if (MulVT.getSizeInBits() < (`2` * EltBits) \|\|
6790	!isOperationLegal(Op: ISD::MUL, VT: MulVT))
6791	return SDValue ();
6792	}
6793
6794	// If the udiv has an 'exact' bit we can use a simpler lowering.
6795	if (N->getFlags().hasExact())
6796	return BuildExactUDIV(TLI: *this, N, dl, DAG, Created);
6797
6798	SDValue N0 = N->getOperand(Num: `0`);
6799	SDValue N1 = N->getOperand(Num: `1`);
6800
6801	// Try to use leading zeros of the dividend to reduce the multiplier and
6802	// avoid expensive fixups.
6803	unsigned KnownLeadingZeros = DAG.computeKnownBits(Op: N0).countMinLeadingZeros();
6804
6805	// If we're after type legalization and SVT is not legal, use the
6806	// promoted type for creating constants to avoid creating nodes with
6807	// illegal types.
6808	if (IsAfterLegalTypes && VT.isVector()) {
6809	SVT = getTypeToTransformTo(Context&: *DAG.getContext(), VT: SVT);
6810	if (SVT.bitsLT(VT: VT.getScalarType()))
6811	return SDValue ();
6812	ShSVT = getTypeToTransformTo(Context&: *DAG.getContext(), VT: ShSVT);
6813	if (ShSVT.bitsLT(VT: ShVT.getScalarType()))
6814	return SDValue ();
6815	}
6816	const unsigned SVTBits = SVT.getSizeInBits();
6817
6818	bool UseNPQ = false, UsePreShift = false, UsePostShift = false;
6819	const bool HasWideVT64MULHU =
6820	isOperationLegalOrCustom(Op: ISD::MULHU, VT: MVT::i64, LegalOnly: IsAfterLegalization);
6821	const bool HasWideVT64UMUL_LOHI =
6822	isOperationLegalOrCustom(Op: ISD::UMUL_LOHI, VT: MVT::i64, LegalOnly: IsAfterLegalization);
6823	bool UseWiden = false;
6824	SmallVector<SDValue, `16`> PreShifts, PostShifts, MagicFactors, NPQFactors;
6825
6826	auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6827	if (C->isZero())
6828	return false;
6829	// Truncate the divisor to the target scalar type in case it was promoted
6830	// during type legalization.
6831	APInt Divisor = C->getAPIntValue().trunc(width: EltBits);
6832
6833	SDValue PreShift, MagicFactor, NPQFactor, PostShift;
6834
6835	// Magic algorithm doesn't work for division by 1. We need to emit a select
6836	// at the end.
6837	if (Divisor.isOne()) {
6838	PreShift = PostShift = DAG.getUNDEF(VT: ShSVT);
6839	MagicFactor = NPQFactor = DAG.getUNDEF(VT: SVT);
6840	} else {
6841	const bool AllowWiden = (EltBits == `32` && !VT.isVector() &&
6842	(HasWideVT64MULHU \|\| HasWideVT64UMUL_LOHI));
6843	UnsignedDivisionByConstantInfo magics =
6844	UnsignedDivisionByConstantInfo::get(
6845	D: Divisor, LeadingZeros: std::min(a: KnownLeadingZeros, b: Divisor.countl_zero()),
6846	/AllowEvenDivisorOptimization=/true,
6847	/AllowWidenOptimization=/AllowWiden);
6848
6849	if (magics.Widen) {
6850	UseWiden = true;
6851	MagicFactor = DAG.getConstant(Val: magics.Magic, DL: dl, VT: MVT::i64);
6852	} else {
6853	MagicFactor = DAG.getConstant(Val: magics.Magic.zext(width: SVTBits), DL: dl, VT: SVT);
6854	}
6855
6856	assert(magics.PreShift < Divisor.getBitWidth() &&
6857	"We shouldn't generate an undefined shift!");
6858	assert(magics.PostShift < Divisor.getBitWidth() &&
6859	"We shouldn't generate an undefined shift!");
6860	assert((!magics.IsAdd \|\| magics.PreShift == `0`) &&
6861	"Unexpected pre-shift");
6862	PreShift = DAG.getConstant(Val: magics.PreShift, DL: dl, VT: ShSVT);
6863	PostShift = DAG.getConstant(Val: magics.PostShift, DL: dl, VT: ShSVT);
6864	NPQFactor = DAG.getConstant(
6865	Val: magics.IsAdd ? APInt::getOneBitSet(numBits: SVTBits, BitNo: EltBits - `1`)
6866	: APInt::getZero(numBits: SVTBits),
6867	DL: dl, VT: SVT);
6868	UseNPQ \|= magics.IsAdd;
6869	UsePreShift \|= magics.PreShift != `0`;
6870	UsePostShift \|= magics.PostShift != `0`;
6871	}
6872
6873	PreShifts.push_back(Elt: PreShift);
6874	MagicFactors.push_back(Elt: MagicFactor);
6875	NPQFactors.push_back(Elt: NPQFactor);
6876	PostShifts.push_back(Elt: PostShift);
6877	return true;
6878	};
6879
6880	// Collect the shifts/magic values from each element.
6881	if (!ISD::matchUnaryPredicate(Op: N1, Match: BuildUDIVPattern, /AllowUndefs=/false,
6882	/AllowTruncation=/true))
6883	return SDValue ();
6884
6885	SDValue PreShift, PostShift, MagicFactor, NPQFactor;
6886	if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6887	PreShift = DAG.getBuildVector(VT: ShVT, DL: dl, Ops: PreShifts);
6888	MagicFactor = DAG.getBuildVector(VT, DL: dl, Ops: MagicFactors);
6889	NPQFactor = DAG.getBuildVector(VT, DL: dl, Ops: NPQFactors);
6890	PostShift = DAG.getBuildVector(VT: ShVT, DL: dl, Ops: PostShifts);
6891	} else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6892	assert(PreShifts.size() == `1` && MagicFactors.size() == `1` &&
6893	NPQFactors.size() == `1` && PostShifts.size() == `1` &&
6894	"Expected matchUnaryPredicate to return one for scalable vectors");
6895	PreShift = DAG.getSplatVector(VT: ShVT, DL: dl, Op: PreShifts [`0`]);
6896	MagicFactor = DAG.getSplatVector(VT, DL: dl, Op: MagicFactors [`0`]);
6897	NPQFactor = DAG.getSplatVector(VT, DL: dl, Op: NPQFactors [`0`]);
6898	PostShift = DAG.getSplatVector(VT: ShVT, DL: dl, Op: PostShifts [`0`]);
6899	} else {
6900	assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6901	PreShift = PreShifts [`0`];
6902	MagicFactor = MagicFactors [`0`];
6903	PostShift = PostShifts [`0`];
6904	}
6905
6906	if (UseWiden) {
6907	// Compute: (i64(x) MagicFactor) >> 64*
6908	SDValue X64 = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: MVT::i64, Operand: N0);
6909
6910	// Perform 64x64 -> 128 multiplication and extract high 64 bits
6911	SDValue High;
6912	if (HasWideVT64MULHU) {
6913	High = DAG.getNode(Opcode: ISD::MULHU, DL: dl, VT: MVT::i64, N1: X64, N2: MagicFactor);
6914	} else {
6915	SDValue LoHi =
6916	DAG.getNode(Opcode: ISD::UMUL_LOHI, DL: dl, VTList: DAG.getVTList(VT1: MVT::i64, VT2: MVT::i64),
6917	N1: X64, N2: MagicFactor);
6918	High = SDValue (LoHi.getNode(), `1`);
6919	}
6920
6921	Created.push_back(Elt: High.getNode());
6922	return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: High);
6923	}
6924
6925	SDValue Q = N0;
6926	if (UsePreShift) {
6927	Q = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Q, N2: PreShift);
6928	Created.push_back(Elt: Q.getNode());
6929	}
6930
6931	// FIXME: We should support doing a MUL in a wider type.
6932	auto GetMULHU = [&](SDValue X, SDValue Y) {
6933	// If the type isn't legal, use a wider mul of the type calculated
6934	// earlier.
6935	if (!isTypeLegal(VT)) {
6936	X = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: MulVT, Operand: X);
6937	Y = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: MulVT, Operand: Y);
6938	Y = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: MulVT, N1: X, N2: Y);
6939	Y = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: MulVT, N1: Y,
6940	N2: DAG.getShiftAmountConstant(Val: EltBits, VT: MulVT, DL: dl));
6941	return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Y);
6942	}
6943
6944	if (isOperationLegalOrCustom(Op: ISD::MULHU, VT, LegalOnly: IsAfterLegalization))
6945	return DAG.getNode(Opcode: ISD::MULHU, DL: dl, VT, N1: X, N2: Y);
6946	if (isOperationLegalOrCustom(Op: ISD::UMUL_LOHI, VT, LegalOnly: IsAfterLegalization)) {
6947	SDValue LoHi =
6948	DAG.getNode(Opcode: ISD::UMUL_LOHI, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: VT), N1: X, N2: Y);
6949	return SDValue (LoHi.getNode(), `1`);
6950	}
6951	// If type twice as wide legal, widen and use a mul plus a shift.
6952	unsigned Size = VT.getScalarSizeInBits();
6953	EVT WideVT = VT.changeElementType(
6954	Context&: DAG.getContext(), EltVT: EVT::getIntegerVT(Context&: DAG.getContext(), BitWidth: Size * `2`));
6955	// Some targets like AMDGPU try to go from UDIV to UDIVREM which is then
6956	// custom lowered. This is very expensive so avoid it at all costs for
6957	// constant divisors.
6958	if ((!IsAfterLegalTypes && isOperationExpand(Op: ISD::UDIV, VT) &&
6959	isOperationCustom(Op: ISD::UDIVREM, VT: VT.getScalarType())) \|\|
6960	isOperationLegalOrCustom(Op: ISD::MUL, VT: WideVT)) {
6961	X = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: WideVT, Operand: X);
6962	Y = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: WideVT, Operand: Y);
6963	Y = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: WideVT, N1: X, N2: Y);
6964	Y = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: WideVT, N1: Y,
6965	N2: DAG.getShiftAmountConstant(Val: EltBits, VT: WideVT, DL: dl));
6966	return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Y);
6967	}
6968	return SDValue (); // No mulhu or equivalent
6969	};
6970
6971	// Multiply the numerator (operand 0) by the magic value.
6972	Q = GetMULHU (Q, MagicFactor);
6973	if (!Q)
6974	return SDValue ();
6975
6976	Created.push_back(Elt: Q.getNode());
6977
6978	if (UseNPQ) {
6979	SDValue NPQ = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: N0, N2: Q);
6980	Created.push_back(Elt: NPQ.getNode());
6981
6982	// For vectors we might have a mix of non-NPQ/NPQ paths, so use
6983	// MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
6984	if (VT.isVector())
6985	NPQ = GetMULHU (NPQ, NPQFactor);
6986	else
6987	NPQ = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: NPQ, N2: DAG.getConstant(Val: `1`, DL: dl, VT: ShVT));
6988
6989	Created.push_back(Elt: NPQ.getNode());
6990
6991	Q = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: NPQ, N2: Q);
6992	Created.push_back(Elt: Q.getNode());
6993	}
6994
6995	if (UsePostShift) {
6996	Q = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Q, N2: PostShift);
6997	Created.push_back(Elt: Q.getNode());
6998	}
6999
7000	EVT SetCCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
7001
7002	SDValue One = DAG.getConstant(Val: `1`, DL: dl, VT);
7003	SDValue IsOne = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: N1, RHS: One, Cond: ISD::SETEQ);
7004	return DAG.getSelect(DL: dl, VT, Cond: IsOne, LHS: N0, RHS: Q);
7005	}
7006
7007	/// If all values in Values that don't* match the predicate are same 'splat'*
7008	/// value, then replace all values with that splat value.
7009	/// Else, if AlternativeReplacement was provided, then replace all values that
7010	/// do match predicate with AlternativeReplacement value.
7011	static void
7012	turnVectorIntoSplatVector(MutableArrayRef<SDValue> Values,
7013	std::function<bool(SDValue)> Predicate,
7014	SDValue AlternativeReplacement = SDValue ()) {
7015	SDValue Replacement;
7016	// Is there a value for which the Predicate does NOT* match? What is it?*
7017	auto SplatValue = llvm::find_if_not(Range&: Values, P: Predicate);
7018	if (SplatValue != Values.end()) {
7019	// Does Values consist only of SplatValue's and values matching Predicate?
7020	if (llvm::all_of(Range&: Values, P: [Predicate, SplatValue](SDValue Value) {
7021	return Value == *SplatValue \|\| Predicate (Value);
7022	})) // Then we shall replace values matching predicate with SplatValue.
7023	Replacement = *SplatValue;
7024	}
7025	if (!Replacement) {
7026	// Oops, we did not find the "baseline" splat value.
7027	if (!AlternativeReplacement)
7028	return; // Nothing to do.
7029	// Let's replace with provided value then.
7030	Replacement = AlternativeReplacement;
7031	}
7032	std::replace_if(first: Values.begin(), last: Values.end(), pred: Predicate, new_value: Replacement);
7033	}
7034
7035	/// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
7036	/// where the divisor is constant and the comparison target is zero,
7037	/// return a DAG expression that will generate the same comparison result
7038	/// using only multiplications, additions and shifts/rotations.
7039	/// Ref: "Hacker's Delight" 10-17.
7040	SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
7041	SDValue CompTargetNode,
7042	ISD::CondCode Cond,
7043	DAGCombinerInfo &DCI,
7044	const SDLoc &DL) const {
7045	SmallVector<SDNode *, `5`> Built;
7046	if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
7047	DCI, DL, Created&: Built)) {
7048	for (SDNode *N : Built)
7049	DCI.AddToWorklist(N);
7050	return Folded;
7051	}
7052
7053	return SDValue ();
7054	}
7055
7056	SDValue
7057	TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
7058	SDValue CompTargetNode, ISD::CondCode Cond,
7059	DAGCombinerInfo &DCI, const SDLoc &DL,
7060	SmallVectorImpl<SDNode > &Created) const* {
7061	// fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
7062	// - D must be constant, with D = D0 2^K where D0 is odd*
7063	// - P is the multiplicative inverse of D0 modulo 2^W
7064	// - Q = floor(((2^W) - 1) / D)
7065	// where W is the width of the common type of N and D.
7066	assert((Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) &&
7067	"Only applicable for (in)equality comparisons.");
7068
7069	SelectionDAG &DAG = DCI.DAG;
7070
7071	EVT VT = REMNode.getValueType();
7072	EVT SVT = VT.getScalarType();
7073	EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
7074	EVT ShSVT = ShVT.getScalarType();
7075
7076	// If MUL is unavailable, we cannot proceed in any case.
7077	if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::MUL, VT))
7078	return SDValue ();
7079
7080	bool ComparingWithAllZeros = true;
7081	bool AllComparisonsWithNonZerosAreTautological = true;
7082	bool HadTautologicalLanes = false;
7083	bool AllLanesAreTautological = true;
7084	bool HadEvenDivisor = false;
7085	bool AllDivisorsArePowerOfTwo = true;
7086	bool HadTautologicalInvertedLanes = false;
7087	SmallVector<SDValue, `16`> PAmts, KAmts, QAmts;
7088
7089	auto BuildUREMPattern = [&](ConstantSDNode CDiv, ConstantSDNode CCmp) {
7090	// Division by 0 is UB. Leave it to be constant-folded elsewhere.
7091	if (CDiv->isZero())
7092	return false;
7093
7094	const APInt &D = CDiv->getAPIntValue();
7095	const APInt &Cmp = CCmp->getAPIntValue();
7096
7097	ComparingWithAllZeros &= Cmp.isZero();
7098
7099	// x u% C1` is always* less than C1. So given `x u% C1 == C2`,*
7100	// if C2 is not less than C1, the comparison is always false.
7101	// But we will only be able to produce the comparison that will give the
7102	// opposive tautological answer. So this lane would need to be fixed up.
7103	bool TautologicalInvertedLane = D.ule(RHS: Cmp);
7104	HadTautologicalInvertedLanes \|= TautologicalInvertedLane;
7105
7106	// If all lanes are tautological (either all divisors are ones, or divisor
7107	// is not greater than the constant we are comparing with),
7108	// we will prefer to avoid the fold.
7109	bool TautologicalLane = D.isOne() \|\| TautologicalInvertedLane;
7110	HadTautologicalLanes \|= TautologicalLane;
7111	AllLanesAreTautological &= TautologicalLane;
7112
7113	// If we are comparing with non-zero, we need'll need to subtract said
7114	// comparison value from the LHS. But there is no point in doing that if
7115	// every lane where we are comparing with non-zero is tautological..
7116	if (!Cmp.isZero())
7117	AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
7118
7119	// Decompose D into D0 2^K*
7120	unsigned K = D.countr_zero();
7121	assert((!D.isOne() \|\| (K == `0`)) && "For divisor '1' we won't rotate.");
7122	APInt D0 = D.lshr(shiftAmt: K);
7123
7124	// D is even if it has trailing zeros.
7125	HadEvenDivisor \|= (K != `0`);
7126	// D is a power-of-two if D0 is one.
7127	// If all divisors are power-of-two, we will prefer to avoid the fold.
7128	AllDivisorsArePowerOfTwo &= D0.isOne();
7129
7130	// P = inv(D0, 2^W)
7131	// 2^W requires W + 1 bits, so we have to extend and then truncate.
7132	unsigned W = D.getBitWidth();
7133	APInt P = D0.multiplicativeInverse();
7134	assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
7135
7136	// Q = floor((2^W - 1) u/ D)
7137	// R = ((2^W - 1) u% D)
7138	APInt Q, R;
7139	APInt::udivrem(LHS: APInt::getAllOnes(numBits: W), RHS: D, Quotient&: Q, Remainder&: R);
7140
7141	// If we are comparing with zero, then that comparison constant is okay,
7142	// else it may need to be one less than that.
7143	if (Cmp.ugt(RHS: R))
7144	Q -= `1`;
7145
7146	assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&
7147	"We are expecting that K is always less than all-ones for ShSVT");
7148
7149	// If the lane is tautological the result can be constant-folded.
7150	if (TautologicalLane) {
7151	// Set P and K amount to a bogus values so we can try to splat them.
7152	P = `0`;
7153	K = -`1`;
7154	// And ensure that comparison constant is tautological,
7155	// it will always compare true/false.
7156	Q = -`1`;
7157	}
7158
7159	PAmts.push_back(Elt: DAG.getConstant(Val: P, DL, VT: SVT));
7160	KAmts.push_back(
7161	Elt: DAG.getConstant(Val: APInt (ShSVT.getSizeInBits(), K, /isSigned=/false,
7162	/implicitTrunc=/true),
7163	DL, VT: ShSVT));
7164	QAmts.push_back(Elt: DAG.getConstant(Val: Q, DL, VT: SVT));
7165	return true;
7166	};
7167
7168	SDValue N = REMNode.getOperand(i: `0`);
7169	SDValue D = REMNode.getOperand(i: `1`);
7170
7171	// Collect the values from each element.
7172	if (!ISD::matchBinaryPredicate(LHS: D, RHS: CompTargetNode, Match: BuildUREMPattern))
7173	return SDValue ();
7174
7175	// If all lanes are tautological, the result can be constant-folded.
7176	if (AllLanesAreTautological)
7177	return SDValue ();
7178
7179	// If this is a urem by a powers-of-two, avoid the fold since it can be
7180	// best implemented as a bit test.
7181	if (AllDivisorsArePowerOfTwo)
7182	return SDValue ();
7183
7184	SDValue PVal, KVal, QVal;
7185	if (D.getOpcode() == ISD::BUILD_VECTOR) {
7186	if (HadTautologicalLanes) {
7187	// Try to turn PAmts into a splat, since we don't care about the values
7188	// that are currently '0'. If we can't, just keep '0'`s.
7189	turnVectorIntoSplatVector(Values: PAmts, Predicate: isNullConstant);
7190	// Try to turn KAmts into a splat, since we don't care about the values
7191	// that are currently '-1'. If we can't, change them to '0'`s.
7192	turnVectorIntoSplatVector(Values: KAmts, Predicate: isAllOnesConstant,
7193	AlternativeReplacement: DAG.getConstant(Val: `0`, DL, VT: ShSVT));
7194	}
7195
7196	PVal = DAG.getBuildVector(VT, DL, Ops: PAmts);
7197	KVal = DAG.getBuildVector(VT: ShVT, DL, Ops: KAmts);
7198	QVal = DAG.getBuildVector(VT, DL, Ops: QAmts);
7199	} else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
7200	assert(PAmts.size() == `1` && KAmts.size() == `1` && QAmts.size() == `1` &&
7201	"Expected matchBinaryPredicate to return one element for "
7202	"SPLAT_VECTORs");
7203	PVal = DAG.getSplatVector(VT, DL, Op: PAmts [`0`]);
7204	KVal = DAG.getSplatVector(VT: ShVT, DL, Op: KAmts [`0`]);
7205	QVal = DAG.getSplatVector(VT, DL, Op: QAmts [`0`]);
7206	} else {
7207	PVal = PAmts [`0`];
7208	KVal = KAmts [`0`];
7209	QVal = QAmts [`0`];
7210	}
7211
7212	if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
7213	if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::SUB, VT))
7214	return SDValue (); // FIXME: Could/should use `ISD::ADD`?
7215	assert(CompTargetNode.getValueType() == N.getValueType() &&
7216	"Expecting that the types on LHS and RHS of comparisons match.");
7217	N = DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: N, N2: CompTargetNode);
7218	}
7219
7220	// (mul N, P)
7221	SDValue Op0 = DAG.getNode(Opcode: ISD::MUL, DL, VT, N1: N, N2: PVal);
7222	Created.push_back(Elt: Op0.getNode());
7223
7224	// Rotate right only if any divisor was even. We avoid rotates for all-odd
7225	// divisors as a performance improvement, since rotating by 0 is a no-op.
7226	if (HadEvenDivisor) {
7227	// We need ROTR to do this.
7228	if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::ROTR, VT))
7229	return SDValue ();
7230	// UREM: (rotr (mul N, P), K)
7231	Op0 = DAG.getNode(Opcode: ISD::ROTR, DL, VT, N1: Op0, N2: KVal);
7232	Created.push_back(Elt: Op0.getNode());
7233	}
7234
7235	// UREM: (setule/setugt (rotr (mul N, P), K), Q)
7236	SDValue NewCC =
7237	DAG.getSetCC(DL, VT: SETCCVT, LHS: Op0, RHS: QVal,
7238	Cond: ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
7239	if (!HadTautologicalInvertedLanes)
7240	return NewCC;
7241
7242	// If any lanes previously compared always-false, the NewCC will give
7243	// always-true result for them, so we need to fixup those lanes.
7244	// Or the other way around for inequality predicate.
7245	assert(VT.isVector() && "Can/should only get here for vectors.");
7246	Created.push_back(Elt: NewCC.getNode());
7247
7248	// x u% C1` is always* less than C1. So given `x u% C1 == C2`,*
7249	// if C2 is not less than C1, the comparison is always false.
7250	// But we have produced the comparison that will give the
7251	// opposive tautological answer. So these lanes would need to be fixed up.
7252	SDValue TautologicalInvertedChannels =
7253	DAG.getSetCC(DL, VT: SETCCVT, LHS: D, RHS: CompTargetNode, Cond: ISD::SETULE);
7254	Created.push_back(Elt: TautologicalInvertedChannels.getNode());
7255
7256	// NOTE: we avoid letting illegal types through even if we're before legalize
7257	// ops – legalization has a hard time producing good code for this.
7258	if (isOperationLegalOrCustom(Op: ISD::VSELECT, VT: SETCCVT)) {
7259	// If we have a vector select, let's replace the comparison results in the
7260	// affected lanes with the correct tautological result.
7261	SDValue Replacement = DAG.getBoolConstant(V: Cond == ISD::SETEQ ? false : true,
7262	DL, VT: SETCCVT, OpVT: SETCCVT);
7263	return DAG.getNode(Opcode: ISD::VSELECT, DL, VT: SETCCVT, N1: TautologicalInvertedChannels,
7264	N2: Replacement, N3: NewCC);
7265	}
7266
7267	// Else, we can just invert the comparison result in the appropriate lanes.
7268	//
7269	// NOTE: see the note above VSELECT above.
7270	if (isOperationLegalOrCustom(Op: ISD::XOR, VT: SETCCVT))
7271	return DAG.getNode(Opcode: ISD::XOR, DL, VT: SETCCVT, N1: NewCC,
7272	N2: TautologicalInvertedChannels);
7273
7274	return SDValue (); // Don't know how to lower.
7275	}
7276
7277	/// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
7278	/// where the divisor is constant and the comparison target is zero,
7279	/// return a DAG expression that will generate the same comparison result
7280	/// using only multiplications, additions and shifts/rotations.
7281	/// Ref: "Hacker's Delight" 10-17.
7282	SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
7283	SDValue CompTargetNode,
7284	ISD::CondCode Cond,
7285	DAGCombinerInfo &DCI,
7286	const SDLoc &DL) const {
7287	SmallVector<SDNode *, `7`> Built;
7288	if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
7289	DCI, DL, Created&: Built)) {
7290	assert(Built.size() <= `7` && "Max size prediction failed.");
7291	for (SDNode *N : Built)
7292	DCI.AddToWorklist(N);
7293	return Folded;
7294	}
7295
7296	return SDValue ();
7297	}
7298
7299	SDValue
7300	TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
7301	SDValue CompTargetNode, ISD::CondCode Cond,
7302	DAGCombinerInfo &DCI, const SDLoc &DL,
7303	SmallVectorImpl<SDNode > &Created) const* {
7304	// Derived from Hacker's Delight, 2nd Edition, by Hank Warren. Section 10-17.
7305	// Fold:
7306	// (seteq/ne (srem N, D), 0)
7307	// To:
7308	// (setule/ugt (rotr (add (mul N, P), A), K), Q)
7309	//
7310	// - D must be constant, with D = D0 2^K where D0 is odd*
7311	// - P is the multiplicative inverse of D0 modulo 2^W
7312	// - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
7313	// - Q = floor((2 A) / (2^K))*
7314	// where W is the width of the common type of N and D.
7315	//
7316	// When D is a power of two (and thus D0 is 1), the normal
7317	// formula for A and Q don't apply, because the derivation
7318	// depends on D not dividing 2^(W-1), and thus theorem ZRS
7319	// does not apply. This specifically fails when N = INT_MIN.
7320	//
7321	// Instead, for power-of-two D, we use:
7322	// - A = 2^(W-1)
7323	// \|-> Order-preserving map from [-2^(W-1), 2^(W-1) - 1] to [0,2^W - 1])
7324	// - Q = 2^(W-K) - 1
7325	// \|-> Test that the top K bits are zero after rotation
7326	assert((Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) &&
7327	"Only applicable for (in)equality comparisons.");
7328
7329	SelectionDAG &DAG = DCI.DAG;
7330
7331	EVT VT = REMNode.getValueType();
7332	EVT SVT = VT.getScalarType();
7333	EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
7334	EVT ShSVT = ShVT.getScalarType();
7335
7336	// If we are after ops legalization, and MUL is unavailable, we can not
7337	// proceed.
7338	if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::MUL, VT))
7339	return SDValue ();
7340
7341	// TODO: Could support comparing with non-zero too.
7342	ConstantSDNode *CompTarget = isConstOrConstSplat(N: CompTargetNode);
7343	if (!CompTarget \|\| !CompTarget->isZero())
7344	return SDValue ();
7345
7346	bool HadIntMinDivisor = false;
7347	bool HadOneDivisor = false;
7348	bool AllDivisorsAreOnes = true;
7349	bool HadEvenDivisor = false;
7350	bool NeedToApplyOffset = false;
7351	bool AllDivisorsArePowerOfTwo = true;
7352	SmallVector<SDValue, `16`> PAmts, AAmts, KAmts, QAmts;
7353
7354	auto BuildSREMPattern = [&](ConstantSDNode *C) {
7355	// Division by 0 is UB. Leave it to be constant-folded elsewhere.
7356	if (C->isZero())
7357	return false;
7358
7359	// FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
7360
7361	// WARNING: this fold is only valid for positive divisors!
7362	APInt D = C->getAPIntValue();
7363	if (D.isNegative())
7364	D.negate(); // `rem %X, -C` is equivalent to `rem %X, C`
7365
7366	HadIntMinDivisor \|= D.isMinSignedValue();
7367
7368	// If all divisors are ones, we will prefer to avoid the fold.
7369	HadOneDivisor \|= D.isOne();
7370	AllDivisorsAreOnes &= D.isOne();
7371
7372	// Decompose D into D0 2^K*
7373	unsigned K = D.countr_zero();
7374	assert((!D.isOne() \|\| (K == `0`)) && "For divisor '1' we won't rotate.");
7375	APInt D0 = D.lshr(shiftAmt: K);
7376
7377	if (!D.isMinSignedValue()) {
7378	// D is even if it has trailing zeros; unless it's INT_MIN, in which case
7379	// we don't care about this lane in this fold, we'll special-handle it.
7380	HadEvenDivisor \|= (K != `0`);
7381	}
7382
7383	// D is a power-of-two if D0 is one. This includes INT_MIN.
7384	// If all divisors are power-of-two, we will prefer to avoid the fold.
7385	AllDivisorsArePowerOfTwo &= D0.isOne();
7386
7387	// P = inv(D0, 2^W)
7388	// 2^W requires W + 1 bits, so we have to extend and then truncate.
7389	unsigned W = D.getBitWidth();
7390	APInt P = D0.multiplicativeInverse();
7391	assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
7392
7393	// A = floor((2^(W - 1) - 1) / D0) & -2^K
7394	APInt A = APInt::getSignedMaxValue(numBits: W).udiv(RHS: D0);
7395	A.clearLowBits(loBits: K);
7396
7397	if (!D.isMinSignedValue()) {
7398	// If divisor INT_MIN, then we don't care about this lane in this fold,
7399	// we'll special-handle it.
7400	NeedToApplyOffset \|= A != `0`;
7401	}
7402
7403	// Q = floor((2 A) / (2^K))*
7404	APInt Q = (`2` * A).udiv(RHS: APInt::getOneBitSet(numBits: W, BitNo: K));
7405
7406	assert(APInt::getAllOnes(SVT.getSizeInBits()).ugt(A) &&
7407	"We are expecting that A is always less than all-ones for SVT");
7408	assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&
7409	"We are expecting that K is always less than all-ones for ShSVT");
7410
7411	// If D was a power of two, apply the alternate constant derivation.
7412	if (D0.isOne()) {
7413	// A = 2^(W-1)
7414	A = APInt::getSignedMinValue(numBits: W);
7415	// - Q = 2^(W-K) - 1
7416	Q = APInt::getAllOnes(numBits: W - K).zext(width: W);
7417	}
7418
7419	// If the divisor is 1 the result can be constant-folded. Likewise, we
7420	// don't care about INT_MIN lanes, those can be set to undef if appropriate.
7421	if (D.isOne()) {
7422	// Set P, A and K to a bogus values so we can try to splat them.
7423	P = `0`;
7424	A = -`1`;
7425	K = -`1`;
7426
7427	// x ?% 1 == 0 <--> true <--> x u<= -1
7428	Q = -`1`;
7429	}
7430
7431	PAmts.push_back(Elt: DAG.getConstant(Val: P, DL, VT: SVT));
7432	AAmts.push_back(Elt: DAG.getConstant(Val: A, DL, VT: SVT));
7433	KAmts.push_back(
7434	Elt: DAG.getConstant(Val: APInt (ShSVT.getSizeInBits(), K, /isSigned=/false,
7435	/implicitTrunc=/true),
7436	DL, VT: ShSVT));
7437	QAmts.push_back(Elt: DAG.getConstant(Val: Q, DL, VT: SVT));
7438	return true;
7439	};
7440
7441	SDValue N = REMNode.getOperand(i: `0`);
7442	SDValue D = REMNode.getOperand(i: `1`);
7443
7444	// Collect the values from each element.
7445	if (!ISD::matchUnaryPredicate(Op: D, Match: BuildSREMPattern))
7446	return SDValue ();
7447
7448	// If this is a srem by a one, avoid the fold since it can be constant-folded.
7449	if (AllDivisorsAreOnes)
7450	return SDValue ();
7451
7452	// If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
7453	// since it can be best implemented as a bit test.
7454	if (AllDivisorsArePowerOfTwo)
7455	return SDValue ();
7456
7457	SDValue PVal, AVal, KVal, QVal;
7458	if (D.getOpcode() == ISD::BUILD_VECTOR) {
7459	if (HadOneDivisor) {
7460	// Try to turn PAmts into a splat, since we don't care about the values
7461	// that are currently '0'. If we can't, just keep '0'`s.
7462	turnVectorIntoSplatVector(Values: PAmts, Predicate: isNullConstant);
7463	// Try to turn AAmts into a splat, since we don't care about the
7464	// values that are currently '-1'. If we can't, change them to '0'`s.
7465	turnVectorIntoSplatVector(Values: AAmts, Predicate: isAllOnesConstant,
7466	AlternativeReplacement: DAG.getConstant(Val: `0`, DL, VT: SVT));
7467	// Try to turn KAmts into a splat, since we don't care about the values
7468	// that are currently '-1'. If we can't, change them to '0'`s.
7469	turnVectorIntoSplatVector(Values: KAmts, Predicate: isAllOnesConstant,
7470	AlternativeReplacement: DAG.getConstant(Val: `0`, DL, VT: ShSVT));
7471	}
7472
7473	PVal = DAG.getBuildVector(VT, DL, Ops: PAmts);
7474	AVal = DAG.getBuildVector(VT, DL, Ops: AAmts);
7475	KVal = DAG.getBuildVector(VT: ShVT, DL, Ops: KAmts);
7476	QVal = DAG.getBuildVector(VT, DL, Ops: QAmts);
7477	} else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
7478	assert(PAmts.size() == `1` && AAmts.size() == `1` && KAmts.size() == `1` &&
7479	QAmts.size() == `1` &&
7480	"Expected matchUnaryPredicate to return one element for scalable "
7481	"vectors");
7482	PVal = DAG.getSplatVector(VT, DL, Op: PAmts [`0`]);
7483	AVal = DAG.getSplatVector(VT, DL, Op: AAmts [`0`]);
7484	KVal = DAG.getSplatVector(VT: ShVT, DL, Op: KAmts [`0`]);
7485	QVal = DAG.getSplatVector(VT, DL, Op: QAmts [`0`]);
7486	} else {
7487	assert(isa<ConstantSDNode>(D) && "Expected a constant");
7488	PVal = PAmts [`0`];
7489	AVal = AAmts [`0`];
7490	KVal = KAmts [`0`];
7491	QVal = QAmts [`0`];
7492	}
7493
7494	// (mul N, P)
7495	SDValue Op0 = DAG.getNode(Opcode: ISD::MUL, DL, VT, N1: N, N2: PVal);
7496	Created.push_back(Elt: Op0.getNode());
7497
7498	if (NeedToApplyOffset) {
7499	// We need ADD to do this.
7500	if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::ADD, VT))
7501	return SDValue ();
7502
7503	// (add (mul N, P), A)
7504	Op0 = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Op0, N2: AVal);
7505	Created.push_back(Elt: Op0.getNode());
7506	}
7507
7508	// Rotate right only if any divisor was even. We avoid rotates for all-odd
7509	// divisors as a performance improvement, since rotating by 0 is a no-op.
7510	if (HadEvenDivisor) {
7511	// We need ROTR to do this.
7512	if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::ROTR, VT))
7513	return SDValue ();
7514	// SREM: (rotr (add (mul N, P), A), K)
7515	Op0 = DAG.getNode(Opcode: ISD::ROTR, DL, VT, N1: Op0, N2: KVal);
7516	Created.push_back(Elt: Op0.getNode());
7517	}
7518
7519	// SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
7520	SDValue Fold =
7521	DAG.getSetCC(DL, VT: SETCCVT, LHS: Op0, RHS: QVal,
7522	Cond: ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
7523
7524	// If we didn't have lanes with INT_MIN divisor, then we're done.
7525	if (!HadIntMinDivisor)
7526	return Fold;
7527
7528	// That fold is only valid for positive divisors. Which effectively means,
7529	// it is invalid for INT_MIN divisors. So if we have such a lane,
7530	// we must fix-up results for said lanes.
7531	assert(VT.isVector() && "Can/should only get here for vectors.");
7532
7533	// NOTE: we avoid letting illegal types through even if we're before legalize
7534	// ops – legalization has a hard time producing good code for the code that
7535	// follows.
7536	if (!isOperationLegalOrCustom(Op: ISD::SETCC, VT: SETCCVT) \|\|
7537	!isOperationLegalOrCustom(Op: ISD::AND, VT) \|\|
7538	!isCondCodeLegalOrCustom(CC: Cond, VT: VT.getSimpleVT()) \|\|
7539	!isOperationLegalOrCustom(Op: ISD::VSELECT, VT: SETCCVT))
7540	return SDValue ();
7541
7542	Created.push_back(Elt: Fold.getNode());
7543
7544	SDValue IntMin = DAG.getConstant(
7545	Val: APInt::getSignedMinValue(numBits: SVT.getScalarSizeInBits()), DL, VT);
7546	SDValue IntMax = DAG.getConstant(
7547	Val: APInt::getSignedMaxValue(numBits: SVT.getScalarSizeInBits()), DL, VT);
7548	SDValue Zero =
7549	DAG.getConstant(Val: APInt::getZero(numBits: SVT.getScalarSizeInBits()), DL, VT);
7550
7551	// Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.
7552	SDValue DivisorIsIntMin = DAG.getSetCC(DL, VT: SETCCVT, LHS: D, RHS: IntMin, Cond: ISD::SETEQ);
7553	Created.push_back(Elt: DivisorIsIntMin.getNode());
7554
7555	// (N s% INT_MIN) ==/!= 0 <--> (N & INT_MAX) ==/!= 0
7556	SDValue Masked = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: N, N2: IntMax);
7557	Created.push_back(Elt: Masked.getNode());
7558	SDValue MaskedIsZero = DAG.getSetCC(DL, VT: SETCCVT, LHS: Masked, RHS: Zero, Cond);
7559	Created.push_back(Elt: MaskedIsZero.getNode());
7560
7561	// To produce final result we need to blend 2 vectors: 'SetCC' and
7562	// 'MaskedIsZero'. If the divisor for channel was NOT* INT_MIN, we pick*
7563	// from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is
7564	// constant-folded, select can get lowered to a shuffle with constant mask.
7565	SDValue Blended = DAG.getNode(Opcode: ISD::VSELECT, DL, VT: SETCCVT, N1: DivisorIsIntMin,
7566	N2: MaskedIsZero, N3: Fold);
7567
7568	return Blended;
7569	}
7570
7571	SDValue TargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
7572	const DenormalMode &Mode,
7573	SDNodeFlags Flags) const {
7574	SDLoc DL(Op);
7575	EVT VT = Op.getValueType();
7576	EVT CCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
7577	SDValue FPZero = DAG.getConstantFP(Val: `0.0`, DL, VT);
7578
7579	// This is specifically a check for the handling of denormal inputs, not the
7580	// result.
7581	if (Mode.Input == DenormalMode::PreserveSign \|\|
7582	Mode.Input == DenormalMode::PositiveZero) {
7583	// Test = X == 0.0
7584	return DAG.getSetCC(DL, VT: CCVT, LHS: Op, RHS: FPZero, Cond: ISD::SETEQ, /Chain=/{},
7585	/Signaling=/IsSignaling: false, Flags);
7586	}
7587
7588	// Testing it with denormal inputs to avoid wrong estimate.
7589	//
7590	// Test = fabs(X) < SmallestNormal
7591	const fltSemantics &FltSem = VT.getFltSemantics();
7592	APFloat SmallestNorm = APFloat::getSmallestNormalized(Sem: FltSem);
7593	SDValue NormC = DAG.getConstantFP(Val: SmallestNorm, DL, VT);
7594	SDValue Fabs = DAG.getNode(Opcode: ISD::FABS, DL, VT, Operand: Op, Flags);
7595	return DAG.getSetCC(DL, VT: CCVT, LHS: Fabs, RHS: NormC, Cond: ISD::SETLT, /Chain=/{},
7596	/Signaling=/IsSignaling: false, Flags);
7597	}
7598
7599	SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
7600	bool LegalOps, bool OptForSize,
7601	NegatibleCost &Cost,
7602	unsigned Depth) const {
7603	// fneg is removable even if it has multiple uses.
7604	if (Op.getOpcode() == ISD::FNEG \|\| Op.getOpcode() == ISD::VP_FNEG) {
7605	Cost = NegatibleCost::Cheaper;
7606	return Op.getOperand(i: `0`);
7607	}
7608
7609	// Don't recurse exponentially.
7610	if (Depth > SelectionDAG::MaxRecursionDepth)
7611	return SDValue ();
7612
7613	// Pre-increment recursion depth for use in recursive calls.
7614	++Depth;
7615	const SDNodeFlags Flags = Op ->getFlags();
7616	EVT VT = Op.getValueType();
7617	unsigned Opcode = Op.getOpcode();
7618
7619	// Don't allow anything with multiple uses unless we know it is free.
7620	if (!Op.hasOneUse() && Opcode != ISD::ConstantFP) {
7621	bool IsFreeExtend = Opcode == ISD::FP_EXTEND &&
7622	isFPExtFree(DestVT: VT, SrcVT: Op.getOperand(i: `0`).getValueType());
7623	if (!IsFreeExtend)
7624	return SDValue ();
7625	}
7626
7627	auto RemoveDeadNode = [&](SDValue N) {
7628	if (N && N.getNode()->use_empty())
7629	DAG.RemoveDeadNode(N: N.getNode());
7630	};
7631
7632	SDLoc DL(Op);
7633
7634	// Because getNegatedExpression can delete nodes we need a handle to keep
7635	// temporary nodes alive in case the recursion manages to create an identical
7636	// node.
7637	std::list<HandleSDNode> Handles;
7638
7639	switch (Opcode) {
7640	case ISD::ConstantFP: {
7641	// Don't invert constant FP values after legalization unless the target says
7642	// the negated constant is legal.
7643	bool IsOpLegal =
7644	isOperationLegal(Op: ISD::ConstantFP, VT) \|\|
7645	isFPImmLegal(neg(X: cast<ConstantFPSDNode>(Val&: Op)->getValueAPF()), VT,
7646	ForCodeSize: OptForSize);
7647
7648	if (LegalOps && !IsOpLegal)
7649	break;
7650
7651	APFloat V = cast<ConstantFPSDNode>(Val&: Op)->getValueAPF();
7652	V.changeSign();
7653	SDValue CFP = DAG.getConstantFP(Val: V, DL, VT);
7654
7655	// If we already have the use of the negated floating constant, it is free
7656	// to negate it even it has multiple uses.
7657	if (!Op.hasOneUse() && CFP.use_empty())
7658	break;
7659	Cost = NegatibleCost::Neutral;
7660	return CFP;
7661	}
7662	case ISD::SPLAT_VECTOR: {
7663	// fold splat_vector(fneg(X)) -> splat_vector(-X)
7664	SDValue X = Op.getOperand(i: `0`);
7665	if (!isOperationLegal(Op: ISD::SPLAT_VECTOR, VT))
7666	break;
7667
7668	SDValue NegX = getCheaperNegatedExpression(Op: X, DAG, LegalOps, OptForSize);
7669	if (!NegX)
7670	break;
7671	Cost = NegatibleCost::Cheaper;
7672	return DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL, VT, Operand: NegX);
7673	}
7674	case ISD::BUILD_VECTOR: {
7675	// Only permit BUILD_VECTOR of constants.
7676	if (llvm::any_of(Range: Op ->op_values(), P: [&](SDValue N) {
7677	return !N.isUndef() && !isa<ConstantFPSDNode>(Val: N);
7678	}))
7679	break;
7680
7681	bool IsOpLegal =
7682	(isOperationLegal(Op: ISD::ConstantFP, VT) &&
7683	isOperationLegal(Op: ISD::BUILD_VECTOR, VT)) \|\|
7684	llvm::all_of(Range: Op ->op_values(), P: [&](SDValue N) {
7685	return N.isUndef() \|\|
7686	isFPImmLegal(neg(X: cast<ConstantFPSDNode>(Val&: N)->getValueAPF()), VT,
7687	ForCodeSize: OptForSize);
7688	});
7689
7690	if (LegalOps && !IsOpLegal)
7691	break;
7692
7693	SmallVector<SDValue, `4`> Ops;
7694	for (SDValue C : Op ->op_values()) {
7695	if (C.isUndef()) {
7696	Ops.push_back(Elt: C);
7697	continue;
7698	}
7699	APFloat V = cast<ConstantFPSDNode>(Val&: C)->getValueAPF();
7700	V.changeSign();
7701	Ops.push_back(Elt: DAG.getConstantFP(Val: V, DL, VT: C.getValueType()));
7702	}
7703	Cost = NegatibleCost::Neutral;
7704	return DAG.getBuildVector(VT, DL, Ops);
7705	}
7706	case ISD::FADD: {
7707	if (!Flags.hasNoSignedZeros())
7708	break;
7709
7710	// After operation legalization, it might not be legal to create new FSUBs.
7711	if (LegalOps && !isOperationLegalOrCustom(Op: ISD::FSUB, VT))
7712	break;
7713	SDValue X = Op.getOperand(i: `0`), Y = Op.getOperand(i: `1`);
7714
7715	// fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y)
7716	NegatibleCost CostX = NegatibleCost::Expensive;
7717	SDValue NegX =
7718	getNegatedExpression(Op: X, DAG, LegalOps, OptForSize, Cost&: CostX, Depth);
7719	// Prevent this node from being deleted by the next call.
7720	if (NegX)
7721	Handles.emplace_back(args&: NegX);
7722
7723	// fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
7724	NegatibleCost CostY = NegatibleCost::Expensive;
7725	SDValue NegY =
7726	getNegatedExpression(Op: Y, DAG, LegalOps, OptForSize, Cost&: CostY, Depth);
7727
7728	// We're done with the handles.
7729	Handles.clear();
7730
7731	// Negate the X if its cost is less or equal than Y.
7732	if (NegX && (CostX <= CostY)) {
7733	Cost = CostX;
7734	SDValue N = DAG.getNode(Opcode: ISD::FSUB, DL, VT, N1: NegX, N2: Y, Flags);
7735	if (NegY != N)
7736	RemoveDeadNode (NegY);
7737	return N;
7738	}
7739
7740	// Negate the Y if it is not expensive.
7741	if (NegY) {
7742	Cost = CostY;
7743	SDValue N = DAG.getNode(Opcode: ISD::FSUB, DL, VT, N1: NegY, N2: X, Flags);
7744	if (NegX != N)
7745	RemoveDeadNode (NegX);
7746	return N;
7747	}
7748	break;
7749	}
7750	case ISD::FSUB: {
7751	// We can't turn -(A-B) into B-A when we honor signed zeros.
7752	if (!Flags.hasNoSignedZeros())
7753	break;
7754
7755	SDValue X = Op.getOperand(i: `0`), Y = Op.getOperand(i: `1`);
7756	// fold (fneg (fsub 0, Y)) -> Y
7757	if (ConstantFPSDNode C = isConstOrConstSplatFP(N: X, /AllowUndefs/* true))
7758	if (C->isZero()) {
7759	Cost = NegatibleCost::Cheaper;
7760	return Y;
7761	}
7762
7763	// fold (fneg (fsub X, Y)) -> (fsub Y, X)
7764	Cost = NegatibleCost::Neutral;
7765	return DAG.getNode(Opcode: ISD::FSUB, DL, VT, N1: Y, N2: X, Flags);
7766	}
7767	case ISD::FMUL:
7768	case ISD::FDIV: {
7769	SDValue X = Op.getOperand(i: `0`), Y = Op.getOperand(i: `1`);
7770
7771	// fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
7772	NegatibleCost CostX = NegatibleCost::Expensive;
7773	SDValue NegX =
7774	getNegatedExpression(Op: X, DAG, LegalOps, OptForSize, Cost&: CostX, Depth);
7775	// Prevent this node from being deleted by the next call.
7776	if (NegX)
7777	Handles.emplace_back(args&: NegX);
7778
7779	// fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
7780	NegatibleCost CostY = NegatibleCost::Expensive;
7781	SDValue NegY =
7782	getNegatedExpression(Op: Y, DAG, LegalOps, OptForSize, Cost&: CostY, Depth);
7783
7784	// We're done with the handles.
7785	Handles.clear();
7786
7787	// Negate the X if its cost is less or equal than Y.
7788	if (NegX && (CostX <= CostY)) {
7789	Cost = CostX;
7790	SDValue N = DAG.getNode(Opcode, DL, VT, N1: NegX, N2: Y, Flags);
7791	if (NegY != N)
7792	RemoveDeadNode (NegY);
7793	return N;
7794	}
7795
7796	// Ignore X 2.0 because that is expected to be canonicalized to X + X.*
7797	if (auto *C = isConstOrConstSplatFP(N: Op.getOperand(i: `1`)))
7798	if (C->isExactlyValue(V: `2.0`) && Op.getOpcode() == ISD::FMUL)
7799	break;
7800
7801	// Negate the Y if it is not expensive.
7802	if (NegY) {
7803	Cost = CostY;
7804	SDValue N = DAG.getNode(Opcode, DL, VT, N1: X, N2: NegY, Flags);
7805	if (NegX != N)
7806	RemoveDeadNode (NegX);
7807	return N;
7808	}
7809	break;
7810	}
7811	case ISD::FMA:
7812	case ISD::FMULADD:
7813	case ISD::FMAD: {
7814	if (!Flags.hasNoSignedZeros())
7815	break;
7816
7817	SDValue X = Op.getOperand(i: `0`), Y = Op.getOperand(i: `1`), Z = Op.getOperand(i: `2`);
7818	NegatibleCost CostZ = NegatibleCost::Expensive;
7819	SDValue NegZ =
7820	getNegatedExpression(Op: Z, DAG, LegalOps, OptForSize, Cost&: CostZ, Depth);
7821	// Give up if fail to negate the Z.
7822	if (!NegZ)
7823	break;
7824
7825	// Prevent this node from being deleted by the next two calls.
7826	Handles.emplace_back(args&: NegZ);
7827
7828	// fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
7829	NegatibleCost CostX = NegatibleCost::Expensive;
7830	SDValue NegX =
7831	getNegatedExpression(Op: X, DAG, LegalOps, OptForSize, Cost&: CostX, Depth);
7832	// Prevent this node from being deleted by the next call.
7833	if (NegX)
7834	Handles.emplace_back(args&: NegX);
7835
7836	// fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
7837	NegatibleCost CostY = NegatibleCost::Expensive;
7838	SDValue NegY =
7839	getNegatedExpression(Op: Y, DAG, LegalOps, OptForSize, Cost&: CostY, Depth);
7840
7841	// We're done with the handles.
7842	Handles.clear();
7843
7844	// Negate the X if its cost is less or equal than Y.
7845	if (NegX && (CostX <= CostY)) {
7846	Cost = std::min(a: CostX, b: CostZ);
7847	SDValue N = DAG.getNode(Opcode, DL, VT, N1: NegX, N2: Y, N3: NegZ, Flags);
7848	if (NegY != N)
7849	RemoveDeadNode (NegY);
7850	return N;
7851	}
7852
7853	// Negate the Y if it is not expensive.
7854	if (NegY) {
7855	Cost = std::min(a: CostY, b: CostZ);
7856	SDValue N = DAG.getNode(Opcode, DL, VT, N1: X, N2: NegY, N3: NegZ, Flags);
7857	if (NegX != N)
7858	RemoveDeadNode (NegX);
7859	return N;
7860	}
7861	break;
7862	}
7863
7864	case ISD::FP_EXTEND:
7865	case ISD::FSIN:
7866	if (SDValue NegV = getNegatedExpression(Op: Op.getOperand(i: `0`), DAG, LegalOps,
7867	OptForSize, Cost, Depth))
7868	return DAG.getNode(Opcode, DL, VT, Operand: NegV);
7869	break;
7870	case ISD::FP_ROUND:
7871	if (SDValue NegV = getNegatedExpression(Op: Op.getOperand(i: `0`), DAG, LegalOps,
7872	OptForSize, Cost, Depth))
7873	return DAG.getNode(Opcode: ISD::FP_ROUND, DL, VT, N1: NegV, N2: Op.getOperand(i: `1`));
7874	break;
7875	case ISD::SELECT:
7876	case ISD::VSELECT: {
7877	// fold (fneg (select C, LHS, RHS)) -> (select C, (fneg LHS), (fneg RHS))
7878	// iff at least one cost is cheaper and the other is neutral/cheaper
7879	SDValue LHS = Op.getOperand(i: `1`);
7880	NegatibleCost CostLHS = NegatibleCost::Expensive;
7881	SDValue NegLHS =
7882	getNegatedExpression(Op: LHS, DAG, LegalOps, OptForSize, Cost&: CostLHS, Depth);
7883	if (!NegLHS \|\| CostLHS > NegatibleCost::Neutral) {
7884	RemoveDeadNode (NegLHS);
7885	break;
7886	}
7887
7888	// Prevent this node from being deleted by the next call.
7889	Handles.emplace_back(args&: NegLHS);
7890
7891	SDValue RHS = Op.getOperand(i: `2`);
7892	NegatibleCost CostRHS = NegatibleCost::Expensive;
7893	SDValue NegRHS =
7894	getNegatedExpression(Op: RHS, DAG, LegalOps, OptForSize, Cost&: CostRHS, Depth);
7895
7896	// We're done with the handles.
7897	Handles.clear();
7898
7899	if (!NegRHS \|\| CostRHS > NegatibleCost::Neutral \|\|
7900	(CostLHS != NegatibleCost::Cheaper &&
7901	CostRHS != NegatibleCost::Cheaper)) {
7902	RemoveDeadNode (NegLHS);
7903	RemoveDeadNode (NegRHS);
7904	break;
7905	}
7906
7907	Cost = std::min(a: CostLHS, b: CostRHS);
7908	return DAG.getSelect(DL, VT, Cond: Op.getOperand(i: `0`), LHS: NegLHS, RHS: NegRHS);
7909	}
7910	}
7911
7912	return SDValue ();
7913	}
7914
7915	//===----------------------------------------------------------------------===//
7916	// Legalization Utilities
7917	//===----------------------------------------------------------------------===//
7918
7919	bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
7920	SDValue LHS, SDValue RHS,
7921	SmallVectorImpl<SDValue> &Result,
7922	EVT HiLoVT, SelectionDAG &DAG,
7923	MulExpansionKind Kind, SDValue LL,
7924	SDValue LH, SDValue RL, SDValue RH) const {
7925	assert(Opcode == ISD::MUL \|\| Opcode == ISD::UMUL_LOHI \|\|
7926	Opcode == ISD::SMUL_LOHI);
7927
7928	bool HasMULHS = (Kind == MulExpansionKind::Always) \|\|
7929	isOperationLegalOrCustom(Op: ISD::MULHS, VT: HiLoVT);
7930	bool HasMULHU = (Kind == MulExpansionKind::Always) \|\|
7931	isOperationLegalOrCustom(Op: ISD::MULHU, VT: HiLoVT);
7932	bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) \|\|
7933	isOperationLegalOrCustom(Op: ISD::SMUL_LOHI, VT: HiLoVT);
7934	bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) \|\|
7935	isOperationLegalOrCustom(Op: ISD::UMUL_LOHI, VT: HiLoVT);
7936
7937	if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
7938	return false;
7939
7940	unsigned OuterBitSize = VT.getScalarSizeInBits();
7941	unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
7942
7943	// LL, LH, RL, and RH must be either all NULL or all set to a value.
7944	assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) \|\|
7945	(!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
7946
7947	auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
7948	bool Signed) -> bool {
7949	if ((Signed && HasSMUL_LOHI) \|\| (!Signed && HasUMUL_LOHI)) {
7950	SDVTList VTs = DAG.getVTList(VT1: HiLoVT, VT2: HiLoVT);
7951	Lo = DAG.getNode(Opcode: Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, DL: dl, VTList: VTs, N1: L, N2: R);
7952	Hi = Lo.getValue(R: `1`);
7953	return true;
7954	}
7955	if ((Signed && HasMULHS) \|\| (!Signed && HasMULHU)) {
7956	Lo = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: HiLoVT, N1: L, N2: R);
7957	Hi = DAG.getNode(Opcode: Signed ? ISD::MULHS : ISD::MULHU, DL: dl, VT: HiLoVT, N1: L, N2: R);
7958	return true;
7959	}
7960	return false;
7961	};
7962
7963	SDValue Lo, Hi;
7964
7965	if (!LL.getNode() && !RL.getNode() &&
7966	isOperationLegalOrCustom(Op: ISD::TRUNCATE, VT: HiLoVT)) {
7967	LL = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: LHS);
7968	RL = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: RHS);
7969	}
7970
7971	if (!LL.getNode())
7972	return false;
7973
7974	APInt HighMask = APInt::getHighBitsSet(numBits: OuterBitSize, hiBitsSet: InnerBitSize);
7975	if (DAG.MaskedValueIsZero(Op: LHS, Mask: HighMask) &&
7976	DAG.MaskedValueIsZero(Op: RHS, Mask: HighMask)) {
7977	// The inputs are both zero-extended.
7978	if (MakeMUL_LOHI (LL, RL, Lo, Hi, false)) {
7979	Result.push_back(Elt: Lo);
7980	Result.push_back(Elt: Hi);
7981	if (Opcode != ISD::MUL) {
7982	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT: HiLoVT);
7983	Result.push_back(Elt: Zero);
7984	Result.push_back(Elt: Zero);
7985	}
7986	return true;
7987	}
7988	}
7989
7990	if (!VT.isVector() && Opcode == ISD::MUL &&
7991	DAG.ComputeMaxSignificantBits(Op: LHS) <= InnerBitSize &&
7992	DAG.ComputeMaxSignificantBits(Op: RHS) <= InnerBitSize) {
7993	// The input values are both sign-extended.
7994	// TODO non-MUL case?
7995	if (MakeMUL_LOHI (LL, RL, Lo, Hi, true)) {
7996	Result.push_back(Elt: Lo);
7997	Result.push_back(Elt: Hi);
7998	return true;
7999	}
8000	}
8001
8002	unsigned ShiftAmount = OuterBitSize - InnerBitSize;
8003	SDValue Shift = DAG.getShiftAmountConstant(Val: ShiftAmount, VT, DL: dl);
8004
8005	if (!LH.getNode() && !RH.getNode() &&
8006	isOperationLegalOrCustom(Op: ISD::SRL, VT) &&
8007	isOperationLegalOrCustom(Op: ISD::TRUNCATE, VT: HiLoVT)) {
8008	LH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: LHS, N2: Shift);
8009	LH = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: LH);
8010	RH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: RHS, N2: Shift);
8011	RH = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: RH);
8012	}
8013
8014	if (!LH.getNode())
8015	return false;
8016
8017	if (!MakeMUL_LOHI (LL, RL, Lo, Hi, false))
8018	return false;
8019
8020	Result.push_back(Elt: Lo);
8021
8022	if (Opcode == ISD::MUL) {
8023	RH = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: HiLoVT, N1: LL, N2: RH);
8024	LH = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: HiLoVT, N1: LH, N2: RL);
8025	Hi = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: HiLoVT, N1: Hi, N2: RH);
8026	Hi = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: HiLoVT, N1: Hi, N2: LH);
8027	Result.push_back(Elt: Hi);
8028	return true;
8029	}
8030
8031	// Compute the full width result.
8032	auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
8033	Lo = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: Lo);
8034	Hi = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: Hi);
8035	Hi = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Hi, N2: Shift);
8036	return DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Lo, N2: Hi);
8037	};
8038
8039	SDValue Next = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: Hi);
8040	if (!MakeMUL_LOHI (LL, RH, Lo, Hi, false))
8041	return false;
8042
8043	// This is effectively the add part of a multiply-add of half-sized operands,
8044	// so it cannot overflow.
8045	Next = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Next, N2: Merge (Lo, Hi));
8046
8047	if (!MakeMUL_LOHI (LH, RL, Lo, Hi, false))
8048	return false;
8049
8050	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT: HiLoVT);
8051	EVT BoolType = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
8052
8053	bool UseGlue = (isOperationLegalOrCustom(Op: ISD::ADDC, VT) &&
8054	isOperationLegalOrCustom(Op: ISD::ADDE, VT));
8055	if (UseGlue)
8056	Next = DAG.getNode(Opcode: ISD::ADDC, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: MVT::Glue), N1: Next,
8057	N2: Merge (Lo, Hi));
8058	else
8059	Next = DAG.getNode(Opcode: ISD::UADDO_CARRY, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: BoolType), N1: Next,
8060	N2: Merge (Lo, Hi), N3: DAG.getConstant(Val: `0`, DL: dl, VT: BoolType));
8061
8062	SDValue Carry = Next.getValue(R: `1`);
8063	Result.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: Next));
8064	Next = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Next, N2: Shift);
8065
8066	if (!MakeMUL_LOHI (LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
8067	return false;
8068
8069	if (UseGlue)
8070	Hi = DAG.getNode(Opcode: ISD::ADDE, DL: dl, VTList: DAG.getVTList(VT1: HiLoVT, VT2: MVT::Glue), N1: Hi, N2: Zero,
8071	N3: Carry);
8072	else
8073	Hi = DAG.getNode(Opcode: ISD::UADDO_CARRY, DL: dl, VTList: DAG.getVTList(VT1: HiLoVT, VT2: BoolType), N1: Hi,
8074	N2: Zero, N3: Carry);
8075
8076	Next = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Next, N2: Merge (Lo, Hi));
8077
8078	if (Opcode == ISD::SMUL_LOHI) {
8079	SDValue NextSub = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Next,
8080	N2: DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: RL));
8081	Next = DAG.getSelectCC(DL: dl, LHS: LH, RHS: Zero, True: NextSub, False: Next, Cond: ISD::SETLT);
8082
8083	NextSub = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Next,
8084	N2: DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: LL));
8085	Next = DAG.getSelectCC(DL: dl, LHS: RH, RHS: Zero, True: NextSub, False: Next, Cond: ISD::SETLT);
8086	}
8087
8088	Result.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: Next));
8089	Next = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Next, N2: Shift);
8090	Result.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: Next));
8091	return true;
8092	}
8093
8094	bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
8095	SelectionDAG &DAG, MulExpansionKind Kind,
8096	SDValue LL, SDValue LH, SDValue RL,
8097	SDValue RH) const {
8098	SmallVector<SDValue, `2`> Result;
8099	bool Ok = expandMUL_LOHI(Opcode: N->getOpcode(), VT: N->getValueType(ResNo: `0`), dl: SDLoc (N),
8100	LHS: N->getOperand(Num: `0`), RHS: N->getOperand(Num: `1`), Result, HiLoVT,
8101	DAG, Kind, LL, LH, RL, RH);
8102	if (Ok) {
8103	assert(Result.size() == `2`);
8104	Lo = Result [`0`];
8105	Hi = Result [`1`];
8106	}
8107	return Ok;
8108	}
8109
8110	// Optimize unsigned division or remainder by constants for types twice as large
8111	// as a legal VT.
8112	//
8113	// If (1 << (BitWidth / 2)) % Constant == 1, then the remainder
8114	// can be computed
8115	// as:
8116	// Sum += __builtin_uadd_overflow(Lo, High, &Sum);
8117	// Remainder = Sum % Constant
8118	// This is based on "Remainder by Summing Digits" from Hacker's Delight.
8119	//
8120	// For division, we can compute the remainder using the algorithm described
8121	// above, subtract it from the dividend to get an exact multiple of Constant.
8122	// Then multiply that exact multiply by the multiplicative inverse modulo
8123	// (1 << (BitWidth / 2)) to get the quotient.
8124
8125	// If Constant is even, we can shift right the dividend and the divisor by the
8126	// number of trailing zeros in Constant before applying the remainder algorithm.
8127	// If we're after the quotient, we can subtract this value from the shifted
8128	// dividend and multiply by the multiplicative inverse of the shifted divisor.
8129	// If we want the remainder, we shift the value left by the number of trailing
8130	// zeros and add the bits that were shifted out of the dividend.
8131	bool TargetLowering::expandDIVREMByConstant(SDNode *N,
8132	SmallVectorImpl<SDValue> &Result,
8133	EVT HiLoVT, SelectionDAG &DAG,
8134	SDValue LL, SDValue LH) const {
8135	unsigned Opcode = N->getOpcode();
8136	EVT VT = N->getValueType(ResNo: `0`);
8137
8138	// TODO: Support signed division/remainder.
8139	if (Opcode == ISD::SREM \|\| Opcode == ISD::SDIV \|\| Opcode == ISD::SDIVREM)
8140	return false;
8141	assert(
8142	(Opcode == ISD::UREM \|\| Opcode == ISD::UDIV \|\| Opcode == ISD::UDIVREM) &&
8143	"Unexpected opcode");
8144
8145	auto *CN = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: `1`));
8146	if (!CN)
8147	return false;
8148
8149	APInt Divisor = CN->getAPIntValue();
8150	unsigned BitWidth = Divisor.getBitWidth();
8151	unsigned HBitWidth = BitWidth / `2`;
8152	assert(VT.getScalarSizeInBits() == BitWidth &&
8153	HiLoVT.getScalarSizeInBits() == HBitWidth && "Unexpected VTs");
8154
8155	// Divisor needs to less than (1 << HBitWidth).
8156	APInt HalfMaxPlus1 = APInt::getOneBitSet(numBits: BitWidth, BitNo: HBitWidth);
8157	if (Divisor.uge(RHS: HalfMaxPlus1))
8158	return false;
8159
8160	// We depend on the UREM by constant optimization in DAGCombiner that requires
8161	// high multiply.
8162	if (!isOperationLegalOrCustom(Op: ISD::MULHU, VT: HiLoVT) &&
8163	!isOperationLegalOrCustom(Op: ISD::UMUL_LOHI, VT: HiLoVT))
8164	return false;
8165
8166	// Don't expand if optimizing for size.
8167	if (DAG.shouldOptForSize())
8168	return false;
8169
8170	// Early out for 0 or 1 divisors.
8171	if (Divisor.ule(RHS: `1`))
8172	return false;
8173
8174	// If the divisor is even, shift it until it becomes odd.
8175	unsigned TrailingZeros = `0`;
8176	if (!Divisor [`0`]) {
8177	TrailingZeros = Divisor.countr_zero();
8178	Divisor.lshrInPlace(ShiftAmt: TrailingZeros);
8179	}
8180
8181	SDLoc dl(N);
8182	SDValue Sum;
8183	SDValue PartialRem;
8184
8185	// If (1 << HBitWidth) % divisor == 1, we can add the two halves together and
8186	// then add in the carry.
8187	// TODO: If we can't split it in half, we might be able to split into 3 or
8188	// more pieces using a smaller bit width.
8189	if (HalfMaxPlus1.urem(RHS: Divisor).isOne()) {
8190	assert(!LL == !LH && "Expected both input halves or no input halves!");
8191	if (!LL)
8192	std::tie(args&: LL, args&: LH) = DAG.SplitScalar(N: N->getOperand(Num: `0`), DL: dl, LoVT: HiLoVT, HiVT: HiLoVT);
8193
8194	// Shift the input by the number of TrailingZeros in the divisor. The
8195	// shifted out bits will be added to the remainder later.
8196	if (TrailingZeros) {
8197	// Save the shifted off bits if we need the remainder.
8198	if (Opcode != ISD::UDIV) {
8199	APInt Mask = APInt::getLowBitsSet(numBits: HBitWidth, loBitsSet: TrailingZeros);
8200	PartialRem = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: HiLoVT, N1: LL,
8201	N2: DAG.getConstant(Val: Mask, DL: dl, VT: HiLoVT));
8202	}
8203
8204	if (isOperationLegal(Op: ISD::FSHR, VT: HiLoVT))
8205	LL = DAG.getNode(Opcode: ISD::FSHR, DL: dl, VT: HiLoVT, N1: LH, N2: LL,
8206	N3: DAG.getShiftAmountConstant(Val: TrailingZeros, VT: HiLoVT, DL: dl));
8207	else
8208	LL = DAG.getNode(
8209	Opcode: ISD::OR, DL: dl, VT: HiLoVT,
8210	N1: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: HiLoVT, N1: LL,
8211	N2: DAG.getShiftAmountConstant(Val: TrailingZeros, VT: HiLoVT, DL: dl)),
8212	N2: DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: HiLoVT, N1: LH,
8213	N2: DAG.getShiftAmountConstant(Val: HBitWidth - TrailingZeros,
8214	VT: HiLoVT, DL: dl)));
8215	LH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: HiLoVT, N1: LH,
8216	N2: DAG.getShiftAmountConstant(Val: TrailingZeros, VT: HiLoVT, DL: dl));
8217	}
8218
8219	// Use uaddo_carry if we can, otherwise use a compare to detect overflow.
8220	EVT SetCCType =
8221	getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: HiLoVT);
8222	if (isOperationLegalOrCustom(Op: ISD::UADDO_CARRY, VT: HiLoVT)) {
8223	SDVTList VTList = DAG.getVTList(VT1: HiLoVT, VT2: SetCCType);
8224	Sum = DAG.getNode(Opcode: ISD::UADDO, DL: dl, VTList, N1: LL, N2: LH);
8225	Sum = DAG.getNode(Opcode: ISD::UADDO_CARRY, DL: dl, VTList, N1: Sum,
8226	N2: DAG.getConstant(Val: `0`, DL: dl, VT: HiLoVT), N3: Sum.getValue(R: `1`));
8227	} else {
8228	Sum = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: HiLoVT, N1: LL, N2: LH);
8229	SDValue Carry = DAG.getSetCC(DL: dl, VT: SetCCType, LHS: Sum, RHS: LL, Cond: ISD::SETULT);
8230	// If the boolean for the target is 0 or 1, we can add the setcc result
8231	// directly.
8232	if (getBooleanContents(Type: HiLoVT) ==
8233	TargetLoweringBase::ZeroOrOneBooleanContent)
8234	Carry = DAG.getZExtOrTrunc(Op: Carry, DL: dl, VT: HiLoVT);
8235	else
8236	Carry = DAG.getSelect(DL: dl, VT: HiLoVT, Cond: Carry, LHS: DAG.getConstant(Val: `1`, DL: dl, VT: HiLoVT),
8237	RHS: DAG.getConstant(Val: `0`, DL: dl, VT: HiLoVT));
8238	Sum = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: HiLoVT, N1: Sum, N2: Carry);
8239	}
8240	}
8241
8242	// If we didn't find a sum, we can't do the expansion.
8243	if (!Sum)
8244	return false;
8245
8246	// Perform a HiLoVT urem on the Sum using truncated divisor.
8247	SDValue RemL =
8248	DAG.getNode(Opcode: ISD::UREM, DL: dl, VT: HiLoVT, N1: Sum,
8249	N2: DAG.getConstant(Val: Divisor.trunc(width: HBitWidth), DL: dl, VT: HiLoVT));
8250	SDValue RemH = DAG.getConstant(Val: `0`, DL: dl, VT: HiLoVT);
8251
8252	if (Opcode != ISD::UREM) {
8253	// Subtract the remainder from the shifted dividend.
8254	SDValue Dividend = DAG.getNode(Opcode: ISD::BUILD_PAIR, DL: dl, VT, N1: LL, N2: LH);
8255	SDValue Rem = DAG.getNode(Opcode: ISD::BUILD_PAIR, DL: dl, VT, N1: RemL, N2: RemH);
8256
8257	Dividend = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Dividend, N2: Rem);
8258
8259	// Multiply by the multiplicative inverse of the divisor modulo
8260	// (1 << BitWidth).
8261	APInt MulFactor = Divisor.multiplicativeInverse();
8262
8263	SDValue Quotient = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: Dividend,
8264	N2: DAG.getConstant(Val: MulFactor, DL: dl, VT));
8265
8266	// Split the quotient into low and high parts.
8267	SDValue QuotL, QuotH;
8268	std::tie(args&: QuotL, args&: QuotH) = DAG.SplitScalar(N: Quotient, DL: dl, LoVT: HiLoVT, HiVT: HiLoVT);
8269	Result.push_back(Elt: QuotL);
8270	Result.push_back(Elt: QuotH);
8271	}
8272
8273	if (Opcode != ISD::UDIV) {
8274	// If we shifted the input, shift the remainder left and add the bits we
8275	// shifted off the input.
8276	if (TrailingZeros) {
8277	RemL = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: HiLoVT, N1: RemL,
8278	N2: DAG.getShiftAmountConstant(Val: TrailingZeros, VT: HiLoVT, DL: dl));
8279	RemL = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: HiLoVT, N1: RemL, N2: PartialRem);
8280	}
8281	Result.push_back(Elt: RemL);
8282	Result.push_back(Elt: DAG.getConstant(Val: `0`, DL: dl, VT: HiLoVT));
8283	}
8284
8285	return true;
8286	}
8287
8288	// Check that (every element of) Z is undef or not an exact multiple of BW.
8289	static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
8290	return ISD::matchUnaryPredicate(
8291	Op: Z,
8292	Match: [=](ConstantSDNode C) { return* !C \|\| C->getAPIntValue().urem(RHS: BW) != `0`; },
8293	/AllowUndefs=/true, /AllowTruncation=/true);
8294	}
8295
8296	static SDValue expandVPFunnelShift(SDNode *Node, SelectionDAG &DAG) {
8297	EVT VT = Node->getValueType(ResNo: `0`);
8298	SDValue ShX, ShY;
8299	SDValue ShAmt, InvShAmt;
8300	SDValue X = Node->getOperand(Num: `0`);
8301	SDValue Y = Node->getOperand(Num: `1`);
8302	SDValue Z = Node->getOperand(Num: `2`);
8303	SDValue Mask = Node->getOperand(Num: `3`);
8304	SDValue VL = Node->getOperand(Num: `4`);
8305
8306	unsigned BW = VT.getScalarSizeInBits();
8307	bool IsFSHL = Node->getOpcode() == ISD::VP_FSHL;
8308	SDLoc DL(SDValue (Node, `0`));
8309
8310	EVT ShVT = Z.getValueType();
8311	if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8312	// fshl: X << C \| Y >> (BW - C)
8313	// fshr: X << (BW - C) \| Y >> C
8314	// where C = Z % BW is not zero
8315	SDValue BitWidthC = DAG.getConstant(Val: BW, DL, VT: ShVT);
8316	ShAmt = DAG.getNode(Opcode: ISD::VP_UREM, DL, VT: ShVT, N1: Z, N2: BitWidthC, N3: Mask, N4: VL);
8317	InvShAmt = DAG.getNode(Opcode: ISD::VP_SUB, DL, VT: ShVT, N1: BitWidthC, N2: ShAmt, N3: Mask, N4: VL);
8318	ShX = DAG.getNode(Opcode: ISD::VP_SHL, DL, VT, N1: X, N2: IsFSHL ? ShAmt : InvShAmt, N3: Mask,
8319	N4: VL);
8320	ShY = DAG.getNode(Opcode: ISD::VP_SRL, DL, VT, N1: Y, N2: IsFSHL ? InvShAmt : ShAmt, N3: Mask,
8321	N4: VL);
8322	} else {
8323	// fshl: X << (Z % BW) \| Y >> 1 >> (BW - 1 - (Z % BW))
8324	// fshr: X << 1 << (BW - 1 - (Z % BW)) \| Y >> (Z % BW)
8325	SDValue BitMask = DAG.getConstant(Val: BW - `1`, DL, VT: ShVT);
8326	if (isPowerOf2_32(Value: BW)) {
8327	// Z % BW -> Z & (BW - 1)
8328	ShAmt = DAG.getNode(Opcode: ISD::VP_AND, DL, VT: ShVT, N1: Z, N2: BitMask, N3: Mask, N4: VL);
8329	// (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8330	SDValue NotZ = DAG.getNode(Opcode: ISD::VP_XOR, DL, VT: ShVT, N1: Z,
8331	N2: DAG.getAllOnesConstant(DL, VT: ShVT), N3: Mask, N4: VL);
8332	InvShAmt = DAG.getNode(Opcode: ISD::VP_AND, DL, VT: ShVT, N1: NotZ, N2: BitMask, N3: Mask, N4: VL);
8333	} else {
8334	SDValue BitWidthC = DAG.getConstant(Val: BW, DL, VT: ShVT);
8335	ShAmt = DAG.getNode(Opcode: ISD::VP_UREM, DL, VT: ShVT, N1: Z, N2: BitWidthC, N3: Mask, N4: VL);
8336	InvShAmt = DAG.getNode(Opcode: ISD::VP_SUB, DL, VT: ShVT, N1: BitMask, N2: ShAmt, N3: Mask, N4: VL);
8337	}
8338
8339	SDValue One = DAG.getConstant(Val: `1`, DL, VT: ShVT);
8340	if (IsFSHL) {
8341	ShX = DAG.getNode(Opcode: ISD::VP_SHL, DL, VT, N1: X, N2: ShAmt, N3: Mask, N4: VL);
8342	SDValue ShY1 = DAG.getNode(Opcode: ISD::VP_SRL, DL, VT, N1: Y, N2: One, N3: Mask, N4: VL);
8343	ShY = DAG.getNode(Opcode: ISD::VP_SRL, DL, VT, N1: ShY1, N2: InvShAmt, N3: Mask, N4: VL);
8344	} else {
8345	SDValue ShX1 = DAG.getNode(Opcode: ISD::VP_SHL, DL, VT, N1: X, N2: One, N3: Mask, N4: VL);
8346	ShX = DAG.getNode(Opcode: ISD::VP_SHL, DL, VT, N1: ShX1, N2: InvShAmt, N3: Mask, N4: VL);
8347	ShY = DAG.getNode(Opcode: ISD::VP_SRL, DL, VT, N1: Y, N2: ShAmt, N3: Mask, N4: VL);
8348	}
8349	}
8350	return DAG.getNode(Opcode: ISD::VP_OR, DL, VT, N1: ShX, N2: ShY, N3: Mask, N4: VL);
8351	}
8352
8353	SDValue TargetLowering::expandFunnelShift(SDNode *Node,
8354	SelectionDAG &DAG) const {
8355	if (Node->isVPOpcode())
8356	return expandVPFunnelShift(Node, DAG);
8357
8358	EVT VT = Node->getValueType(ResNo: `0`);
8359
8360	if (VT.isVector() && (!isOperationLegalOrCustom(Op: ISD::SHL, VT) \|\|
8361	!isOperationLegalOrCustom(Op: ISD::SRL, VT) \|\|
8362	!isOperationLegalOrCustom(Op: ISD::SUB, VT) \|\|
8363	!isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT)))
8364	return SDValue ();
8365
8366	SDValue X = Node->getOperand(Num: `0`);
8367	SDValue Y = Node->getOperand(Num: `1`);
8368	SDValue Z = Node->getOperand(Num: `2`);
8369
8370	unsigned BW = VT.getScalarSizeInBits();
8371	bool IsFSHL = Node->getOpcode() == ISD::FSHL;
8372	SDLoc DL(SDValue (Node, `0`));
8373
8374	EVT ShVT = Z.getValueType();
8375
8376	// If a funnel shift in the other direction is more supported, use it.
8377	unsigned RevOpcode = IsFSHL ? ISD::FSHR : ISD::FSHL;
8378	if (!isOperationLegalOrCustom(Op: Node->getOpcode(), VT) &&
8379	isOperationLegalOrCustom(Op: RevOpcode, VT) && isPowerOf2_32(Value: BW)) {
8380	if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8381	// fshl X, Y, Z -> fshr X, Y, -Z
8382	// fshr X, Y, Z -> fshl X, Y, -Z
8383	Z = DAG.getNegative(Val: Z, DL, VT: ShVT);
8384	} else {
8385	// fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
8386	// fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
8387	SDValue One = DAG.getConstant(Val: `1`, DL, VT: ShVT);
8388	if (IsFSHL) {
8389	Y = DAG.getNode(Opcode: RevOpcode, DL, VT, N1: X, N2: Y, N3: One);
8390	X = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: X, N2: One);
8391	} else {
8392	X = DAG.getNode(Opcode: RevOpcode, DL, VT, N1: X, N2: Y, N3: One);
8393	Y = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Y, N2: One);
8394	}
8395	Z = DAG.getNOT(DL, Val: Z, VT: ShVT);
8396	}
8397	return DAG.getNode(Opcode: RevOpcode, DL, VT, N1: X, N2: Y, N3: Z);
8398	}
8399
8400	SDValue ShX, ShY;
8401	SDValue ShAmt, InvShAmt;
8402	if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8403	// fshl: X << C \| Y >> (BW - C)
8404	// fshr: X << (BW - C) \| Y >> C
8405	// where C = Z % BW is not zero
8406	SDValue BitWidthC = DAG.getConstant(Val: BW, DL, VT: ShVT);
8407	ShAmt = DAG.getNode(Opcode: ISD::UREM, DL, VT: ShVT, N1: Z, N2: BitWidthC);
8408	InvShAmt = DAG.getNode(Opcode: ISD::SUB, DL, VT: ShVT, N1: BitWidthC, N2: ShAmt);
8409	ShX = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: X, N2: IsFSHL ? ShAmt : InvShAmt);
8410	ShY = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Y, N2: IsFSHL ? InvShAmt : ShAmt);
8411	} else {
8412	// fshl: X << (Z % BW) \| Y >> 1 >> (BW - 1 - (Z % BW))
8413	// fshr: X << 1 << (BW - 1 - (Z % BW)) \| Y >> (Z % BW)
8414	SDValue Mask = DAG.getConstant(Val: BW - `1`, DL, VT: ShVT);
8415	if (isPowerOf2_32(Value: BW)) {
8416	// Z % BW -> Z & (BW - 1)
8417	ShAmt = DAG.getNode(Opcode: ISD::AND, DL, VT: ShVT, N1: Z, N2: Mask);
8418	// (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8419	InvShAmt = DAG.getNode(Opcode: ISD::AND, DL, VT: ShVT, N1: DAG.getNOT(DL, Val: Z, VT: ShVT), N2: Mask);
8420	} else {
8421	SDValue BitWidthC = DAG.getConstant(Val: BW, DL, VT: ShVT);
8422	ShAmt = DAG.getNode(Opcode: ISD::UREM, DL, VT: ShVT, N1: Z, N2: BitWidthC);
8423	InvShAmt = DAG.getNode(Opcode: ISD::SUB, DL, VT: ShVT, N1: Mask, N2: ShAmt);
8424	}
8425
8426	SDValue One = DAG.getConstant(Val: `1`, DL, VT: ShVT);
8427	if (IsFSHL) {
8428	ShX = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: X, N2: ShAmt);
8429	SDValue ShY1 = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Y, N2: One);
8430	ShY = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: ShY1, N2: InvShAmt);
8431	} else {
8432	SDValue ShX1 = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: X, N2: One);
8433	ShX = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: ShX1, N2: InvShAmt);
8434	ShY = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Y, N2: ShAmt);
8435	}
8436	}
8437	return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: ShX, N2: ShY);
8438	}
8439
8440	// TODO: Merge with expandFunnelShift.
8441	SDValue TargetLowering::expandROT(SDNode Node, bool* AllowVectorOps,
8442	SelectionDAG &DAG) const {
8443	EVT VT = Node->getValueType(ResNo: `0`);
8444	unsigned EltSizeInBits = VT.getScalarSizeInBits();
8445	bool IsLeft = Node->getOpcode() == ISD::ROTL;
8446	SDValue Op0 = Node->getOperand(Num: `0`);
8447	SDValue Op1 = Node->getOperand(Num: `1`);
8448	SDLoc DL(SDValue (Node, `0`));
8449
8450	EVT ShVT = Op1.getValueType();
8451	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT: ShVT);
8452
8453	// If a rotate in the other direction is more supported, use it.
8454	unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
8455	if (!isOperationLegalOrCustom(Op: Node->getOpcode(), VT) &&
8456	isOperationLegalOrCustom(Op: RevRot, VT) && isPowerOf2_32(Value: EltSizeInBits)) {
8457	SDValue Sub = DAG.getNode(Opcode: ISD::SUB, DL, VT: ShVT, N1: Zero, N2: Op1);
8458	return DAG.getNode(Opcode: RevRot, DL, VT, N1: Op0, N2: Sub);
8459	}
8460
8461	if (!AllowVectorOps && VT.isVector() &&
8462	(!isOperationLegalOrCustom(Op: ISD::SHL, VT) \|\|
8463	!isOperationLegalOrCustom(Op: ISD::SRL, VT) \|\|
8464	!isOperationLegalOrCustom(Op: ISD::SUB, VT) \|\|
8465	!isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT) \|\|
8466	!isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT)))
8467	return SDValue ();
8468
8469	unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
8470	unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
8471	SDValue BitWidthMinusOneC = DAG.getConstant(Val: EltSizeInBits - `1`, DL, VT: ShVT);
8472	SDValue ShVal;
8473	SDValue HsVal;
8474	if (isPowerOf2_32(Value: EltSizeInBits)) {
8475	// (rotl x, c) -> x << (c & (w - 1)) \| x >> (-c & (w - 1))
8476	// (rotr x, c) -> x >> (c & (w - 1)) \| x << (-c & (w - 1))
8477	SDValue NegOp1 = DAG.getNode(Opcode: ISD::SUB, DL, VT: ShVT, N1: Zero, N2: Op1);
8478	SDValue ShAmt = DAG.getNode(Opcode: ISD::AND, DL, VT: ShVT, N1: Op1, N2: BitWidthMinusOneC);
8479	ShVal = DAG.getNode(Opcode: ShOpc, DL, VT, N1: Op0, N2: ShAmt);
8480	SDValue HsAmt = DAG.getNode(Opcode: ISD::AND, DL, VT: ShVT, N1: NegOp1, N2: BitWidthMinusOneC);
8481	HsVal = DAG.getNode(Opcode: HsOpc, DL, VT, N1: Op0, N2: HsAmt);
8482	} else {
8483	// (rotl x, c) -> x << (c % w) \| x >> 1 >> (w - 1 - (c % w))
8484	// (rotr x, c) -> x >> (c % w) \| x << 1 << (w - 1 - (c % w))
8485	SDValue BitWidthC = DAG.getConstant(Val: EltSizeInBits, DL, VT: ShVT);
8486	SDValue ShAmt = DAG.getNode(Opcode: ISD::UREM, DL, VT: ShVT, N1: Op1, N2: BitWidthC);
8487	ShVal = DAG.getNode(Opcode: ShOpc, DL, VT, N1: Op0, N2: ShAmt);
8488	SDValue HsAmt = DAG.getNode(Opcode: ISD::SUB, DL, VT: ShVT, N1: BitWidthMinusOneC, N2: ShAmt);
8489	SDValue One = DAG.getConstant(Val: `1`, DL, VT: ShVT);
8490	HsVal =
8491	DAG.getNode(Opcode: HsOpc, DL, VT, N1: DAG.getNode(Opcode: HsOpc, DL, VT, N1: Op0, N2: One), N2: HsAmt);
8492	}
8493	return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: ShVal, N2: HsVal);
8494	}
8495
8496	/// Check if CLMUL on VT can eventually reach a type with legal CLMUL through
8497	/// a chain of halving decompositions (halving element width) and/or vector
8498	/// widening (doubling element count). This guides expansion strategy selection:
8499	/// if true, the halving/widening path produces better code than bit-by-bit.
8500	///
8501	/// HalveDepth tracks halving steps only (each creates ~4x more operations).
8502	/// Widening steps are cheap (O(1) pad/extract) and don't count.
8503	/// Limiting halvings to 2 prevents exponential blowup:
8504	/// 1 halving: ~4 sub-CLMULs (good, e.g. v8i16 -> v8i8)
8505	/// 2 halvings: ~16 sub-CLMULs (acceptable, e.g. v4i32 -> v4i16 -> v8i8)
8506	/// 3 halvings: ~64 sub-CLMULs (worse than bit-by-bit expansion)
8507	static bool canNarrowCLMULToLegal(const TargetLowering &TLI, LLVMContext &Ctx,
8508	EVT VT, unsigned HalveDepth = `0`,
8509	unsigned TotalDepth = `0`) {
8510	if (HalveDepth > `2` \|\| TotalDepth > `8` \|\| !VT.isFixedLengthVector())
8511	return false;
8512	if (TLI.isOperationLegalOrCustom(Op: ISD::CLMUL, VT))
8513	return true;
8514	if (!TLI.isTypeLegal(VT))
8515	return false;
8516
8517	unsigned BW = VT.getScalarSizeInBits();
8518
8519	// Halve: halve element width, same element count.
8520	// This is the expensive step -- each halving creates ~4x more operations.
8521	if (BW % `2` == `0`) {
8522	EVT HalfEltVT = EVT::getIntegerVT(Context&: Ctx, BitWidth: BW / `2`);
8523	EVT HalfVT = VT.changeVectorElementType(Context&: Ctx, EltVT: HalfEltVT);
8524	if (TLI.isTypeLegal(VT: HalfVT) &&
8525	canNarrowCLMULToLegal(TLI, Ctx, VT: HalfVT, HalveDepth: HalveDepth + `1`, TotalDepth: TotalDepth + `1`))
8526	return true;
8527	}
8528
8529	// Widen: double element count (fixed-width vectors only).
8530	// This is cheap -- just INSERT_SUBVECTOR + EXTRACT_SUBVECTOR.
8531	EVT WideVT = VT.getDoubleNumVectorElementsVT(Context&: Ctx);
8532	if (TLI.isTypeLegal(VT: WideVT) &&
8533	canNarrowCLMULToLegal(TLI, Ctx, VT: WideVT, HalveDepth, TotalDepth: TotalDepth + `1`))
8534	return true;
8535
8536	return false;
8537	}
8538
8539	SDValue TargetLowering::expandCLMUL(SDNode Node, SelectionDAG &DAG) const* {
8540	SDLoc DL(Node);
8541	EVT VT = Node->getValueType(ResNo: `0`);
8542	SDValue X = Node->getOperand(Num: `0`);
8543	SDValue Y = Node->getOperand(Num: `1`);
8544	unsigned BW = VT.getScalarSizeInBits();
8545	unsigned Opcode = Node->getOpcode();
8546	LLVMContext &Ctx = *DAG.getContext();
8547
8548	switch (Opcode) {
8549	case ISD::CLMUL: {
8550	// For vector types, try decomposition strategies that leverage legal
8551	// CLMUL on narrower or wider element types, avoiding the expensive
8552	// bit-by-bit expansion.
8553	if (VT.isVector()) {
8554	// Strategy 1: Halving decomposition to half-element-width CLMUL.
8555	// Applies ExpandIntRes_CLMUL's identity element-wise:
8556	// CLMUL(X, Y) = (Hi << HalfBW) \| Lo
8557	// where:
8558	// Lo = CLMUL(XLo, YLo)
8559	// Hi = CLMULH(XLo, YLo) ^ CLMUL(XLo, YHi) ^ CLMUL(XHi, YLo)
8560	unsigned HalfBW = BW / `2`;
8561	if (BW % `2` == `0`) {
8562	EVT HalfEltVT = EVT::getIntegerVT(Context&: Ctx, BitWidth: HalfBW);
8563	EVT HalfVT =
8564	EVT::getVectorVT(Context&: Ctx, VT: HalfEltVT, EC: VT.getVectorElementCount());
8565	if (isTypeLegal(VT: HalfVT) && canNarrowCLMULToLegal(TLI: *this, Ctx, VT: HalfVT,
8566	/HalveDepth=/`1`)) {
8567	SDValue ShAmt = DAG.getShiftAmountConstant(Val: HalfBW, VT, DL);
8568
8569	// Extract low and high halves of each element.
8570	SDValue XLo = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: HalfVT, Operand: X);
8571	SDValue XHi = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: HalfVT,
8572	Operand: DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: X, N2: ShAmt));
8573	SDValue YLo = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: HalfVT, Operand: Y);
8574	SDValue YHi = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: HalfVT,
8575	Operand: DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Y, N2: ShAmt));
8576
8577	// Lo = CLMUL(XLo, YLo)
8578	SDValue Lo = DAG.getNode(Opcode: ISD::CLMUL, DL, VT: HalfVT, N1: XLo, N2: YLo);
8579
8580	// Hi = CLMULH(XLo, YLo) ^ CLMUL(XLo, YHi) ^ CLMUL(XHi, YLo)
8581	SDValue LoH = DAG.getNode(Opcode: ISD::CLMULH, DL, VT: HalfVT, N1: XLo, N2: YLo);
8582	SDValue Cross1 = DAG.getNode(Opcode: ISD::CLMUL, DL, VT: HalfVT, N1: XLo, N2: YHi);
8583	SDValue Cross2 = DAG.getNode(Opcode: ISD::CLMUL, DL, VT: HalfVT, N1: XHi, N2: YLo);
8584	SDValue Cross = DAG.getNode(Opcode: ISD::XOR, DL, VT: HalfVT, N1: Cross1, N2: Cross2);
8585	SDValue Hi = DAG.getNode(Opcode: ISD::XOR, DL, VT: HalfVT, N1: LoH, N2: Cross);
8586
8587	// Reassemble: Result = ZExt(Lo) \| (AnyExt(Hi) << HalfBW)
8588	SDValue LoExt = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT, Operand: Lo);
8589	SDValue HiExt = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT, Operand: Hi);
8590	SDValue HiShifted = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: HiExt, N2: ShAmt);
8591	return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: LoExt, N2: HiShifted);
8592	}
8593	}
8594
8595	// Strategy 2: Promote to double-element-width CLMUL.
8596	// CLMUL(X, Y) = Trunc(CLMUL(AnyExt(X), AnyExt(Y)))
8597	{
8598	EVT ExtVT = VT.changeElementType(Context&: Ctx, EltVT: EVT::getIntegerVT(Context&: Ctx, BitWidth: `2` * BW));
8599	if (isTypeLegal(VT: ExtVT) && isOperationLegalOrCustom(Op: ISD::CLMUL, VT: ExtVT)) {
8600	// If CLMUL on ExtVT is Custom (not Legal), the target may
8601	// scalarize it, costing O(NumElements) scalar ops. The bit-by-bit
8602	// fallback costs O(BW) vectorized iterations. Only widen when
8603	// element count is small enough that scalarization is cheaper.
8604	unsigned NumElts = VT.getVectorMinNumElements();
8605	if (isOperationLegal(Op: ISD::CLMUL, VT: ExtVT) \|\| NumElts < BW) {
8606	SDValue XExt = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: ExtVT, Operand: X);
8607	SDValue YExt = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: ExtVT, Operand: Y);
8608	SDValue Mul = DAG.getNode(Opcode: ISD::CLMUL, DL, VT: ExtVT, N1: XExt, N2: YExt);
8609	return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Mul);
8610	}
8611	}
8612	}
8613
8614	// Strategy 3: Widen element count (pad with undef, do CLMUL on wider
8615	// vector, extract lower result). CLMUL is element-wise, so upper
8616	// (undef) lanes don't affect the lower results.
8617	// e.g. v4i16 => pad to v8i16 => halve to v8i8 PMUL => extract v4i16.
8618	if (auto EC = VT.getVectorElementCount(); EC.isFixed()) {
8619	EVT WideVT = EVT::getVectorVT(Context&: Ctx, VT: VT.getVectorElementType(), EC: EC * `2`);
8620	if (isTypeLegal(VT: WideVT) && canNarrowCLMULToLegal(TLI: *this, Ctx, VT: WideVT)) {
8621	SDValue Undef = DAG.getUNDEF(VT: WideVT);
8622	SDValue XWide = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: WideVT, N1: Undef,
8623	N2: X, N3: DAG.getVectorIdxConstant(Val: `0`, DL));
8624	SDValue YWide = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: WideVT, N1: Undef,
8625	N2: Y, N3: DAG.getVectorIdxConstant(Val: `0`, DL));
8626	SDValue WideRes = DAG.getNode(Opcode: ISD::CLMUL, DL, VT: WideVT, N1: XWide, N2: YWide);
8627	return DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT, N1: WideRes,
8628	N2: DAG.getVectorIdxConstant(Val: `0`, DL));
8629	}
8630	}
8631	}
8632
8633	// NOTE: If you change this expansion, please update the cost model
8634	// calculation in BasicTTIImpl::getTypeBasedIntrinsicInstrCost for
8635	// Intrinsic::clmul.
8636
8637	EVT SetCCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: Ctx, VT);
8638
8639	SDValue Res = DAG.getConstant(Val: `0`, DL, VT);
8640	for (unsigned I = `0`; I < BW; ++I) {
8641	SDValue ShiftAmt = DAG.getShiftAmountConstant(Val: I, VT, DL);
8642	SDValue Mask = DAG.getConstant(Val: APInt::getOneBitSet(numBits: BW, BitNo: I), DL, VT);
8643	SDValue YMasked = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Y, N2: Mask);
8644
8645	// For targets with a fast bit test instruction (e.g., x86 BT) or without
8646	// multiply, use a shift-based expansion to avoid expensive MUL
8647	// instructions.
8648	SDValue Part;
8649	if (!hasBitTest(X: Y, Y: ShiftAmt) &&
8650	isOperationLegalOrCustom(
8651	Op: ISD::MUL, VT: getTypeToTransformTo(Context&: *DAG.getContext(), VT))) {
8652	Part = DAG.getNode(Opcode: ISD::MUL, DL, VT, N1: X, N2: YMasked);
8653	} else {
8654	// Canonical bit test: (Y & (1 << I)) != 0
8655	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT);
8656	SDValue Cond = DAG.getSetCC(DL, VT: SetCCVT, LHS: YMasked, RHS: Zero, Cond: ISD::SETEQ);
8657	SDValue XShifted = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: X, N2: ShiftAmt);
8658	Part = DAG.getSelect(DL, VT, Cond, LHS: Zero, RHS: XShifted);
8659	}
8660	Res = DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: Res, N2: Part);
8661	}
8662	return Res;
8663	}
8664	case ISD::CLMULR:
8665	// If we have CLMUL/CLMULH, merge the shifted results to form CLMULR.
8666	if (isOperationLegalOrCustom(Op: ISD::CLMUL, VT) &&
8667	isOperationLegalOrCustom(Op: ISD::CLMULH, VT)) {
8668	SDValue Lo = DAG.getNode(Opcode: ISD::CLMUL, DL, VT, N1: X, N2: Y);
8669	SDValue Hi = DAG.getNode(Opcode: ISD::CLMULH, DL, VT, N1: X, N2: Y);
8670	Lo = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Lo,
8671	N2: DAG.getShiftAmountConstant(Val: BW - `1`, VT, DL));
8672	Hi = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Hi,
8673	N2: DAG.getShiftAmountConstant(Val: `1`, VT, DL));
8674	return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: Lo, N2: Hi);
8675	}
8676	[[fallthrough]];
8677	case ISD::CLMULH: {
8678	EVT ExtVT = VT.changeElementType(Context&: Ctx, EltVT: EVT::getIntegerVT(Context&: Ctx, BitWidth: `2` * BW));
8679	// Use bitreverse-based lowering (CLMULR/H = rev(CLMUL(rev,rev)) >> S)
8680	// when any of these hold:
8681	// (a) ZERO_EXTEND to ExtVT or SRL on ExtVT isn't legal.
8682	// (b) CLMUL is legal on VT but not on ExtVT (e.g. v8i8 on AArch64).
8683	// (c) CLMUL on ExtVT isn't legal, but CLMUL on VT can be efficiently
8684	// expanded via halving/widening to reach legal CLMUL. The bitreverse
8685	// path creates CLMUL(VT) which will be expanded efficiently. The
8686	// promote path would create CLMUL(ExtVT) => halving => CLMULH(VT),
8687	// causing a cycle.
8688	// Note: when CLMUL is legal on ExtVT, the zext => CLMUL(ExtVT) => shift
8689	// => trunc path is preferred over the bitreverse path, as it avoids the
8690	// cost of 3 bitreverse operations.
8691	if (!isOperationLegalOrCustom(Op: ISD::ZERO_EXTEND, VT: ExtVT) \|\|
8692	!isOperationLegalOrCustom(Op: ISD::SRL, VT: ExtVT) \|\|
8693	(!isOperationLegalOrCustom(Op: ISD::CLMUL, VT: ExtVT) &&
8694	(isOperationLegalOrCustom(Op: ISD::CLMUL, VT) \|\|
8695	canNarrowCLMULToLegal(TLI: *this, Ctx, VT)))) {
8696	SDValue XRev = DAG.getNode(Opcode: ISD::BITREVERSE, DL, VT, Operand: X);
8697	SDValue YRev = DAG.getNode(Opcode: ISD::BITREVERSE, DL, VT, Operand: Y);
8698	SDValue ClMul = DAG.getNode(Opcode: ISD::CLMUL, DL, VT, N1: XRev, N2: YRev);
8699	SDValue Res = DAG.getNode(Opcode: ISD::BITREVERSE, DL, VT, Operand: ClMul);
8700	if (Opcode == ISD::CLMULH)
8701	Res = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Res,
8702	N2: DAG.getShiftAmountConstant(Val: `1`, VT, DL));
8703	return Res;
8704	}
8705	SDValue XExt = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: ExtVT, Operand: X);
8706	SDValue YExt = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: ExtVT, Operand: Y);
8707	SDValue ClMul = DAG.getNode(Opcode: ISD::CLMUL, DL, VT: ExtVT, N1: XExt, N2: YExt);
8708	unsigned ShAmt = Opcode == ISD::CLMULR ? BW - `1` : BW;
8709	SDValue HiBits = DAG.getNode(Opcode: ISD::SRL, DL, VT: ExtVT, N1: ClMul,
8710	N2: DAG.getShiftAmountConstant(Val: ShAmt, VT: ExtVT, DL));
8711	return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: HiBits);
8712	}
8713	}
8714	llvm_unreachable("Expected CLMUL, CLMULR, or CLMULH");
8715	}
8716
8717	void TargetLowering::expandShiftParts(SDNode *Node, SDValue &Lo, SDValue &Hi,
8718	SelectionDAG &DAG) const {
8719	assert(Node->getNumOperands() == `3` && "Not a double-shift!");
8720	EVT VT = Node->getValueType(ResNo: `0`);
8721	unsigned VTBits = VT.getScalarSizeInBits();
8722	assert(isPowerOf2_32(VTBits) && "Power-of-two integer type expected");
8723
8724	bool IsSHL = Node->getOpcode() == ISD::SHL_PARTS;
8725	bool IsSRA = Node->getOpcode() == ISD::SRA_PARTS;
8726	SDValue ShOpLo = Node->getOperand(Num: `0`);
8727	SDValue ShOpHi = Node->getOperand(Num: `1`);
8728	SDValue ShAmt = Node->getOperand(Num: `2`);
8729	EVT ShAmtVT = ShAmt.getValueType();
8730	EVT ShAmtCCVT =
8731	getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: ShAmtVT);
8732	SDLoc dl(Node);
8733
8734	// ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and
8735	// ISD::SRA/L nodes haven't. Insert an AND to be safe, it's usually optimized
8736	// away during isel.
8737	SDValue SafeShAmt = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ShAmtVT, N1: ShAmt,
8738	N2: DAG.getConstant(Val: VTBits - `1`, DL: dl, VT: ShAmtVT));
8739	SDValue Tmp1 = IsSRA ? DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: ShOpHi,
8740	N2: DAG.getConstant(Val: VTBits - `1`, DL: dl, VT: ShAmtVT))
8741	: DAG.getConstant(Val: `0`, DL: dl, VT);
8742
8743	SDValue Tmp2, Tmp3;
8744	if (IsSHL) {
8745	Tmp2 = DAG.getNode(Opcode: ISD::FSHL, DL: dl, VT, N1: ShOpHi, N2: ShOpLo, N3: ShAmt);
8746	Tmp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: ShOpLo, N2: SafeShAmt);
8747	} else {
8748	Tmp2 = DAG.getNode(Opcode: ISD::FSHR, DL: dl, VT, N1: ShOpHi, N2: ShOpLo, N3: ShAmt);
8749	Tmp3 = DAG.getNode(Opcode: IsSRA ? ISD::SRA : ISD::SRL, DL: dl, VT, N1: ShOpHi, N2: SafeShAmt);
8750	}
8751
8752	// If the shift amount is larger or equal than the width of a part we don't
8753	// use the result from the FSHL/FSHR. Insert a test and select the appropriate
8754	// values for large shift amounts.
8755	SDValue AndNode = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ShAmtVT, N1: ShAmt,
8756	N2: DAG.getConstant(Val: VTBits, DL: dl, VT: ShAmtVT));
8757	SDValue Cond = DAG.getSetCC(DL: dl, VT: ShAmtCCVT, LHS: AndNode,
8758	RHS: DAG.getConstant(Val: `0`, DL: dl, VT: ShAmtVT), Cond: ISD::SETNE);
8759
8760	if (IsSHL) {
8761	Hi = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT, N1: Cond, N2: Tmp3, N3: Tmp2);
8762	Lo = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT, N1: Cond, N2: Tmp1, N3: Tmp3);
8763	} else {
8764	Lo = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT, N1: Cond, N2: Tmp3, N3: Tmp2);
8765	Hi = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT, N1: Cond, N2: Tmp1, N3: Tmp3);
8766	}
8767	}
8768
8769	bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
8770	SelectionDAG &DAG) const {
8771	unsigned OpNo = Node->isStrictFPOpcode() ? `1` : `0`;
8772	SDValue Src = Node->getOperand(Num: OpNo);
8773	EVT SrcVT = Src.getValueType();
8774	EVT DstVT = Node->getValueType(ResNo: `0`);
8775	SDLoc dl(SDValue (Node, `0`));
8776
8777	// FIXME: Only f32 to i64 conversions are supported.
8778	if (SrcVT != MVT::f32 \|\| DstVT != MVT::i64)
8779	return false;
8780
8781	if (Node->isStrictFPOpcode())
8782	// When a NaN is converted to an integer a trap is allowed. We can't
8783	// use this expansion here because it would eliminate that trap. Other
8784	// traps are also allowed and cannot be eliminated. See
8785	// IEEE 754-2008 sec 5.8.
8786	return false;
8787
8788	// Expand f32 -> i64 conversion
8789	// This algorithm comes from compiler-rt's implementation of fixsfdi:
8790	// https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
8791	unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
8792	EVT IntVT = SrcVT.changeTypeToInteger();
8793	EVT IntShVT = getShiftAmountTy(LHSTy: IntVT, DL: DAG.getDataLayout());
8794
8795	SDValue ExponentMask = DAG.getConstant(Val: `0x7F800000`, DL: dl, VT: IntVT);
8796	SDValue ExponentLoBit = DAG.getConstant(Val: `23`, DL: dl, VT: IntVT);
8797	SDValue Bias = DAG.getConstant(Val: `127`, DL: dl, VT: IntVT);
8798	SDValue SignMask = DAG.getConstant(Val: APInt::getSignMask(BitWidth: SrcEltBits), DL: dl, VT: IntVT);
8799	SDValue SignLowBit = DAG.getConstant(Val: SrcEltBits - `1`, DL: dl, VT: IntVT);
8800	SDValue MantissaMask = DAG.getConstant(Val: `0x007FFFFF`, DL: dl, VT: IntVT);
8801
8802	SDValue Bits = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: IntVT, Operand: Src);
8803
8804	SDValue ExponentBits = DAG.getNode(
8805	Opcode: ISD::SRL, DL: dl, VT: IntVT, N1: DAG.getNode(Opcode: ISD::AND, DL: dl, VT: IntVT, N1: Bits, N2: ExponentMask),
8806	N2: DAG.getZExtOrTrunc(Op: ExponentLoBit, DL: dl, VT: IntShVT));
8807	SDValue Exponent = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: IntVT, N1: ExponentBits, N2: Bias);
8808
8809	SDValue Sign = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT: IntVT,
8810	N1: DAG.getNode(Opcode: ISD::AND, DL: dl, VT: IntVT, N1: Bits, N2: SignMask),
8811	N2: DAG.getZExtOrTrunc(Op: SignLowBit, DL: dl, VT: IntShVT));
8812	Sign = DAG.getSExtOrTrunc(Op: Sign, DL: dl, VT: DstVT);
8813
8814	SDValue R = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: IntVT,
8815	N1: DAG.getNode(Opcode: ISD::AND, DL: dl, VT: IntVT, N1: Bits, N2: MantissaMask),
8816	N2: DAG.getConstant(Val: `0x00800000`, DL: dl, VT: IntVT));
8817
8818	R = DAG.getZExtOrTrunc(Op: R, DL: dl, VT: DstVT);
8819
8820	R = DAG.getSelectCC(
8821	DL: dl, LHS: Exponent, RHS: ExponentLoBit,
8822	True: DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: DstVT, N1: R,
8823	N2: DAG.getZExtOrTrunc(
8824	Op: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: IntVT, N1: Exponent, N2: ExponentLoBit),
8825	DL: dl, VT: IntShVT)),
8826	False: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: DstVT, N1: R,
8827	N2: DAG.getZExtOrTrunc(
8828	Op: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: IntVT, N1: ExponentLoBit, N2: Exponent),
8829	DL: dl, VT: IntShVT)),
8830	Cond: ISD::SETGT);
8831
8832	SDValue Ret = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: DstVT,
8833	N1: DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: DstVT, N1: R, N2: Sign), N2: Sign);
8834
8835	Result = DAG.getSelectCC(DL: dl, LHS: Exponent, RHS: DAG.getConstant(Val: `0`, DL: dl, VT: IntVT),
8836	True: DAG.getConstant(Val: `0`, DL: dl, VT: DstVT), False: Ret, Cond: ISD::SETLT);
8837	return true;
8838	}
8839
8840	bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
8841	SDValue &Chain,
8842	SelectionDAG &DAG) const {
8843	SDLoc dl(SDValue (Node, `0`));
8844	unsigned OpNo = Node->isStrictFPOpcode() ? `1` : `0`;
8845	SDValue Src = Node->getOperand(Num: OpNo);
8846
8847	EVT SrcVT = Src.getValueType();
8848	EVT DstVT = Node->getValueType(ResNo: `0`);
8849	EVT SetCCVT =
8850	getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: SrcVT);
8851	EVT DstSetCCVT =
8852	getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: DstVT);
8853
8854	// Only expand vector types if we have the appropriate vector bit operations.
8855	unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
8856	ISD::FP_TO_SINT;
8857	if (DstVT.isVector() && (!isOperationLegalOrCustom(Op: SIntOpcode, VT: DstVT) \|\|
8858	!isOperationLegalOrCustomOrPromote(Op: ISD::XOR, VT: SrcVT)))
8859	return false;
8860
8861	// If the maximum float value is smaller then the signed integer range,
8862	// the destination signmask can't be represented by the float, so we can
8863	// just use FP_TO_SINT directly.
8864	const fltSemantics &APFSem = SrcVT.getFltSemantics();
8865	APFloat APF(APFSem, APInt::getZero(numBits: SrcVT.getScalarSizeInBits()));
8866	APInt SignMask = APInt::getSignMask(BitWidth: DstVT.getScalarSizeInBits());
8867	if (APFloat::opOverflow &
8868	APF.convertFromAPInt(Input: SignMask, IsSigned: false, RM: APFloat::rmNearestTiesToEven)) {
8869	if (Node->isStrictFPOpcode()) {
8870	Result = DAG.getNode(Opcode: ISD::STRICT_FP_TO_SINT, DL: dl, ResultTys: { DstVT, MVT::Other },
8871	Ops: { Node->getOperand(Num: `0`), Src });
8872	Chain = Result.getValue(R: `1`);
8873	} else
8874	Result = DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: dl, VT: DstVT, Operand: Src);
8875	return true;
8876	}
8877
8878	// Don't expand it if there isn't cheap fsub instruction.
8879	if (!isOperationLegalOrCustom(
8880	Op: Node->isStrictFPOpcode() ? ISD::STRICT_FSUB : ISD::FSUB, VT: SrcVT))
8881	return false;
8882
8883	SDValue Cst = DAG.getConstantFP(Val: APF, DL: dl, VT: SrcVT);
8884	SDValue Sel;
8885
8886	if (Node->isStrictFPOpcode()) {
8887	Sel = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: Cst, Cond: ISD::SETLT,
8888	Chain: Node->getOperand(Num: `0`), /IsSignaling/ true);
8889	Chain = Sel.getValue(R: `1`);
8890	} else {
8891	Sel = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: Cst, Cond: ISD::SETLT);
8892	}
8893
8894	bool Strict = Node->isStrictFPOpcode() \|\|
8895	shouldUseStrictFP_TO_INT(FpVT: SrcVT, IntVT: DstVT, /IsSigned/ false);
8896
8897	if (Strict) {
8898	// Expand based on maximum range of FP_TO_SINT, if the value exceeds the
8899	// signmask then offset (the result of which should be fully representable).
8900	// Sel = Src < 0x8000000000000000
8901	// FltOfs = select Sel, 0, 0x8000000000000000
8902	// IntOfs = select Sel, 0, 0x8000000000000000
8903	// Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8904
8905	// TODO: Should any fast-math-flags be set for the FSUB?
8906	SDValue FltOfs = DAG.getSelect(DL: dl, VT: SrcVT, Cond: Sel,
8907	LHS: DAG.getConstantFP(Val: `0.0`, DL: dl, VT: SrcVT), RHS: Cst);
8908	Sel = DAG.getBoolExtOrTrunc(Op: Sel, SL: dl, VT: DstSetCCVT, OpVT: DstVT);
8909	SDValue IntOfs = DAG.getSelect(DL: dl, VT: DstVT, Cond: Sel,
8910	LHS: DAG.getConstant(Val: `0`, DL: dl, VT: DstVT),
8911	RHS: DAG.getConstant(Val: SignMask, DL: dl, VT: DstVT));
8912	SDValue SInt;
8913	if (Node->isStrictFPOpcode()) {
8914	SDValue Val = DAG.getNode(Opcode: ISD::STRICT_FSUB, DL: dl, ResultTys: { SrcVT, MVT::Other },
8915	Ops: { Chain, Src, FltOfs });
8916	SInt = DAG.getNode(Opcode: ISD::STRICT_FP_TO_SINT, DL: dl, ResultTys: { DstVT, MVT::Other },
8917	Ops: { Val.getValue(R: `1`), Val });
8918	Chain = SInt.getValue(R: `1`);
8919	} else {
8920	SDValue Val = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: SrcVT, N1: Src, N2: FltOfs);
8921	SInt = DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: dl, VT: DstVT, Operand: Val);
8922	}
8923	Result = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: DstVT, N1: SInt, N2: IntOfs);
8924	} else {
8925	// Expand based on maximum range of FP_TO_SINT:
8926	// True = fp_to_sint(Src)
8927	// False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
8928	// Result = select (Src < 0x8000000000000000), True, False
8929
8930	SDValue True = DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: dl, VT: DstVT, Operand: Src);
8931	// TODO: Should any fast-math-flags be set for the FSUB?
8932	SDValue False = DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: dl, VT: DstVT,
8933	Operand: DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: SrcVT, N1: Src, N2: Cst));
8934	False = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: DstVT, N1: False,
8935	N2: DAG.getConstant(Val: SignMask, DL: dl, VT: DstVT));
8936	Sel = DAG.getBoolExtOrTrunc(Op: Sel, SL: dl, VT: DstSetCCVT, OpVT: DstVT);
8937	Result = DAG.getSelect(DL: dl, VT: DstVT, Cond: Sel, LHS: True, RHS: False);
8938	}
8939	return true;
8940	}
8941
8942	bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
8943	SDValue &Chain, SelectionDAG &DAG) const {
8944	// This transform is not correct for converting 0 when rounding mode is set
8945	// to round toward negative infinity which will produce -0.0. So disable
8946	// under strictfp.
8947	if (Node->isStrictFPOpcode())
8948	return false;
8949
8950	SDValue Src = Node->getOperand(Num: `0`);
8951	EVT SrcVT = Src.getValueType();
8952	EVT DstVT = Node->getValueType(ResNo: `0`);
8953
8954	// If the input is known to be non-negative and SINT_TO_FP is legal then use
8955	// it.
8956	if (Node->getFlags().hasNonNeg() &&
8957	isOperationLegalOrCustom(Op: ISD::SINT_TO_FP, VT: SrcVT)) {
8958	Result =
8959	DAG.getNode(Opcode: ISD::SINT_TO_FP, DL: SDLoc (Node), VT: DstVT, Operand: Node->getOperand(Num: `0`));
8960	return true;
8961	}
8962
8963	if (SrcVT.getScalarType() != MVT::i64 \|\| DstVT.getScalarType() != MVT::f64)
8964	return false;
8965
8966	// Only expand vector types if we have the appropriate vector bit
8967	// operations.
8968	if (SrcVT.isVector() && (!isOperationLegalOrCustom(Op: ISD::SRL, VT: SrcVT) \|\|
8969	!isOperationLegalOrCustom(Op: ISD::FADD, VT: DstVT) \|\|
8970	!isOperationLegalOrCustom(Op: ISD::FSUB, VT: DstVT) \|\|
8971	!isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT: SrcVT) \|\|
8972	!isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT: SrcVT)))
8973	return false;
8974
8975	SDLoc dl(SDValue (Node, `0`));
8976
8977	// Implementation of unsigned i64 to f64 following the algorithm in
8978	// __floatundidf in compiler_rt. This implementation performs rounding
8979	// correctly in all rounding modes with the exception of converting 0
8980	// when rounding toward negative infinity. In that case the fsub will
8981	// produce -0.0. This will be added to +0.0 and produce -0.0 which is
8982	// incorrect.
8983	SDValue TwoP52 = DAG.getConstant(UINT64_C(`0x4330000000000000`), DL: dl, VT: SrcVT);
8984	SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
8985	Val: llvm::bit_cast<double>(UINT64_C(`0x4530000000100000`)), DL: dl, VT: DstVT);
8986	SDValue TwoP84 = DAG.getConstant(UINT64_C(`0x4530000000000000`), DL: dl, VT: SrcVT);
8987	SDValue LoMask = DAG.getConstant(UINT64_C(`0x00000000FFFFFFFF`), DL: dl, VT: SrcVT);
8988	SDValue HiShift = DAG.getShiftAmountConstant(Val: `32`, VT: SrcVT, DL: dl);
8989
8990	SDValue Lo = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: SrcVT, N1: Src, N2: LoMask);
8991	SDValue Hi = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: SrcVT, N1: Src, N2: HiShift);
8992	SDValue LoOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: SrcVT, N1: Lo, N2: TwoP52);
8993	SDValue HiOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: SrcVT, N1: Hi, N2: TwoP84);
8994	SDValue LoFlt = DAG.getBitcast(VT: DstVT, V: LoOr);
8995	SDValue HiFlt = DAG.getBitcast(VT: DstVT, V: HiOr);
8996	SDValue HiSub = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: DstVT, N1: HiFlt, N2: TwoP84PlusTwoP52);
8997	Result = DAG.getNode(Opcode: ISD::FADD, DL: dl, VT: DstVT, N1: LoFlt, N2: HiSub);
8998	return true;
8999	}
9000
9001	SDValue
9002	TargetLowering::createSelectForFMINNUM_FMAXNUM(SDNode *Node,
9003	SelectionDAG &DAG) const {
9004	unsigned Opcode = Node->getOpcode();
9005	assert((Opcode == ISD::FMINNUM \|\| Opcode == ISD::FMAXNUM \|\|
9006	Opcode == ISD::STRICT_FMINNUM \|\| Opcode == ISD::STRICT_FMAXNUM) &&
9007	"Wrong opcode");
9008
9009	if (Node->getFlags().hasNoNaNs()) {
9010	ISD::CondCode Pred = Opcode == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
9011	EVT VT = Node->getValueType(ResNo: `0`);
9012	if ((!isCondCodeLegal(CC: Pred, VT: VT.getSimpleVT()) \|\|
9013	!isOperationLegalOrCustom(Op: ISD::VSELECT, VT)) &&
9014	VT.isVector())
9015	return SDValue ();
9016	SDValue Op1 = Node->getOperand(Num: `0`);
9017	SDValue Op2 = Node->getOperand(Num: `1`);
9018	return DAG.getSelectCC(DL: SDLoc (Node), LHS: Op1, RHS: Op2, True: Op1, False: Op2, Cond: Pred,
9019	Flags: Node->getFlags());
9020	}
9021
9022	return SDValue ();
9023	}
9024
9025	SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
9026	SelectionDAG &DAG) const {
9027	if (SDValue Expanded = expandVectorNaryOpBySplitting(Node, DAG))
9028	return Expanded;
9029
9030	EVT VT = Node->getValueType(ResNo: `0`);
9031	if (VT.isScalableVector())
9032	report_fatal_error(
9033	reason: "Expanding fminnum/fmaxnum for scalable vectors is undefined.");
9034
9035	SDLoc dl(Node);
9036	unsigned NewOp =
9037	Node->getOpcode() == ISD::FMINNUM ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
9038
9039	if (isOperationLegalOrCustom(Op: NewOp, VT)) {
9040	SDValue Quiet0 = Node->getOperand(Num: `0`);
9041	SDValue Quiet1 = Node->getOperand(Num: `1`);
9042
9043	if (!Node->getFlags().hasNoNaNs()) {
9044	// Insert canonicalizes if it's possible we need to quiet to get correct
9045	// sNaN behavior.
9046	if (!DAG.isKnownNeverSNaN(Op: Quiet0)) {
9047	Quiet0 = DAG.getNode(Opcode: ISD::FCANONICALIZE, DL: dl, VT, Operand: Quiet0,
9048	Flags: Node->getFlags());
9049	}
9050	if (!DAG.isKnownNeverSNaN(Op: Quiet1)) {
9051	Quiet1 = DAG.getNode(Opcode: ISD::FCANONICALIZE, DL: dl, VT, Operand: Quiet1,
9052	Flags: Node->getFlags());
9053	}
9054	}
9055
9056	return DAG.getNode(Opcode: NewOp, DL: dl, VT, N1: Quiet0, N2: Quiet1, Flags: Node->getFlags());
9057	}
9058
9059	// If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
9060	// instead if there are no NaNs.
9061	if (Node->getFlags().hasNoNaNs() \|\|
9062	(DAG.isKnownNeverNaN(Op: Node->getOperand(Num: `0`)) &&
9063	DAG.isKnownNeverNaN(Op: Node->getOperand(Num: `1`)))) {
9064	unsigned IEEE2018Op =
9065	Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
9066	if (isOperationLegalOrCustom(Op: IEEE2018Op, VT))
9067	return DAG.getNode(Opcode: IEEE2018Op, DL: dl, VT, N1: Node->getOperand(Num: `0`),
9068	N2: Node->getOperand(Num: `1`), Flags: Node->getFlags());
9069	}
9070
9071	if (SDValue SelCC = createSelectForFMINNUM_FMAXNUM(Node, DAG))
9072	return SelCC;
9073
9074	return SDValue ();
9075	}
9076
9077	SDValue TargetLowering::expandFMINIMUM_FMAXIMUM(SDNode *N,
9078	SelectionDAG &DAG) const {
9079	if (SDValue Expanded = expandVectorNaryOpBySplitting(Node: N, DAG))
9080	return Expanded;
9081
9082	SDLoc DL(N);
9083	SDValue LHS = N->getOperand(Num: `0`);
9084	SDValue RHS = N->getOperand(Num: `1`);
9085	unsigned Opc = N->getOpcode();
9086	EVT VT = N->getValueType(ResNo: `0`);
9087	EVT CCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
9088	bool IsMax = Opc == ISD::FMAXIMUM;
9089	SDNodeFlags Flags = N->getFlags();
9090
9091	// First, implement comparison not propagating NaN. If no native fmin or fmax
9092	// available, use plain select with setcc instead.
9093	SDValue MinMax;
9094	unsigned CompOpcIeee = IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
9095	unsigned CompOpc = IsMax ? ISD::FMAXNUM : ISD::FMINNUM;
9096
9097	// FIXME: We should probably define fminnum/fmaxnum variants with correct
9098	// signed zero behavior.
9099	bool MinMaxMustRespectOrderedZero = false;
9100
9101	if (isOperationLegalOrCustom(Op: CompOpcIeee, VT)) {
9102	MinMax = DAG.getNode(Opcode: CompOpcIeee, DL, VT, N1: LHS, N2: RHS, Flags);
9103	MinMaxMustRespectOrderedZero = true;
9104	} else if (isOperationLegalOrCustom(Op: CompOpc, VT)) {
9105	MinMax = DAG.getNode(Opcode: CompOpc, DL, VT, N1: LHS, N2: RHS, Flags);
9106	} else {
9107	if (VT.isVector() && !isOperationLegalOrCustom(Op: ISD::VSELECT, VT))
9108	return DAG.UnrollVectorOp(N);
9109
9110	// NaN (if exists) will be propagated later, so orderness doesn't matter.
9111	SDValue Compare =
9112	DAG.getSetCC(DL, VT: CCVT, LHS, RHS, Cond: IsMax ? ISD::SETOGT : ISD::SETOLT);
9113	MinMax = DAG.getSelect(DL, VT, Cond: Compare, LHS, RHS, Flags);
9114	}
9115
9116	// Propagate any NaN of both operands
9117	if (!N->getFlags().hasNoNaNs() &&
9118	(!DAG.isKnownNeverNaN(Op: RHS) \|\| !DAG.isKnownNeverNaN(Op: LHS))) {
9119	ConstantFP FPNaN = ConstantFP::get(Context&: DAG.getContext(),
9120	V: APFloat::getNaN(Sem: VT.getFltSemantics()));
9121	MinMax = DAG.getSelect(DL, VT, Cond: DAG.getSetCC(DL, VT: CCVT, LHS, RHS, Cond: ISD::SETUO),
9122	LHS: DAG.getConstantFP(V: *FPNaN, DL, VT), RHS: MinMax, Flags);
9123	}
9124
9125	// fminimum/fmaximum requires -0.0 less than +0.0
9126	if (!MinMaxMustRespectOrderedZero && !N->getFlags().hasNoSignedZeros() &&
9127	!DAG.isKnownNeverZeroFloat(Op: RHS) && !DAG.isKnownNeverZeroFloat(Op: LHS)) {
9128	SDValue IsZero = DAG.getSetCC(DL, VT: CCVT, LHS: MinMax,
9129	RHS: DAG.getConstantFP(Val: `0.0`, DL, VT), Cond: ISD::SETOEQ);
9130	SDValue TestZero =
9131	DAG.getTargetConstant(Val: IsMax ? fcPosZero : fcNegZero, DL, VT: MVT::i32);
9132	SDValue LCmp = DAG.getSelect(
9133	DL, VT, Cond: DAG.getNode(Opcode: ISD::IS_FPCLASS, DL, VT: CCVT, N1: LHS, N2: TestZero), LHS,
9134	RHS: MinMax, Flags);
9135	SDValue RCmp = DAG.getSelect(
9136	DL, VT, Cond: DAG.getNode(Opcode: ISD::IS_FPCLASS, DL, VT: CCVT, N1: RHS, N2: TestZero), LHS: RHS,
9137	RHS: LCmp, Flags);
9138	MinMax = DAG.getSelect(DL, VT, Cond: IsZero, LHS: RCmp, RHS: MinMax, Flags);
9139	}
9140
9141	return MinMax;
9142	}
9143
9144	SDValue TargetLowering::expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *Node,
9145	SelectionDAG &DAG) const {
9146	SDLoc DL(Node);
9147	SDValue LHS = Node->getOperand(Num: `0`);
9148	SDValue RHS = Node->getOperand(Num: `1`);
9149	unsigned Opc = Node->getOpcode();
9150	EVT VT = Node->getValueType(ResNo: `0`);
9151	EVT CCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
9152	bool IsMax = Opc == ISD::FMAXIMUMNUM;
9153	SDNodeFlags Flags = Node->getFlags();
9154
9155	unsigned NewOp =
9156	Opc == ISD::FMINIMUMNUM ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
9157
9158	if (isOperationLegalOrCustom(Op: NewOp, VT)) {
9159	if (!Flags.hasNoNaNs()) {
9160	// Insert canonicalizes if it's possible we need to quiet to get correct
9161	// sNaN behavior.
9162	if (!DAG.isKnownNeverSNaN(Op: LHS)) {
9163	LHS = DAG.getNode(Opcode: ISD::FCANONICALIZE, DL, VT, Operand: LHS, Flags);
9164	}
9165	if (!DAG.isKnownNeverSNaN(Op: RHS)) {
9166	RHS = DAG.getNode(Opcode: ISD::FCANONICALIZE, DL, VT, Operand: RHS, Flags);
9167	}
9168	}
9169
9170	return DAG.getNode(Opcode: NewOp, DL, VT, N1: LHS, N2: RHS, Flags);
9171	}
9172
9173	// We can use FMINIMUM/FMAXIMUM if there is no NaN, since it has
9174	// same behaviors for all of other cases: +0.0 vs -0.0 included.
9175	if (Flags.hasNoNaNs() \|\|
9176	(DAG.isKnownNeverNaN(Op: LHS) && DAG.isKnownNeverNaN(Op: RHS))) {
9177	unsigned IEEE2019Op =
9178	Opc == ISD::FMINIMUMNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
9179	if (isOperationLegalOrCustom(Op: IEEE2019Op, VT))
9180	return DAG.getNode(Opcode: IEEE2019Op, DL, VT, N1: LHS, N2: RHS, Flags);
9181	}
9182
9183	// FMINNUM/FMAXMUM returns qNaN if either operand is sNaN, and it may return
9184	// either one for +0.0 vs -0.0.
9185	if ((Flags.hasNoNaNs() \|\|
9186	(DAG.isKnownNeverSNaN(Op: LHS) && DAG.isKnownNeverSNaN(Op: RHS))) &&
9187	(Flags.hasNoSignedZeros() \|\| DAG.isKnownNeverZeroFloat(Op: LHS) \|\|
9188	DAG.isKnownNeverZeroFloat(Op: RHS))) {
9189	unsigned IEEE2008Op = Opc == ISD::FMINIMUMNUM ? ISD::FMINNUM : ISD::FMAXNUM;
9190	if (isOperationLegalOrCustom(Op: IEEE2008Op, VT))
9191	return DAG.getNode(Opcode: IEEE2008Op, DL, VT, N1: LHS, N2: RHS, Flags);
9192	}
9193
9194	if (VT.isVector() &&
9195	(isOperationLegalOrCustomOrPromote(Op: Opc, VT: VT.getVectorElementType()) \|\|
9196	!isOperationLegalOrCustom(Op: ISD::VSELECT, VT)))
9197	return DAG.UnrollVectorOp(N: Node);
9198
9199	// If only one operand is NaN, override it with another operand.
9200	if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(Op: LHS)) {
9201	LHS = DAG.getSelectCC(DL, LHS, RHS: LHS, True: RHS, False: LHS, Cond: ISD::SETUO);
9202	}
9203	if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(Op: RHS)) {
9204	RHS = DAG.getSelectCC(DL, LHS: RHS, RHS, True: LHS, False: RHS, Cond: ISD::SETUO);
9205	}
9206
9207	// Always prefer RHS if equal.
9208	SDValue MinMax =
9209	DAG.getSelectCC(DL, LHS, RHS, True: LHS, False: RHS, Cond: IsMax ? ISD::SETGT : ISD::SETLT);
9210
9211	// TODO: We need quiet sNaN if strictfp.
9212
9213	// Fixup signed zero behavior.
9214	if (Flags.hasNoSignedZeros() \|\| DAG.isKnownNeverZeroFloat(Op: LHS) \|\|
9215	DAG.isKnownNeverZeroFloat(Op: RHS)) {
9216	return MinMax;
9217	}
9218	SDValue TestZero =
9219	DAG.getTargetConstant(Val: IsMax ? fcPosZero : fcNegZero, DL, VT: MVT::i32);
9220	SDValue IsZero = DAG.getSetCC(DL, VT: CCVT, LHS: MinMax,
9221	RHS: DAG.getConstantFP(Val: `0.0`, DL, VT), Cond: ISD::SETEQ);
9222	EVT IntVT = VT.changeTypeToInteger();
9223	EVT FloatVT = VT.changeElementType(Context&: *DAG.getContext(), EltVT: MVT::f32);
9224	SDValue LHSTrunc = LHS;
9225	if (!isTypeLegal(VT: IntVT) && !isOperationLegalOrCustom(Op: ISD::IS_FPCLASS, VT)) {
9226	LHSTrunc = DAG.getNode(Opcode: ISD::FP_ROUND, DL, VT: FloatVT, N1: LHS,
9227	N2: DAG.getIntPtrConstant(Val: `0`, DL, /isTarget=/true));
9228	}
9229	// It's OK to select from LHS and MinMax, with only one ISD::IS_FPCLASS, as
9230	// we preferred RHS when generate MinMax, if the operands are equal.
9231	SDValue RetZero = DAG.getSelect(
9232	DL, VT, Cond: DAG.getNode(Opcode: ISD::IS_FPCLASS, DL, VT: CCVT, N1: LHSTrunc, N2: TestZero), LHS,
9233	RHS: MinMax, Flags);
9234	return DAG.getSelect(DL, VT, Cond: IsZero, LHS: RetZero, RHS: MinMax, Flags);
9235	}
9236
9237	/// Returns a true value if if this FPClassTest can be performed with an ordered
9238	/// fcmp to 0, and a false value if it's an unordered fcmp to 0. Returns
9239	/// std::nullopt if it cannot be performed as a compare with 0.
9240	static std::optional<bool> isFCmpEqualZero(FPClassTest Test,
9241	const fltSemantics &Semantics,
9242	const MachineFunction &MF) {
9243	FPClassTest OrderedMask = Test & ~fcNan;
9244	FPClassTest NanTest = Test & fcNan;
9245	bool IsOrdered = NanTest == fcNone;
9246	bool IsUnordered = NanTest == fcNan;
9247
9248	// Skip cases that are testing for only a qnan or snan.
9249	if (!IsOrdered && !IsUnordered)
9250	return std::nullopt;
9251
9252	if (OrderedMask == fcZero &&
9253	MF.getDenormalMode(FPType: Semantics).Input == DenormalMode::IEEE)
9254	return IsOrdered;
9255	if (OrderedMask == (fcZero \| fcSubnormal) &&
9256	MF.getDenormalMode(FPType: Semantics).inputsAreZero())
9257	return IsOrdered;
9258	return std::nullopt;
9259	}
9260
9261	SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
9262	const FPClassTest OrigTestMask,
9263	SDNodeFlags Flags, const SDLoc &DL,
9264	SelectionDAG &DAG) const {
9265	EVT OperandVT = Op.getValueType();
9266	assert(OperandVT.isFloatingPoint());
9267	FPClassTest Test = OrigTestMask;
9268
9269	// Degenerated cases.
9270	if (Test == fcNone)
9271	return DAG.getBoolConstant(V: false, DL, VT: ResultVT, OpVT: OperandVT);
9272	if (Test == fcAllFlags)
9273	return DAG.getBoolConstant(V: true, DL, VT: ResultVT, OpVT: OperandVT);
9274
9275	// PPC double double is a pair of doubles, of which the higher part determines
9276	// the value class.
9277	if (OperandVT == MVT::ppcf128) {
9278	Op = DAG.getNode(Opcode: ISD::EXTRACT_ELEMENT, DL, VT: MVT::f64, N1: Op,
9279	N2: DAG.getConstant(Val: `1`, DL, VT: MVT::i32));
9280	OperandVT = MVT::f64;
9281	}
9282
9283	// Floating-point type properties.
9284	EVT ScalarFloatVT = OperandVT.getScalarType();
9285	const Type FloatTy = ScalarFloatVT.getTypeForEVT(Context&: DAG.getContext());
9286	const llvm::fltSemantics &Semantics = FloatTy->getFltSemantics();
9287	bool IsF80 = (ScalarFloatVT == MVT::f80);
9288
9289	// Some checks can be implemented using float comparisons, if floating point
9290	// exceptions are ignored.
9291	if (Flags.hasNoFPExcept() &&
9292	isOperationLegalOrCustom(Op: ISD::SETCC, VT: OperandVT.getScalarType())) {
9293	FPClassTest FPTestMask = Test;
9294	bool IsInvertedFP = false;
9295
9296	if (FPClassTest InvertedFPCheck =
9297	invertFPClassTestIfSimpler(Test: FPTestMask, UseFCmp: true)) {
9298	FPTestMask = InvertedFPCheck;
9299	IsInvertedFP = true;
9300	}
9301
9302	ISD::CondCode OrderedCmpOpcode = IsInvertedFP ? ISD::SETUNE : ISD::SETOEQ;
9303	ISD::CondCode UnorderedCmpOpcode = IsInvertedFP ? ISD::SETONE : ISD::SETUEQ;
9304
9305	// See if we can fold an \| fcNan into an unordered compare.
9306	FPClassTest OrderedFPTestMask = FPTestMask & ~fcNan;
9307
9308	// Can't fold the ordered check if we're only testing for snan or qnan
9309	// individually.
9310	if ((FPTestMask & fcNan) != fcNan)
9311	OrderedFPTestMask = FPTestMask;
9312
9313	const bool IsOrdered = FPTestMask == OrderedFPTestMask;
9314
9315	if (std::optional<bool> IsCmp0 =
9316	isFCmpEqualZero(Test: FPTestMask, Semantics, MF: DAG.getMachineFunction());
9317	IsCmp0 && (isCondCodeLegalOrCustom(
9318	CC: *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode,
9319	VT: OperandVT.getScalarType().getSimpleVT()))) {
9320
9321	// If denormals could be implicitly treated as 0, this is not equivalent
9322	// to a compare with 0 since it will also be true for denormals.
9323	return DAG.getSetCC(DL, VT: ResultVT, LHS: Op,
9324	RHS: DAG.getConstantFP(Val: `0.0`, DL, VT: OperandVT),
9325	Cond: *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode);
9326	}
9327
9328	if (FPTestMask == fcNan &&
9329	isCondCodeLegalOrCustom(CC: IsInvertedFP ? ISD::SETO : ISD::SETUO,
9330	VT: OperandVT.getScalarType().getSimpleVT()))
9331	return DAG.getSetCC(DL, VT: ResultVT, LHS: Op, RHS: Op,
9332	Cond: IsInvertedFP ? ISD::SETO : ISD::SETUO);
9333
9334	bool IsOrderedInf = FPTestMask == fcInf;
9335	if ((FPTestMask == fcInf \|\| FPTestMask == (fcInf \| fcNan)) &&
9336	isCondCodeLegalOrCustom(CC: IsOrderedInf ? OrderedCmpOpcode
9337	: UnorderedCmpOpcode,
9338	VT: OperandVT.getScalarType().getSimpleVT()) &&
9339	isOperationLegalOrCustom(Op: ISD::FABS, VT: OperandVT.getScalarType()) &&
9340	(isOperationLegal(Op: ISD::ConstantFP, VT: OperandVT.getScalarType()) \|\|
9341	(OperandVT.isVector() &&
9342	isOperationLegalOrCustom(Op: ISD::BUILD_VECTOR, VT: OperandVT)))) {
9343	// isinf(x) --> fabs(x) == inf
9344	SDValue Abs = DAG.getNode(Opcode: ISD::FABS, DL, VT: OperandVT, Operand: Op);
9345	SDValue Inf =
9346	DAG.getConstantFP(Val: APFloat::getInf(Sem: Semantics), DL, VT: OperandVT);
9347	return DAG.getSetCC(DL, VT: ResultVT, LHS: Abs, RHS: Inf,
9348	Cond: IsOrderedInf ? OrderedCmpOpcode : UnorderedCmpOpcode);
9349	}
9350
9351	if ((OrderedFPTestMask == fcPosInf \|\| OrderedFPTestMask == fcNegInf) &&
9352	isCondCodeLegalOrCustom(CC: IsOrdered ? OrderedCmpOpcode
9353	: UnorderedCmpOpcode,
9354	VT: OperandVT.getSimpleVT())) {
9355	// isposinf(x) --> x == inf
9356	// isneginf(x) --> x == -inf
9357	// isposinf(x) \|\| nan --> x u== inf
9358	// isneginf(x) \|\| nan --> x u== -inf
9359
9360	SDValue Inf = DAG.getConstantFP(
9361	Val: APFloat::getInf(Sem: Semantics, Negative: OrderedFPTestMask == fcNegInf), DL,
9362	VT: OperandVT);
9363	return DAG.getSetCC(DL, VT: ResultVT, LHS: Op, RHS: Inf,
9364	Cond: IsOrdered ? OrderedCmpOpcode : UnorderedCmpOpcode);
9365	}
9366
9367	if (OrderedFPTestMask == (fcSubnormal \| fcZero) && !IsOrdered) {
9368	// TODO: Could handle ordered case, but it produces worse code for
9369	// x86. Maybe handle ordered if fabs is free?
9370
9371	ISD::CondCode OrderedOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
9372	ISD::CondCode UnorderedOp = IsInvertedFP ? ISD::SETOGE : ISD::SETULT;
9373
9374	if (isCondCodeLegalOrCustom(CC: IsOrdered ? OrderedOp : UnorderedOp,
9375	VT: OperandVT.getScalarType().getSimpleVT())) {
9376	// (issubnormal(x) \|\| iszero(x)) --> fabs(x) < smallest_normal
9377
9378	// TODO: Maybe only makes sense if fabs is free. Integer test of
9379	// exponent bits seems better for x86.
9380	SDValue Abs = DAG.getNode(Opcode: ISD::FABS, DL, VT: OperandVT, Operand: Op);
9381	SDValue SmallestNormal = DAG.getConstantFP(
9382	Val: APFloat::getSmallestNormalized(Sem: Semantics), DL, VT: OperandVT);
9383	return DAG.getSetCC(DL, VT: ResultVT, LHS: Abs, RHS: SmallestNormal,
9384	Cond: IsOrdered ? OrderedOp : UnorderedOp);
9385	}
9386	}
9387
9388	if (FPTestMask == fcNormal) {
9389	// TODO: Handle unordered
9390	ISD::CondCode IsFiniteOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
9391	ISD::CondCode IsNormalOp = IsInvertedFP ? ISD::SETOLT : ISD::SETUGE;
9392
9393	if (isCondCodeLegalOrCustom(CC: IsFiniteOp,
9394	VT: OperandVT.getScalarType().getSimpleVT()) &&
9395	isCondCodeLegalOrCustom(CC: IsNormalOp,
9396	VT: OperandVT.getScalarType().getSimpleVT()) &&
9397	isFAbsFree(VT: OperandVT)) {
9398	// isnormal(x) --> fabs(x) < infinity && !(fabs(x) < smallest_normal)
9399	SDValue Inf =
9400	DAG.getConstantFP(Val: APFloat::getInf(Sem: Semantics), DL, VT: OperandVT);
9401	SDValue SmallestNormal = DAG.getConstantFP(
9402	Val: APFloat::getSmallestNormalized(Sem: Semantics), DL, VT: OperandVT);
9403
9404	SDValue Abs = DAG.getNode(Opcode: ISD::FABS, DL, VT: OperandVT, Operand: Op);
9405	SDValue IsFinite = DAG.getSetCC(DL, VT: ResultVT, LHS: Abs, RHS: Inf, Cond: IsFiniteOp);
9406	SDValue IsNormal =
9407	DAG.getSetCC(DL, VT: ResultVT, LHS: Abs, RHS: SmallestNormal, Cond: IsNormalOp);
9408	unsigned LogicOp = IsInvertedFP ? ISD::OR : ISD::AND;
9409	return DAG.getNode(Opcode: LogicOp, DL, VT: ResultVT, N1: IsFinite, N2: IsNormal);
9410	}
9411	}
9412	}
9413
9414	// Some checks may be represented as inversion of simpler check, for example
9415	// "inf\|normal\|subnormal\|zero" => !"nan".
9416	bool IsInverted = false;
9417
9418	if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test, UseFCmp: false)) {
9419	Test = InvertedCheck;
9420	IsInverted = true;
9421	}
9422
9423	// In the general case use integer operations.
9424	unsigned BitSize = OperandVT.getScalarSizeInBits();
9425	EVT IntVT = OperandVT.changeElementType(
9426	Context&: DAG.getContext(), EltVT: EVT::getIntegerVT(Context&: DAG.getContext(), BitWidth: BitSize));
9427	SDValue OpAsInt = DAG.getBitcast(VT: IntVT, V: Op);
9428
9429	// Various masks.
9430	APInt SignBit = APInt::getSignMask(BitWidth: BitSize);
9431	APInt ValueMask = APInt::getSignedMaxValue(numBits: BitSize); // All bits but sign.
9432	APInt Inf = APFloat::getInf(Sem: Semantics).bitcastToAPInt(); // Exp and int bit.
9433	const unsigned ExplicitIntBitInF80 = `63`;
9434	APInt ExpMask = Inf;
9435	if (IsF80)
9436	ExpMask.clearBit(BitPosition: ExplicitIntBitInF80);
9437	APInt AllOneMantissa = APFloat::getLargest(Sem: Semantics).bitcastToAPInt() & ~Inf;
9438	APInt QNaNBitMask =
9439	APInt::getOneBitSet(numBits: BitSize, BitNo: AllOneMantissa.getActiveBits() - `1`);
9440	APInt InversionMask = APInt::getAllOnes(numBits: ResultVT.getScalarSizeInBits());
9441
9442	SDValue ValueMaskV = DAG.getConstant(Val: ValueMask, DL, VT: IntVT);
9443	SDValue SignBitV = DAG.getConstant(Val: SignBit, DL, VT: IntVT);
9444	SDValue ExpMaskV = DAG.getConstant(Val: ExpMask, DL, VT: IntVT);
9445	SDValue ZeroV = DAG.getConstant(Val: `0`, DL, VT: IntVT);
9446	SDValue InfV = DAG.getConstant(Val: Inf, DL, VT: IntVT);
9447	SDValue ResultInversionMask = DAG.getConstant(Val: InversionMask, DL, VT: ResultVT);
9448
9449	SDValue Res;
9450	const auto appendResult = [&](SDValue PartialRes) {
9451	if (PartialRes) {
9452	if (Res)
9453	Res = DAG.getNode(Opcode: ISD::OR, DL, VT: ResultVT, N1: Res, N2: PartialRes);
9454	else
9455	Res = PartialRes;
9456	}
9457	};
9458
9459	SDValue IntBitIsSetV; // Explicit integer bit in f80 mantissa is set.
9460	const auto getIntBitIsSet = [&]() -> SDValue {
9461	if (!IntBitIsSetV) {
9462	APInt IntBitMask(BitSize, `0`);
9463	IntBitMask.setBit(ExplicitIntBitInF80);
9464	SDValue IntBitMaskV = DAG.getConstant(Val: IntBitMask, DL, VT: IntVT);
9465	SDValue IntBitV = DAG.getNode(Opcode: ISD::AND, DL, VT: IntVT, N1: OpAsInt, N2: IntBitMaskV);
9466	IntBitIsSetV = DAG.getSetCC(DL, VT: ResultVT, LHS: IntBitV, RHS: ZeroV, Cond: ISD::SETNE);
9467	}
9468	return IntBitIsSetV;
9469	};
9470
9471	// Split the value into sign bit and absolute value.
9472	SDValue AbsV = DAG.getNode(Opcode: ISD::AND, DL, VT: IntVT, N1: OpAsInt, N2: ValueMaskV);
9473	SDValue SignV = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt,
9474	RHS: DAG.getConstant(Val: `0`, DL, VT: IntVT), Cond: ISD::SETLT);
9475
9476	// Tests that involve more than one class should be processed first.
9477	SDValue PartialRes;
9478
9479	if (IsF80)
9480	; // Detect finite numbers of f80 by checking individual classes because
9481	// they have different settings of the explicit integer bit.
9482	else if ((Test & fcFinite) == fcFinite) {
9483	// finite(V) ==> abs(V) < exp_mask
9484	PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: ExpMaskV, Cond: ISD::SETLT);
9485	Test &= ~fcFinite;
9486	} else if ((Test & fcFinite) == fcPosFinite) {
9487	// finite(V) && V > 0 ==> V < exp_mask
9488	PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt, RHS: ExpMaskV, Cond: ISD::SETULT);
9489	Test &= ~fcPosFinite;
9490	} else if ((Test & fcFinite) == fcNegFinite) {
9491	// finite(V) && V < 0 ==> abs(V) < exp_mask && signbit == 1
9492	PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: ExpMaskV, Cond: ISD::SETLT);
9493	PartialRes = DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: PartialRes, N2: SignV);
9494	Test &= ~fcNegFinite;
9495	}
9496	appendResult (PartialRes);
9497
9498	if (FPClassTest PartialCheck = Test & (fcZero \| fcSubnormal)) {
9499	// fcZero \| fcSubnormal => test all exponent bits are 0
9500	// TODO: Handle sign bit specific cases
9501	if (PartialCheck == (fcZero \| fcSubnormal)) {
9502	SDValue ExpBits = DAG.getNode(Opcode: ISD::AND, DL, VT: IntVT, N1: OpAsInt, N2: ExpMaskV);
9503	SDValue ExpIsZero =
9504	DAG.getSetCC(DL, VT: ResultVT, LHS: ExpBits, RHS: ZeroV, Cond: ISD::SETEQ);
9505	appendResult (ExpIsZero);
9506	Test &= ~PartialCheck & fcAllFlags;
9507	}
9508	}
9509
9510	// Check for individual classes.
9511
9512	if (unsigned PartialCheck = Test & fcZero) {
9513	if (PartialCheck == fcPosZero)
9514	PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt, RHS: ZeroV, Cond: ISD::SETEQ);
9515	else if (PartialCheck == fcZero)
9516	PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: ZeroV, Cond: ISD::SETEQ);
9517	else // ISD::fcNegZero
9518	PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt, RHS: SignBitV, Cond: ISD::SETEQ);
9519	appendResult (PartialRes);
9520	}
9521
9522	if (unsigned PartialCheck = Test & fcSubnormal) {
9523	// issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set)
9524	// issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set)
9525	SDValue V = (PartialCheck == fcPosSubnormal) ? OpAsInt : AbsV;
9526	SDValue MantissaV = DAG.getConstant(Val: AllOneMantissa, DL, VT: IntVT);
9527	SDValue VMinusOneV =
9528	DAG.getNode(Opcode: ISD::SUB, DL, VT: IntVT, N1: V, N2: DAG.getConstant(Val: `1`, DL, VT: IntVT));
9529	PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: VMinusOneV, RHS: MantissaV, Cond: ISD::SETULT);
9530	if (PartialCheck == fcNegSubnormal)
9531	PartialRes = DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: PartialRes, N2: SignV);
9532	appendResult (PartialRes);
9533	}
9534
9535	if (unsigned PartialCheck = Test & fcInf) {
9536	if (PartialCheck == fcPosInf)
9537	PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt, RHS: InfV, Cond: ISD::SETEQ);
9538	else if (PartialCheck == fcInf)
9539	PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: InfV, Cond: ISD::SETEQ);
9540	else { // ISD::fcNegInf
9541	APInt NegInf = APFloat::getInf(Sem: Semantics, Negative: true).bitcastToAPInt();
9542	SDValue NegInfV = DAG.getConstant(Val: NegInf, DL, VT: IntVT);
9543	PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt, RHS: NegInfV, Cond: ISD::SETEQ);
9544	}
9545	appendResult (PartialRes);
9546	}
9547
9548	if (unsigned PartialCheck = Test & fcNan) {
9549	APInt InfWithQnanBit = Inf \| QNaNBitMask;
9550	SDValue InfWithQnanBitV = DAG.getConstant(Val: InfWithQnanBit, DL, VT: IntVT);
9551	if (PartialCheck == fcNan) {
9552	// isnan(V) ==> abs(V) > int(inf)
9553	PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: InfV, Cond: ISD::SETGT);
9554	if (IsF80) {
9555	// Recognize unsupported values as NaNs for compatibility with glibc.
9556	// In them (exp(V)==0) == int_bit.
9557	SDValue ExpBits = DAG.getNode(Opcode: ISD::AND, DL, VT: IntVT, N1: AbsV, N2: ExpMaskV);
9558	SDValue ExpIsZero =
9559	DAG.getSetCC(DL, VT: ResultVT, LHS: ExpBits, RHS: ZeroV, Cond: ISD::SETEQ);
9560	SDValue IsPseudo =
9561	DAG.getSetCC(DL, VT: ResultVT, LHS: getIntBitIsSet (), RHS: ExpIsZero, Cond: ISD::SETEQ);
9562	PartialRes = DAG.getNode(Opcode: ISD::OR, DL, VT: ResultVT, N1: PartialRes, N2: IsPseudo);
9563	}
9564	} else if (PartialCheck == fcQNan) {
9565	// isquiet(V) ==> abs(V) >= (unsigned(Inf) \| quiet_bit)
9566	PartialRes =
9567	DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: InfWithQnanBitV, Cond: ISD::SETGE);
9568	} else { // ISD::fcSNan
9569	// issignaling(V) ==> abs(V) > unsigned(Inf) &&
9570	// abs(V) < (unsigned(Inf) \| quiet_bit)
9571	SDValue IsNan = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: InfV, Cond: ISD::SETGT);
9572	SDValue IsNotQnan =
9573	DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: InfWithQnanBitV, Cond: ISD::SETLT);
9574	PartialRes = DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: IsNan, N2: IsNotQnan);
9575	}
9576	appendResult (PartialRes);
9577	}
9578
9579	if (unsigned PartialCheck = Test & fcNormal) {
9580	// isnormal(V) ==> (0 < exp < max_exp) ==> (unsigned(exp-1) < (max_exp-1))
9581	APInt ExpLSB = ExpMask & ~(ExpMask.shl(shiftAmt: `1`));
9582	SDValue ExpLSBV = DAG.getConstant(Val: ExpLSB, DL, VT: IntVT);
9583	SDValue ExpMinus1 = DAG.getNode(Opcode: ISD::SUB, DL, VT: IntVT, N1: AbsV, N2: ExpLSBV);
9584	APInt ExpLimit = ExpMask - ExpLSB;
9585	SDValue ExpLimitV = DAG.getConstant(Val: ExpLimit, DL, VT: IntVT);
9586	PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: ExpMinus1, RHS: ExpLimitV, Cond: ISD::SETULT);
9587	if (PartialCheck == fcNegNormal)
9588	PartialRes = DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: PartialRes, N2: SignV);
9589	else if (PartialCheck == fcPosNormal) {
9590	SDValue PosSignV =
9591	DAG.getNode(Opcode: ISD::XOR, DL, VT: ResultVT, N1: SignV, N2: ResultInversionMask);
9592	PartialRes = DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: PartialRes, N2: PosSignV);
9593	}
9594	if (IsF80)
9595	PartialRes =
9596	DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: PartialRes, N2: getIntBitIsSet ());
9597	appendResult (PartialRes);
9598	}
9599
9600	if (!Res)
9601	return DAG.getConstant(Val: IsInverted, DL, VT: ResultVT);
9602	if (IsInverted)
9603	Res = DAG.getNode(Opcode: ISD::XOR, DL, VT: ResultVT, N1: Res, N2: ResultInversionMask);
9604	return Res;
9605	}
9606
9607	// Only expand vector types if we have the appropriate vector bit operations.
9608	static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) {
9609	assert(VT.isVector() && "Expected vector type");
9610	unsigned Len = VT.getScalarSizeInBits();
9611	return TLI.isOperationLegalOrCustom(Op: ISD::ADD, VT) &&
9612	TLI.isOperationLegalOrCustom(Op: ISD::SUB, VT) &&
9613	TLI.isOperationLegalOrCustom(Op: ISD::SRL, VT) &&
9614	(Len == `8` \|\| TLI.isOperationLegalOrCustom(Op: ISD::MUL, VT)) &&
9615	TLI.isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT);
9616	}
9617
9618	SDValue TargetLowering::expandCTPOP(SDNode Node, SelectionDAG &DAG) const* {
9619	SDLoc dl(Node);
9620	EVT VT = Node->getValueType(ResNo: `0`);
9621	EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
9622	SDValue Op = Node->getOperand(Num: `0`);
9623	unsigned Len = VT.getScalarSizeInBits();
9624	assert(VT.isInteger() && "CTPOP not implemented for this type.");
9625
9626	// TODO: Add support for irregular type lengths.
9627	if (!(Len <= `128` && Len % `8` == `0`))
9628	return SDValue ();
9629
9630	// Only expand vector types if we have the appropriate vector bit operations.
9631	if (VT.isVector() && !canExpandVectorCTPOP(TLI: *this, VT))
9632	return SDValue ();
9633
9634	// This is the "best" algorithm from
9635	// http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
9636	SDValue Mask55 =
9637	DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt (`8`, `0x55`)), DL: dl, VT);
9638	SDValue Mask33 =
9639	DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt (`8`, `0x33`)), DL: dl, VT);
9640	SDValue Mask0F =
9641	DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt (`8`, `0x0F`)), DL: dl, VT);
9642
9643	// v = v - ((v >> 1) & 0x55555555...)
9644	Op = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Op,
9645	N2: DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
9646	N1: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op,
9647	N2: DAG.getConstant(Val: `1`, DL: dl, VT: ShVT)),
9648	N2: Mask55));
9649	// v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
9650	Op = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op, N2: Mask33),
9651	N2: DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
9652	N1: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op,
9653	N2: DAG.getConstant(Val: `2`, DL: dl, VT: ShVT)),
9654	N2: Mask33));
9655	// v = (v + (v >> 4)) & 0x0F0F0F0F...
9656	Op = DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
9657	N1: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Op,
9658	N2: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op,
9659	N2: DAG.getConstant(Val: `4`, DL: dl, VT: ShVT))),
9660	N2: Mask0F);
9661
9662	if (Len <= `8`)
9663	return Op;
9664
9665	// Avoid the multiply if we only have 2 bytes to add.
9666	// TODO: Only doing this for scalars because vectors weren't as obviously
9667	// improved.
9668	if (Len == `16` && !VT.isVector()) {
9669	// v = (v + (v >> 8)) & 0x00FF;
9670	return DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
9671	N1: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Op,
9672	N2: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op,
9673	N2: DAG.getConstant(Val: `8`, DL: dl, VT: ShVT))),
9674	N2: DAG.getConstant(Val: `0xFF`, DL: dl, VT));
9675	}
9676
9677	// v = (v 0x01010101...) >> (Len - 8)*
9678	SDValue V;
9679	if (isOperationLegalOrCustomOrPromote(
9680	Op: ISD::MUL, VT: getTypeToTransformTo(Context&: *DAG.getContext(), VT))) {
9681	SDValue Mask01 =
9682	DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt (`8`, `0x01`)), DL: dl, VT);
9683	V = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: Op, N2: Mask01);
9684	} else {
9685	V = Op;
9686	for (unsigned Shift = `8`; Shift < Len; Shift *= `2`) {
9687	SDValue ShiftC = DAG.getShiftAmountConstant(Val: Shift, VT, DL: dl);
9688	V = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: V,
9689	N2: DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: V, N2: ShiftC));
9690	}
9691	}
9692	return DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: V, N2: DAG.getConstant(Val: Len - `8`, DL: dl, VT: ShVT));
9693	}
9694
9695	SDValue TargetLowering::expandVPCTPOP(SDNode Node, SelectionDAG &DAG) const* {
9696	SDLoc dl(Node);
9697	EVT VT = Node->getValueType(ResNo: `0`);
9698	EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
9699	SDValue Op = Node->getOperand(Num: `0`);
9700	SDValue Mask = Node->getOperand(Num: `1`);
9701	SDValue VL = Node->getOperand(Num: `2`);
9702	unsigned Len = VT.getScalarSizeInBits();
9703	assert(VT.isInteger() && "VP_CTPOP not implemented for this type.");
9704
9705	// TODO: Add support for irregular type lengths.
9706	if (!(Len <= `128` && Len % `8` == `0`))
9707	return SDValue ();
9708
9709	// This is same algorithm of expandCTPOP from
9710	// http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
9711	SDValue Mask55 =
9712	DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt (`8`, `0x55`)), DL: dl, VT);
9713	SDValue Mask33 =
9714	DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt (`8`, `0x33`)), DL: dl, VT);
9715	SDValue Mask0F =
9716	DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt (`8`, `0x0F`)), DL: dl, VT);
9717
9718	SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5;
9719
9720	// v = v - ((v >> 1) & 0x55555555...)
9721	Tmp1 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT,
9722	N1: DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op,
9723	N2: DAG.getConstant(Val: `1`, DL: dl, VT: ShVT), N3: Mask, N4: VL),
9724	N2: Mask55, N3: Mask, N4: VL);
9725	Op = DAG.getNode(Opcode: ISD::VP_SUB, DL: dl, VT, N1: Op, N2: Tmp1, N3: Mask, N4: VL);
9726
9727	// v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
9728	Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Op, N2: Mask33, N3: Mask, N4: VL);
9729	Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT,
9730	N1: DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op,
9731	N2: DAG.getConstant(Val: `2`, DL: dl, VT: ShVT), N3: Mask, N4: VL),
9732	N2: Mask33, N3: Mask, N4: VL);
9733	Op = DAG.getNode(Opcode: ISD::VP_ADD, DL: dl, VT, N1: Tmp2, N2: Tmp3, N3: Mask, N4: VL);
9734
9735	// v = (v + (v >> 4)) & 0x0F0F0F0F...
9736	Tmp4 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `4`, DL: dl, VT: ShVT),
9737	N3: Mask, N4: VL),
9738	Tmp5 = DAG.getNode(Opcode: ISD::VP_ADD, DL: dl, VT, N1: Op, N2: Tmp4, N3: Mask, N4: VL);
9739	Op = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp5, N2: Mask0F, N3: Mask, N4: VL);
9740
9741	if (Len <= `8`)
9742	return Op;
9743
9744	// v = (v 0x01010101...) >> (Len - 8)*
9745	SDValue V;
9746	if (isOperationLegalOrCustomOrPromote(
9747	Op: ISD::VP_MUL, VT: getTypeToTransformTo(Context&: *DAG.getContext(), VT))) {
9748	SDValue Mask01 =
9749	DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt (`8`, `0x01`)), DL: dl, VT);
9750	V = DAG.getNode(Opcode: ISD::VP_MUL, DL: dl, VT, N1: Op, N2: Mask01, N3: Mask, N4: VL);
9751	} else {
9752	V = Op;
9753	for (unsigned Shift = `8`; Shift < Len; Shift *= `2`) {
9754	SDValue ShiftC = DAG.getShiftAmountConstant(Val: Shift, VT, DL: dl);
9755	V = DAG.getNode(Opcode: ISD::VP_ADD, DL: dl, VT, N1: V,
9756	N2: DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: V, N2: ShiftC, N3: Mask, N4: VL),
9757	N3: Mask, N4: VL);
9758	}
9759	}
9760	return DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: V, N2: DAG.getConstant(Val: Len - `8`, DL: dl, VT: ShVT),
9761	N3: Mask, N4: VL);
9762	}
9763
9764	SDValue TargetLowering::expandCTLZ(SDNode Node, SelectionDAG &DAG) const* {
9765	SDLoc dl(Node);
9766	EVT VT = Node->getValueType(ResNo: `0`);
9767	EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
9768	SDValue Op = Node->getOperand(Num: `0`);
9769	unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9770
9771	// If the non-ZERO_UNDEF version is supported we can use that instead.
9772	if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
9773	isOperationLegalOrCustom(Op: ISD::CTLZ, VT))
9774	return DAG.getNode(Opcode: ISD::CTLZ, DL: dl, VT, Operand: Op);
9775
9776	// If the ZERO_UNDEF version is supported use that and handle the zero case.
9777	if (isOperationLegalOrCustom(Op: ISD::CTLZ_ZERO_UNDEF, VT)) {
9778	EVT SetCCVT =
9779	getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
9780	SDValue CTLZ = DAG.getNode(Opcode: ISD::CTLZ_ZERO_UNDEF, DL: dl, VT, Operand: Op);
9781	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT);
9782	SDValue SrcIsZero = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Op, RHS: Zero, Cond: ISD::SETEQ);
9783	return DAG.getSelect(DL: dl, VT, Cond: SrcIsZero,
9784	LHS: DAG.getConstant(Val: NumBitsPerElt, DL: dl, VT), RHS: CTLZ);
9785	}
9786
9787	// Only expand vector types if we have the appropriate vector bit operations.
9788	// This includes the operations needed to expand CTPOP if it isn't supported.
9789	if (VT.isVector() && (!isPowerOf2_32(Value: NumBitsPerElt) \|\|
9790	(!isOperationLegalOrCustom(Op: ISD::CTPOP, VT) &&
9791	!canExpandVectorCTPOP(TLI: *this, VT)) \|\|
9792	!isOperationLegalOrCustom(Op: ISD::SRL, VT) \|\|
9793	!isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT)))
9794	return SDValue ();
9795
9796	// for now, we do this:
9797	// x = x \| (x >> 1);
9798	// x = x \| (x >> 2);
9799	// ...
9800	// x = x \| (x >>16);
9801	// x = x \| (x >>32); // for 64-bit input
9802	// return popcount(~x);
9803	//
9804	// Ref: "Hacker's Delight" by Henry Warren
9805	for (unsigned i = `0`; (`1U` << i) < NumBitsPerElt; ++i) {
9806	SDValue Tmp = DAG.getConstant(Val: `1ULL` << i, DL: dl, VT: ShVT);
9807	Op = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Op,
9808	N2: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: Tmp));
9809	}
9810	Op = DAG.getNOT(DL: dl, Val: Op, VT);
9811	return DAG.getNode(Opcode: ISD::CTPOP, DL: dl, VT, Operand: Op);
9812	}
9813
9814	SDValue TargetLowering::expandVPCTLZ(SDNode Node, SelectionDAG &DAG) const* {
9815	SDLoc dl(Node);
9816	EVT VT = Node->getValueType(ResNo: `0`);
9817	EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
9818	SDValue Op = Node->getOperand(Num: `0`);
9819	SDValue Mask = Node->getOperand(Num: `1`);
9820	SDValue VL = Node->getOperand(Num: `2`);
9821	unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9822
9823	// do this:
9824	// x = x \| (x >> 1);
9825	// x = x \| (x >> 2);
9826	// ...
9827	// x = x \| (x >>16);
9828	// x = x \| (x >>32); // for 64-bit input
9829	// return popcount(~x);
9830	for (unsigned i = `0`; (`1U` << i) < NumBitsPerElt; ++i) {
9831	SDValue Tmp = DAG.getConstant(Val: `1ULL` << i, DL: dl, VT: ShVT);
9832	Op = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Op,
9833	N2: DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: Tmp, N3: Mask, N4: VL), N3: Mask,
9834	N4: VL);
9835	}
9836	Op = DAG.getNode(Opcode: ISD::VP_XOR, DL: dl, VT, N1: Op, N2: DAG.getAllOnesConstant(DL: dl, VT),
9837	N3: Mask, N4: VL);
9838	return DAG.getNode(Opcode: ISD::VP_CTPOP, DL: dl, VT, N1: Op, N2: Mask, N3: VL);
9839	}
9840
9841	SDValue TargetLowering::expandCTLS(SDNode Node, SelectionDAG &DAG) const* {
9842	SDLoc dl(Node);
9843	EVT VT = Node->getValueType(ResNo: `0`);
9844	SDValue Op = DAG.getFreeze(V: Node->getOperand(Num: `0`));
9845	unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9846
9847	// CTLS(x) = CTLZ(OR(SHL(XOR(x, SRA(x, BW-1)), 1), 1))
9848	// This transforms the sign bits into leading zeros that can be counted.
9849	SDValue ShiftAmt = DAG.getShiftAmountConstant(Val: NumBitsPerElt - `1`, VT, DL: dl);
9850	SDValue SignBit = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: Op, N2: ShiftAmt);
9851	SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: Op, N2: SignBit);
9852	SDValue Shl =
9853	DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Xor, N2: DAG.getShiftAmountConstant(Val: `1`, VT, DL: dl));
9854	SDValue Or = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Shl, N2: DAG.getConstant(Val: `1`, DL: dl, VT));
9855	return DAG.getNode(Opcode: ISD::CTLZ_ZERO_UNDEF, DL: dl, VT, Operand: Or);
9856	}
9857
9858	SDValue TargetLowering::CTTZTableLookup(SDNode *Node, SelectionDAG &DAG,
9859	const SDLoc &DL, EVT VT, SDValue Op,
9860	unsigned BitWidth) const {
9861	if (BitWidth != `32` && BitWidth != `64`)
9862	return SDValue ();
9863
9864	const DataLayout &TD = DAG.getDataLayout();
9865	if (!isOperationCustom(Op: ISD::ConstantPool, VT: getPointerTy(DL: TD)))
9866	return SDValue ();
9867
9868	APInt DeBruijn = BitWidth == `32` ? APInt (`32`, `0x077CB531U`)
9869	: APInt (`64`, `0x0218A392CD3D5DBFULL`);
9870	MachinePointerInfo PtrInfo =
9871	MachinePointerInfo::getConstantPool(MF&: DAG.getMachineFunction());
9872	unsigned ShiftAmt = BitWidth - Log2_32(Value: BitWidth);
9873	SDValue Neg = DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: DAG.getConstant(Val: `0`, DL, VT), N2: Op);
9874	SDValue Lookup = DAG.getNode(
9875	Opcode: ISD::SRL, DL, VT,
9876	N1: DAG.getNode(Opcode: ISD::MUL, DL, VT, N1: DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Op, N2: Neg),
9877	N2: DAG.getConstant(Val: DeBruijn, DL, VT)),
9878	N2: DAG.getShiftAmountConstant(Val: ShiftAmt, VT, DL));
9879	Lookup = DAG.getSExtOrTrunc(Op: Lookup, DL, VT: getPointerTy(DL: TD));
9880
9881	SmallVector<uint8_t> Table(BitWidth, `0`);
9882	for (unsigned i = `0`; i < BitWidth; i++) {
9883	APInt Shl = DeBruijn.shl(shiftAmt: i);
9884	APInt Lshr = Shl.lshr(shiftAmt: ShiftAmt);
9885	Table [Lshr.getZExtValue()] = i;
9886	}
9887
9888	// Create a ConstantArray in Constant Pool
9889	auto CA = ConstantDataArray::get(Context&: DAG.getContext(), Elts&: Table);
9890	SDValue CPIdx = DAG.getConstantPool(C: CA, VT: getPointerTy(DL: TD),
9891	Align: TD.getPrefTypeAlign(Ty: CA->getType()));
9892	SDValue ExtLoad = DAG.getExtLoad(ExtType: ISD::ZEXTLOAD, dl: DL, VT, Chain: DAG.getEntryNode(),
9893	Ptr: DAG.getMemBasePlusOffset(Base: CPIdx, Offset: Lookup, DL),
9894	PtrInfo, MemVT: MVT::i8);
9895	if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF)
9896	return ExtLoad;
9897
9898	EVT SetCCVT =
9899	getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
9900	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT);
9901	SDValue SrcIsZero = DAG.getSetCC(DL, VT: SetCCVT, LHS: Op, RHS: Zero, Cond: ISD::SETEQ);
9902	return DAG.getSelect(DL, VT, Cond: SrcIsZero,
9903	LHS: DAG.getConstant(Val: BitWidth, DL, VT), RHS: ExtLoad);
9904	}
9905
9906	SDValue TargetLowering::expandCTTZ(SDNode Node, SelectionDAG &DAG) const* {
9907	SDLoc dl(Node);
9908	EVT VT = Node->getValueType(ResNo: `0`);
9909	SDValue Op = Node->getOperand(Num: `0`);
9910	unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9911
9912	// If the non-ZERO_UNDEF version is supported we can use that instead.
9913	if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
9914	isOperationLegalOrCustom(Op: ISD::CTTZ, VT))
9915	return DAG.getNode(Opcode: ISD::CTTZ, DL: dl, VT, Operand: Op);
9916
9917	// If the ZERO_UNDEF version is supported use that and handle the zero case.
9918	if (isOperationLegalOrCustom(Op: ISD::CTTZ_ZERO_UNDEF, VT)) {
9919	EVT SetCCVT =
9920	getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
9921	SDValue CTTZ = DAG.getNode(Opcode: ISD::CTTZ_ZERO_UNDEF, DL: dl, VT, Operand: Op);
9922	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT);
9923	SDValue SrcIsZero = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Op, RHS: Zero, Cond: ISD::SETEQ);
9924	return DAG.getSelect(DL: dl, VT, Cond: SrcIsZero,
9925	LHS: DAG.getConstant(Val: NumBitsPerElt, DL: dl, VT), RHS: CTTZ);
9926	}
9927
9928	// Only expand vector types if we have the appropriate vector bit operations.
9929	// This includes the operations needed to expand CTPOP if it isn't supported.
9930	if (VT.isVector() && (!isPowerOf2_32(Value: NumBitsPerElt) \|\|
9931	(!isOperationLegalOrCustom(Op: ISD::CTPOP, VT) &&
9932	!isOperationLegalOrCustom(Op: ISD::CTLZ, VT) &&
9933	!canExpandVectorCTPOP(TLI: *this, VT)) \|\|
9934	!isOperationLegalOrCustom(Op: ISD::SUB, VT) \|\|
9935	!isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT) \|\|
9936	!isOperationLegalOrCustomOrPromote(Op: ISD::XOR, VT)))
9937	return SDValue ();
9938
9939	// Emit Table Lookup if ISD::CTPOP used in the fallback path below is going
9940	// to be expanded or converted to a libcall.
9941	if (!VT.isVector() && !isOperationLegalOrCustomOrPromote(Op: ISD::CTPOP, VT) &&
9942	!isOperationLegal(Op: ISD::CTLZ, VT))
9943	if (SDValue V = CTTZTableLookup(Node, DAG, DL: dl, VT, Op, BitWidth: NumBitsPerElt))
9944	return V;
9945
9946	// for now, we use: { return popcount(~x & (x - 1)); }
9947	// unless the target has ctlz but not ctpop, in which case we use:
9948	// { return 32 - nlz(~x & (x-1)); }
9949	// Ref: "Hacker's Delight" by Henry Warren
9950	SDValue Tmp = DAG.getNode(
9951	Opcode: ISD::AND, DL: dl, VT, N1: DAG.getNOT(DL: dl, Val: Op, VT),
9952	N2: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `1`, DL: dl, VT)));
9953
9954	// If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
9955	if (isOperationLegal(Op: ISD::CTLZ, VT) && !isOperationLegal(Op: ISD::CTPOP, VT)) {
9956	return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: DAG.getConstant(Val: NumBitsPerElt, DL: dl, VT),
9957	N2: DAG.getNode(Opcode: ISD::CTLZ, DL: dl, VT, Operand: Tmp));
9958	}
9959
9960	return DAG.getNode(Opcode: ISD::CTPOP, DL: dl, VT, Operand: Tmp);
9961	}
9962
9963	SDValue TargetLowering::expandVPCTTZ(SDNode Node, SelectionDAG &DAG) const* {
9964	SDValue Op = Node->getOperand(Num: `0`);
9965	SDValue Mask = Node->getOperand(Num: `1`);
9966	SDValue VL = Node->getOperand(Num: `2`);
9967	SDLoc dl(Node);
9968	EVT VT = Node->getValueType(ResNo: `0`);
9969
9970	// Same as the vector part of expandCTTZ, use: popcount(~x & (x - 1))
9971	SDValue Not = DAG.getNode(Opcode: ISD::VP_XOR, DL: dl, VT, N1: Op,
9972	N2: DAG.getAllOnesConstant(DL: dl, VT), N3: Mask, N4: VL);
9973	SDValue MinusOne = DAG.getNode(Opcode: ISD::VP_SUB, DL: dl, VT, N1: Op,
9974	N2: DAG.getConstant(Val: `1`, DL: dl, VT), N3: Mask, N4: VL);
9975	SDValue Tmp = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Not, N2: MinusOne, N3: Mask, N4: VL);
9976	return DAG.getNode(Opcode: ISD::VP_CTPOP, DL: dl, VT, N1: Tmp, N2: Mask, N3: VL);
9977	}
9978
9979	SDValue TargetLowering::expandVPCTTZElements(SDNode *N,
9980	SelectionDAG &DAG) const {
9981	// %cond = to_bool_vec %source
9982	// %splat = splat /val=/VL
9983	// %tz = step_vector
9984	// %v = vp.select %cond, /true=/tz, /false=/%splat
9985	// %r = vp.reduce.umin %v
9986	SDLoc DL(N);
9987	SDValue Source = N->getOperand(Num: `0`);
9988	SDValue Mask = N->getOperand(Num: `1`);
9989	SDValue EVL = N->getOperand(Num: `2`);
9990	EVT SrcVT = Source.getValueType();
9991	EVT ResVT = N->getValueType(ResNo: `0`);
9992	EVT ResVecVT =
9993	EVT::getVectorVT(Context&: *DAG.getContext(), VT: ResVT, EC: SrcVT.getVectorElementCount());
9994
9995	// Convert to boolean vector.
9996	if (SrcVT.getScalarType() != MVT::i1) {
9997	SDValue AllZero = DAG.getConstant(Val: `0`, DL, VT: SrcVT);
9998	SrcVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: MVT::i1,
9999	EC: SrcVT.getVectorElementCount());
10000	Source = DAG.getNode(Opcode: ISD::VP_SETCC, DL, VT: SrcVT, N1: Source, N2: AllZero,
10001	N3: DAG.getCondCode(Cond: ISD::SETNE), N4: Mask, N5: EVL);
10002	}
10003
10004	SDValue ExtEVL = DAG.getZExtOrTrunc(Op: EVL, DL, VT: ResVT);
10005	SDValue Splat = DAG.getSplat(VT: ResVecVT, DL, Op: ExtEVL);
10006	SDValue StepVec = DAG.getStepVector(DL, ResVT: ResVecVT);
10007	SDValue Select =
10008	DAG.getNode(Opcode: ISD::VP_SELECT, DL, VT: ResVecVT, N1: Source, N2: StepVec, N3: Splat, N4: EVL);
10009	return DAG.getNode(Opcode: ISD::VP_REDUCE_UMIN, DL, VT: ResVT, N1: ExtEVL, N2: Select, N3: Mask, N4: EVL);
10010	}
10011
10012	SDValue TargetLowering::expandVectorFindLastActive(SDNode *N,
10013	SelectionDAG &DAG) const {
10014	SDLoc DL(N);
10015	SDValue Mask = N->getOperand(Num: `0`);
10016	EVT MaskVT = Mask.getValueType();
10017	EVT BoolVT = MaskVT.getScalarType();
10018
10019	// Find a suitable type for a stepvector.
10020	ConstantRange VScaleRange(`1`, /isFullSet=/true); // Fixed length default.
10021	if (MaskVT.isScalableVector())
10022	VScaleRange = getVScaleRange(F: &DAG.getMachineFunction().getFunction(), BitWidth: `64`);
10023	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10024	uint64_t EltWidth = TLI.getBitWidthForCttzElements(
10025	RetTy: BoolVT.getTypeForEVT(Context&: *DAG.getContext()), EC: MaskVT.getVectorElementCount(),
10026	/ZeroIsPoison=/true, VScaleRange: &VScaleRange);
10027	// If the step vector element type is smaller than the mask element type,
10028	// use the mask type directly to avoid widening issues.
10029	EltWidth = std::max(a: EltWidth, b: BoolVT.getFixedSizeInBits());
10030	EVT StepVT = MVT::getIntegerVT(BitWidth: EltWidth);
10031	EVT StepVecVT = MaskVT.changeVectorElementType(Context&: *DAG.getContext(), EltVT: StepVT);
10032
10033	// If promotion or widening is required to make the type legal, do it here.
10034	// Promotion of integers within LegalizeVectorOps is looking for types of
10035	// the same size but with a smaller number of larger elements, not the usual
10036	// larger size with the same number of larger elements.
10037	TargetLowering::LegalizeTypeAction TypeAction =
10038	TLI.getTypeAction(VT: StepVecVT.getSimpleVT());
10039	SDValue StepVec;
10040	if (TypeAction == TargetLowering::TypePromoteInteger) {
10041	StepVecVT = TLI.getTypeToTransformTo(Context&: *DAG.getContext(), VT: StepVecVT);
10042	StepVT = StepVecVT.getVectorElementType();
10043	StepVec = DAG.getStepVector(DL, ResVT: StepVecVT);
10044	} else if (TypeAction == TargetLowering::TypeWidenVector) {
10045	// For widening, the element count changes. Create a step vector with only
10046	// the original elements valid and zeros for padding. Also widen the mask.
10047	EVT WideVecVT = TLI.getTypeToTransformTo(Context&: *DAG.getContext(), VT: StepVecVT);
10048	unsigned WideNumElts = WideVecVT.getVectorNumElements();
10049
10050	// Build widened step vector: <0, 1, ..., OrigNumElts-1, poison, poison, ..>
10051	SDValue OrigStepVec = DAG.getStepVector(DL, ResVT: StepVecVT);
10052	SDValue UndefStep = DAG.getPOISON(VT: WideVecVT);
10053	StepVec = DAG.getInsertSubvector(DL, Vec: UndefStep, SubVec: OrigStepVec, Idx: `0`);
10054
10055	// Widen mask: pad with zeros.
10056	EVT WideMaskVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: BoolVT, NumElements: WideNumElts);
10057	SDValue ZeroMask = DAG.getConstant(Val: `0`, DL, VT: WideMaskVT);
10058	Mask = DAG.getInsertSubvector(DL, Vec: ZeroMask, SubVec: Mask, Idx: `0`);
10059
10060	StepVecVT = WideVecVT;
10061	StepVT = WideVecVT.getVectorElementType();
10062	} else {
10063	StepVec = DAG.getStepVector(DL, ResVT: StepVecVT);
10064	}
10065
10066	// Zero out lanes with inactive elements, then find the highest remaining
10067	// value from the stepvector.
10068	SDValue Zeroes = DAG.getConstant(Val: `0`, DL, VT: StepVecVT);
10069	SDValue ActiveElts = DAG.getSelect(DL, VT: StepVecVT, Cond: Mask, LHS: StepVec, RHS: Zeroes);
10070	SDValue HighestIdx = DAG.getNode(Opcode: ISD::VECREDUCE_UMAX, DL, VT: StepVT, Operand: ActiveElts);
10071	return DAG.getZExtOrTrunc(Op: HighestIdx, DL, VT: N->getValueType(ResNo: `0`));
10072	}
10073
10074	SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG,
10075	bool IsNegative) const {
10076	SDLoc dl(N);
10077	EVT VT = N->getValueType(ResNo: `0`);
10078	SDValue Op = N->getOperand(Num: `0`);
10079
10080	// abs(x) -> smax(x,sub(0,x))
10081	if (!IsNegative && isOperationLegal(Op: ISD::SUB, VT) &&
10082	isOperationLegal(Op: ISD::SMAX, VT)) {
10083	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT);
10084	Op = DAG.getFreeze(V: Op);
10085	return DAG.getNode(Opcode: ISD::SMAX, DL: dl, VT, N1: Op,
10086	N2: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Zero, N2: Op));
10087	}
10088
10089	// abs(x) -> umin(x,sub(0,x))
10090	if (!IsNegative && isOperationLegal(Op: ISD::SUB, VT) &&
10091	isOperationLegal(Op: ISD::UMIN, VT)) {
10092	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT);
10093	Op = DAG.getFreeze(V: Op);
10094	return DAG.getNode(Opcode: ISD::UMIN, DL: dl, VT, N1: Op,
10095	N2: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Zero, N2: Op));
10096	}
10097
10098	// 0 - abs(x) -> smin(x, sub(0,x))
10099	if (IsNegative && isOperationLegal(Op: ISD::SUB, VT) &&
10100	isOperationLegal(Op: ISD::SMIN, VT)) {
10101	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT);
10102	Op = DAG.getFreeze(V: Op);
10103	return DAG.getNode(Opcode: ISD::SMIN, DL: dl, VT, N1: Op,
10104	N2: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Zero, N2: Op));
10105	}
10106
10107	// Only expand vector types if we have the appropriate vector operations.
10108	if (VT.isVector() &&
10109	(!isOperationLegalOrCustom(Op: ISD::SRA, VT) \|\|
10110	(!IsNegative && !isOperationLegalOrCustom(Op: ISD::ADD, VT)) \|\|
10111	(IsNegative && !isOperationLegalOrCustom(Op: ISD::SUB, VT)) \|\|
10112	!isOperationLegalOrCustomOrPromote(Op: ISD::XOR, VT)))
10113	return SDValue ();
10114
10115	Op = DAG.getFreeze(V: Op);
10116	SDValue Shift = DAG.getNode(
10117	Opcode: ISD::SRA, DL: dl, VT, N1: Op,
10118	N2: DAG.getShiftAmountConstant(Val: VT.getScalarSizeInBits() - `1`, VT, DL: dl));
10119	SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: Op, N2: Shift);
10120
10121	// abs(x) -> Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
10122	if (!IsNegative)
10123	return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Xor, N2: Shift);
10124
10125	// 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
10126	return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Shift, N2: Xor);
10127	}
10128
10129	SDValue TargetLowering::expandABD(SDNode N, SelectionDAG &DAG) const* {
10130	SDLoc dl(N);
10131	EVT VT = N->getValueType(ResNo: `0`);
10132	SDValue LHS = N->getOperand(Num: `0`);
10133	SDValue RHS = N->getOperand(Num: `1`);
10134	bool IsSigned = N->getOpcode() == ISD::ABDS;
10135
10136	// abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
10137	// abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
10138	unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
10139	unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
10140	if (isOperationLegal(Op: MaxOpc, VT) && isOperationLegal(Op: MinOpc, VT)) {
10141	LHS = DAG.getFreeze(V: LHS);
10142	RHS = DAG.getFreeze(V: RHS);
10143	SDValue Max = DAG.getNode(Opcode: MaxOpc, DL: dl, VT, N1: LHS, N2: RHS);
10144	SDValue Min = DAG.getNode(Opcode: MinOpc, DL: dl, VT, N1: LHS, N2: RHS);
10145	return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Max, N2: Min);
10146	}
10147
10148	// abdu(lhs, rhs) -> or(usubsat(lhs,rhs), usubsat(rhs,lhs))
10149	if (!IsSigned && isOperationLegal(Op: ISD::USUBSAT, VT)) {
10150	LHS = DAG.getFreeze(V: LHS);
10151	RHS = DAG.getFreeze(V: RHS);
10152	return DAG.getNode(Opcode: ISD::OR, DL: dl, VT,
10153	N1: DAG.getNode(Opcode: ISD::USUBSAT, DL: dl, VT, N1: LHS, N2: RHS),
10154	N2: DAG.getNode(Opcode: ISD::USUBSAT, DL: dl, VT, N1: RHS, N2: LHS));
10155	}
10156
10157	// If the subtract doesn't overflow then just use abs(sub())
10158	bool IsNonNegative = DAG.SignBitIsZero(Op: LHS) && DAG.SignBitIsZero(Op: RHS);
10159
10160	if (DAG.willNotOverflowSub(IsSigned: IsSigned \|\| IsNonNegative, N0: LHS, N1: RHS))
10161	return DAG.getNode(Opcode: ISD::ABS, DL: dl, VT,
10162	Operand: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: LHS, N2: RHS));
10163
10164	if (DAG.willNotOverflowSub(IsSigned: IsSigned \|\| IsNonNegative, N0: RHS, N1: LHS))
10165	return DAG.getNode(Opcode: ISD::ABS, DL: dl, VT,
10166	Operand: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: RHS, N2: LHS));
10167
10168	EVT CCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
10169	ISD::CondCode CC = IsSigned ? ISD::CondCode::SETGT : ISD::CondCode::SETUGT;
10170	LHS = DAG.getFreeze(V: LHS);
10171	RHS = DAG.getFreeze(V: RHS);
10172	SDValue Cmp = DAG.getSetCC(DL: dl, VT: CCVT, LHS, RHS, Cond: CC);
10173
10174	// Branchless expansion iff cmp result is allbits:
10175	// abds(lhs, rhs) -> sub(sgt(lhs, rhs), xor(sgt(lhs, rhs), sub(lhs, rhs)))
10176	// abdu(lhs, rhs) -> sub(ugt(lhs, rhs), xor(ugt(lhs, rhs), sub(lhs, rhs)))
10177	if (CCVT == VT && getBooleanContents(Type: VT) == ZeroOrNegativeOneBooleanContent) {
10178	SDValue Diff = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: LHS, N2: RHS);
10179	SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: Diff, N2: Cmp);
10180	return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Cmp, N2: Xor);
10181	}
10182
10183	// Similar to the branchless expansion, if we don't prefer selects, use the
10184	// (sign-extended) usubo overflow flag if the (scalar) type is illegal as this
10185	// is more likely to legalize cleanly: abdu(lhs, rhs) -> sub(xor(sub(lhs,
10186	// rhs), uof(lhs, rhs)), uof(lhs, rhs))
10187	if (!IsSigned && VT.isScalarInteger() && !isTypeLegal(VT) &&
10188	!preferSelectsOverBooleanArithmetic(VT)) {
10189	SDValue USubO =
10190	DAG.getNode(Opcode: ISD::USUBO, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: MVT::i1), Ops: {LHS, RHS});
10191	SDValue Cmp = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT, Operand: USubO.getValue(R: `1`));
10192	SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: USubO.getValue(R: `0`), N2: Cmp);
10193	return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Xor, N2: Cmp);
10194	}
10195
10196	// FIXME: Should really try to split the vector in case it's legal on a
10197	// subvector.
10198	if (VT.isVector() && !isOperationLegalOrCustom(Op: ISD::VSELECT, VT))
10199	return DAG.UnrollVectorOp(N);
10200
10201	// abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
10202	// abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
10203	return DAG.getSelect(DL: dl, VT, Cond: Cmp, LHS: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: LHS, N2: RHS),
10204	RHS: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: RHS, N2: LHS));
10205	}
10206
10207	SDValue TargetLowering::expandAVG(SDNode N, SelectionDAG &DAG) const* {
10208	SDLoc dl(N);
10209	EVT VT = N->getValueType(ResNo: `0`);
10210	SDValue LHS = N->getOperand(Num: `0`);
10211	SDValue RHS = N->getOperand(Num: `1`);
10212
10213	unsigned Opc = N->getOpcode();
10214	bool IsFloor = Opc == ISD::AVGFLOORS \|\| Opc == ISD::AVGFLOORU;
10215	bool IsSigned = Opc == ISD::AVGCEILS \|\| Opc == ISD::AVGFLOORS;
10216	unsigned SumOpc = IsFloor ? ISD::ADD : ISD::SUB;
10217	unsigned SignOpc = IsFloor ? ISD::AND : ISD::OR;
10218	unsigned ShiftOpc = IsSigned ? ISD::SRA : ISD::SRL;
10219	unsigned ExtOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
10220	assert((Opc == ISD::AVGFLOORS \|\| Opc == ISD::AVGCEILS \|\|
10221	Opc == ISD::AVGFLOORU \|\| Opc == ISD::AVGCEILU) &&
10222	"Unknown AVG node");
10223
10224	// If the operands are already extended, we can add+shift.
10225	bool IsExt =
10226	(IsSigned && DAG.ComputeNumSignBits(Op: LHS) >= `2` &&
10227	DAG.ComputeNumSignBits(Op: RHS) >= `2`) \|\|
10228	(!IsSigned && DAG.computeKnownBits(Op: LHS).countMinLeadingZeros() >= `1` &&
10229	DAG.computeKnownBits(Op: RHS).countMinLeadingZeros() >= `1`);
10230	if (IsExt) {
10231	SDValue Sum = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: LHS, N2: RHS);
10232	if (!IsFloor)
10233	Sum = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Sum, N2: DAG.getConstant(Val: `1`, DL: dl, VT));
10234	return DAG.getNode(Opcode: ShiftOpc, DL: dl, VT, N1: Sum,
10235	N2: DAG.getShiftAmountConstant(Val: `1`, VT, DL: dl));
10236	}
10237
10238	// For scalars, see if we can efficiently extend/truncate to use add+shift.
10239	if (VT.isScalarInteger()) {
10240	unsigned BW = VT.getScalarSizeInBits();
10241	EVT ExtVT = VT.getIntegerVT(Context&: DAG.getContext(), BitWidth: `2` BW);
10242	if (isTypeLegal(VT: ExtVT) && isTruncateFree(FromVT: ExtVT, ToVT: VT)) {
10243	LHS = DAG.getNode(Opcode: ExtOpc, DL: dl, VT: ExtVT, Operand: LHS);
10244	RHS = DAG.getNode(Opcode: ExtOpc, DL: dl, VT: ExtVT, Operand: RHS);
10245	SDValue Avg = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: ExtVT, N1: LHS, N2: RHS);
10246	if (!IsFloor)
10247	Avg = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: ExtVT, N1: Avg,
10248	N2: DAG.getConstant(Val: `1`, DL: dl, VT: ExtVT));
10249	// Just use SRL as we will be truncating away the extended sign bits.
10250	Avg = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: ExtVT, N1: Avg,
10251	N2: DAG.getShiftAmountConstant(Val: `1`, VT: ExtVT, DL: dl));
10252	return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Avg);
10253	}
10254	}
10255
10256	// avgflooru(lhs, rhs) -> or(lshr(add(lhs, rhs),1),shl(overflow, typesize-1))
10257	if (Opc == ISD::AVGFLOORU && VT.isScalarInteger() && !isTypeLegal(VT) &&
10258	isOperationLegalOrCustom(
10259	Op: ISD::UADDO, VT: getLegalTypeToTransformTo(Context&: *DAG.getContext(), VT))) {
10260	SDValue UAddWithOverflow =
10261	DAG.getNode(Opcode: ISD::UADDO, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: MVT::i1), Ops: {RHS, LHS});
10262
10263	SDValue Sum = UAddWithOverflow.getValue(R: `0`);
10264	SDValue Overflow = UAddWithOverflow.getValue(R: `1`);
10265
10266	// Right shift the sum by 1
10267	SDValue LShrVal = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Sum,
10268	N2: DAG.getShiftAmountConstant(Val: `1`, VT, DL: dl));
10269
10270	SDValue ZeroExtOverflow = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT, Operand: Overflow);
10271	SDValue OverflowShl = DAG.getNode(
10272	Opcode: ISD::SHL, DL: dl, VT, N1: ZeroExtOverflow,
10273	N2: DAG.getShiftAmountConstant(Val: VT.getScalarSizeInBits() - `1`, VT, DL: dl));
10274
10275	return DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: LShrVal, N2: OverflowShl);
10276	}
10277
10278	// avgceils(lhs, rhs) -> sub(or(lhs,rhs),ashr(xor(lhs,rhs),1))
10279	// avgceilu(lhs, rhs) -> sub(or(lhs,rhs),lshr(xor(lhs,rhs),1))
10280	// avgfloors(lhs, rhs) -> add(and(lhs,rhs),ashr(xor(lhs,rhs),1))
10281	// avgflooru(lhs, rhs) -> add(and(lhs,rhs),lshr(xor(lhs,rhs),1))
10282	LHS = DAG.getFreeze(V: LHS);
10283	RHS = DAG.getFreeze(V: RHS);
10284	SDValue Sign = DAG.getNode(Opcode: SignOpc, DL: dl, VT, N1: LHS, N2: RHS);
10285	SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: LHS, N2: RHS);
10286	SDValue Shift =
10287	DAG.getNode(Opcode: ShiftOpc, DL: dl, VT, N1: Xor, N2: DAG.getShiftAmountConstant(Val: `1`, VT, DL: dl));
10288	return DAG.getNode(Opcode: SumOpc, DL: dl, VT, N1: Sign, N2: Shift);
10289	}
10290
10291	SDValue TargetLowering::expandBSWAP(SDNode N, SelectionDAG &DAG) const* {
10292	SDLoc dl(N);
10293	EVT VT = N->getValueType(ResNo: `0`);
10294	SDValue Op = N->getOperand(Num: `0`);
10295
10296	if (!VT.isSimple())
10297	return SDValue ();
10298
10299	EVT SHVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
10300	SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
10301	switch (VT.getSimpleVT().getScalarType().SimpleTy) {
10302	default:
10303	return SDValue ();
10304	case MVT::i16:
10305	// Use a rotate by 8. This can be further expanded if necessary.
10306	return DAG.getNode(Opcode: ISD::ROTL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `8`, DL: dl, VT: SHVT));
10307	case MVT::i32:
10308	// This is meant for ARM speficially, which has ROTR but no ROTL.
10309	if (isOperationLegalOrCustom(Op: ISD::ROTR, VT)) {
10310	SDValue Mask = DAG.getConstant(Val: `0x00FF00FF`, DL: dl, VT);
10311	// (x & 0x00FF00FF) rotr 8 \| (x rotl 8) & 0x00FF00FF
10312	SDValue And = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op, N2: Mask);
10313	SDValue Rotr =
10314	DAG.getNode(Opcode: ISD::ROTR, DL: dl, VT, N1: And, N2: DAG.getConstant(Val: `8`, DL: dl, VT: SHVT));
10315	SDValue Rotl =
10316	DAG.getNode(Opcode: ISD::ROTR, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `24`, DL: dl, VT: SHVT));
10317	SDValue And2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Rotl, N2: Mask);
10318	return DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Rotr, N2: And2);
10319	}
10320	Tmp4 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `24`, DL: dl, VT: SHVT));
10321	Tmp3 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op,
10322	N2: DAG.getConstant(Val: `0xFF00`, DL: dl, VT));
10323	Tmp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: `8`, DL: dl, VT: SHVT));
10324	Tmp2 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `8`, DL: dl, VT: SHVT));
10325	Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2, N2: DAG.getConstant(Val: `0xFF00`, DL: dl, VT));
10326	Tmp1 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `24`, DL: dl, VT: SHVT));
10327	Tmp4 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp4, N2: Tmp3);
10328	Tmp2 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp1);
10329	return DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp4, N2: Tmp2);
10330	case MVT::i64:
10331	Tmp8 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `56`, DL: dl, VT: SHVT));
10332	Tmp7 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op,
10333	N2: DAG.getConstant(Val: `255ULL`<<`8`, DL: dl, VT));
10334	Tmp7 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp7, N2: DAG.getConstant(Val: `40`, DL: dl, VT: SHVT));
10335	Tmp6 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op,
10336	N2: DAG.getConstant(Val: `255ULL`<<`16`, DL: dl, VT));
10337	Tmp6 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp6, N2: DAG.getConstant(Val: `24`, DL: dl, VT: SHVT));
10338	Tmp5 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op,
10339	N2: DAG.getConstant(Val: `255ULL`<<`24`, DL: dl, VT));
10340	Tmp5 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp5, N2: DAG.getConstant(Val: `8`, DL: dl, VT: SHVT));
10341	Tmp4 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `8`, DL: dl, VT: SHVT));
10342	Tmp4 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp4,
10343	N2: DAG.getConstant(Val: `255ULL`<<`24`, DL: dl, VT));
10344	Tmp3 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `24`, DL: dl, VT: SHVT));
10345	Tmp3 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp3,
10346	N2: DAG.getConstant(Val: `255ULL`<<`16`, DL: dl, VT));
10347	Tmp2 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `40`, DL: dl, VT: SHVT));
10348	Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2,
10349	N2: DAG.getConstant(Val: `255ULL`<<`8`, DL: dl, VT));
10350	Tmp1 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `56`, DL: dl, VT: SHVT));
10351	Tmp8 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp8, N2: Tmp7);
10352	Tmp6 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp6, N2: Tmp5);
10353	Tmp4 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp4, N2: Tmp3);
10354	Tmp2 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp1);
10355	Tmp8 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp8, N2: Tmp6);
10356	Tmp4 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp4, N2: Tmp2);
10357	return DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp8, N2: Tmp4);
10358	}
10359	}
10360
10361	SDValue TargetLowering::expandVPBSWAP(SDNode N, SelectionDAG &DAG) const* {
10362	SDLoc dl(N);
10363	EVT VT = N->getValueType(ResNo: `0`);
10364	SDValue Op = N->getOperand(Num: `0`);
10365	SDValue Mask = N->getOperand(Num: `1`);
10366	SDValue EVL = N->getOperand(Num: `2`);
10367
10368	if (!VT.isSimple())
10369	return SDValue ();
10370
10371	EVT SHVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
10372	SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
10373	switch (VT.getSimpleVT().getScalarType().SimpleTy) {
10374	default:
10375	return SDValue ();
10376	case MVT::i16:
10377	Tmp1 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `8`, DL: dl, VT: SHVT),
10378	N3: Mask, N4: EVL);
10379	Tmp2 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `8`, DL: dl, VT: SHVT),
10380	N3: Mask, N4: EVL);
10381	return DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp1, N2: Tmp2, N3: Mask, N4: EVL);
10382	case MVT::i32:
10383	Tmp4 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `24`, DL: dl, VT: SHVT),
10384	N3: Mask, N4: EVL);
10385	Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `0xFF00`, DL: dl, VT),
10386	N3: Mask, N4: EVL);
10387	Tmp3 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: `8`, DL: dl, VT: SHVT),
10388	N3: Mask, N4: EVL);
10389	Tmp2 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `8`, DL: dl, VT: SHVT),
10390	N3: Mask, N4: EVL);
10391	Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp2,
10392	N2: DAG.getConstant(Val: `0xFF00`, DL: dl, VT), N3: Mask, N4: EVL);
10393	Tmp1 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `24`, DL: dl, VT: SHVT),
10394	N3: Mask, N4: EVL);
10395	Tmp4 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp4, N2: Tmp3, N3: Mask, N4: EVL);
10396	Tmp2 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp2, N2: Tmp1, N3: Mask, N4: EVL);
10397	return DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp4, N2: Tmp2, N3: Mask, N4: EVL);
10398	case MVT::i64:
10399	Tmp8 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `56`, DL: dl, VT: SHVT),
10400	N3: Mask, N4: EVL);
10401	Tmp7 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Op,
10402	N2: DAG.getConstant(Val: `255ULL` << `8`, DL: dl, VT), N3: Mask, N4: EVL);
10403	Tmp7 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp7, N2: DAG.getConstant(Val: `40`, DL: dl, VT: SHVT),
10404	N3: Mask, N4: EVL);
10405	Tmp6 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Op,
10406	N2: DAG.getConstant(Val: `255ULL` << `16`, DL: dl, VT), N3: Mask, N4: EVL);
10407	Tmp6 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp6, N2: DAG.getConstant(Val: `24`, DL: dl, VT: SHVT),
10408	N3: Mask, N4: EVL);
10409	Tmp5 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Op,
10410	N2: DAG.getConstant(Val: `255ULL` << `24`, DL: dl, VT), N3: Mask, N4: EVL);
10411	Tmp5 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp5, N2: DAG.getConstant(Val: `8`, DL: dl, VT: SHVT),
10412	N3: Mask, N4: EVL);
10413	Tmp4 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `8`, DL: dl, VT: SHVT),
10414	N3: Mask, N4: EVL);
10415	Tmp4 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp4,
10416	N2: DAG.getConstant(Val: `255ULL` << `24`, DL: dl, VT), N3: Mask, N4: EVL);
10417	Tmp3 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `24`, DL: dl, VT: SHVT),
10418	N3: Mask, N4: EVL);
10419	Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp3,
10420	N2: DAG.getConstant(Val: `255ULL` << `16`, DL: dl, VT), N3: Mask, N4: EVL);
10421	Tmp2 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `40`, DL: dl, VT: SHVT),
10422	N3: Mask, N4: EVL);
10423	Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp2,
10424	N2: DAG.getConstant(Val: `255ULL` << `8`, DL: dl, VT), N3: Mask, N4: EVL);
10425	Tmp1 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `56`, DL: dl, VT: SHVT),
10426	N3: Mask, N4: EVL);
10427	Tmp8 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp8, N2: Tmp7, N3: Mask, N4: EVL);
10428	Tmp6 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp6, N2: Tmp5, N3: Mask, N4: EVL);
10429	Tmp4 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp4, N2: Tmp3, N3: Mask, N4: EVL);
10430	Tmp2 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp2, N2: Tmp1, N3: Mask, N4: EVL);
10431	Tmp8 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp8, N2: Tmp6, N3: Mask, N4: EVL);
10432	Tmp4 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp4, N2: Tmp2, N3: Mask, N4: EVL);
10433	return DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp8, N2: Tmp4, N3: Mask, N4: EVL);
10434	}
10435	}
10436
10437	SDValue TargetLowering::expandBITREVERSE(SDNode N, SelectionDAG &DAG) const* {
10438	SDLoc dl(N);
10439	EVT VT = N->getValueType(ResNo: `0`);
10440	SDValue Op = N->getOperand(Num: `0`);
10441	EVT SHVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
10442	unsigned Sz = VT.getScalarSizeInBits();
10443
10444	SDValue Tmp, Tmp2, Tmp3;
10445
10446	// If we can, perform BSWAP first and then the mask+swap the i4, then i2
10447	// and finally the i1 pairs.
10448	// TODO: We can easily support i4/i2 legal types if any target ever does.
10449	if (Sz >= `8` && isPowerOf2_32(Value: Sz)) {
10450	// Create the masks - repeating the pattern every byte.
10451	APInt Mask4 = APInt::getSplat(NewLen: Sz, V: APInt (`8`, `0x0F`));
10452	APInt Mask2 = APInt::getSplat(NewLen: Sz, V: APInt (`8`, `0x33`));
10453	APInt Mask1 = APInt::getSplat(NewLen: Sz, V: APInt (`8`, `0x55`));
10454
10455	// BSWAP if the type is wider than a single byte.
10456	Tmp = (Sz > `8` ? DAG.getNode(Opcode: ISD::BSWAP, DL: dl, VT, Operand: Op) : Op);
10457
10458	// swap i4: ((V >> 4) & 0x0F) \| ((V & 0x0F) << 4)
10459	Tmp2 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: `4`, DL: dl, VT: SHVT));
10460	Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2, N2: DAG.getConstant(Val: Mask4, DL: dl, VT));
10461	Tmp3 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask4, DL: dl, VT));
10462	Tmp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: `4`, DL: dl, VT: SHVT));
10463	Tmp = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp3);
10464
10465	// swap i2: ((V >> 2) & 0x33) \| ((V & 0x33) << 2)
10466	Tmp2 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: `2`, DL: dl, VT: SHVT));
10467	Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2, N2: DAG.getConstant(Val: Mask2, DL: dl, VT));
10468	Tmp3 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask2, DL: dl, VT));
10469	Tmp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: `2`, DL: dl, VT: SHVT));
10470	Tmp = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp3);
10471
10472	// swap i1: ((V >> 1) & 0x55) \| ((V & 0x55) << 1)
10473	Tmp2 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: `1`, DL: dl, VT: SHVT));
10474	Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2, N2: DAG.getConstant(Val: Mask1, DL: dl, VT));
10475	Tmp3 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask1, DL: dl, VT));
10476	Tmp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: `1`, DL: dl, VT: SHVT));
10477	Tmp = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp3);
10478	return Tmp;
10479	}
10480
10481	Tmp = DAG.getConstant(Val: `0`, DL: dl, VT);
10482	for (unsigned I = `0`, J = Sz-`1`; I < Sz; ++I, --J) {
10483	if (I < J)
10484	Tmp2 =
10485	DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: J - I, DL: dl, VT: SHVT));
10486	else
10487	Tmp2 =
10488	DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: I - J, DL: dl, VT: SHVT));
10489
10490	APInt Shift = APInt::getOneBitSet(numBits: Sz, BitNo: J);
10491	Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2, N2: DAG.getConstant(Val: Shift, DL: dl, VT));
10492	Tmp = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp, N2: Tmp2);
10493	}
10494
10495	return Tmp;
10496	}
10497
10498	SDValue TargetLowering::expandVPBITREVERSE(SDNode N, SelectionDAG &DAG) const* {
10499	assert(N->getOpcode() == ISD::VP_BITREVERSE);
10500
10501	SDLoc dl(N);
10502	EVT VT = N->getValueType(ResNo: `0`);
10503	SDValue Op = N->getOperand(Num: `0`);
10504	SDValue Mask = N->getOperand(Num: `1`);
10505	SDValue EVL = N->getOperand(Num: `2`);
10506	EVT SHVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
10507	unsigned Sz = VT.getScalarSizeInBits();
10508
10509	SDValue Tmp, Tmp2, Tmp3;
10510
10511	// If we can, perform BSWAP first and then the mask+swap the i4, then i2
10512	// and finally the i1 pairs.
10513	// TODO: We can easily support i4/i2 legal types if any target ever does.
10514	if (Sz >= `8` && isPowerOf2_32(Value: Sz)) {
10515	// Create the masks - repeating the pattern every byte.
10516	APInt Mask4 = APInt::getSplat(NewLen: Sz, V: APInt (`8`, `0x0F`));
10517	APInt Mask2 = APInt::getSplat(NewLen: Sz, V: APInt (`8`, `0x33`));
10518	APInt Mask1 = APInt::getSplat(NewLen: Sz, V: APInt (`8`, `0x55`));
10519
10520	// BSWAP if the type is wider than a single byte.
10521	Tmp = (Sz > `8` ? DAG.getNode(Opcode: ISD::VP_BSWAP, DL: dl, VT, N1: Op, N2: Mask, N3: EVL) : Op);
10522
10523	// swap i4: ((V >> 4) & 0x0F) \| ((V & 0x0F) << 4)
10524	Tmp2 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: `4`, DL: dl, VT: SHVT),
10525	N3: Mask, N4: EVL);
10526	Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp2,
10527	N2: DAG.getConstant(Val: Mask4, DL: dl, VT), N3: Mask, N4: EVL);
10528	Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask4, DL: dl, VT),
10529	N3: Mask, N4: EVL);
10530	Tmp3 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: `4`, DL: dl, VT: SHVT),
10531	N3: Mask, N4: EVL);
10532	Tmp = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp2, N2: Tmp3, N3: Mask, N4: EVL);
10533
10534	// swap i2: ((V >> 2) & 0x33) \| ((V & 0x33) << 2)
10535	Tmp2 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: `2`, DL: dl, VT: SHVT),
10536	N3: Mask, N4: EVL);
10537	Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp2,
10538	N2: DAG.getConstant(Val: Mask2, DL: dl, VT), N3: Mask, N4: EVL);
10539	Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask2, DL: dl, VT),
10540	N3: Mask, N4: EVL);
10541	Tmp3 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: `2`, DL: dl, VT: SHVT),
10542	N3: Mask, N4: EVL);
10543	Tmp = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp2, N2: Tmp3, N3: Mask, N4: EVL);
10544
10545	// swap i1: ((V >> 1) & 0x55) \| ((V & 0x55) << 1)
10546	Tmp2 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: `1`, DL: dl, VT: SHVT),
10547	N3: Mask, N4: EVL);
10548	Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp2,
10549	N2: DAG.getConstant(Val: Mask1, DL: dl, VT), N3: Mask, N4: EVL);
10550	Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask1, DL: dl, VT),
10551	N3: Mask, N4: EVL);
10552	Tmp3 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: `1`, DL: dl, VT: SHVT),
10553	N3: Mask, N4: EVL);
10554	Tmp = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp2, N2: Tmp3, N3: Mask, N4: EVL);
10555	return Tmp;
10556	}
10557	return SDValue ();
10558	}
10559
10560	std::pair<SDValue, SDValue>
10561	TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
10562	SelectionDAG &DAG) const {
10563	SDLoc SL(LD);
10564	SDValue Chain = LD->getChain();
10565	SDValue BasePTR = LD->getBasePtr();
10566	EVT SrcVT = LD->getMemoryVT();
10567	EVT DstVT = LD->getValueType(ResNo: `0`);
10568	ISD::LoadExtType ExtType = LD->getExtensionType();
10569
10570	if (SrcVT.isScalableVector())
10571	report_fatal_error(reason: "Cannot scalarize scalable vector loads");
10572
10573	unsigned NumElem = SrcVT.getVectorNumElements();
10574
10575	EVT SrcEltVT = SrcVT.getScalarType();
10576	EVT DstEltVT = DstVT.getScalarType();
10577
10578	// A vector must always be stored in memory as-is, i.e. without any padding
10579	// between the elements, since various code depend on it, e.g. in the
10580	// handling of a bitcast of a vector type to int, which may be done with a
10581	// vector store followed by an integer load. A vector that does not have
10582	// elements that are byte-sized must therefore be stored as an integer
10583	// built out of the extracted vector elements.
10584	if (!SrcEltVT.isByteSized()) {
10585	unsigned NumLoadBits = SrcVT.getStoreSizeInBits();
10586	EVT LoadVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NumLoadBits);
10587
10588	unsigned NumSrcBits = SrcVT.getSizeInBits();
10589	EVT SrcIntVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NumSrcBits);
10590
10591	unsigned SrcEltBits = SrcEltVT.getSizeInBits();
10592	SDValue SrcEltBitMask = DAG.getConstant(
10593	Val: APInt::getLowBitsSet(numBits: NumLoadBits, loBitsSet: SrcEltBits), DL: SL, VT: LoadVT);
10594
10595	// Load the whole vector and avoid masking off the top bits as it makes
10596	// the codegen worse.
10597	SDValue Load =
10598	DAG.getExtLoad(ExtType: ISD::EXTLOAD, dl: SL, VT: LoadVT, Chain, Ptr: BasePTR,
10599	PtrInfo: LD->getPointerInfo(), MemVT: SrcIntVT, Alignment: LD->getBaseAlign(),
10600	MMOFlags: LD->getMemOperand()->getFlags(), AAInfo: LD->getAAInfo());
10601
10602	SmallVector<SDValue, `8`> Vals;
10603	for (unsigned Idx = `0`; Idx < NumElem; ++Idx) {
10604	unsigned ShiftIntoIdx =
10605	(DAG.getDataLayout().isBigEndian() ? (NumElem - `1`) - Idx : Idx);
10606	SDValue ShiftAmount = DAG.getShiftAmountConstant(
10607	Val: ShiftIntoIdx * SrcEltVT.getSizeInBits(), VT: LoadVT, DL: SL);
10608	SDValue ShiftedElt = DAG.getNode(Opcode: ISD::SRL, DL: SL, VT: LoadVT, N1: Load, N2: ShiftAmount);
10609	SDValue Elt =
10610	DAG.getNode(Opcode: ISD::AND, DL: SL, VT: LoadVT, N1: ShiftedElt, N2: SrcEltBitMask);
10611	SDValue Scalar = DAG.getNode(Opcode: ISD::TRUNCATE, DL: SL, VT: SrcEltVT, Operand: Elt);
10612
10613	if (ExtType != ISD::NON_EXTLOAD) {
10614	unsigned ExtendOp = ISD::getExtForLoadExtType(IsFP: false, ExtType);
10615	Scalar = DAG.getNode(Opcode: ExtendOp, DL: SL, VT: DstEltVT, Operand: Scalar);
10616	}
10617
10618	Vals.push_back(Elt: Scalar);
10619	}
10620
10621	SDValue Value = DAG.getBuildVector(VT: DstVT, DL: SL, Ops: Vals);
10622	return std::make_pair(x&: Value, y: Load.getValue(R: `1`));
10623	}
10624
10625	unsigned Stride = SrcEltVT.getSizeInBits() / `8`;
10626	assert(SrcEltVT.isByteSized());
10627
10628	SmallVector<SDValue, `8`> Vals;
10629	SmallVector<SDValue, `8`> LoadChains;
10630
10631	for (unsigned Idx = `0`; Idx < NumElem; ++Idx) {
10632	SDValue ScalarLoad = DAG.getExtLoad(
10633	ExtType, dl: SL, VT: DstEltVT, Chain, Ptr: BasePTR,
10634	PtrInfo: LD->getPointerInfo().getWithOffset(O: Idx * Stride), MemVT: SrcEltVT,
10635	Alignment: LD->getBaseAlign(), MMOFlags: LD->getMemOperand()->getFlags(), AAInfo: LD->getAAInfo());
10636
10637	BasePTR = DAG.getObjectPtrOffset(SL, Ptr: BasePTR, Offset: TypeSize::getFixed(ExactSize: Stride));
10638
10639	Vals.push_back(Elt: ScalarLoad.getValue(R: `0`));
10640	LoadChains.push_back(Elt: ScalarLoad.getValue(R: `1`));
10641	}
10642
10643	SDValue NewChain = DAG.getNode(Opcode: ISD::TokenFactor, DL: SL, VT: MVT::Other, Ops: LoadChains);
10644	SDValue Value = DAG.getBuildVector(VT: DstVT, DL: SL, Ops: Vals);
10645
10646	return std::make_pair(x&: Value, y&: NewChain);
10647	}
10648
10649	SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
10650	SelectionDAG &DAG) const {
10651	SDLoc SL(ST);
10652
10653	SDValue Chain = ST->getChain();
10654	SDValue BasePtr = ST->getBasePtr();
10655	SDValue Value = ST->getValue();
10656	EVT StVT = ST->getMemoryVT();
10657
10658	if (StVT.isScalableVector())
10659	report_fatal_error(reason: "Cannot scalarize scalable vector stores");
10660
10661	// The type of the data we want to save
10662	EVT RegVT = Value.getValueType();
10663	EVT RegSclVT = RegVT.getScalarType();
10664
10665	// The type of data as saved in memory.
10666	EVT MemSclVT = StVT.getScalarType();
10667
10668	unsigned NumElem = StVT.getVectorNumElements();
10669
10670	// A vector must always be stored in memory as-is, i.e. without any padding
10671	// between the elements, since various code depend on it, e.g. in the
10672	// handling of a bitcast of a vector type to int, which may be done with a
10673	// vector store followed by an integer load. A vector that does not have
10674	// elements that are byte-sized must therefore be stored as an integer
10675	// built out of the extracted vector elements.
10676	if (!MemSclVT.isByteSized()) {
10677	unsigned NumBits = StVT.getSizeInBits();
10678	EVT IntVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NumBits);
10679
10680	SDValue CurrVal = DAG.getConstant(Val: `0`, DL: SL, VT: IntVT);
10681
10682	for (unsigned Idx = `0`; Idx < NumElem; ++Idx) {
10683	SDValue Elt = DAG.getExtractVectorElt(DL: SL, VT: RegSclVT, Vec: Value, Idx);
10684	SDValue Trunc = DAG.getNode(Opcode: ISD::TRUNCATE, DL: SL, VT: MemSclVT, Operand: Elt);
10685	SDValue ExtElt = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: SL, VT: IntVT, Operand: Trunc);
10686	unsigned ShiftIntoIdx =
10687	(DAG.getDataLayout().isBigEndian() ? (NumElem - `1`) - Idx : Idx);
10688	SDValue ShiftAmount =
10689	DAG.getConstant(Val: ShiftIntoIdx * MemSclVT.getSizeInBits(), DL: SL, VT: IntVT);
10690	SDValue ShiftedElt =
10691	DAG.getNode(Opcode: ISD::SHL, DL: SL, VT: IntVT, N1: ExtElt, N2: ShiftAmount);
10692	CurrVal = DAG.getNode(Opcode: ISD::OR, DL: SL, VT: IntVT, N1: CurrVal, N2: ShiftedElt);
10693	}
10694
10695	return DAG.getStore(Chain, dl: SL, Val: CurrVal, Ptr: BasePtr, PtrInfo: ST->getPointerInfo(),
10696	Alignment: ST->getBaseAlign(), MMOFlags: ST->getMemOperand()->getFlags(),
10697	AAInfo: ST->getAAInfo());
10698	}
10699
10700	// Store Stride in bytes
10701	unsigned Stride = MemSclVT.getSizeInBits() / `8`;
10702	assert(Stride && "Zero stride!");
10703	// Extract each of the elements from the original vector and save them into
10704	// memory individually.
10705	SmallVector<SDValue, `8`> Stores;
10706	for (unsigned Idx = `0`; Idx < NumElem; ++Idx) {
10707	SDValue Elt = DAG.getExtractVectorElt(DL: SL, VT: RegSclVT, Vec: Value, Idx);
10708
10709	SDValue Ptr =
10710	DAG.getObjectPtrOffset(SL, Ptr: BasePtr, Offset: TypeSize::getFixed(ExactSize: Idx * Stride));
10711
10712	// This scalar TruncStore may be illegal, but we legalize it later.
10713	SDValue Store = DAG.getTruncStore(
10714	Chain, dl: SL, Val: Elt, Ptr, PtrInfo: ST->getPointerInfo().getWithOffset(O: Idx * Stride),
10715	SVT: MemSclVT, Alignment: ST->getBaseAlign(), MMOFlags: ST->getMemOperand()->getFlags(),
10716	AAInfo: ST->getAAInfo());
10717
10718	Stores.push_back(Elt: Store);
10719	}
10720
10721	return DAG.getNode(Opcode: ISD::TokenFactor, DL: SL, VT: MVT::Other, Ops: Stores);
10722	}
10723
10724	std::pair<SDValue, SDValue>
10725	TargetLowering::expandUnalignedLoad(LoadSDNode LD, SelectionDAG &DAG) const* {
10726	assert(LD->getAddressingMode() == ISD::UNINDEXED &&
10727	"unaligned indexed loads not implemented!");
10728	SDValue Chain = LD->getChain();
10729	SDValue Ptr = LD->getBasePtr();
10730	EVT VT = LD->getValueType(ResNo: `0`);
10731	EVT LoadedVT = LD->getMemoryVT();
10732	SDLoc dl(LD);
10733	auto &MF = DAG.getMachineFunction();
10734
10735	if (VT.isFloatingPoint() \|\| VT.isVector()) {
10736	EVT intVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: LoadedVT.getSizeInBits());
10737	if (isTypeLegal(VT: intVT) && isTypeLegal(VT: LoadedVT)) {
10738	if (!isOperationLegalOrCustom(Op: ISD::LOAD, VT: intVT) &&
10739	LoadedVT.isVector()) {
10740	// Scalarize the load and let the individual components be handled.
10741	return scalarizeVectorLoad(LD, DAG);
10742	}
10743
10744	// Expand to a (misaligned) integer load of the same size,
10745	// then bitconvert to floating point or vector.
10746	SDValue newLoad = DAG.getLoad(VT: intVT, dl, Chain, Ptr,
10747	MMO: LD->getMemOperand());
10748	SDValue Result = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: LoadedVT, Operand: newLoad);
10749	if (LoadedVT != VT)
10750	Result = DAG.getNode(Opcode: VT.isFloatingPoint() ? ISD::FP_EXTEND :
10751	ISD::ANY_EXTEND, DL: dl, VT, Operand: Result);
10752
10753	return std::make_pair(x&: Result, y: newLoad.getValue(R: `1`));
10754	}
10755
10756	// Copy the value to a (aligned) stack slot using (unaligned) integer
10757	// loads and stores, then do a (aligned) load from the stack slot.
10758	MVT RegVT = getRegisterType(Context&: *DAG.getContext(), VT: intVT);
10759	unsigned LoadedBytes = LoadedVT.getStoreSize();
10760	unsigned RegBytes = RegVT.getSizeInBits() / `8`;
10761	unsigned NumRegs = (LoadedBytes + RegBytes - `1`) / RegBytes;
10762
10763	// Make sure the stack slot is also aligned for the register type.
10764	SDValue StackBase = DAG.CreateStackTemporary(VT1: LoadedVT, VT2: RegVT);
10765	auto FrameIndex = cast<FrameIndexSDNode>(Val: StackBase.getNode())->getIndex();
10766	SmallVector<SDValue, `8`> Stores;
10767	SDValue StackPtr = StackBase;
10768	unsigned Offset = `0`;
10769
10770	EVT PtrVT = Ptr.getValueType();
10771	EVT StackPtrVT = StackPtr.getValueType();
10772
10773	SDValue PtrIncrement = DAG.getConstant(Val: RegBytes, DL: dl, VT: PtrVT);
10774	SDValue StackPtrIncrement = DAG.getConstant(Val: RegBytes, DL: dl, VT: StackPtrVT);
10775
10776	// Do all but one copies using the full register width.
10777	for (unsigned i = `1`; i < NumRegs; i++) {
10778	// Load one integer register's worth from the original location.
10779	SDValue Load = DAG.getLoad(
10780	VT: RegVT, dl, Chain, Ptr, PtrInfo: LD->getPointerInfo().getWithOffset(O: Offset),
10781	Alignment: LD->getBaseAlign(), MMOFlags: LD->getMemOperand()->getFlags(), AAInfo: LD->getAAInfo());
10782	// Follow the load with a store to the stack slot. Remember the store.
10783	Stores.push_back(Elt: DAG.getStore(
10784	Chain: Load.getValue(R: `1`), dl, Val: Load, Ptr: StackPtr,
10785	PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset)));
10786	// Increment the pointers.
10787	Offset += RegBytes;
10788
10789	Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: PtrIncrement);
10790	StackPtr = DAG.getObjectPtrOffset(SL: dl, Ptr: StackPtr, Offset: StackPtrIncrement);
10791	}
10792
10793	// The last copy may be partial. Do an extending load.
10794	EVT MemVT = EVT::getIntegerVT(Context&: *DAG.getContext(),
10795	BitWidth: `8` * (LoadedBytes - Offset));
10796	SDValue Load = DAG.getExtLoad(
10797	ExtType: ISD::EXTLOAD, dl, VT: RegVT, Chain, Ptr,
10798	PtrInfo: LD->getPointerInfo().getWithOffset(O: Offset), MemVT, Alignment: LD->getBaseAlign(),
10799	MMOFlags: LD->getMemOperand()->getFlags(), AAInfo: LD->getAAInfo());
10800	// Follow the load with a store to the stack slot. Remember the store.
10801	// On big-endian machines this requires a truncating store to ensure
10802	// that the bits end up in the right place.
10803	Stores.push_back(Elt: DAG.getTruncStore(
10804	Chain: Load.getValue(R: `1`), dl, Val: Load, Ptr: StackPtr,
10805	PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset), SVT: MemVT));
10806
10807	// The order of the stores doesn't matter - say it with a TokenFactor.
10808	SDValue TF = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: Stores);
10809
10810	// Finally, perform the original load only redirected to the stack slot.
10811	Load = DAG.getExtLoad(ExtType: LD->getExtensionType(), dl, VT, Chain: TF, Ptr: StackBase,
10812	PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset: `0`),
10813	MemVT: LoadedVT);
10814
10815	// Callers expect a MERGE_VALUES node.
10816	return std::make_pair(x&: Load, y&: TF);
10817	}
10818
10819	assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
10820	"Unaligned load of unsupported type.");
10821
10822	// Compute the new VT that is half the size of the old one. This is an
10823	// integer MVT.
10824	unsigned NumBits = LoadedVT.getSizeInBits();
10825	EVT NewLoadedVT;
10826	NewLoadedVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NumBits/`2`);
10827	NumBits >>= `1`;
10828
10829	Align Alignment = LD->getBaseAlign();
10830	unsigned IncrementSize = NumBits / `8`;
10831	ISD::LoadExtType HiExtType = LD->getExtensionType();
10832
10833	// If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
10834	if (HiExtType == ISD::NON_EXTLOAD)
10835	HiExtType = ISD::ZEXTLOAD;
10836
10837	// Load the value in two parts
10838	SDValue Lo, Hi;
10839	if (DAG.getDataLayout().isLittleEndian()) {
10840	Lo = DAG.getExtLoad(ExtType: ISD::ZEXTLOAD, dl, VT, Chain, Ptr, PtrInfo: LD->getPointerInfo(),
10841	MemVT: NewLoadedVT, Alignment, MMOFlags: LD->getMemOperand()->getFlags(),
10842	AAInfo: LD->getAAInfo());
10843
10844	Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: TypeSize::getFixed(ExactSize: IncrementSize));
10845	Hi = DAG.getExtLoad(ExtType: HiExtType, dl, VT, Chain, Ptr,
10846	PtrInfo: LD->getPointerInfo().getWithOffset(O: IncrementSize),
10847	MemVT: NewLoadedVT, Alignment, MMOFlags: LD->getMemOperand()->getFlags(),
10848	AAInfo: LD->getAAInfo());
10849	} else {
10850	Hi = DAG.getExtLoad(ExtType: HiExtType, dl, VT, Chain, Ptr, PtrInfo: LD->getPointerInfo(),
10851	MemVT: NewLoadedVT, Alignment, MMOFlags: LD->getMemOperand()->getFlags(),
10852	AAInfo: LD->getAAInfo());
10853
10854	Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: TypeSize::getFixed(ExactSize: IncrementSize));
10855	Lo = DAG.getExtLoad(ExtType: ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
10856	PtrInfo: LD->getPointerInfo().getWithOffset(O: IncrementSize),
10857	MemVT: NewLoadedVT, Alignment, MMOFlags: LD->getMemOperand()->getFlags(),
10858	AAInfo: LD->getAAInfo());
10859	}
10860
10861	// aggregate the two parts
10862	SDValue ShiftAmount = DAG.getShiftAmountConstant(Val: NumBits, VT, DL: dl);
10863	SDValue Result = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Hi, N2: ShiftAmount);
10864	Result = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Result, N2: Lo);
10865
10866	SDValue TF = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, N1: Lo.getValue(R: `1`),
10867	N2: Hi.getValue(R: `1`));
10868
10869	return std::make_pair(x&: Result, y&: TF);
10870	}
10871
10872	SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
10873	SelectionDAG &DAG) const {
10874	assert(ST->getAddressingMode() == ISD::UNINDEXED &&
10875	"unaligned indexed stores not implemented!");
10876	SDValue Chain = ST->getChain();
10877	SDValue Ptr = ST->getBasePtr();
10878	SDValue Val = ST->getValue();
10879	EVT VT = Val.getValueType();
10880	Align Alignment = ST->getBaseAlign();
10881	auto &MF = DAG.getMachineFunction();
10882	EVT StoreMemVT = ST->getMemoryVT();
10883
10884	SDLoc dl(ST);
10885	if (StoreMemVT.isFloatingPoint() \|\| StoreMemVT.isVector()) {
10886	EVT intVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: VT.getSizeInBits());
10887	if (isTypeLegal(VT: intVT)) {
10888	if (!isOperationLegalOrCustom(Op: ISD::STORE, VT: intVT) &&
10889	StoreMemVT.isVector()) {
10890	// Scalarize the store and let the individual components be handled.
10891	SDValue Result = scalarizeVectorStore(ST, DAG);
10892	return Result;
10893	}
10894	// Expand to a bitconvert of the value to the integer type of the
10895	// same size, then a (misaligned) int store.
10896	// FIXME: Does not handle truncating floating point stores!
10897	SDValue Result = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: intVT, Operand: Val);
10898	Result = DAG.getStore(Chain, dl, Val: Result, Ptr, PtrInfo: ST->getPointerInfo(),
10899	Alignment, MMOFlags: ST->getMemOperand()->getFlags());
10900	return Result;
10901	}
10902	// Do a (aligned) store to a stack slot, then copy from the stack slot
10903	// to the final destination using (unaligned) integer loads and stores.
10904	MVT RegVT = getRegisterType(
10905	Context&: *DAG.getContext(),
10906	VT: EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: StoreMemVT.getSizeInBits()));
10907	EVT PtrVT = Ptr.getValueType();
10908	unsigned StoredBytes = StoreMemVT.getStoreSize();
10909	unsigned RegBytes = RegVT.getSizeInBits() / `8`;
10910	unsigned NumRegs = (StoredBytes + RegBytes - `1`) / RegBytes;
10911
10912	// Make sure the stack slot is also aligned for the register type.
10913	SDValue StackPtr = DAG.CreateStackTemporary(VT1: StoreMemVT, VT2: RegVT);
10914	auto FrameIndex = cast<FrameIndexSDNode>(Val: StackPtr.getNode())->getIndex();
10915
10916	// Perform the original store, only redirected to the stack slot.
10917	SDValue Store = DAG.getTruncStore(
10918	Chain, dl, Val, Ptr: StackPtr,
10919	PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset: `0`), SVT: StoreMemVT);
10920
10921	EVT StackPtrVT = StackPtr.getValueType();
10922
10923	SDValue PtrIncrement = DAG.getConstant(Val: RegBytes, DL: dl, VT: PtrVT);
10924	SDValue StackPtrIncrement = DAG.getConstant(Val: RegBytes, DL: dl, VT: StackPtrVT);
10925	SmallVector<SDValue, `8`> Stores;
10926	unsigned Offset = `0`;
10927
10928	// Do all but one copies using the full register width.
10929	for (unsigned i = `1`; i < NumRegs; i++) {
10930	// Load one integer register's worth from the stack slot.
10931	SDValue Load = DAG.getLoad(
10932	VT: RegVT, dl, Chain: Store, Ptr: StackPtr,
10933	PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset));
10934	// Store it to the final location. Remember the store.
10935	Stores.push_back(Elt: DAG.getStore(Chain: Load.getValue(R: `1`), dl, Val: Load, Ptr,
10936	PtrInfo: ST->getPointerInfo().getWithOffset(O: Offset),
10937	Alignment: ST->getBaseAlign(),
10938	MMOFlags: ST->getMemOperand()->getFlags()));
10939	// Increment the pointers.
10940	Offset += RegBytes;
10941	StackPtr = DAG.getObjectPtrOffset(SL: dl, Ptr: StackPtr, Offset: StackPtrIncrement);
10942	Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: PtrIncrement);
10943	}
10944
10945	// The last store may be partial. Do a truncating store. On big-endian
10946	// machines this requires an extending load from the stack slot to ensure
10947	// that the bits are in the right place.
10948	EVT LoadMemVT =
10949	EVT::getIntegerVT(Context&: DAG.getContext(), BitWidth: `8` (StoredBytes - Offset));
10950
10951	// Load from the stack slot.
10952	SDValue Load = DAG.getExtLoad(
10953	ExtType: ISD::EXTLOAD, dl, VT: RegVT, Chain: Store, Ptr: StackPtr,
10954	PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset), MemVT: LoadMemVT);
10955
10956	Stores.push_back(Elt: DAG.getTruncStore(
10957	Chain: Load.getValue(R: `1`), dl, Val: Load, Ptr,
10958	PtrInfo: ST->getPointerInfo().getWithOffset(O: Offset), SVT: LoadMemVT,
10959	Alignment: ST->getBaseAlign(), MMOFlags: ST->getMemOperand()->getFlags(), AAInfo: ST->getAAInfo()));
10960	// The order of the stores doesn't matter - say it with a TokenFactor.
10961	SDValue Result = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: Stores);
10962	return Result;
10963	}
10964
10965	assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
10966	"Unaligned store of unknown type.");
10967	// Get the half-size VT
10968	EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(Context&: *DAG.getContext());
10969	unsigned NumBits = NewStoredVT.getFixedSizeInBits();
10970	unsigned IncrementSize = NumBits / `8`;
10971
10972	// Divide the stored value in two parts.
10973	SDValue ShiftAmount =
10974	DAG.getShiftAmountConstant(Val: NumBits, VT: Val.getValueType(), DL: dl);
10975	SDValue Lo = Val;
10976	// If Val is a constant, replace the upper bits with 0. The SRL will constant
10977	// fold and not use the upper bits. A smaller constant may be easier to
10978	// materialize.
10979	if (auto *C = dyn_cast<ConstantSDNode>(Val&: Lo); C && !C->isOpaque())
10980	Lo = DAG.getNode(
10981	Opcode: ISD::AND, DL: dl, VT, N1: Lo,
10982	N2: DAG.getConstant(Val: APInt::getLowBitsSet(numBits: VT.getSizeInBits(), loBitsSet: NumBits), DL: dl,
10983	VT));
10984	SDValue Hi = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Val, N2: ShiftAmount);
10985
10986	// Store the two parts
10987	SDValue Store1, Store2;
10988	Store1 = DAG.getTruncStore(Chain, dl,
10989	Val: DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
10990	Ptr, PtrInfo: ST->getPointerInfo(), SVT: NewStoredVT, Alignment,
10991	MMOFlags: ST->getMemOperand()->getFlags());
10992
10993	Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: TypeSize::getFixed(ExactSize: IncrementSize));
10994	Store2 = DAG.getTruncStore(
10995	Chain, dl, Val: DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
10996	PtrInfo: ST->getPointerInfo().getWithOffset(O: IncrementSize), SVT: NewStoredVT, Alignment,
10997	MMOFlags: ST->getMemOperand()->getFlags(), AAInfo: ST->getAAInfo());
10998
10999	SDValue Result =
11000	DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, N1: Store1, N2: Store2);
11001	return Result;
11002	}
11003
11004	SDValue
11005	TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
11006	const SDLoc &DL, EVT DataVT,
11007	SelectionDAG &DAG,
11008	bool IsCompressedMemory) const {
11009	SDValue Increment;
11010	EVT AddrVT = Addr.getValueType();
11011	EVT MaskVT = Mask.getValueType();
11012	assert(DataVT.getVectorElementCount() == MaskVT.getVectorElementCount() &&
11013	"Incompatible types of Data and Mask");
11014	if (IsCompressedMemory) {
11015	// Incrementing the pointer according to number of '1's in the mask.
11016	if (DataVT.isScalableVector()) {
11017	EVT MaskExtVT = MaskVT.changeElementType(Context&: *DAG.getContext(), EltVT: MVT::i32);
11018	SDValue MaskExt = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: MaskExtVT, Operand: Mask);
11019	Increment = DAG.getNode(Opcode: ISD::VECREDUCE_ADD, DL, VT: MVT::i32, Operand: MaskExt);
11020	} else {
11021	EVT MaskIntVT =
11022	EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: MaskVT.getSizeInBits());
11023	SDValue MaskInIntReg = DAG.getBitcast(VT: MaskIntVT, V: Mask);
11024	if (MaskIntVT.getSizeInBits() < `32`) {
11025	MaskInIntReg =
11026	DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: MVT::i32, Operand: MaskInIntReg);
11027	MaskIntVT = MVT::i32;
11028	}
11029	Increment = DAG.getNode(Opcode: ISD::CTPOP, DL, VT: MaskIntVT, Operand: MaskInIntReg);
11030	}
11031	// Scale is an element size in bytes.
11032	SDValue Scale = DAG.getConstant(Val: DataVT.getScalarSizeInBits() / `8`, DL,
11033	VT: AddrVT);
11034	Increment = DAG.getZExtOrTrunc(Op: Increment, DL, VT: AddrVT);
11035	Increment = DAG.getNode(Opcode: ISD::MUL, DL, VT: AddrVT, N1: Increment, N2: Scale);
11036	} else
11037	Increment = DAG.getTypeSize(DL, VT: AddrVT, TS: DataVT.getStoreSize());
11038
11039	return DAG.getNode(Opcode: ISD::ADD, DL, VT: AddrVT, N1: Addr, N2: Increment);
11040	}
11041
11042	static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx,
11043	EVT VecVT, const SDLoc &dl,
11044	ElementCount SubEC) {
11045	assert(!(SubEC.isScalable() && VecVT.isFixedLengthVector()) &&
11046	"Cannot index a scalable vector within a fixed-width vector");
11047
11048	unsigned NElts = VecVT.getVectorMinNumElements();
11049	unsigned NumSubElts = SubEC.getKnownMinValue();
11050	EVT IdxVT = Idx.getValueType();
11051
11052	if (VecVT.isScalableVector() && !SubEC.isScalable()) {
11053	// If this is a constant index and we know the value plus the number of the
11054	// elements in the subvector minus one is less than the minimum number of
11055	// elements then it's safe to return Idx.
11056	if (auto *IdxCst = dyn_cast<ConstantSDNode>(Val&: Idx))
11057	if (IdxCst->getZExtValue() + (NumSubElts - `1`) < NElts)
11058	return Idx;
11059	SDValue VS =
11060	DAG.getVScale(DL: dl, VT: IdxVT, MulImm: APInt (IdxVT.getFixedSizeInBits(), NElts));
11061	unsigned SubOpcode = NumSubElts <= NElts ? ISD::SUB : ISD::USUBSAT;
11062	SDValue Sub = DAG.getNode(Opcode: SubOpcode, DL: dl, VT: IdxVT, N1: VS,
11063	N2: DAG.getConstant(Val: NumSubElts, DL: dl, VT: IdxVT));
11064	return DAG.getNode(Opcode: ISD::UMIN, DL: dl, VT: IdxVT, N1: Idx, N2: Sub);
11065	}
11066	if (isPowerOf2_32(Value: NElts) && NumSubElts == `1`) {
11067	APInt Imm = APInt::getLowBitsSet(numBits: IdxVT.getSizeInBits(), loBitsSet: Log2_32(Value: NElts));
11068	return DAG.getNode(Opcode: ISD::AND, DL: dl, VT: IdxVT, N1: Idx,
11069	N2: DAG.getConstant(Val: Imm, DL: dl, VT: IdxVT));
11070	}
11071	unsigned MaxIndex = NumSubElts < NElts ? NElts - NumSubElts : `0`;
11072	return DAG.getNode(Opcode: ISD::UMIN, DL: dl, VT: IdxVT, N1: Idx,
11073	N2: DAG.getConstant(Val: MaxIndex, DL: dl, VT: IdxVT));
11074	}
11075
11076	SDValue
11077	TargetLowering::getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr,
11078	EVT VecVT, SDValue Index,
11079	const SDNodeFlags PtrArithFlags) const {
11080	return getVectorSubVecPointer(
11081	DAG, VecPtr, VecVT,
11082	SubVecVT: EVT::getVectorVT(Context&: *DAG.getContext(), VT: VecVT.getVectorElementType(), NumElements: `1`),
11083	Index, PtrArithFlags);
11084	}
11085
11086	SDValue
11087	TargetLowering::getVectorSubVecPointer(SelectionDAG &DAG, SDValue VecPtr,
11088	EVT VecVT, EVT SubVecVT, SDValue Index,
11089	const SDNodeFlags PtrArithFlags) const {
11090	SDLoc dl(Index);
11091	// Make sure the index type is big enough to compute in.
11092	Index = DAG.getZExtOrTrunc(Op: Index, DL: dl, VT: VecPtr.getValueType());
11093
11094	EVT EltVT = VecVT.getVectorElementType();
11095
11096	// Calculate the element offset and add it to the pointer.
11097	unsigned EltSize = EltVT.getFixedSizeInBits() / `8`; // FIXME: should be ABI size.
11098	assert(EltSize * `8` == EltVT.getFixedSizeInBits() &&
11099	"Converting bits to bytes lost precision");
11100	assert(SubVecVT.getVectorElementType() == EltVT &&
11101	"Sub-vector must be a vector with matching element type");
11102	Index = clampDynamicVectorIndex(DAG, Idx: Index, VecVT, dl,
11103	SubEC: SubVecVT.getVectorElementCount());
11104
11105	EVT IdxVT = Index.getValueType();
11106	if (SubVecVT.isScalableVector())
11107	Index =
11108	DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: IdxVT, N1: Index,
11109	N2: DAG.getVScale(DL: dl, VT: IdxVT, MulImm: APInt (IdxVT.getSizeInBits(), `1`)));
11110
11111	Index = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: IdxVT, N1: Index,
11112	N2: DAG.getConstant(Val: EltSize, DL: dl, VT: IdxVT));
11113	return DAG.getMemBasePlusOffset(Base: VecPtr, Offset: Index, DL: dl, Flags: PtrArithFlags);
11114	}
11115
11116	//===----------------------------------------------------------------------===//
11117	// Implementation of Emulated TLS Model
11118	//===----------------------------------------------------------------------===//
11119
11120	SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
11121	SelectionDAG &DAG) const {
11122	// Access to address of TLS varialbe xyz is lowered to a function call:
11123	// __emutls_get_address( address of global variable named "__emutls_v.xyz" )
11124	EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
11125	PointerType VoidPtrType = PointerType::get(C&: DAG.getContext(), AddressSpace: `0`);
11126	SDLoc dl(GA);
11127
11128	ArgListTy Args;
11129	const GlobalValue *GV =
11130	cast<GlobalValue>(Val: GA->getGlobal()->stripPointerCastsAndAliases());
11131	SmallString<`32`> NameString("__emutls_v.");
11132	NameString += GV->getName();
11133	StringRef EmuTlsVarName(NameString);
11134	const GlobalVariable *EmuTlsVar =
11135	GV->getParent()->getNamedGlobal(Name: EmuTlsVarName);
11136	assert(EmuTlsVar && "Cannot find EmuTlsVar ");
11137	Args.emplace_back(args: DAG.getGlobalAddress(GV: EmuTlsVar, DL: dl, VT: PtrVT), args&: VoidPtrType);
11138
11139	SDValue EmuTlsGetAddr = DAG.getExternalSymbol(Sym: "__emutls_get_address", VT: PtrVT);
11140
11141	TargetLowering::CallLoweringInfo CLI(DAG);
11142	CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
11143	CLI.setLibCallee(CC: CallingConv::C, ResultType: VoidPtrType, Target: EmuTlsGetAddr, ArgsList: std::move(Args));
11144	std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
11145
11146	// TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
11147	// At last for X86 targets, maybe good for other targets too?
11148	MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
11149	MFI.setAdjustsStack(true); // Is this only for X86 target?
11150	MFI.setHasCalls(true);
11151
11152	assert((GA->getOffset() == `0`) &&
11153	"Emulated TLS must have zero offset in GlobalAddressSDNode");
11154	return CallResult.first;
11155	}
11156
11157	SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,
11158	SelectionDAG &DAG) const {
11159	assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
11160	if (!isCtlzFast())
11161	return SDValue ();
11162	ISD::CondCode CC = cast<CondCodeSDNode>(Val: Op.getOperand(i: `2`))->get();
11163	SDLoc dl(Op);
11164	if (isNullConstant(V: Op.getOperand(i: `1`)) && CC == ISD::SETEQ) {
11165	EVT VT = Op.getOperand(i: `0`).getValueType();
11166	SDValue Zext = Op.getOperand(i: `0`);
11167	if (VT.bitsLT(VT: MVT::i32)) {
11168	VT = MVT::i32;
11169	Zext = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: Op.getOperand(i: `0`));
11170	}
11171	unsigned Log2b = Log2_32(Value: VT.getSizeInBits());
11172	SDValue Clz = DAG.getNode(Opcode: ISD::CTLZ, DL: dl, VT, Operand: Zext);
11173	SDValue Scc = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Clz,
11174	N2: DAG.getConstant(Val: Log2b, DL: dl, VT: MVT::i32));
11175	return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i32, Operand: Scc);
11176	}
11177	return SDValue ();
11178	}
11179
11180	SDValue TargetLowering::expandIntMINMAX(SDNode Node, SelectionDAG &DAG) const* {
11181	SDValue Op0 = Node->getOperand(Num: `0`);
11182	SDValue Op1 = Node->getOperand(Num: `1`);
11183	EVT VT = Op0.getValueType();
11184	EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
11185	unsigned Opcode = Node->getOpcode();
11186	SDLoc DL(Node);
11187
11188	// If both sign bits are zero, flip UMIN/UMAX <-> SMIN/SMAX if legal.
11189	unsigned AltOpcode = ISD::getOppositeSignednessMinMaxOpcode(MinMaxOpc: Opcode);
11190	if (isOperationLegal(Op: AltOpcode, VT) && DAG.SignBitIsZero(Op: Op0) &&
11191	DAG.SignBitIsZero(Op: Op1))
11192	return DAG.getNode(Opcode: AltOpcode, DL, VT, N1: Op0, N2: Op1);
11193
11194	// umax(x,1) --> sub(x,cmpeq(x,0)) iff cmp result is allbits
11195	if (Opcode == ISD::UMAX && llvm::isOneOrOneSplat(V: Op1, AllowUndefs: true) && BoolVT == VT &&
11196	getBooleanContents(Type: VT) == ZeroOrNegativeOneBooleanContent) {
11197	Op0 = DAG.getFreeze(V: Op0);
11198	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT);
11199	return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Op0,
11200	N2: DAG.getSetCC(DL, VT, LHS: Op0, RHS: Zero, Cond: ISD::SETEQ));
11201	}
11202
11203	// umin(x,y) -> sub(x,usubsat(x,y))
11204	// TODO: Missing freeze(Op0)?
11205	if (Opcode == ISD::UMIN && isOperationLegal(Op: ISD::SUB, VT) &&
11206	isOperationLegal(Op: ISD::USUBSAT, VT)) {
11207	return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Op0,
11208	N2: DAG.getNode(Opcode: ISD::USUBSAT, DL, VT, N1: Op0, N2: Op1));
11209	}
11210
11211	// umax(x,y) -> add(x,usubsat(y,x))
11212	// TODO: Missing freeze(Op0)?
11213	if (Opcode == ISD::UMAX && isOperationLegal(Op: ISD::ADD, VT) &&
11214	isOperationLegal(Op: ISD::USUBSAT, VT)) {
11215	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Op0,
11216	N2: DAG.getNode(Opcode: ISD::USUBSAT, DL, VT, N1: Op1, N2: Op0));
11217	}
11218
11219	// FIXME: Should really try to split the vector in case it's legal on a
11220	// subvector.
11221	if (VT.isVector() && !isOperationLegalOrCustom(Op: ISD::VSELECT, VT))
11222	return DAG.UnrollVectorOp(N: Node);
11223
11224	// Attempt to find an existing SETCC node that we can reuse.
11225	// TODO: Do we need a generic doesSETCCNodeExist?
11226	// TODO: Missing freeze(Op0)/freeze(Op1)?
11227	auto buildMinMax = [&](ISD::CondCode PrefCC, ISD::CondCode AltCC,
11228	ISD::CondCode PrefCommuteCC,
11229	ISD::CondCode AltCommuteCC) {
11230	SDVTList BoolVTList = DAG.getVTList(VT: BoolVT);
11231	for (ISD::CondCode CC : {PrefCC, AltCC}) {
11232	if (DAG.doesNodeExist(Opcode: ISD::SETCC, VTList: BoolVTList,
11233	Ops: {Op0, Op1, DAG.getCondCode(Cond: CC)})) {
11234	SDValue Cond = DAG.getSetCC(DL, VT: BoolVT, LHS: Op0, RHS: Op1, Cond: CC);
11235	return DAG.getSelect(DL, VT, Cond, LHS: Op0, RHS: Op1);
11236	}
11237	}
11238	for (ISD::CondCode CC : {PrefCommuteCC, AltCommuteCC}) {
11239	if (DAG.doesNodeExist(Opcode: ISD::SETCC, VTList: BoolVTList,
11240	Ops: {Op0, Op1, DAG.getCondCode(Cond: CC)})) {
11241	SDValue Cond = DAG.getSetCC(DL, VT: BoolVT, LHS: Op0, RHS: Op1, Cond: CC);
11242	return DAG.getSelect(DL, VT, Cond, LHS: Op1, RHS: Op0);
11243	}
11244	}
11245	SDValue Cond = DAG.getSetCC(DL, VT: BoolVT, LHS: Op0, RHS: Op1, Cond: PrefCC);
11246	return DAG.getSelect(DL, VT, Cond, LHS: Op0, RHS: Op1);
11247	};
11248
11249	// Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
11250	// -> Y = (A < B) ? B : A
11251	// -> Y = (A >= B) ? A : B
11252	// -> Y = (A <= B) ? B : A
11253	switch (Opcode) {
11254	case ISD::SMAX:
11255	return buildMinMax (ISD::SETGT, ISD::SETGE, ISD::SETLT, ISD::SETLE);
11256	case ISD::SMIN:
11257	return buildMinMax (ISD::SETLT, ISD::SETLE, ISD::SETGT, ISD::SETGE);
11258	case ISD::UMAX:
11259	return buildMinMax (ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE);
11260	case ISD::UMIN:
11261	return buildMinMax (ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE);
11262	}
11263
11264	llvm_unreachable("How did we get here?");
11265	}
11266
11267	SDValue TargetLowering::expandAddSubSat(SDNode Node, SelectionDAG &DAG) const* {
11268	unsigned Opcode = Node->getOpcode();
11269	SDValue LHS = Node->getOperand(Num: `0`);
11270	SDValue RHS = Node->getOperand(Num: `1`);
11271	EVT VT = LHS.getValueType();
11272	SDLoc dl(Node);
11273
11274	assert(VT == RHS.getValueType() && "Expected operands to be the same type");
11275	assert(VT.isInteger() && "Expected operands to be integers");
11276
11277	// usub.sat(a, b) -> umax(a, b) - b
11278	if (Opcode == ISD::USUBSAT && isOperationLegal(Op: ISD::UMAX, VT)) {
11279	SDValue Max = DAG.getNode(Opcode: ISD::UMAX, DL: dl, VT, N1: LHS, N2: RHS);
11280	return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Max, N2: RHS);
11281	}
11282
11283	// usub.sat(a, 1) -> sub(a, zext(a != 0))
11284	if (Opcode == ISD::USUBSAT && isOneOrOneSplat(V: RHS)) {
11285	LHS = DAG.getFreeze(V: LHS);
11286	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT);
11287	EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
11288	SDValue IsNonZero = DAG.getSetCC(DL: dl, VT: BoolVT, LHS, RHS: Zero, Cond: ISD::SETNE);
11289	SDValue Subtrahend = DAG.getBoolExtOrTrunc(Op: IsNonZero, SL: dl, VT, OpVT: BoolVT);
11290	Subtrahend =
11291	DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Subtrahend, N2: DAG.getConstant(Val: `1`, DL: dl, VT));
11292	return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: LHS, N2: Subtrahend);
11293	}
11294
11295	// uadd.sat(a, b) -> umin(a, ~b) + b
11296	if (Opcode == ISD::UADDSAT && isOperationLegal(Op: ISD::UMIN, VT)) {
11297	SDValue InvRHS = DAG.getNOT(DL: dl, Val: RHS, VT);
11298	SDValue Min = DAG.getNode(Opcode: ISD::UMIN, DL: dl, VT, N1: LHS, N2: InvRHS);
11299	return DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Min, N2: RHS);
11300	}
11301
11302	unsigned OverflowOp;
11303	switch (Opcode) {
11304	case ISD::SADDSAT:
11305	OverflowOp = ISD::SADDO;
11306	break;
11307	case ISD::UADDSAT:
11308	OverflowOp = ISD::UADDO;
11309	break;
11310	case ISD::SSUBSAT:
11311	OverflowOp = ISD::SSUBO;
11312	break;
11313	case ISD::USUBSAT:
11314	OverflowOp = ISD::USUBO;
11315	break;
11316	default:
11317	llvm_unreachable("Expected method to receive signed or unsigned saturation "
11318	"addition or subtraction node.");
11319	}
11320
11321	// FIXME: Should really try to split the vector in case it's legal on a
11322	// subvector.
11323	if (VT.isVector() && !isOperationLegalOrCustom(Op: ISD::VSELECT, VT))
11324	return DAG.UnrollVectorOp(N: Node);
11325
11326	unsigned BitWidth = LHS.getScalarValueSizeInBits();
11327	EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
11328	SDValue Result = DAG.getNode(Opcode: OverflowOp, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: BoolVT), N1: LHS, N2: RHS);
11329	SDValue SumDiff = Result.getValue(R: `0`);
11330	SDValue Overflow = Result.getValue(R: `1`);
11331	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT);
11332	SDValue AllOnes = DAG.getAllOnesConstant(DL: dl, VT);
11333
11334	if (Opcode == ISD::UADDSAT) {
11335	if (getBooleanContents(Type: VT) == ZeroOrNegativeOneBooleanContent) {
11336	// (LHS + RHS) \| OverflowMask
11337	SDValue OverflowMask = DAG.getSExtOrTrunc(Op: Overflow, DL: dl, VT);
11338	return DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: SumDiff, N2: OverflowMask);
11339	}
11340	// Overflow ? 0xffff.... : (LHS + RHS)
11341	return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: AllOnes, RHS: SumDiff);
11342	}
11343
11344	if (Opcode == ISD::USUBSAT) {
11345	if (getBooleanContents(Type: VT) == ZeroOrNegativeOneBooleanContent) {
11346	// (LHS - RHS) & ~OverflowMask
11347	SDValue OverflowMask = DAG.getSExtOrTrunc(Op: Overflow, DL: dl, VT);
11348	SDValue Not = DAG.getNOT(DL: dl, Val: OverflowMask, VT);
11349	return DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: SumDiff, N2: Not);
11350	}
11351	// Overflow ? 0 : (LHS - RHS)
11352	return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: Zero, RHS: SumDiff);
11353	}
11354
11355	if (Opcode == ISD::SADDSAT \|\| Opcode == ISD::SSUBSAT) {
11356	APInt MinVal = APInt::getSignedMinValue(numBits: BitWidth);
11357	APInt MaxVal = APInt::getSignedMaxValue(numBits: BitWidth);
11358
11359	KnownBits KnownLHS = DAG.computeKnownBits(Op: LHS);
11360	KnownBits KnownRHS = DAG.computeKnownBits(Op: RHS);
11361
11362	// If either of the operand signs are known, then they are guaranteed to
11363	// only saturate in one direction. If non-negative they will saturate
11364	// towards SIGNED_MAX, if negative they will saturate towards SIGNED_MIN.
11365	//
11366	// In the case of ISD::SSUBSAT, 'x - y' is equivalent to 'x + (-y)', so the
11367	// sign of 'y' has to be flipped.
11368
11369	bool LHSIsNonNegative = KnownLHS.isNonNegative();
11370	bool RHSIsNonNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNonNegative()
11371	: KnownRHS.isNegative();
11372	if (LHSIsNonNegative \|\| RHSIsNonNegative) {
11373	SDValue SatMax = DAG.getConstant(Val: MaxVal, DL: dl, VT);
11374	return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: SatMax, RHS: SumDiff);
11375	}
11376
11377	bool LHSIsNegative = KnownLHS.isNegative();
11378	bool RHSIsNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNegative()
11379	: KnownRHS.isNonNegative();
11380	if (LHSIsNegative \|\| RHSIsNegative) {
11381	SDValue SatMin = DAG.getConstant(Val: MinVal, DL: dl, VT);
11382	return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: SatMin, RHS: SumDiff);
11383	}
11384	}
11385
11386	// Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff
11387	APInt MinVal = APInt::getSignedMinValue(numBits: BitWidth);
11388	SDValue SatMin = DAG.getConstant(Val: MinVal, DL: dl, VT);
11389	SDValue Shift = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: SumDiff,
11390	N2: DAG.getConstant(Val: BitWidth - `1`, DL: dl, VT));
11391	Result = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: Shift, N2: SatMin);
11392	return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: Result, RHS: SumDiff);
11393	}
11394
11395	SDValue TargetLowering::expandCMP(SDNode Node, SelectionDAG &DAG) const* {
11396	unsigned Opcode = Node->getOpcode();
11397	SDValue LHS = Node->getOperand(Num: `0`);
11398	SDValue RHS = Node->getOperand(Num: `1`);
11399	EVT VT = LHS.getValueType();
11400	EVT ResVT = Node->getValueType(ResNo: `0`);
11401	EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
11402	SDLoc dl(Node);
11403
11404	auto LTPredicate = (Opcode == ISD::UCMP ? ISD::SETULT : ISD::SETLT);
11405	auto GTPredicate = (Opcode == ISD::UCMP ? ISD::SETUGT : ISD::SETGT);
11406	SDValue IsLT = DAG.getSetCC(DL: dl, VT: BoolVT, LHS, RHS, Cond: LTPredicate);
11407	SDValue IsGT = DAG.getSetCC(DL: dl, VT: BoolVT, LHS, RHS, Cond: GTPredicate);
11408
11409	// We can't perform arithmetic on i1 values. Extending them would
11410	// probably result in worse codegen, so let's just use two selects instead.
11411	// Some targets are also just better off using selects rather than subtraction
11412	// because one of the conditions can be merged with one of the selects.
11413	// And finally, if we don't know the contents of high bits of a boolean value
11414	// we can't perform any arithmetic either.
11415	if (preferSelectsOverBooleanArithmetic(VT) \|\|
11416	BoolVT.getScalarSizeInBits() == `1` \|\|
11417	getBooleanContents(Type: BoolVT) == UndefinedBooleanContent) {
11418	SDValue SelectZeroOrOne =
11419	DAG.getSelect(DL: dl, VT: ResVT, Cond: IsGT, LHS: DAG.getConstant(Val: `1`, DL: dl, VT: ResVT),
11420	RHS: DAG.getConstant(Val: `0`, DL: dl, VT: ResVT));
11421	return DAG.getSelect(DL: dl, VT: ResVT, Cond: IsLT, LHS: DAG.getAllOnesConstant(DL: dl, VT: ResVT),
11422	RHS: SelectZeroOrOne);
11423	}
11424
11425	if (getBooleanContents(Type: BoolVT) == ZeroOrNegativeOneBooleanContent)
11426	std::swap(a&: IsGT, b&: IsLT);
11427	return DAG.getSExtOrTrunc(Op: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: BoolVT, N1: IsGT, N2: IsLT), DL: dl,
11428	VT: ResVT);
11429	}
11430
11431	SDValue TargetLowering::expandShlSat(SDNode Node, SelectionDAG &DAG) const* {
11432	unsigned Opcode = Node->getOpcode();
11433	bool IsSigned = Opcode == ISD::SSHLSAT;
11434	SDValue LHS = Node->getOperand(Num: `0`);
11435	SDValue RHS = Node->getOperand(Num: `1`);
11436	EVT VT = LHS.getValueType();
11437	SDLoc dl(Node);
11438
11439	assert((Node->getOpcode() == ISD::SSHLSAT \|\|
11440	Node->getOpcode() == ISD::USHLSAT) &&
11441	"Expected a SHLSAT opcode");
11442	assert(VT.isInteger() && "Expected operands to be integers");
11443
11444	if (VT.isVector() && !isOperationLegalOrCustom(Op: ISD::VSELECT, VT))
11445	return DAG.UnrollVectorOp(N: Node);
11446
11447	// If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate.
11448
11449	unsigned BW = VT.getScalarSizeInBits();
11450	EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
11451	SDValue Result = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: LHS, N2: RHS);
11452	SDValue Orig =
11453	DAG.getNode(Opcode: IsSigned ? ISD::SRA : ISD::SRL, DL: dl, VT, N1: Result, N2: RHS);
11454
11455	SDValue SatVal;
11456	if (IsSigned) {
11457	SDValue SatMin = DAG.getConstant(Val: APInt::getSignedMinValue(numBits: BW), DL: dl, VT);
11458	SDValue SatMax = DAG.getConstant(Val: APInt::getSignedMaxValue(numBits: BW), DL: dl, VT);
11459	SDValue Cond =
11460	DAG.getSetCC(DL: dl, VT: BoolVT, LHS, RHS: DAG.getConstant(Val: `0`, DL: dl, VT), Cond: ISD::SETLT);
11461	SatVal = DAG.getSelect(DL: dl, VT, Cond, LHS: SatMin, RHS: SatMax);
11462	} else {
11463	SatVal = DAG.getConstant(Val: APInt::getMaxValue(numBits: BW), DL: dl, VT);
11464	}
11465	SDValue Cond = DAG.getSetCC(DL: dl, VT: BoolVT, LHS, RHS: Orig, Cond: ISD::SETNE);
11466	return DAG.getSelect(DL: dl, VT, Cond, LHS: SatVal, RHS: Result);
11467	}
11468
11469	void TargetLowering::forceExpandMultiply(SelectionDAG &DAG, const SDLoc &dl,
11470	bool Signed, SDValue &Lo, SDValue &Hi,
11471	SDValue LHS, SDValue RHS,
11472	SDValue HiLHS, SDValue HiRHS) const {
11473	EVT VT = LHS.getValueType();
11474	assert(RHS.getValueType() == VT && "Mismatching operand types");
11475
11476	assert((HiLHS && HiRHS) \|\| (!HiLHS && !HiRHS));
11477	assert((!Signed \|\| !HiLHS) &&
11478	"Signed flag should only be set when HiLHS and RiRHS are null");
11479
11480	// We'll expand the multiplication by brute force because we have no other
11481	// options. This is a trivially-generalized version of the code from
11482	// Hacker's Delight (itself derived from Knuth's Algorithm M from section
11483	// 4.3.1). If Signed is set, we can use arithmetic right shifts to propagate
11484	// sign bits while calculating the Hi half.
11485	unsigned Bits = VT.getSizeInBits();
11486	unsigned HalfBits = Bits / `2`;
11487	SDValue Mask = DAG.getConstant(Val: APInt::getLowBitsSet(numBits: Bits, loBitsSet: HalfBits), DL: dl, VT);
11488	SDValue LL = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: LHS, N2: Mask);
11489	SDValue RL = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: RHS, N2: Mask);
11490
11491	SDValue T = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LL, N2: RL);
11492	SDValue TL = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: T, N2: Mask);
11493
11494	SDValue Shift = DAG.getShiftAmountConstant(Val: HalfBits, VT, DL: dl);
11495	// This is always an unsigned shift.
11496	SDValue TH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: T, N2: Shift);
11497
11498	unsigned ShiftOpc = Signed ? ISD::SRA : ISD::SRL;
11499	SDValue LH = DAG.getNode(Opcode: ShiftOpc, DL: dl, VT, N1: LHS, N2: Shift);
11500	SDValue RH = DAG.getNode(Opcode: ShiftOpc, DL: dl, VT, N1: RHS, N2: Shift);
11501
11502	SDValue U =
11503	DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LH, N2: RL), N2: TH);
11504	SDValue UL = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: U, N2: Mask);
11505	SDValue UH = DAG.getNode(Opcode: ShiftOpc, DL: dl, VT, N1: U, N2: Shift);
11506
11507	SDValue V =
11508	DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LL, N2: RH), N2: UL);
11509	SDValue VH = DAG.getNode(Opcode: ShiftOpc, DL: dl, VT, N1: V, N2: Shift);
11510
11511	Lo = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: TL,
11512	N2: DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: V, N2: Shift));
11513
11514	Hi = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LH, N2: RH),
11515	N2: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: UH, N2: VH));
11516
11517	// If HiLHS and HiRHS are set, multiply them by the opposite low part and add
11518	// the products to Hi.
11519	if (HiLHS) {
11520	Hi = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Hi,
11521	N2: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT,
11522	N1: DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: HiRHS, N2: LHS),
11523	N2: DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: RHS, N2: HiLHS)));
11524	}
11525	}
11526
11527	void TargetLowering::forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl,
11528	bool Signed, const SDValue LHS,
11529	const SDValue RHS, SDValue &Lo,
11530	SDValue &Hi) const {
11531	EVT VT = LHS.getValueType();
11532	assert(RHS.getValueType() == VT && "Mismatching operand types");
11533	EVT WideVT = EVT::getIntegerVT(Context&: DAG.getContext(), BitWidth: VT.getSizeInBits() `2`);
11534	// We can fall back to a libcall with an illegal type for the MUL if we
11535	// have a libcall big enough.
11536	RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
11537	if (WideVT == MVT::i16)
11538	LC = RTLIB::MUL_I16;
11539	else if (WideVT == MVT::i32)
11540	LC = RTLIB::MUL_I32;
11541	else if (WideVT == MVT::i64)
11542	LC = RTLIB::MUL_I64;
11543	else if (WideVT == MVT::i128)
11544	LC = RTLIB::MUL_I128;
11545
11546	RTLIB::LibcallImpl LibcallImpl = getLibcallImpl(Call: LC);
11547	if (LibcallImpl == RTLIB::Unsupported) {
11548	forceExpandMultiply(DAG, dl, Signed, Lo, Hi, LHS, RHS);
11549	return;
11550	}
11551
11552	SDValue HiLHS, HiRHS;
11553	if (Signed) {
11554	// The high part is obtained by SRA'ing all but one of the bits of low
11555	// part.
11556	unsigned LoSize = VT.getFixedSizeInBits();
11557	SDValue Shift = DAG.getShiftAmountConstant(Val: LoSize - `1`, VT, DL: dl);
11558	HiLHS = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: LHS, N2: Shift);
11559	HiRHS = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: RHS, N2: Shift);
11560	} else {
11561	HiLHS = DAG.getConstant(Val: `0`, DL: dl, VT);
11562	HiRHS = DAG.getConstant(Val: `0`, DL: dl, VT);
11563	}
11564
11565	// Attempt a libcall.
11566	SDValue Ret;
11567	TargetLowering::MakeLibCallOptions CallOptions;
11568	CallOptions.setIsSigned(Signed);
11569	CallOptions.setIsPostTypeLegalization(true);
11570	if (shouldSplitFunctionArgumentsAsLittleEndian(DL: DAG.getDataLayout())) {
11571	// Halves of WideVT are packed into registers in different order
11572	// depending on platform endianness. This is usually handled by
11573	// the C calling convention, but we can't defer to it in
11574	// the legalizer.
11575	SDValue Args[] = {LHS, HiLHS, RHS, HiRHS};
11576	Ret = makeLibCall(DAG, LC, RetVT: WideVT, Ops: Args, CallOptions, dl).first;
11577	} else {
11578	SDValue Args[] = {HiLHS, LHS, HiRHS, RHS};
11579	Ret = makeLibCall(DAG, LC, RetVT: WideVT, Ops: Args, CallOptions, dl).first;
11580	}
11581	assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
11582	"Ret value is a collection of constituent nodes holding result.");
11583	if (DAG.getDataLayout().isLittleEndian()) {
11584	// Same as above.
11585	Lo = Ret.getOperand(i: `0`);
11586	Hi = Ret.getOperand(i: `1`);
11587	} else {
11588	Lo = Ret.getOperand(i: `1`);
11589	Hi = Ret.getOperand(i: `0`);
11590	}
11591	}
11592
11593	SDValue
11594	TargetLowering::expandFixedPointMul(SDNode Node, SelectionDAG &DAG) const* {
11595	assert((Node->getOpcode() == ISD::SMULFIX \|\|
11596	Node->getOpcode() == ISD::UMULFIX \|\|
11597	Node->getOpcode() == ISD::SMULFIXSAT \|\|
11598	Node->getOpcode() == ISD::UMULFIXSAT) &&
11599	"Expected a fixed point multiplication opcode");
11600
11601	SDLoc dl(Node);
11602	SDValue LHS = Node->getOperand(Num: `0`);
11603	SDValue RHS = Node->getOperand(Num: `1`);
11604	EVT VT = LHS.getValueType();
11605	unsigned Scale = Node->getConstantOperandVal(Num: `2`);
11606	bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT \|\|
11607	Node->getOpcode() == ISD::UMULFIXSAT);
11608	bool Signed = (Node->getOpcode() == ISD::SMULFIX \|\|
11609	Node->getOpcode() == ISD::SMULFIXSAT);
11610	EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
11611	unsigned VTSize = VT.getScalarSizeInBits();
11612
11613	if (!Scale) {
11614	// [us]mul.fix(a, b, 0) -> mul(a, b)
11615	if (!Saturating) {
11616	if (isOperationLegalOrCustom(Op: ISD::MUL, VT))
11617	return DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LHS, N2: RHS);
11618	} else if (Signed && isOperationLegalOrCustom(Op: ISD::SMULO, VT)) {
11619	SDValue Result =
11620	DAG.getNode(Opcode: ISD::SMULO, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: BoolVT), N1: LHS, N2: RHS);
11621	SDValue Product = Result.getValue(R: `0`);
11622	SDValue Overflow = Result.getValue(R: `1`);
11623	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT);
11624
11625	APInt MinVal = APInt::getSignedMinValue(numBits: VTSize);
11626	APInt MaxVal = APInt::getSignedMaxValue(numBits: VTSize);
11627	SDValue SatMin = DAG.getConstant(Val: MinVal, DL: dl, VT);
11628	SDValue SatMax = DAG.getConstant(Val: MaxVal, DL: dl, VT);
11629	// Xor the inputs, if resulting sign bit is 0 the product will be
11630	// positive, else negative.
11631	SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: LHS, N2: RHS);
11632	SDValue ProdNeg = DAG.getSetCC(DL: dl, VT: BoolVT, LHS: Xor, RHS: Zero, Cond: ISD::SETLT);
11633	Result = DAG.getSelect(DL: dl, VT, Cond: ProdNeg, LHS: SatMin, RHS: SatMax);
11634	return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: Result, RHS: Product);
11635	} else if (!Signed && isOperationLegalOrCustom(Op: ISD::UMULO, VT)) {
11636	SDValue Result =
11637	DAG.getNode(Opcode: ISD::UMULO, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: BoolVT), N1: LHS, N2: RHS);
11638	SDValue Product = Result.getValue(R: `0`);
11639	SDValue Overflow = Result.getValue(R: `1`);
11640
11641	APInt MaxVal = APInt::getMaxValue(numBits: VTSize);
11642	SDValue SatMax = DAG.getConstant(Val: MaxVal, DL: dl, VT);
11643	return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: SatMax, RHS: Product);
11644	}
11645	}
11646
11647	assert(((Signed && Scale < VTSize) \|\| (!Signed && Scale <= VTSize)) &&
11648	"Expected scale to be less than the number of bits if signed or at "
11649	"most the number of bits if unsigned.");
11650	assert(LHS.getValueType() == RHS.getValueType() &&
11651	"Expected both operands to be the same type");
11652
11653	// Get the upper and lower bits of the result.
11654	SDValue Lo, Hi;
11655	unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
11656	unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
11657	EVT WideVT = EVT::getIntegerVT(Context&: DAG.getContext(), BitWidth: VTSize `2`);
11658	if (VT.isVector())
11659	WideVT =
11660	EVT::getVectorVT(Context&: *DAG.getContext(), VT: WideVT, EC: VT.getVectorElementCount());
11661	if (isOperationLegalOrCustom(Op: LoHiOp, VT)) {
11662	SDValue Result = DAG.getNode(Opcode: LoHiOp, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: VT), N1: LHS, N2: RHS);
11663	Lo = Result.getValue(R: `0`);
11664	Hi = Result.getValue(R: `1`);
11665	} else if (isOperationLegalOrCustom(Op: HiOp, VT)) {
11666	Lo = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LHS, N2: RHS);
11667	Hi = DAG.getNode(Opcode: HiOp, DL: dl, VT, N1: LHS, N2: RHS);
11668	} else if (isOperationLegalOrCustom(Op: ISD::MUL, VT: WideVT)) {
11669	// Try for a multiplication using a wider type.
11670	unsigned Ext = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
11671	SDValue LHSExt = DAG.getNode(Opcode: Ext, DL: dl, VT: WideVT, Operand: LHS);
11672	SDValue RHSExt = DAG.getNode(Opcode: Ext, DL: dl, VT: WideVT, Operand: RHS);
11673	SDValue Res = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: WideVT, N1: LHSExt, N2: RHSExt);
11674	Lo = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Res);
11675	SDValue Shifted =
11676	DAG.getNode(Opcode: ISD::SRA, DL: dl, VT: WideVT, N1: Res,
11677	N2: DAG.getShiftAmountConstant(Val: VTSize, VT: WideVT, DL: dl));
11678	Hi = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Shifted);
11679	} else if (VT.isVector()) {
11680	return SDValue ();
11681	} else {
11682	forceExpandWideMUL(DAG, dl, Signed, LHS, RHS, Lo, Hi);
11683	}
11684
11685	if (Scale == VTSize)
11686	// Result is just the top half since we'd be shifting by the width of the
11687	// operand. Overflow impossible so this works for both UMULFIX and
11688	// UMULFIXSAT.
11689	return Hi;
11690
11691	// The result will need to be shifted right by the scale since both operands
11692	// are scaled. The result is given to us in 2 halves, so we only want part of
11693	// both in the result.
11694	SDValue Result = DAG.getNode(Opcode: ISD::FSHR, DL: dl, VT, N1: Hi, N2: Lo,
11695	N3: DAG.getShiftAmountConstant(Val: Scale, VT, DL: dl));
11696	if (!Saturating)
11697	return Result;
11698
11699	if (!Signed) {
11700	// Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
11701	// widened multiplication) aren't all zeroes.
11702
11703	// Saturate to max if ((Hi >> Scale) != 0),
11704	// which is the same as if (Hi > ((1 << Scale) - 1))
11705	APInt MaxVal = APInt::getMaxValue(numBits: VTSize);
11706	SDValue LowMask = DAG.getConstant(Val: APInt::getLowBitsSet(numBits: VTSize, loBitsSet: Scale),
11707	DL: dl, VT);
11708	Result = DAG.getSelectCC(DL: dl, LHS: Hi, RHS: LowMask,
11709	True: DAG.getConstant(Val: MaxVal, DL: dl, VT), False: Result,
11710	Cond: ISD::SETUGT);
11711
11712	return Result;
11713	}
11714
11715	// Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
11716	// widened multiplication) aren't all ones or all zeroes.
11717
11718	SDValue SatMin = DAG.getConstant(Val: APInt::getSignedMinValue(numBits: VTSize), DL: dl, VT);
11719	SDValue SatMax = DAG.getConstant(Val: APInt::getSignedMaxValue(numBits: VTSize), DL: dl, VT);
11720
11721	if (Scale == `0`) {
11722	SDValue Sign = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: Lo,
11723	N2: DAG.getShiftAmountConstant(Val: VTSize - `1`, VT, DL: dl));
11724	SDValue Overflow = DAG.getSetCC(DL: dl, VT: BoolVT, LHS: Hi, RHS: Sign, Cond: ISD::SETNE);
11725	// Saturated to SatMin if wide product is negative, and SatMax if wide
11726	// product is positive ...
11727	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT);
11728	SDValue ResultIfOverflow = DAG.getSelectCC(DL: dl, LHS: Hi, RHS: Zero, True: SatMin, False: SatMax,
11729	Cond: ISD::SETLT);
11730	// ... but only if we overflowed.
11731	return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: ResultIfOverflow, RHS: Result);
11732	}
11733
11734	// We handled Scale==0 above so all the bits to examine is in Hi.
11735
11736	// Saturate to max if ((Hi >> (Scale - 1)) > 0),
11737	// which is the same as if (Hi > (1 << (Scale - 1)) - 1)
11738	SDValue LowMask = DAG.getConstant(Val: APInt::getLowBitsSet(numBits: VTSize, loBitsSet: Scale - `1`),
11739	DL: dl, VT);
11740	Result = DAG.getSelectCC(DL: dl, LHS: Hi, RHS: LowMask, True: SatMax, False: Result, Cond: ISD::SETGT);
11741	// Saturate to min if (Hi >> (Scale - 1)) < -1),
11742	// which is the same as if (HI < (-1 << (Scale - 1))
11743	SDValue HighMask =
11744	DAG.getConstant(Val: APInt::getHighBitsSet(numBits: VTSize, hiBitsSet: VTSize - Scale + `1`),
11745	DL: dl, VT);
11746	Result = DAG.getSelectCC(DL: dl, LHS: Hi, RHS: HighMask, True: SatMin, False: Result, Cond: ISD::SETLT);
11747	return Result;
11748	}
11749
11750	SDValue
11751	TargetLowering::expandFixedPointDiv(unsigned Opcode, const SDLoc &dl,
11752	SDValue LHS, SDValue RHS,
11753	unsigned Scale, SelectionDAG &DAG) const {
11754	assert((Opcode == ISD::SDIVFIX \|\| Opcode == ISD::SDIVFIXSAT \|\|
11755	Opcode == ISD::UDIVFIX \|\| Opcode == ISD::UDIVFIXSAT) &&
11756	"Expected a fixed point division opcode");
11757
11758	EVT VT = LHS.getValueType();
11759	bool Signed = Opcode == ISD::SDIVFIX \|\| Opcode == ISD::SDIVFIXSAT;
11760	bool Saturating = Opcode == ISD::SDIVFIXSAT \|\| Opcode == ISD::UDIVFIXSAT;
11761	EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
11762
11763	// If there is enough room in the type to upscale the LHS or downscale the
11764	// RHS before the division, we can perform it in this type without having to
11765	// resize. For signed operations, the LHS headroom is the number of
11766	// redundant sign bits, and for unsigned ones it is the number of zeroes.
11767	// The headroom for the RHS is the number of trailing zeroes.
11768	unsigned LHSLead = Signed ? DAG.ComputeNumSignBits(Op: LHS) - `1`
11769	: DAG.computeKnownBits(Op: LHS).countMinLeadingZeros();
11770	unsigned RHSTrail = DAG.computeKnownBits(Op: RHS).countMinTrailingZeros();
11771
11772	// For signed saturating operations, we need to be able to detect true integer
11773	// division overflow; that is, when you have MIN / -EPS. However, this
11774	// is undefined behavior and if we emit divisions that could take such
11775	// values it may cause undesired behavior (arithmetic exceptions on x86, for
11776	// example).
11777	// Avoid this by requiring an extra bit so that we never get this case.
11778	// FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale
11779	// signed saturating division, we need to emit a whopping 32-bit division.
11780	if (LHSLead + RHSTrail < Scale + (unsigned)(Saturating && Signed))
11781	return SDValue ();
11782
11783	unsigned LHSShift = std::min(a: LHSLead, b: Scale);
11784	unsigned RHSShift = Scale - LHSShift;
11785
11786	// At this point, we know that if we shift the LHS up by LHSShift and the
11787	// RHS down by RHSShift, we can emit a regular division with a final scaling
11788	// factor of Scale.
11789
11790	if (LHSShift)
11791	LHS = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: LHS,
11792	N2: DAG.getShiftAmountConstant(Val: LHSShift, VT, DL: dl));
11793	if (RHSShift)
11794	RHS = DAG.getNode(Opcode: Signed ? ISD::SRA : ISD::SRL, DL: dl, VT, N1: RHS,
11795	N2: DAG.getShiftAmountConstant(Val: RHSShift, VT, DL: dl));
11796
11797	SDValue Quot;
11798	if (Signed) {
11799	// For signed operations, if the resulting quotient is negative and the
11800	// remainder is nonzero, subtract 1 from the quotient to round towards
11801	// negative infinity.
11802	SDValue Rem;
11803	// FIXME: Ideally we would always produce an SDIVREM here, but if the
11804	// type isn't legal, SDIVREM cannot be expanded. There is no reason why
11805	// we couldn't just form a libcall, but the type legalizer doesn't do it.
11806	if (isTypeLegal(VT) &&
11807	isOperationLegalOrCustom(Op: ISD::SDIVREM, VT)) {
11808	Quot = DAG.getNode(Opcode: ISD::SDIVREM, DL: dl,
11809	VTList: DAG.getVTList(VT1: VT, VT2: VT),
11810	N1: LHS, N2: RHS);
11811	Rem = Quot.getValue(R: `1`);
11812	Quot = Quot.getValue(R: `0`);
11813	} else {
11814	Quot = DAG.getNode(Opcode: ISD::SDIV, DL: dl, VT,
11815	N1: LHS, N2: RHS);
11816	Rem = DAG.getNode(Opcode: ISD::SREM, DL: dl, VT,
11817	N1: LHS, N2: RHS);
11818	}
11819	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT);
11820	SDValue RemNonZero = DAG.getSetCC(DL: dl, VT: BoolVT, LHS: Rem, RHS: Zero, Cond: ISD::SETNE);
11821	SDValue LHSNeg = DAG.getSetCC(DL: dl, VT: BoolVT, LHS, RHS: Zero, Cond: ISD::SETLT);
11822	SDValue RHSNeg = DAG.getSetCC(DL: dl, VT: BoolVT, LHS: RHS, RHS: Zero, Cond: ISD::SETLT);
11823	SDValue QuotNeg = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: BoolVT, N1: LHSNeg, N2: RHSNeg);
11824	SDValue Sub1 = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Quot,
11825	N2: DAG.getConstant(Val: `1`, DL: dl, VT));
11826	Quot = DAG.getSelect(DL: dl, VT,
11827	Cond: DAG.getNode(Opcode: ISD::AND, DL: dl, VT: BoolVT, N1: RemNonZero, N2: QuotNeg),
11828	LHS: Sub1, RHS: Quot);
11829	} else
11830	Quot = DAG.getNode(Opcode: ISD::UDIV, DL: dl, VT,
11831	N1: LHS, N2: RHS);
11832
11833	return Quot;
11834	}
11835
11836	void TargetLowering::expandUADDSUBO(
11837	SDNode Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const* {
11838	SDLoc dl(Node);
11839	SDValue LHS = Node->getOperand(Num: `0`);
11840	SDValue RHS = Node->getOperand(Num: `1`);
11841	bool IsAdd = Node->getOpcode() == ISD::UADDO;
11842
11843	// If UADDO_CARRY/SUBO_CARRY is legal, use that instead.
11844	unsigned OpcCarry = IsAdd ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
11845	if (isOperationLegalOrCustom(Op: OpcCarry, VT: Node->getValueType(ResNo: `0`))) {
11846	SDValue CarryIn = DAG.getConstant(Val: `0`, DL: dl, VT: Node->getValueType(ResNo: `1`));
11847	SDValue NodeCarry = DAG.getNode(Opcode: OpcCarry, DL: dl, VTList: Node->getVTList(),
11848	Ops: { LHS, RHS, CarryIn });
11849	Result = SDValue (NodeCarry.getNode(), `0`);
11850	Overflow = SDValue (NodeCarry.getNode(), `1`);
11851	return;
11852	}
11853
11854	Result = DAG.getNode(Opcode: IsAdd ? ISD::ADD : ISD::SUB, DL: dl,
11855	VT: LHS.getValueType(), N1: LHS, N2: RHS);
11856
11857	EVT ResultType = Node->getValueType(ResNo: `1`);
11858	EVT SetCCType = getSetCCResultType(
11859	DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: Node->getValueType(ResNo: `0`));
11860	SDValue SetCC;
11861	if (IsAdd && isOneConstant(V: RHS)) {
11862	// Special case: uaddo X, 1 overflowed if X+1 is 0. This potential reduces
11863	// the live range of X. We assume comparing with 0 is cheap.
11864	// The general case (X + C) < C is not necessarily beneficial. Although we
11865	// reduce the live range of X, we may introduce the materialization of
11866	// constant C.
11867	SetCC =
11868	DAG.getSetCC(DL: dl, VT: SetCCType, LHS: Result,
11869	RHS: DAG.getConstant(Val: `0`, DL: dl, VT: Node->getValueType(ResNo: `0`)), Cond: ISD::SETEQ);
11870	} else if (IsAdd && isAllOnesConstant(V: RHS)) {
11871	// Special case: uaddo X, -1 overflows if X != 0.
11872	SetCC =
11873	DAG.getSetCC(DL: dl, VT: SetCCType, LHS,
11874	RHS: DAG.getConstant(Val: `0`, DL: dl, VT: Node->getValueType(ResNo: `0`)), Cond: ISD::SETNE);
11875	} else {
11876	ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
11877	SetCC = DAG.getSetCC(DL: dl, VT: SetCCType, LHS: Result, RHS: LHS, Cond: CC);
11878	}
11879	Overflow = DAG.getBoolExtOrTrunc(Op: SetCC, SL: dl, VT: ResultType, OpVT: ResultType);
11880	}
11881
11882	void TargetLowering::expandSADDSUBO(
11883	SDNode Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const* {
11884	SDLoc dl(Node);
11885	SDValue LHS = Node->getOperand(Num: `0`);
11886	SDValue RHS = Node->getOperand(Num: `1`);
11887	bool IsAdd = Node->getOpcode() == ISD::SADDO;
11888
11889	Result = DAG.getNode(Opcode: IsAdd ? ISD::ADD : ISD::SUB, DL: dl,
11890	VT: LHS.getValueType(), N1: LHS, N2: RHS);
11891
11892	EVT ResultType = Node->getValueType(ResNo: `1`);
11893	EVT OType = getSetCCResultType(
11894	DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: Node->getValueType(ResNo: `0`));
11895
11896	// If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
11897	unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
11898	if (isOperationLegal(Op: OpcSat, VT: LHS.getValueType())) {
11899	SDValue Sat = DAG.getNode(Opcode: OpcSat, DL: dl, VT: LHS.getValueType(), N1: LHS, N2: RHS);
11900	SDValue SetCC = DAG.getSetCC(DL: dl, VT: OType, LHS: Result, RHS: Sat, Cond: ISD::SETNE);
11901	Overflow = DAG.getBoolExtOrTrunc(Op: SetCC, SL: dl, VT: ResultType, OpVT: ResultType);
11902	return;
11903	}
11904
11905	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT: LHS.getValueType());
11906
11907	// For an addition, the result should be less than one of the operands (LHS)
11908	// if and only if the other operand (RHS) is negative, otherwise there will
11909	// be overflow.
11910	// For a subtraction, the result should be less than one of the operands
11911	// (LHS) if and only if the other operand (RHS) is (non-zero) positive,
11912	// otherwise there will be overflow.
11913	SDValue ResultLowerThanLHS = DAG.getSetCC(DL: dl, VT: OType, LHS: Result, RHS: LHS, Cond: ISD::SETLT);
11914	SDValue ConditionRHS =
11915	DAG.getSetCC(DL: dl, VT: OType, LHS: RHS, RHS: Zero, Cond: IsAdd ? ISD::SETLT : ISD::SETGT);
11916
11917	Overflow = DAG.getBoolExtOrTrunc(
11918	Op: DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: OType, N1: ConditionRHS, N2: ResultLowerThanLHS), SL: dl,
11919	VT: ResultType, OpVT: ResultType);
11920	}
11921
11922	bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,
11923	SDValue &Overflow, SelectionDAG &DAG) const {
11924	SDLoc dl(Node);
11925	EVT VT = Node->getValueType(ResNo: `0`);
11926	EVT SetCCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
11927	SDValue LHS = Node->getOperand(Num: `0`);
11928	SDValue RHS = Node->getOperand(Num: `1`);
11929	bool isSigned = Node->getOpcode() == ISD::SMULO;
11930
11931	// For power-of-two multiplications we can use a simpler shift expansion.
11932	if (ConstantSDNode *RHSC = isConstOrConstSplat(N: RHS)) {
11933	const APInt &C = RHSC->getAPIntValue();
11934	// mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
11935	if (C.isPowerOf2()) {
11936	// smulo(x, signed_min) is same as umulo(x, signed_min).
11937	bool UseArithShift = isSigned && !C.isMinSignedValue();
11938	SDValue ShiftAmt = DAG.getShiftAmountConstant(Val: C.logBase2(), VT, DL: dl);
11939	Result = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: LHS, N2: ShiftAmt);
11940	Overflow = DAG.getSetCC(DL: dl, VT: SetCCVT,
11941	LHS: DAG.getNode(Opcode: UseArithShift ? ISD::SRA : ISD::SRL,
11942	DL: dl, VT, N1: Result, N2: ShiftAmt),
11943	RHS: LHS, Cond: ISD::SETNE);
11944	return true;
11945	}
11946	}
11947
11948	EVT WideVT = EVT::getIntegerVT(Context&: DAG.getContext(), BitWidth: VT.getScalarSizeInBits() `2`);
11949	if (VT.isVector())
11950	WideVT =
11951	EVT::getVectorVT(Context&: *DAG.getContext(), VT: WideVT, EC: VT.getVectorElementCount());
11952
11953	SDValue BottomHalf;
11954	SDValue TopHalf;
11955	static const unsigned Ops[`2`][`3`] =
11956	{ { ISD::MULHU, ISD::UMUL_LOHI, ISD::ZERO_EXTEND },
11957	{ ISD::MULHS, ISD::SMUL_LOHI, ISD::SIGN_EXTEND }};
11958	if (isOperationLegalOrCustom(Op: Ops[isSigned][`0`], VT)) {
11959	BottomHalf = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LHS, N2: RHS);
11960	TopHalf = DAG.getNode(Opcode: Ops[isSigned][`0`], DL: dl, VT, N1: LHS, N2: RHS);
11961	} else if (isOperationLegalOrCustom(Op: Ops[isSigned][`1`], VT)) {
11962	BottomHalf = DAG.getNode(Opcode: Ops[isSigned][`1`], DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: VT), N1: LHS,
11963	N2: RHS);
11964	TopHalf = BottomHalf.getValue(R: `1`);
11965	} else if (isTypeLegal(VT: WideVT)) {
11966	LHS = DAG.getNode(Opcode: Ops[isSigned][`2`], DL: dl, VT: WideVT, Operand: LHS);
11967	RHS = DAG.getNode(Opcode: Ops[isSigned][`2`], DL: dl, VT: WideVT, Operand: RHS);
11968	SDValue Mul = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: WideVT, N1: LHS, N2: RHS);
11969	BottomHalf = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Mul);
11970	SDValue ShiftAmt =
11971	DAG.getShiftAmountConstant(Val: VT.getScalarSizeInBits(), VT: WideVT, DL: dl);
11972	TopHalf = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT,
11973	Operand: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: WideVT, N1: Mul, N2: ShiftAmt));
11974	} else {
11975	if (VT.isVector())
11976	return false;
11977
11978	forceExpandWideMUL(DAG, dl, Signed: isSigned, LHS, RHS, Lo&: BottomHalf, Hi&: TopHalf);
11979	}
11980
11981	Result = BottomHalf;
11982	if (isSigned) {
11983	SDValue ShiftAmt = DAG.getShiftAmountConstant(
11984	Val: VT.getScalarSizeInBits() - `1`, VT: BottomHalf.getValueType(), DL: dl);
11985	SDValue Sign = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: BottomHalf, N2: ShiftAmt);
11986	Overflow = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: TopHalf, RHS: Sign, Cond: ISD::SETNE);
11987	} else {
11988	Overflow = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: TopHalf,
11989	RHS: DAG.getConstant(Val: `0`, DL: dl, VT), Cond: ISD::SETNE);
11990	}
11991
11992	// Truncate the result if SetCC returns a larger type than needed.
11993	EVT RType = Node->getValueType(ResNo: `1`);
11994	if (RType.bitsLT(VT: Overflow.getValueType()))
11995	Overflow = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: RType, Operand: Overflow);
11996
11997	assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
11998	"Unexpected result type for S/UMULO legalization");
11999	return true;
12000	}
12001
12002	SDValue TargetLowering::expandVecReduce(SDNode Node, SelectionDAG &DAG) const* {
12003	SDLoc dl(Node);
12004	unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(VecReduceOpcode: Node->getOpcode());
12005	SDValue Op = Node->getOperand(Num: `0`);
12006	EVT VT = Op.getValueType();
12007
12008	// Try to use a shuffle reduction for power of two vectors.
12009	if (VT.isPow2VectorType()) {
12010	while (VT.getVectorElementCount().isKnownMultipleOf(RHS: `2`)) {
12011	EVT HalfVT = VT.getHalfNumVectorElementsVT(Context&: *DAG.getContext());
12012	if (!isOperationLegalOrCustom(Op: BaseOpcode, VT: HalfVT))
12013	break;
12014
12015	SDValue Lo, Hi;
12016	std::tie(args&: Lo, args&: Hi) = DAG.SplitVector(N: Op, DL: dl);
12017	Op = DAG.getNode(Opcode: BaseOpcode, DL: dl, VT: HalfVT, N1: Lo, N2: Hi, Flags: Node->getFlags());
12018	VT = HalfVT;
12019
12020	// Stop if splitting is enough to make the reduction legal.
12021	if (isOperationLegalOrCustom(Op: Node->getOpcode(), VT: HalfVT))
12022	return DAG.getNode(Opcode: Node->getOpcode(), DL: dl, VT: Node->getValueType(ResNo: `0`), Operand: Op,
12023	Flags: Node->getFlags());
12024	}
12025	}
12026
12027	if (VT.isScalableVector())
12028	reportFatalInternalError(
12029	reason: "Expanding reductions for scalable vectors is undefined.");
12030
12031	EVT EltVT = VT.getVectorElementType();
12032	unsigned NumElts = VT.getVectorNumElements();
12033
12034	SmallVector<SDValue, `8`> Ops;
12035	DAG.ExtractVectorElements(Op, Args&: Ops, Start: `0`, Count: NumElts);
12036
12037	SDValue Res = Ops [`0`];
12038	for (unsigned i = `1`; i < NumElts; i++)
12039	Res = DAG.getNode(Opcode: BaseOpcode, DL: dl, VT: EltVT, N1: Res, N2: Ops [i], Flags: Node->getFlags());
12040
12041	// Result type may be wider than element type.
12042	if (EltVT != Node->getValueType(ResNo: `0`))
12043	Res = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT: Node->getValueType(ResNo: `0`), Operand: Res);
12044	return Res;
12045	}
12046
12047	SDValue TargetLowering::expandVecReduceSeq(SDNode Node, SelectionDAG &DAG) const* {
12048	SDLoc dl(Node);
12049	SDValue AccOp = Node->getOperand(Num: `0`);
12050	SDValue VecOp = Node->getOperand(Num: `1`);
12051	SDNodeFlags Flags = Node->getFlags();
12052
12053	EVT VT = VecOp.getValueType();
12054	EVT EltVT = VT.getVectorElementType();
12055
12056	if (VT.isScalableVector())
12057	report_fatal_error(
12058	reason: "Expanding reductions for scalable vectors is undefined.");
12059
12060	unsigned NumElts = VT.getVectorNumElements();
12061
12062	SmallVector<SDValue, `8`> Ops;
12063	DAG.ExtractVectorElements(Op: VecOp, Args&: Ops, Start: `0`, Count: NumElts);
12064
12065	unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(VecReduceOpcode: Node->getOpcode());
12066
12067	SDValue Res = AccOp;
12068	for (unsigned i = `0`; i < NumElts; i++)
12069	Res = DAG.getNode(Opcode: BaseOpcode, DL: dl, VT: EltVT, N1: Res, N2: Ops [i], Flags);
12070
12071	return Res;
12072	}
12073
12074	bool TargetLowering::expandREM(SDNode *Node, SDValue &Result,
12075	SelectionDAG &DAG) const {
12076	EVT VT = Node->getValueType(ResNo: `0`);
12077	SDLoc dl(Node);
12078	bool isSigned = Node->getOpcode() == ISD::SREM;
12079	unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
12080	unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
12081	SDValue Dividend = Node->getOperand(Num: `0`);
12082	SDValue Divisor = Node->getOperand(Num: `1`);
12083	if (isOperationLegalOrCustom(Op: DivRemOpc, VT)) {
12084	SDVTList VTs = DAG.getVTList(VT1: VT, VT2: VT);
12085	Result = DAG.getNode(Opcode: DivRemOpc, DL: dl, VTList: VTs, N1: Dividend, N2: Divisor).getValue(R: `1`);
12086	return true;
12087	}
12088	if (isOperationLegalOrCustom(Op: DivOpc, VT)) {
12089	// X % Y -> X-X/YY*
12090	SDValue Divide = DAG.getNode(Opcode: DivOpc, DL: dl, VT, N1: Dividend, N2: Divisor);
12091	SDValue Mul = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: Divide, N2: Divisor);
12092	Result = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Dividend, N2: Mul);
12093	return true;
12094	}
12095	return false;
12096	}
12097
12098	SDValue TargetLowering::expandFP_TO_INT_SAT(SDNode *Node,
12099	SelectionDAG &DAG) const {
12100	bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT;
12101	SDLoc dl(SDValue (Node, `0`));
12102	SDValue Src = Node->getOperand(Num: `0`);
12103
12104	// DstVT is the result type, while SatVT is the size to which we saturate
12105	EVT SrcVT = Src.getValueType();
12106	EVT DstVT = Node->getValueType(ResNo: `0`);
12107
12108	EVT SatVT = cast<VTSDNode>(Val: Node->getOperand(Num: `1`))->getVT();
12109	unsigned SatWidth = SatVT.getScalarSizeInBits();
12110	unsigned DstWidth = DstVT.getScalarSizeInBits();
12111	assert(SatWidth <= DstWidth &&
12112	"Expected saturation width smaller than result width");
12113
12114	// Determine minimum and maximum integer values and their corresponding
12115	// floating-point values.
12116	APInt MinInt, MaxInt;
12117	if (IsSigned) {
12118	MinInt = APInt::getSignedMinValue(numBits: SatWidth).sext(width: DstWidth);
12119	MaxInt = APInt::getSignedMaxValue(numBits: SatWidth).sext(width: DstWidth);
12120	} else {
12121	MinInt = APInt::getMinValue(numBits: SatWidth).zext(width: DstWidth);
12122	MaxInt = APInt::getMaxValue(numBits: SatWidth).zext(width: DstWidth);
12123	}
12124
12125	// We cannot risk emitting FP_TO_XINT nodes with a source VT of [b]f16, as
12126	// libcall emission cannot handle this. Large result types will fail.
12127	if (SrcVT == MVT::f16 \|\| SrcVT == MVT::bf16) {
12128	Src = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f32, Operand: Src);
12129	SrcVT = Src.getValueType();
12130	}
12131
12132	const fltSemantics &Sem = SrcVT.getFltSemantics();
12133	APFloat MinFloat(Sem);
12134	APFloat MaxFloat(Sem);
12135
12136	APFloat::opStatus MinStatus =
12137	MinFloat.convertFromAPInt(Input: MinInt, IsSigned, RM: APFloat::rmTowardZero);
12138	APFloat::opStatus MaxStatus =
12139	MaxFloat.convertFromAPInt(Input: MaxInt, IsSigned, RM: APFloat::rmTowardZero);
12140	bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
12141	!(MaxStatus & APFloat::opStatus::opInexact);
12142
12143	SDValue MinFloatNode = DAG.getConstantFP(Val: MinFloat, DL: dl, VT: SrcVT);
12144	SDValue MaxFloatNode = DAG.getConstantFP(Val: MaxFloat, DL: dl, VT: SrcVT);
12145
12146	// If the integer bounds are exactly representable as floats and min/max are
12147	// legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
12148	// of comparisons and selects.
12149	auto EmitMinMax = [&](unsigned MinOpcode, unsigned MaxOpcode,
12150	bool MayPropagateNaN) {
12151	bool MinMaxLegal = isOperationLegalOrCustom(Op: MinOpcode, VT: SrcVT) &&
12152	isOperationLegalOrCustom(Op: MaxOpcode, VT: SrcVT);
12153	if (!MinMaxLegal)
12154	return SDValue ();
12155
12156	SDValue Clamped = Src;
12157
12158	// Clamp Src by MinFloat from below. If !MayPropagateNaN and Src is NaN
12159	// then the result is MinFloat.
12160	Clamped = DAG.getNode(Opcode: MaxOpcode, DL: dl, VT: SrcVT, N1: Clamped, N2: MinFloatNode);
12161	// Clamp by MaxFloat from above. If !MayPropagateNaN then NaN cannot occur.
12162	Clamped = DAG.getNode(Opcode: MinOpcode, DL: dl, VT: SrcVT, N1: Clamped, N2: MaxFloatNode);
12163	// Convert clamped value to integer.
12164	SDValue FpToInt = DAG.getNode(Opcode: IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT,
12165	DL: dl, VT: DstVT, Operand: Clamped);
12166
12167	// If !MayPropagateNan and the conversion is unsigned case we're done,
12168	// because we mapped NaN to MinFloat, which will cast to zero.
12169	if (!MayPropagateNaN && !IsSigned)
12170	return FpToInt;
12171
12172	// Otherwise, select 0 if Src is NaN.
12173	SDValue ZeroInt = DAG.getConstant(Val: `0`, DL: dl, VT: DstVT);
12174	EVT SetCCVT =
12175	getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: SrcVT);
12176	SDValue IsNan = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: Src, Cond: ISD::CondCode::SETUO);
12177	return DAG.getSelect(DL: dl, VT: DstVT, Cond: IsNan, LHS: ZeroInt, RHS: FpToInt);
12178	};
12179	if (AreExactFloatBounds) {
12180	if (SDValue Res = EmitMinMax (ISD::FMINIMUMNUM, ISD::FMAXIMUMNUM,
12181	/MayPropagateNaN=/false))
12182	return Res;
12183	// These may propagate NaN for sNaN operands.
12184	if (SDValue Res =
12185	EmitMinMax (ISD::FMINNUM, ISD::FMAXNUM, /MayPropagateNaN=/true))
12186	return Res;
12187	// These always propagate NaN.
12188	if (SDValue Res =
12189	EmitMinMax (ISD::FMINIMUM, ISD::FMAXIMUM, /MayPropagateNaN=/true))
12190	return Res;
12191	}
12192
12193	SDValue MinIntNode = DAG.getConstant(Val: MinInt, DL: dl, VT: DstVT);
12194	SDValue MaxIntNode = DAG.getConstant(Val: MaxInt, DL: dl, VT: DstVT);
12195
12196	// Result of direct conversion. The assumption here is that the operation is
12197	// non-trapping and it's fine to apply it to an out-of-range value if we
12198	// select it away later.
12199	SDValue FpToInt =
12200	DAG.getNode(Opcode: IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, DL: dl, VT: DstVT, Operand: Src);
12201
12202	SDValue Select = FpToInt;
12203
12204	EVT SetCCVT =
12205	getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: SrcVT);
12206
12207	// If Src ULT MinFloat, select MinInt. In particular, this also selects
12208	// MinInt if Src is NaN.
12209	SDValue ULT = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: MinFloatNode, Cond: ISD::SETULT);
12210	Select = DAG.getSelect(DL: dl, VT: DstVT, Cond: ULT, LHS: MinIntNode, RHS: Select);
12211	// If Src OGT MaxFloat, select MaxInt.
12212	SDValue OGT = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: MaxFloatNode, Cond: ISD::SETOGT);
12213	Select = DAG.getSelect(DL: dl, VT: DstVT, Cond: OGT, LHS: MaxIntNode, RHS: Select);
12214
12215	// In the unsigned case we are done, because we mapped NaN to MinInt, which
12216	// is already zero.
12217	if (!IsSigned)
12218	return Select;
12219
12220	// Otherwise, select 0 if Src is NaN.
12221	SDValue ZeroInt = DAG.getConstant(Val: `0`, DL: dl, VT: DstVT);
12222	SDValue IsNan = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: Src, Cond: ISD::CondCode::SETUO);
12223	return DAG.getSelect(DL: dl, VT: DstVT, Cond: IsNan, LHS: ZeroInt, RHS: Select);
12224	}
12225
12226	SDValue TargetLowering::expandRoundInexactToOdd(EVT ResultVT, SDValue Op,
12227	const SDLoc &dl,
12228	SelectionDAG &DAG) const {
12229	EVT OperandVT = Op.getValueType();
12230	if (OperandVT.getScalarType() == ResultVT.getScalarType())
12231	return Op;
12232	EVT ResultIntVT = ResultVT.changeTypeToInteger();
12233	// We are rounding binary64/binary128 -> binary32 -> bfloat16. This
12234	// can induce double-rounding which may alter the results. We can
12235	// correct for this using a trick explained in: Boldo, Sylvie, and
12236	// Guillaume Melquiond. "When double rounding is odd." 17th IMACS
12237	// World Congress. 2005.
12238	SDValue Narrow = DAG.getFPExtendOrRound(Op, DL: dl, VT: ResultVT);
12239	SDValue NarrowAsWide = DAG.getFPExtendOrRound(Op: Narrow, DL: dl, VT: OperandVT);
12240
12241	// We can keep the narrow value as-is if narrowing was exact (no
12242	// rounding error), the wide value was NaN (the narrow value is also
12243	// NaN and should be preserved) or if we rounded to the odd value.
12244	SDValue NarrowBits = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: ResultIntVT, Operand: Narrow);
12245	SDValue One = DAG.getConstant(Val: `1`, DL: dl, VT: ResultIntVT);
12246	SDValue NegativeOne = DAG.getAllOnesConstant(DL: dl, VT: ResultIntVT);
12247	SDValue And = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ResultIntVT, N1: NarrowBits, N2: One);
12248	EVT ResultIntVTCCVT = getSetCCResultType(
12249	DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: And.getValueType());
12250	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT: ResultIntVT);
12251	// The result is already odd so we don't need to do anything.
12252	SDValue AlreadyOdd = DAG.getSetCC(DL: dl, VT: ResultIntVTCCVT, LHS: And, RHS: Zero, Cond: ISD::SETNE);
12253
12254	EVT WideSetCCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(),
12255	VT: Op.getValueType());
12256	// We keep results which are exact, odd or NaN.
12257	SDValue KeepNarrow =
12258	DAG.getSetCC(DL: dl, VT: WideSetCCVT, LHS: Op, RHS: NarrowAsWide, Cond: ISD::SETUEQ);
12259	KeepNarrow = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: WideSetCCVT, N1: KeepNarrow, N2: AlreadyOdd);
12260	// We morally performed a round-down if AbsNarrow is smaller than
12261	// AbsWide.
12262	SDValue AbsWide = DAG.getNode(Opcode: ISD::FABS, DL: dl, VT: OperandVT, Operand: Op);
12263	SDValue AbsNarrowAsWide = DAG.getNode(Opcode: ISD::FABS, DL: dl, VT: OperandVT, Operand: NarrowAsWide);
12264	SDValue NarrowIsRd =
12265	DAG.getSetCC(DL: dl, VT: WideSetCCVT, LHS: AbsWide, RHS: AbsNarrowAsWide, Cond: ISD::SETOGT);
12266	// If the narrow value is odd or exact, pick it.
12267	// Otherwise, narrow is even and corresponds to either the rounded-up
12268	// or rounded-down value. If narrow is the rounded-down value, we want
12269	// the rounded-up value as it will be odd.
12270	SDValue Adjust = DAG.getSelect(DL: dl, VT: ResultIntVT, Cond: NarrowIsRd, LHS: One, RHS: NegativeOne);
12271	SDValue Adjusted = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: ResultIntVT, N1: NarrowBits, N2: Adjust);
12272	Op = DAG.getSelect(DL: dl, VT: ResultIntVT, Cond: KeepNarrow, LHS: NarrowBits, RHS: Adjusted);
12273	return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: ResultVT, Operand: Op);
12274	}
12275
12276	SDValue TargetLowering::expandFP_ROUND(SDNode Node, SelectionDAG &DAG) const* {
12277	assert(Node->getOpcode() == ISD::FP_ROUND && "Unexpected opcode!");
12278	SDValue Op = Node->getOperand(Num: `0`);
12279	EVT VT = Node->getValueType(ResNo: `0`);
12280	SDLoc dl(Node);
12281	if (VT.getScalarType() == MVT::bf16) {
12282	if (Node->getConstantOperandVal(Num: `1`) == `1`) {
12283	return DAG.getNode(Opcode: ISD::FP_TO_BF16, DL: dl, VT, Operand: Node->getOperand(Num: `0`));
12284	}
12285	EVT OperandVT = Op.getValueType();
12286	SDValue IsNaN = DAG.getSetCC(
12287	DL: dl,
12288	VT: getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: OperandVT),
12289	LHS: Op, RHS: Op, Cond: ISD::SETUO);
12290
12291	// We are rounding binary64/binary128 -> binary32 -> bfloat16. This
12292	// can induce double-rounding which may alter the results. We can
12293	// correct for this using a trick explained in: Boldo, Sylvie, and
12294	// Guillaume Melquiond. "When double rounding is odd." 17th IMACS
12295	// World Congress. 2005.
12296	EVT F32 = VT.changeElementType(Context&: *DAG.getContext(), EltVT: MVT::f32);
12297	EVT I32 = F32.changeTypeToInteger();
12298	Op = expandRoundInexactToOdd(ResultVT: F32, Op, dl, DAG);
12299	Op = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: I32, Operand: Op);
12300
12301	// Conversions should set NaN's quiet bit. This also prevents NaNs from
12302	// turning into infinities.
12303	SDValue NaN =
12304	DAG.getNode(Opcode: ISD::OR, DL: dl, VT: I32, N1: Op, N2: DAG.getConstant(Val: `0x400000`, DL: dl, VT: I32));
12305
12306	// Factor in the contribution of the low 16 bits.
12307	SDValue One = DAG.getConstant(Val: `1`, DL: dl, VT: I32);
12308	SDValue Lsb = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: I32, N1: Op,
12309	N2: DAG.getShiftAmountConstant(Val: `16`, VT: I32, DL: dl));
12310	Lsb = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: I32, N1: Lsb, N2: One);
12311	SDValue RoundingBias =
12312	DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: I32, N1: DAG.getConstant(Val: `0x7fff`, DL: dl, VT: I32), N2: Lsb);
12313	SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: I32, N1: Op, N2: RoundingBias);
12314
12315	// Don't round if we had a NaN, we don't want to turn 0x7fffffff into
12316	// 0x80000000.
12317	Op = DAG.getSelect(DL: dl, VT: I32, Cond: IsNaN, LHS: NaN, RHS: Add);
12318
12319	// Now that we have rounded, shift the bits into position.
12320	Op = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: I32, N1: Op,
12321	N2: DAG.getShiftAmountConstant(Val: `16`, VT: I32, DL: dl));
12322	Op = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: I32, Operand: Op);
12323	EVT I16 = I32.changeElementType(Context&: *DAG.getContext(), EltVT: MVT::i16);
12324	Op = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: I16, Operand: Op);
12325	return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT, Operand: Op);
12326	}
12327	return SDValue ();
12328	}
12329
12330	SDValue TargetLowering::expandVectorSplice(SDNode *Node,
12331	SelectionDAG &DAG) const {
12332	assert((Node->getOpcode() == ISD::VECTOR_SPLICE_LEFT \|\|
12333	Node->getOpcode() == ISD::VECTOR_SPLICE_RIGHT) &&
12334	"Unexpected opcode!");
12335	assert((Node->getValueType(`0`).isScalableVector() \|\|
12336	!isa<ConstantSDNode>(Node->getOperand(`2`))) &&
12337	"Fixed length vector types with constant offsets expected to use "
12338	"SHUFFLE_VECTOR!");
12339
12340	EVT VT = Node->getValueType(ResNo: `0`);
12341	SDValue V1 = Node->getOperand(Num: `0`);
12342	SDValue V2 = Node->getOperand(Num: `1`);
12343	SDValue Offset = Node->getOperand(Num: `2`);
12344	SDLoc DL(Node);
12345
12346	// Expand through memory thusly:
12347	// Alloca CONCAT_VECTORS_TYPES(V1, V2) Ptr
12348	// Store V1, Ptr
12349	// Store V2, Ptr + sizeof(V1)
12350	// if (VECTOR_SPLICE_LEFT)
12351	// Ptr = Ptr + (Offset sizeof(VT.Elt))*
12352	// else
12353	// Ptr = Ptr + sizeof(V1) - (Offset size(VT.Elt))*
12354	// Res = Load Ptr
12355
12356	Align Alignment = DAG.getReducedAlign(VT, /UseABI=/false);
12357
12358	EVT MemVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: VT.getVectorElementType(),
12359	EC: VT.getVectorElementCount() * `2`);
12360	SDValue StackPtr = DAG.CreateStackTemporary(Bytes: MemVT.getStoreSize(), Alignment);
12361	EVT PtrVT = StackPtr.getValueType();
12362	auto &MF = DAG.getMachineFunction();
12363	auto FrameIndex = cast<FrameIndexSDNode>(Val: StackPtr.getNode())->getIndex();
12364	auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FI: FrameIndex);
12365
12366	// Store the lo part of CONCAT_VECTORS(V1, V2)
12367	SDValue StoreV1 = DAG.getStore(Chain: DAG.getEntryNode(), dl: DL, Val: V1, Ptr: StackPtr, PtrInfo);
12368	// Store the hi part of CONCAT_VECTORS(V1, V2)
12369	SDValue VTBytes = DAG.getTypeSize(DL, VT: PtrVT, TS: VT.getStoreSize());
12370	SDValue StackPtr2 = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: StackPtr, N2: VTBytes);
12371	SDValue StoreV2 = DAG.getStore(Chain: StoreV1, dl: DL, Val: V2, Ptr: StackPtr2, PtrInfo);
12372
12373	// NOTE: TrailingBytes must be clamped so as not to read outside of V1:V2.
12374	SDValue EltByteSize =
12375	DAG.getTypeSize(DL, VT: PtrVT, TS: VT.getVectorElementType().getStoreSize());
12376	Offset = DAG.getZExtOrTrunc(Op: Offset, DL, VT: PtrVT);
12377	SDValue TrailingBytes = DAG.getNode(Opcode: ISD::MUL, DL, VT: PtrVT, N1: Offset, N2: EltByteSize);
12378
12379	TrailingBytes = DAG.getNode(Opcode: ISD::UMIN, DL, VT: PtrVT, N1: TrailingBytes, N2: VTBytes);
12380
12381	if (Node->getOpcode() == ISD::VECTOR_SPLICE_LEFT)
12382	StackPtr = DAG.getMemBasePlusOffset(Base: StackPtr, Offset: TrailingBytes, DL);
12383	else
12384	StackPtr = DAG.getNode(Opcode: ISD::SUB, DL, VT: PtrVT, N1: StackPtr2, N2: TrailingBytes);
12385
12386	// Load the spliced result
12387	return DAG.getLoad(VT, dl: DL, Chain: StoreV2, Ptr: StackPtr,
12388	PtrInfo: MachinePointerInfo::getUnknownStack(MF));
12389	}
12390
12391	SDValue TargetLowering::expandVECTOR_COMPRESS(SDNode *Node,
12392	SelectionDAG &DAG) const {
12393	SDLoc DL(Node);
12394	SDValue Vec = Node->getOperand(Num: `0`);
12395	SDValue Mask = Node->getOperand(Num: `1`);
12396	SDValue Passthru = Node->getOperand(Num: `2`);
12397
12398	EVT VecVT = Vec.getValueType();
12399	EVT ScalarVT = VecVT.getScalarType();
12400	EVT MaskVT = Mask.getValueType();
12401	EVT MaskScalarVT = MaskVT.getScalarType();
12402
12403	// Needs to be handled by targets that have scalable vector types.
12404	if (VecVT.isScalableVector())
12405	report_fatal_error(reason: "Cannot expand masked_compress for scalable vectors.");
12406
12407	SDValue StackPtr = DAG.CreateStackTemporary(
12408	Bytes: VecVT.getStoreSize(), Alignment: DAG.getReducedAlign(VT: VecVT, /UseABI=/false));
12409	int FI = cast<FrameIndexSDNode>(Val: StackPtr.getNode())->getIndex();
12410	MachinePointerInfo PtrInfo =
12411	MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI);
12412
12413	MVT PositionVT = getVectorIdxTy(DL: DAG.getDataLayout());
12414	SDValue Chain = DAG.getEntryNode();
12415	SDValue OutPos = DAG.getConstant(Val: `0`, DL, VT: PositionVT);
12416
12417	bool HasPassthru = !Passthru.isUndef();
12418
12419	// If we have a passthru vector, store it on the stack, overwrite the matching
12420	// positions and then re-write the last element that was potentially
12421	// overwritten even though mask[i] = false.
12422	if (HasPassthru)
12423	Chain = DAG.getStore(Chain, dl: DL, Val: Passthru, Ptr: StackPtr, PtrInfo);
12424
12425	SDValue LastWriteVal;
12426	APInt PassthruSplatVal;
12427	bool IsSplatPassthru =
12428	ISD::isConstantSplatVector(N: Passthru.getNode(), SplatValue&: PassthruSplatVal);
12429
12430	if (IsSplatPassthru) {
12431	// As we do not know which position we wrote to last, we cannot simply
12432	// access that index from the passthru vector. So we first check if passthru
12433	// is a splat vector, to use any element ...
12434	LastWriteVal = DAG.getConstant(Val: PassthruSplatVal, DL, VT: ScalarVT);
12435	} else if (HasPassthru) {
12436	// ... if it is not a splat vector, we need to get the passthru value at
12437	// position = popcount(mask) and re-load it from the stack before it is
12438	// overwritten in the loop below.
12439	EVT PopcountVT = ScalarVT.changeTypeToInteger();
12440	SDValue Popcount = DAG.getNode(
12441	Opcode: ISD::TRUNCATE, DL,
12442	VT: MaskVT.changeVectorElementType(Context&: *DAG.getContext(), EltVT: MVT::i1), Operand: Mask);
12443	Popcount = DAG.getNode(
12444	Opcode: ISD::ZERO_EXTEND, DL,
12445	VT: MaskVT.changeVectorElementType(Context&: *DAG.getContext(), EltVT: PopcountVT),
12446	Operand: Popcount);
12447	Popcount = DAG.getNode(Opcode: ISD::VECREDUCE_ADD, DL, VT: PopcountVT, Operand: Popcount);
12448	SDValue LastElmtPtr =
12449	getVectorElementPointer(DAG, VecPtr: StackPtr, VecVT, Index: Popcount);
12450	LastWriteVal = DAG.getLoad(
12451	VT: ScalarVT, dl: DL, Chain, Ptr: LastElmtPtr,
12452	PtrInfo: MachinePointerInfo::getUnknownStack(MF&: DAG.getMachineFunction()));
12453	Chain = LastWriteVal.getValue(R: `1`);
12454	}
12455
12456	unsigned NumElms = VecVT.getVectorNumElements();
12457	for (unsigned I = `0`; I < NumElms; I++) {
12458	SDValue ValI = DAG.getExtractVectorElt(DL, VT: ScalarVT, Vec, Idx: I);
12459	SDValue OutPtr = getVectorElementPointer(DAG, VecPtr: StackPtr, VecVT, Index: OutPos);
12460	Chain = DAG.getStore(
12461	Chain, dl: DL, Val: ValI, Ptr: OutPtr,
12462	PtrInfo: MachinePointerInfo::getUnknownStack(MF&: DAG.getMachineFunction()));
12463
12464	// Get the mask value and add it to the current output position. This
12465	// either increments by 1 if MaskI is true or adds 0 otherwise.
12466	// Freeze in case we have poison/undef mask entries.
12467	SDValue MaskI = DAG.getExtractVectorElt(DL, VT: MaskScalarVT, Vec: Mask, Idx: I);
12468	MaskI = DAG.getFreeze(V: MaskI);
12469	MaskI = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i1, Operand: MaskI);
12470	MaskI = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: PositionVT, Operand: MaskI);
12471	OutPos = DAG.getNode(Opcode: ISD::ADD, DL, VT: PositionVT, N1: OutPos, N2: MaskI);
12472
12473	if (HasPassthru && I == NumElms - `1`) {
12474	SDValue EndOfVector =
12475	DAG.getConstant(Val: VecVT.getVectorNumElements() - `1`, DL, VT: PositionVT);
12476	SDValue AllLanesSelected =
12477	DAG.getSetCC(DL, VT: MVT::i1, LHS: OutPos, RHS: EndOfVector, Cond: ISD::CondCode::SETUGT);
12478	OutPos = DAG.getNode(Opcode: ISD::UMIN, DL, VT: PositionVT, N1: OutPos, N2: EndOfVector);
12479	OutPtr = getVectorElementPointer(DAG, VecPtr: StackPtr, VecVT, Index: OutPos);
12480
12481	// Re-write the last ValI if all lanes were selected. Otherwise,
12482	// overwrite the last write it with the passthru value.
12483	LastWriteVal = DAG.getSelect(DL, VT: ScalarVT, Cond: AllLanesSelected, LHS: ValI,
12484	RHS: LastWriteVal, Flags: SDNodeFlags::Unpredictable);
12485	Chain = DAG.getStore(
12486	Chain, dl: DL, Val: LastWriteVal, Ptr: OutPtr,
12487	PtrInfo: MachinePointerInfo::getUnknownStack(MF&: DAG.getMachineFunction()));
12488	}
12489	}
12490
12491	return DAG.getLoad(VT: VecVT, dl: DL, Chain, Ptr: StackPtr, PtrInfo);
12492	}
12493
12494	SDValue TargetLowering::expandPartialReduceMLA(SDNode *N,
12495	SelectionDAG &DAG) const {
12496	SDLoc DL(N);
12497	SDValue Acc = N->getOperand(Num: `0`);
12498	SDValue MulLHS = N->getOperand(Num: `1`);
12499	SDValue MulRHS = N->getOperand(Num: `2`);
12500	EVT AccVT = Acc.getValueType();
12501	EVT MulOpVT = MulLHS.getValueType();
12502
12503	EVT ExtMulOpVT =
12504	EVT::getVectorVT(Context&: *DAG.getContext(), VT: AccVT.getVectorElementType(),
12505	EC: MulOpVT.getVectorElementCount());
12506
12507	unsigned ExtOpcLHS, ExtOpcRHS;
12508	switch (N->getOpcode()) {
12509	default:
12510	llvm_unreachable("Unexpected opcode");
12511	case ISD::PARTIAL_REDUCE_UMLA:
12512	ExtOpcLHS = ExtOpcRHS = ISD::ZERO_EXTEND;
12513	break;
12514	case ISD::PARTIAL_REDUCE_SMLA:
12515	ExtOpcLHS = ExtOpcRHS = ISD::SIGN_EXTEND;
12516	break;
12517	case ISD::PARTIAL_REDUCE_FMLA:
12518	ExtOpcLHS = ExtOpcRHS = ISD::FP_EXTEND;
12519	break;
12520	}
12521
12522	if (ExtMulOpVT != MulOpVT) {
12523	MulLHS = DAG.getNode(Opcode: ExtOpcLHS, DL, VT: ExtMulOpVT, Operand: MulLHS);
12524	MulRHS = DAG.getNode(Opcode: ExtOpcRHS, DL, VT: ExtMulOpVT, Operand: MulRHS);
12525	}
12526	SDValue Input = MulLHS;
12527	if (N->getOpcode() == ISD::PARTIAL_REDUCE_FMLA) {
12528	if (!llvm::isOneOrOneSplatFP(V: MulRHS))
12529	Input = DAG.getNode(Opcode: ISD::FMUL, DL, VT: ExtMulOpVT, N1: MulLHS, N2: MulRHS);
12530	} else if (!llvm::isOneOrOneSplat(V: MulRHS)) {
12531	Input = DAG.getNode(Opcode: ISD::MUL, DL, VT: ExtMulOpVT, N1: MulLHS, N2: MulRHS);
12532	}
12533
12534	unsigned Stride = AccVT.getVectorMinNumElements();
12535	unsigned ScaleFactor = MulOpVT.getVectorMinNumElements() / Stride;
12536
12537	// Collect all of the subvectors
12538	std::deque<SDValue> Subvectors = {Acc};
12539	for (unsigned I = `0`; I < ScaleFactor; I++)
12540	Subvectors.push_back(x: DAG.getExtractSubvector(DL, VT: AccVT, Vec: Input, Idx: I * Stride));
12541
12542	unsigned FlatNode =
12543	N->getOpcode() == ISD::PARTIAL_REDUCE_FMLA ? ISD::FADD : ISD::ADD;
12544
12545	// Flatten the subvector tree
12546	while (Subvectors.size() > `1`) {
12547	Subvectors.push_back(
12548	x: DAG.getNode(Opcode: FlatNode, DL, VT: AccVT, Ops: {Subvectors [`0`], Subvectors [`1`]}));
12549	Subvectors.pop_front();
12550	Subvectors.pop_front();
12551	}
12552
12553	assert(Subvectors.size() == `1` &&
12554	"There should only be one subvector after tree flattening");
12555
12556	return Subvectors [`0`];
12557	}
12558
12559	/// Given a store node \p StoreNode, return true if it is safe to fold that node
12560	/// into \p FPNode, which expands to a library call with output pointers.
12561	static bool canFoldStoreIntoLibCallOutputPointers(StoreSDNode *StoreNode,
12562	SDNode *FPNode) {
12563	SmallVector<const SDNode *, `8`> Worklist;
12564	SmallVector<const SDNode *, `8`> DeferredNodes;
12565	SmallPtrSet<const SDNode *, `16`> Visited;
12566
12567	// Skip FPNode use by StoreNode (that's the use we want to fold into FPNode).
12568	for (SDValue Op : StoreNode->ops())
12569	if (Op.getNode() != FPNode)
12570	Worklist.push_back(Elt: Op.getNode());
12571
12572	unsigned MaxSteps = SelectionDAG::getHasPredecessorMaxSteps();
12573	while (!Worklist.empty()) {
12574	const SDNode *Node = Worklist.pop_back_val();
12575	auto [_, Inserted] = Visited.insert(Ptr: Node);
12576	if (!Inserted)
12577	continue;
12578
12579	if (MaxSteps > `0` && Visited.size() >= MaxSteps)
12580	return false;
12581
12582	// Reached the FPNode (would result in a cycle).
12583	// OR Reached CALLSEQ_START (would result in nested call sequences).
12584	if (Node == FPNode \|\| Node->getOpcode() == ISD::CALLSEQ_START)
12585	return false;
12586
12587	if (Node->getOpcode() == ISD::CALLSEQ_END) {
12588	// Defer looking into call sequences (so we can check we're outside one).
12589	// We still need to look through these for the predecessor check.
12590	DeferredNodes.push_back(Elt: Node);
12591	continue;
12592	}
12593
12594	for (SDValue Op : Node->ops())
12595	Worklist.push_back(Elt: Op.getNode());
12596	}
12597
12598	// True if we're outside a call sequence and don't have the FPNode as a
12599	// predecessor. No cycles or nested call sequences possible.
12600	return !SDNode::hasPredecessorHelper(N: FPNode, Visited, Worklist&: DeferredNodes,
12601	MaxSteps);
12602	}
12603
12604	bool TargetLowering::expandMultipleResultFPLibCall(
12605	SelectionDAG &DAG, RTLIB::Libcall LC, SDNode *Node,
12606	SmallVectorImpl<SDValue> &Results,
12607	std::optional<unsigned> CallRetResNo) const {
12608	if (LC == RTLIB::UNKNOWN_LIBCALL)
12609	return false;
12610
12611	RTLIB::LibcallImpl LibcallImpl = getLibcallImpl(Call: LC);
12612	if (LibcallImpl == RTLIB::Unsupported)
12613	return false;
12614
12615	LLVMContext &Ctx = *DAG.getContext();
12616	EVT VT = Node->getValueType(ResNo: `0`);
12617	unsigned NumResults = Node->getNumValues();
12618
12619	// Find users of the node that store the results (and share input chains). The
12620	// destination pointers can be used instead of creating stack allocations.
12621	SDValue StoresInChain;
12622	SmallVector<StoreSDNode *, `2`> ResultStores(NumResults);
12623	for (SDNode *User : Node->users()) {
12624	if (!ISD::isNormalStore(N: User))
12625	continue;
12626	auto *ST = cast<StoreSDNode>(Val: User);
12627	SDValue StoreValue = ST->getValue();
12628	unsigned ResNo = StoreValue.getResNo();
12629	// Ensure the store corresponds to an output pointer.
12630	if (CallRetResNo == ResNo)
12631	continue;
12632	// Ensure the store to the default address space and not atomic or volatile.
12633	if (!ST->isSimple() \|\| ST->getAddressSpace() != `0`)
12634	continue;
12635	// Ensure all store chains are the same (so they don't alias).
12636	if (StoresInChain && ST->getChain() != StoresInChain)
12637	continue;
12638	// Ensure the store is properly aligned.
12639	Type *StoreType = StoreValue.getValueType().getTypeForEVT(Context&: Ctx);
12640	if (ST->getAlign() <
12641	DAG.getDataLayout().getABITypeAlign(Ty: StoreType->getScalarType()))
12642	continue;
12643	// Avoid:
12644	// 1. Creating cyclic dependencies.
12645	// 2. Expanding the node to a call within a call sequence.
12646	if (!canFoldStoreIntoLibCallOutputPointers(StoreNode: ST, FPNode: Node))
12647	continue;
12648	ResultStores [ResNo] = ST;
12649	StoresInChain = ST->getChain();
12650	}
12651
12652	ArgListTy Args;
12653
12654	// Pass the arguments.
12655	for (const SDValue &Op : Node->op_values()) {
12656	EVT ArgVT = Op.getValueType();
12657	Type *ArgTy = ArgVT.getTypeForEVT(Context&: Ctx);
12658	Args.emplace_back(args: Op, args&: ArgTy);
12659	}
12660
12661	// Pass the output pointers.
12662	SmallVector<SDValue, `2`> ResultPtrs(NumResults);
12663	Type *PointerTy = PointerType::getUnqual(C&: Ctx);
12664	for (auto [ResNo, ST] : llvm::enumerate(First&: ResultStores)) {
12665	if (ResNo == CallRetResNo)
12666	continue;
12667	EVT ResVT = Node->getValueType(ResNo);
12668	SDValue ResultPtr = ST ? ST->getBasePtr() : DAG.CreateStackTemporary(VT: ResVT);
12669	ResultPtrs [ResNo] = ResultPtr;
12670	Args.emplace_back(args&: ResultPtr, args&: PointerTy);
12671	}
12672
12673	SDLoc DL(Node);
12674
12675	if (RTLIB::RuntimeLibcallsInfo::hasVectorMaskArgument(Impl: LibcallImpl)) {
12676	// Pass the vector mask (if required).
12677	EVT MaskVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: Ctx, VT);
12678	SDValue Mask = DAG.getBoolConstant(V: true, DL, VT: MaskVT, OpVT: VT);
12679	Args.emplace_back(args&: Mask, args: MaskVT.getTypeForEVT(Context&: Ctx));
12680	}
12681
12682	Type *RetType = CallRetResNo.has_value()
12683	? Node->getValueType(ResNo: *CallRetResNo).getTypeForEVT(Context&: Ctx)
12684	: Type::getVoidTy(C&: Ctx);
12685	SDValue InChain = StoresInChain ? StoresInChain : DAG.getEntryNode();
12686	SDValue Callee =
12687	DAG.getExternalSymbol(LCImpl: LibcallImpl, VT: getPointerTy(DL: DAG.getDataLayout()));
12688	TargetLowering::CallLoweringInfo CLI(DAG);
12689	CLI.setDebugLoc(DL).setChain(InChain).setLibCallee(
12690	CC: getLibcallImplCallingConv(Call: LibcallImpl), ResultType: RetType, Target: Callee, ArgsList: std::move(Args));
12691
12692	auto [Call, CallChain] = LowerCallTo(CLI);
12693
12694	for (auto [ResNo, ResultPtr] : llvm::enumerate(First&: ResultPtrs)) {
12695	if (ResNo == CallRetResNo) {
12696	Results.push_back(Elt: Call);
12697	continue;
12698	}
12699	MachinePointerInfo PtrInfo;
12700	SDValue LoadResult = DAG.getLoad(VT: Node->getValueType(ResNo), dl: DL, Chain: CallChain,
12701	Ptr: ResultPtr, PtrInfo);
12702	SDValue OutChain = LoadResult.getValue(R: `1`);
12703
12704	if (StoreSDNode *ST = ResultStores [ResNo]) {
12705	// Replace store with the library call.
12706	DAG.ReplaceAllUsesOfValueWith(From: SDValue (ST, `0`), To: OutChain);
12707	PtrInfo = ST->getPointerInfo();
12708	} else {
12709	PtrInfo = MachinePointerInfo::getFixedStack(
12710	MF&: DAG.getMachineFunction(),
12711	FI: cast<FrameIndexSDNode>(Val&: ResultPtr)->getIndex());
12712	}
12713
12714	Results.push_back(Elt: LoadResult);
12715	}
12716
12717	return true;
12718	}
12719
12720	bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT,
12721	SDValue &LHS, SDValue &RHS,
12722	SDValue &CC, SDValue Mask,
12723	SDValue EVL, bool &NeedInvert,
12724	const SDLoc &dl, SDValue &Chain,
12725	bool IsSignaling) const {
12726	MVT OpVT = LHS.getSimpleValueType();
12727	ISD::CondCode CCCode = cast<CondCodeSDNode>(Val&: CC)->get();
12728	NeedInvert = false;
12729	assert(!EVL == !Mask && "VP Mask and EVL must either both be set or unset");
12730	bool IsNonVP = !EVL;
12731	switch (getCondCodeAction(CC: CCCode, VT: OpVT)) {
12732	default:
12733	llvm_unreachable("Unknown condition code action!");
12734	case TargetLowering::Legal:
12735	// Nothing to do.
12736	break;
12737	case TargetLowering::Expand: {
12738	ISD::CondCode InvCC = ISD::getSetCCSwappedOperands(Operation: CCCode);
12739	if (isCondCodeLegalOrCustom(CC: InvCC, VT: OpVT)) {
12740	std::swap(a&: LHS, b&: RHS);
12741	CC = DAG.getCondCode(Cond: InvCC);
12742	return true;
12743	}
12744	// Swapping operands didn't work. Try inverting the condition.
12745	bool NeedSwap = false;
12746	InvCC = getSetCCInverse(Operation: CCCode, Type: OpVT);
12747	if (!isCondCodeLegalOrCustom(CC: InvCC, VT: OpVT)) {
12748	// If inverting the condition is not enough, try swapping operands
12749	// on top of it.
12750	InvCC = ISD::getSetCCSwappedOperands(Operation: InvCC);
12751	NeedSwap = true;
12752	}
12753	if (isCondCodeLegalOrCustom(CC: InvCC, VT: OpVT)) {
12754	CC = DAG.getCondCode(Cond: InvCC);
12755	NeedInvert = true;
12756	if (NeedSwap)
12757	std::swap(a&: LHS, b&: RHS);
12758	return true;
12759	}
12760
12761	// Special case: expand i1 comparisons using logical operations.
12762	if (OpVT == MVT::i1) {
12763	SDValue Ret;
12764	switch (CCCode) {
12765	default:
12766	llvm_unreachable("Unknown integer setcc!");
12767	case ISD::SETEQ: // X == Y --> ~(X ^ Y)
12768	Ret = DAG.getNOT(DL: dl, Val: DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: MVT::i1, N1: LHS, N2: RHS),
12769	VT: MVT::i1);
12770	break;
12771	case ISD::SETNE: // X != Y --> (X ^ Y)
12772	Ret = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: MVT::i1, N1: LHS, N2: RHS);
12773	break;
12774	case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
12775	case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
12776	Ret = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i1, N1: RHS,
12777	N2: DAG.getNOT(DL: dl, Val: LHS, VT: MVT::i1));
12778	break;
12779	case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
12780	case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
12781	Ret = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i1, N1: LHS,
12782	N2: DAG.getNOT(DL: dl, Val: RHS, VT: MVT::i1));
12783	break;
12784	case ISD::SETULE: // X <=u Y --> X == 0 \| Y == 1 --> ~X \| Y
12785	case ISD::SETGE: // X >=s Y --> X == 0 \| Y == 1 --> ~X \| Y
12786	Ret = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: MVT::i1, N1: RHS,
12787	N2: DAG.getNOT(DL: dl, Val: LHS, VT: MVT::i1));
12788	break;
12789	case ISD::SETUGE: // X >=u Y --> X == 1 \| Y == 0 --> ~Y \| X
12790	case ISD::SETLE: // X <=s Y --> X == 1 \| Y == 0 --> ~Y \| X
12791	Ret = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: MVT::i1, N1: LHS,
12792	N2: DAG.getNOT(DL: dl, Val: RHS, VT: MVT::i1));
12793	break;
12794	}
12795
12796	LHS = DAG.getZExtOrTrunc(Op: Ret, DL: dl, VT);
12797	RHS = SDValue ();
12798	CC = SDValue ();
12799	return true;
12800	}
12801
12802	ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
12803	unsigned Opc = `0`;
12804	switch (CCCode) {
12805	default:
12806	llvm_unreachable("Don't know how to expand this condition!");
12807	case ISD::SETUO:
12808	if (isCondCodeLegal(CC: ISD::SETUNE, VT: OpVT)) {
12809	CC1 = ISD::SETUNE;
12810	CC2 = ISD::SETUNE;
12811	Opc = ISD::OR;
12812	break;
12813	}
12814	assert(isCondCodeLegal(ISD::SETOEQ, OpVT) &&
12815	"If SETUE is expanded, SETOEQ or SETUNE must be legal!");
12816	NeedInvert = true;
12817	[[fallthrough]];
12818	case ISD::SETO:
12819	assert(isCondCodeLegal(ISD::SETOEQ, OpVT) &&
12820	"If SETO is expanded, SETOEQ must be legal!");
12821	CC1 = ISD::SETOEQ;
12822	CC2 = ISD::SETOEQ;
12823	Opc = ISD::AND;
12824	break;
12825	case ISD::SETONE:
12826	case ISD::SETUEQ:
12827	// If the SETUO or SETO CC isn't legal, we might be able to use
12828	// SETOGT \|\| SETOLT, inverting the result for SETUEQ. We only need one
12829	// of SETOGT/SETOLT to be legal, the other can be emulated by swapping
12830	// the operands.
12831	CC2 = ((unsigned)CCCode & `0x8U`) ? ISD::SETUO : ISD::SETO;
12832	if (!isCondCodeLegal(CC: CC2, VT: OpVT) && (isCondCodeLegal(CC: ISD::SETOGT, VT: OpVT) \|\|
12833	isCondCodeLegal(CC: ISD::SETOLT, VT: OpVT))) {
12834	CC1 = ISD::SETOGT;
12835	CC2 = ISD::SETOLT;
12836	Opc = ISD::OR;
12837	NeedInvert = ((unsigned)CCCode & `0x8U`);
12838	break;
12839	}
12840	[[fallthrough]];
12841	case ISD::SETOEQ:
12842	case ISD::SETOGT:
12843	case ISD::SETOGE:
12844	case ISD::SETOLT:
12845	case ISD::SETOLE:
12846	case ISD::SETUNE:
12847	case ISD::SETUGT:
12848	case ISD::SETUGE:
12849	case ISD::SETULT:
12850	case ISD::SETULE:
12851	// If we are floating point, assign and break, otherwise fall through.
12852	if (!OpVT.isInteger()) {
12853	// We can use the 4th bit to tell if we are the unordered
12854	// or ordered version of the opcode.
12855	CC2 = ((unsigned)CCCode & `0x8U`) ? ISD::SETUO : ISD::SETO;
12856	Opc = ((unsigned)CCCode & `0x8U`) ? ISD::OR : ISD::AND;
12857	CC1 = (ISD::CondCode)(((int)CCCode & `0x7`) \| `0x10`);
12858	break;
12859	}
12860	// Fallthrough if we are unsigned integer.
12861	[[fallthrough]];
12862	case ISD::SETLE:
12863	case ISD::SETGT:
12864	case ISD::SETGE:
12865	case ISD::SETLT:
12866	case ISD::SETNE:
12867	case ISD::SETEQ:
12868	// If all combinations of inverting the condition and swapping operands
12869	// didn't work then we have no means to expand the condition.
12870	llvm_unreachable("Don't know how to expand this condition!");
12871	}
12872
12873	SDValue SetCC1, SetCC2;
12874	if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
12875	// If we aren't the ordered or unorder operation,
12876	// then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
12877	if (IsNonVP) {
12878	SetCC1 = DAG.getSetCC(DL: dl, VT, LHS, RHS, Cond: CC1, Chain, IsSignaling);
12879	SetCC2 = DAG.getSetCC(DL: dl, VT, LHS, RHS, Cond: CC2, Chain, IsSignaling);
12880	} else {
12881	SetCC1 = DAG.getSetCCVP(DL: dl, VT, LHS, RHS, Cond: CC1, Mask, EVL);
12882	SetCC2 = DAG.getSetCCVP(DL: dl, VT, LHS, RHS, Cond: CC2, Mask, EVL);
12883	}
12884	} else {
12885	// Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
12886	if (IsNonVP) {
12887	SetCC1 = DAG.getSetCC(DL: dl, VT, LHS, RHS: LHS, Cond: CC1, Chain, IsSignaling);
12888	SetCC2 = DAG.getSetCC(DL: dl, VT, LHS: RHS, RHS, Cond: CC2, Chain, IsSignaling);
12889	} else {
12890	SetCC1 = DAG.getSetCCVP(DL: dl, VT, LHS, RHS: LHS, Cond: CC1, Mask, EVL);
12891	SetCC2 = DAG.getSetCCVP(DL: dl, VT, LHS: RHS, RHS, Cond: CC2, Mask, EVL);
12892	}
12893	}
12894	if (Chain)
12895	Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, N1: SetCC1.getValue(R: `1`),
12896	N2: SetCC2.getValue(R: `1`));
12897	if (IsNonVP)
12898	LHS = DAG.getNode(Opcode: Opc, DL: dl, VT, N1: SetCC1, N2: SetCC2);
12899	else {
12900	// Transform the binary opcode to the VP equivalent.
12901	assert((Opc == ISD::OR \|\| Opc == ISD::AND) && "Unexpected opcode");
12902	Opc = Opc == ISD::OR ? ISD::VP_OR : ISD::VP_AND;
12903	LHS = DAG.getNode(Opcode: Opc, DL: dl, VT, N1: SetCC1, N2: SetCC2, N3: Mask, N4: EVL);
12904	}
12905	RHS = SDValue ();
12906	CC = SDValue ();
12907	return true;
12908	}
12909	}
12910	return false;
12911	}
12912
12913	SDValue TargetLowering::expandVectorNaryOpBySplitting(SDNode *Node,
12914	SelectionDAG &DAG) const {
12915	EVT VT = Node->getValueType(ResNo: `0`);
12916	// Despite its documentation, GetSplitDestVTs will assert if VT cannot be
12917	// split into two equal parts.
12918	if (!VT.isVector() \|\| !VT.getVectorElementCount().isKnownMultipleOf(RHS: `2`))
12919	return SDValue ();
12920
12921	// Restrict expansion to cases where both parts can be concatenated.
12922	auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VT);
12923	if (LoVT != HiVT \|\| !isTypeLegal(VT: LoVT))
12924	return SDValue ();
12925
12926	SDLoc DL(Node);
12927	unsigned Opcode = Node->getOpcode();
12928
12929	// Don't expand if the result is likely to be unrolled anyway.
12930	if (!isOperationLegalOrCustomOrPromote(Op: Opcode, VT: LoVT))
12931	return SDValue ();
12932
12933	SmallVector<SDValue, `4`> LoOps, HiOps;
12934	for (const SDValue &V : Node->op_values()) {
12935	auto [Lo, Hi] = DAG.SplitVector(N: V, DL, LoVT, HiVT);
12936	LoOps.push_back(Elt: Lo);
12937	HiOps.push_back(Elt: Hi);
12938	}
12939
12940	SDValue SplitOpLo = DAG.getNode(Opcode, DL, VT: LoVT, Ops: LoOps);
12941	SDValue SplitOpHi = DAG.getNode(Opcode, DL, VT: HiVT, Ops: HiOps);
12942	return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT, N1: SplitOpLo, N2: SplitOpHi);
12943	}
12944
12945	SDValue TargetLowering::scalarizeExtractedVectorLoad(EVT ResultVT,
12946	const SDLoc &DL,
12947	EVT InVecVT, SDValue EltNo,
12948	LoadSDNode *OriginalLoad,
12949	SelectionDAG &DAG) const {
12950	assert(OriginalLoad->isSimple());
12951
12952	EVT VecEltVT = InVecVT.getVectorElementType();
12953
12954	// If the vector element type is not a multiple of a byte then we are unable
12955	// to correctly compute an address to load only the extracted element as a
12956	// scalar.
12957	if (!VecEltVT.isByteSized())
12958	return SDValue ();
12959
12960	ISD::LoadExtType ExtTy =
12961	ResultVT.bitsGT(VT: VecEltVT) ? ISD::EXTLOAD : ISD::NON_EXTLOAD;
12962	if (!isOperationLegalOrCustom(Op: ISD::LOAD, VT: VecEltVT))
12963	return SDValue ();
12964
12965	std::optional<unsigned> ByteOffset;
12966	Align Alignment = OriginalLoad->getAlign();
12967	MachinePointerInfo MPI;
12968	if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(Val&: EltNo)) {
12969	int Elt = ConstEltNo->getZExtValue();
12970	ByteOffset = VecEltVT.getSizeInBits() * Elt / `8`;
12971	MPI = OriginalLoad->getPointerInfo().getWithOffset(O: *ByteOffset);
12972	Alignment = commonAlignment(A: Alignment, Offset: *ByteOffset);
12973	} else {
12974	// Discard the pointer info except the address space because the memory
12975	// operand can't represent this new access since the offset is variable.
12976	MPI = MachinePointerInfo (OriginalLoad->getPointerInfo().getAddrSpace());
12977	Alignment = commonAlignment(A: Alignment, Offset: VecEltVT.getSizeInBits() / `8`);
12978	}
12979
12980	if (!shouldReduceLoadWidth(Load: OriginalLoad, ExtTy, NewVT: VecEltVT, ByteOffset))
12981	return SDValue ();
12982
12983	unsigned IsFast = `0`;
12984	if (!allowsMemoryAccess(Context&: *DAG.getContext(), DL: DAG.getDataLayout(), VT: VecEltVT,
12985	AddrSpace: OriginalLoad->getAddressSpace(), Alignment,
12986	Flags: OriginalLoad->getMemOperand()->getFlags(), Fast: &IsFast) \|\|
12987	!IsFast)
12988	return SDValue ();
12989
12990	// The original DAG loaded the entire vector from memory, so arithmetic
12991	// within it must be inbounds.
12992	SDValue NewPtr = getInboundsVectorElementPointer(
12993	DAG, VecPtr: OriginalLoad->getBasePtr(), VecVT: InVecVT, Index: EltNo);
12994
12995	// We are replacing a vector load with a scalar load. The new load must have
12996	// identical memory op ordering to the original.
12997	SDValue Load;
12998	if (ResultVT.bitsGT(VT: VecEltVT)) {
12999	// If the result type of vextract is wider than the load, then issue an
13000	// extending load instead.
13001	ISD::LoadExtType ExtType = isLoadExtLegal(ExtType: ISD::ZEXTLOAD, ValVT: ResultVT, MemVT: VecEltVT)
13002	? ISD::ZEXTLOAD
13003	: ISD::EXTLOAD;
13004	Load = DAG.getExtLoad(ExtType, dl: DL, VT: ResultVT, Chain: OriginalLoad->getChain(),
13005	Ptr: NewPtr, PtrInfo: MPI, MemVT: VecEltVT, Alignment,
13006	MMOFlags: OriginalLoad->getMemOperand()->getFlags(),
13007	AAInfo: OriginalLoad->getAAInfo());
13008	DAG.makeEquivalentMemoryOrdering(OldLoad: OriginalLoad, NewMemOp: Load);
13009	} else {
13010	// The result type is narrower or the same width as the vector element
13011	Load = DAG.getLoad(VT: VecEltVT, dl: DL, Chain: OriginalLoad->getChain(), Ptr: NewPtr, PtrInfo: MPI,
13012	Alignment, MMOFlags: OriginalLoad->getMemOperand()->getFlags(),
13013	AAInfo: OriginalLoad->getAAInfo());
13014	DAG.makeEquivalentMemoryOrdering(OldLoad: OriginalLoad, NewMemOp: Load);
13015	if (ResultVT.bitsLT(VT: VecEltVT))
13016	Load = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: ResultVT, Operand: Load);
13017	else
13018	Load = DAG.getBitcast(VT: ResultVT, V: Load);
13019	}
13020
13021	return Load;
13022	}
13023
13024	// Set type id for call site info and metadata 'call_target'.
13025	// We are filtering for:
13026	// a) The call-graph-section use case that wants to know about indirect
13027	// calls, or
13028	// b) We want to annotate indirect calls.
13029	void TargetLowering::setTypeIdForCallsiteInfo(
13030	const CallBase *CB, MachineFunction &MF,
13031	MachineFunction::CallSiteInfo &CSInfo) const {
13032	if (CB && CB->isIndirectCall() &&
13033	(MF.getTarget().Options.EmitCallGraphSection \|\|
13034	MF.getTarget().Options.EmitCallSiteInfo))
13035	CSInfo = MachineFunction::CallSiteInfo (*CB);
13036	}
13037

Browse the source code of llvm_projects/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp