TargetLowering.cpp source code [llvm_projects/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp]

1	//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This implements the TargetLowering class.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "llvm/CodeGen/TargetLowering.h"
14	#include "llvm/ADT/STLExtras.h"
15	#include "llvm/Analysis/ValueTracking.h"
16	#include "llvm/Analysis/VectorUtils.h"
17	#include "llvm/CodeGen/Analysis.h"
18	#include "llvm/CodeGen/CallingConvLower.h"
19	#include "llvm/CodeGen/CodeGenCommonISel.h"
20	#include "llvm/CodeGen/MachineFrameInfo.h"
21	#include "llvm/CodeGen/MachineFunction.h"
22	#include "llvm/CodeGen/MachineJumpTableInfo.h"
23	#include "llvm/CodeGen/MachineRegisterInfo.h"
24	#include "llvm/CodeGen/SDPatternMatch.h"
25	#include "llvm/CodeGen/SelectionDAG.h"
26	#include "llvm/CodeGen/TargetRegisterInfo.h"
27	#include "llvm/IR/DataLayout.h"
28	#include "llvm/IR/DerivedTypes.h"
29	#include "llvm/IR/GlobalVariable.h"
30	#include "llvm/IR/LLVMContext.h"
31	#include "llvm/MC/MCAsmInfo.h"
32	#include "llvm/MC/MCExpr.h"
33	#include "llvm/Support/DivisionByConstantInfo.h"
34	#include "llvm/Support/ErrorHandling.h"
35	#include "llvm/Support/KnownBits.h"
36	#include "llvm/Support/MathExtras.h"
37	#include "llvm/Target/TargetMachine.h"
38	#include <cctype>
39	#include <deque>
40	using namespace llvm;
41	using namespace llvm::SDPatternMatch;
42
43	/// NOTE: The TargetMachine owns TLOF.
44	TargetLowering::TargetLowering(const TargetMachine &tm)
45	: TargetLoweringBase (tm) {}
46
47	// Define the virtual destructor out-of-line for build efficiency.
48	TargetLowering::~TargetLowering() = default;
49
50	const char TargetLowering::getTargetNodeName(unsigned* Opcode) const {
51	return nullptr;
52	}
53
54	bool TargetLowering::isPositionIndependent() const {
55	return getTargetMachine().isPositionIndependent();
56	}
57
58	/// Check whether a given call node is in tail position within its function. If
59	/// so, it sets Chain to the input chain of the tail call.
60	bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
61	SDValue &Chain) const {
62	const Function &F = DAG.getMachineFunction().getFunction();
63
64	// First, check if tail calls have been disabled in this function.
65	if (F.getFnAttribute(Kind: "disable-tail-calls").getValueAsBool())
66	return false;
67
68	// Conservatively require the attributes of the call to match those of
69	// the return. Ignore following attributes because they don't affect the
70	// call sequence.
71	AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs());
72	for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
73	Attribute::DereferenceableOrNull, Attribute::NoAlias,
74	Attribute::NonNull, Attribute::NoUndef,
75	Attribute::Range, Attribute::NoFPClass})
76	CallerAttrs.removeAttribute(Val: Attr);
77
78	if (CallerAttrs.hasAttributes())
79	return false;
80
81	// It's not safe to eliminate the sign / zero extension of the return value.
82	if (CallerAttrs.contains(A: Attribute::ZExt) \|\|
83	CallerAttrs.contains(A: Attribute::SExt))
84	return false;
85
86	// Check if the only use is a function return node.
87	return isUsedByReturnOnly(Node, Chain);
88	}
89
90	bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
91	const uint32_t *CallerPreservedMask,
92	const SmallVectorImpl<CCValAssign> &ArgLocs,
93	const SmallVectorImpl<SDValue> &OutVals) const {
94	for (unsigned I = `0`, E = ArgLocs.size(); I != E; ++I) {
95	const CCValAssign &ArgLoc = ArgLocs [I];
96	if (!ArgLoc.isRegLoc())
97	continue;
98	MCRegister Reg = ArgLoc.getLocReg();
99	// Only look at callee saved registers.
100	if (MachineOperand::clobbersPhysReg(RegMask: CallerPreservedMask, PhysReg: Reg))
101	continue;
102	// Check that we pass the value used for the caller.
103	// (We look for a CopyFromReg reading a virtual register that is used
104	// for the function live-in value of register Reg)
105	SDValue Value = OutVals [I];
106	if (Value ->getOpcode() == ISD::AssertZext)
107	Value = Value.getOperand(i: `0`);
108	if (Value ->getOpcode() != ISD::CopyFromReg)
109	return false;
110	Register ArgReg = cast<RegisterSDNode>(Val: Value ->getOperand(Num: `1`))->getReg();
111	if (MRI.getLiveInPhysReg(VReg: ArgReg) != Reg)
112	return false;
113	}
114	return true;
115	}
116
117	/// Set CallLoweringInfo attribute flags based on a call instruction
118	/// and called function attributes.
119	void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
120	unsigned ArgIdx) {
121	IsSExt = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::SExt);
122	IsZExt = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::ZExt);
123	IsNoExt = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::NoExt);
124	IsInReg = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::InReg);
125	IsSRet = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::StructRet);
126	IsNest = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::Nest);
127	IsByVal = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::ByVal);
128	IsPreallocated = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::Preallocated);
129	IsInAlloca = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::InAlloca);
130	IsReturned = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::Returned);
131	IsSwiftSelf = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::SwiftSelf);
132	IsSwiftAsync = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::SwiftAsync);
133	IsSwiftError = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::SwiftError);
134	Alignment = Call->getParamStackAlign(ArgNo: ArgIdx);
135	IndirectType = nullptr;
136	assert(IsByVal + IsPreallocated + IsInAlloca + IsSRet <= `1` &&
137	"multiple ABI attributes?");
138	if (IsByVal) {
139	IndirectType = Call->getParamByValType(ArgNo: ArgIdx);
140	if (!Alignment)
141	Alignment = Call->getParamAlign(ArgNo: ArgIdx);
142	}
143	if (IsPreallocated)
144	IndirectType = Call->getParamPreallocatedType(ArgNo: ArgIdx);
145	if (IsInAlloca)
146	IndirectType = Call->getParamInAllocaType(ArgNo: ArgIdx);
147	if (IsSRet)
148	IndirectType = Call->getParamStructRetType(ArgNo: ArgIdx);
149	}
150
151	/// Generate a libcall taking the given operands as arguments and returning a
152	/// result of type RetVT.
153	std::pair<SDValue, SDValue>
154	TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
155	ArrayRef<SDValue> Ops,
156	MakeLibCallOptions CallOptions,
157	const SDLoc &dl,
158	SDValue InChain) const {
159	if (!InChain)
160	InChain = DAG.getEntryNode();
161
162	TargetLowering::ArgListTy Args;
163	Args.reserve(n: Ops.size());
164
165	TargetLowering::ArgListEntry Entry;
166	ArrayRef<Type *> OpsTypeOverrides = CallOptions.OpsTypeOverrides;
167	for (unsigned i = `0`; i < Ops.size(); ++i) {
168	SDValue NewOp = Ops [i];
169	Entry.Node = NewOp;
170	Entry.Ty = i < OpsTypeOverrides.size() && OpsTypeOverrides [i]
171	? OpsTypeOverrides [i]
172	: Entry.Node.getValueType().getTypeForEVT(Context&: *DAG.getContext());
173	Entry.IsSExt =
174	shouldSignExtendTypeInLibCall(Ty: Entry.Ty, IsSigned: CallOptions.IsSigned);
175	Entry.IsZExt = !Entry.IsSExt;
176
177	if (CallOptions.IsSoften &&
178	!shouldExtendTypeInLibCall(Type: CallOptions.OpsVTBeforeSoften [i])) {
179	Entry.IsSExt = Entry.IsZExt = false;
180	}
181	Args.push_back(x: Entry);
182	}
183
184	const char *LibcallName = getLibcallName(Call: LC);
185	if (LC == RTLIB::UNKNOWN_LIBCALL \|\| !LibcallName)
186	reportFatalInternalError(reason: "unsupported library call operation");
187
188	SDValue Callee =
189	DAG.getExternalSymbol(Sym: LibcallName, VT: getPointerTy(DL: DAG.getDataLayout()));
190
191	Type RetTy = RetVT.getTypeForEVT(Context&: DAG.getContext());
192	TargetLowering::CallLoweringInfo CLI(DAG);
193	bool signExtend = shouldSignExtendTypeInLibCall(Ty: RetTy, IsSigned: CallOptions.IsSigned);
194	bool zeroExtend = !signExtend;
195
196	if (CallOptions.IsSoften &&
197	!shouldExtendTypeInLibCall(Type: CallOptions.RetVTBeforeSoften)) {
198	signExtend = zeroExtend = false;
199	}
200
201	CLI.setDebugLoc(dl)
202	.setChain(InChain)
203	.setLibCallee(CC: getLibcallCallingConv(Call: LC), ResultType: RetTy, Target: Callee, ArgsList: std::move(Args))
204	.setNoReturn(CallOptions.DoesNotReturn)
205	.setDiscardResult(!CallOptions.IsReturnValueUsed)
206	.setIsPostTypeLegalization(CallOptions.IsPostTypeLegalization)
207	.setSExtResult(signExtend)
208	.setZExtResult(zeroExtend);
209	return LowerCallTo(CLI);
210	}
211
212	bool TargetLowering::findOptimalMemOpLowering(
213	std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
214	unsigned SrcAS, const AttributeList &FuncAttributes) const {
215	if (Limit != ~unsigned(`0`) && Op.isMemcpyWithFixedDstAlign() &&
216	Op.getSrcAlign() < Op.getDstAlign())
217	return false;
218
219	EVT VT = getOptimalMemOpType(Op, FuncAttributes);
220
221	if (VT == MVT::Other) {
222	// Use the largest integer type whose alignment constraints are satisfied.
223	// We only need to check DstAlign here as SrcAlign is always greater or
224	// equal to DstAlign (or zero).
225	VT = MVT::LAST_INTEGER_VALUETYPE;
226	if (Op.isFixedDstAlign())
227	while (Op.getDstAlign() < (VT.getSizeInBits() / `8`) &&
228	!allowsMisalignedMemoryAccesses(VT, AddrSpace: DstAS, Alignment: Op.getDstAlign()))
229	VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - `1`);
230	assert(VT.isInteger());
231
232	// Find the largest legal integer type.
233	MVT LVT = MVT::LAST_INTEGER_VALUETYPE;
234	while (!isTypeLegal(VT: LVT))
235	LVT = (MVT::SimpleValueType)(LVT.SimpleTy - `1`);
236	assert(LVT.isInteger());
237
238	// If the type we've chosen is larger than the largest legal integer type
239	// then use that instead.
240	if (VT.bitsGT(VT: LVT))
241	VT = LVT;
242	}
243
244	unsigned NumMemOps = `0`;
245	uint64_t Size = Op.size();
246	while (Size) {
247	unsigned VTSize = VT.getSizeInBits() / `8`;
248	while (VTSize > Size) {
249	// For now, only use non-vector load / store's for the left-over pieces.
250	EVT NewVT = VT;
251	unsigned NewVTSize;
252
253	bool Found = false;
254	if (VT.isVector() \|\| VT.isFloatingPoint()) {
255	NewVT = (VT.getSizeInBits() > `64`) ? MVT::i64 : MVT::i32;
256	if (isOperationLegalOrCustom(Op: ISD::STORE, VT: NewVT) &&
257	isSafeMemOpType(NewVT.getSimpleVT()))
258	Found = true;
259	else if (NewVT == MVT::i64 &&
260	isOperationLegalOrCustom(Op: ISD::STORE, VT: MVT::f64) &&
261	isSafeMemOpType(MVT::f64)) {
262	// i64 is usually not legal on 32-bit targets, but f64 may be.
263	NewVT = MVT::f64;
264	Found = true;
265	}
266	}
267
268	if (!Found) {
269	do {
270	NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - `1`);
271	if (NewVT == MVT::i8)
272	break;
273	} while (!isSafeMemOpType(NewVT.getSimpleVT()));
274	}
275	NewVTSize = NewVT.getSizeInBits() / `8`;
276
277	// If the new VT cannot cover all of the remaining bits, then consider
278	// issuing a (or a pair of) unaligned and overlapping load / store.
279	unsigned Fast;
280	if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
281	allowsMisalignedMemoryAccesses(
282	VT, AddrSpace: DstAS, Alignment: Op.isFixedDstAlign() ? Op.getDstAlign() : Align (`1`),
283	Flags: MachineMemOperand::MONone, &Fast) &&
284	Fast)
285	VTSize = Size;
286	else {
287	VT = NewVT;
288	VTSize = NewVTSize;
289	}
290	}
291
292	if (++NumMemOps > Limit)
293	return false;
294
295	MemOps.push_back(x: VT);
296	Size -= VTSize;
297	}
298
299	return true;
300	}
301
302	/// Soften the operands of a comparison. This code is shared among BR_CC,
303	/// SELECT_CC, and SETCC handlers.
304	void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
305	SDValue &NewLHS, SDValue &NewRHS,
306	ISD::CondCode &CCCode,
307	const SDLoc &dl, const SDValue OldLHS,
308	const SDValue OldRHS) const {
309	SDValue Chain;
310	return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, DL: dl, OldLHS,
311	OldRHS, Chain);
312	}
313
314	void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
315	SDValue &NewLHS, SDValue &NewRHS,
316	ISD::CondCode &CCCode,
317	const SDLoc &dl, const SDValue OldLHS,
318	const SDValue OldRHS,
319	SDValue &Chain,
320	bool IsSignaling) const {
321	// FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
322	// not supporting it. We can update this code when libgcc provides such
323	// functions.
324
325	assert((VT == MVT::f32 \|\| VT == MVT::f64 \|\| VT == MVT::f128 \|\| VT == MVT::ppcf128)
326	&& "Unsupported setcc type!");
327
328	// Expand into one or more soft-fp libcall(s).
329	RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
330	bool ShouldInvertCC = false;
331	switch (CCCode) {
332	case ISD::SETEQ:
333	case ISD::SETOEQ:
334	LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
335	(VT == MVT::f64) ? RTLIB::OEQ_F64 :
336	(VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
337	break;
338	case ISD::SETNE:
339	case ISD::SETUNE:
340	LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
341	(VT == MVT::f64) ? RTLIB::UNE_F64 :
342	(VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
343	break;
344	case ISD::SETGE:
345	case ISD::SETOGE:
346	LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
347	(VT == MVT::f64) ? RTLIB::OGE_F64 :
348	(VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
349	break;
350	case ISD::SETLT:
351	case ISD::SETOLT:
352	LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
353	(VT == MVT::f64) ? RTLIB::OLT_F64 :
354	(VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
355	break;
356	case ISD::SETLE:
357	case ISD::SETOLE:
358	LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
359	(VT == MVT::f64) ? RTLIB::OLE_F64 :
360	(VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
361	break;
362	case ISD::SETGT:
363	case ISD::SETOGT:
364	LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
365	(VT == MVT::f64) ? RTLIB::OGT_F64 :
366	(VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
367	break;
368	case ISD::SETO:
369	ShouldInvertCC = true;
370	[[fallthrough]];
371	case ISD::SETUO:
372	LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
373	(VT == MVT::f64) ? RTLIB::UO_F64 :
374	(VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
375	break;
376	case ISD::SETONE:
377	// SETONE = O && UNE
378	ShouldInvertCC = true;
379	[[fallthrough]];
380	case ISD::SETUEQ:
381	LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
382	(VT == MVT::f64) ? RTLIB::UO_F64 :
383	(VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
384	LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
385	(VT == MVT::f64) ? RTLIB::OEQ_F64 :
386	(VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
387	break;
388	default:
389	// Invert CC for unordered comparisons
390	ShouldInvertCC = true;
391	switch (CCCode) {
392	case ISD::SETULT:
393	LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
394	(VT == MVT::f64) ? RTLIB::OGE_F64 :
395	(VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
396	break;
397	case ISD::SETULE:
398	LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
399	(VT == MVT::f64) ? RTLIB::OGT_F64 :
400	(VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
401	break;
402	case ISD::SETUGT:
403	LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
404	(VT == MVT::f64) ? RTLIB::OLE_F64 :
405	(VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
406	break;
407	case ISD::SETUGE:
408	LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
409	(VT == MVT::f64) ? RTLIB::OLT_F64 :
410	(VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
411	break;
412	default: llvm_unreachable("Do not know how to soften this setcc!");
413	}
414	}
415
416	// Use the target specific return value for comparison lib calls.
417	EVT RetVT = getCmpLibcallReturnType();
418	SDValue Ops[`2`] = {NewLHS, NewRHS};
419	TargetLowering::MakeLibCallOptions CallOptions;
420	EVT OpsVT[`2`] = { OldLHS.getValueType(),
421	OldRHS.getValueType() };
422	CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, Value: true);
423	auto Call = makeLibCall(DAG, LC: LC1, RetVT, Ops, CallOptions, dl, InChain: Chain);
424	NewLHS = Call.first;
425	NewRHS = DAG.getConstant(Val: `0`, DL: dl, VT: RetVT);
426
427	CCCode = getICmpCondCode(Pred: getSoftFloatCmpLibcallPredicate(Call: LC1));
428	if (ShouldInvertCC) {
429	assert(RetVT.isInteger());
430	CCCode = getSetCCInverse(Operation: CCCode, Type: RetVT);
431	}
432
433	if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
434	// Update Chain.
435	Chain = Call.second;
436	} else {
437	assert(CCCode == (ShouldInvertCC ? ISD::SETEQ : ISD::SETNE) &&
438	"unordered call should be simple boolean");
439
440	EVT SetCCVT =
441	getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: RetVT);
442	if (getBooleanContents(Type: RetVT) == ZeroOrOneBooleanContent) {
443	NewLHS = DAG.getNode(Opcode: ISD::AssertZext, DL: dl, VT: RetVT, N1: Call.first,
444	N2: DAG.getValueType(MVT::i1));
445	}
446
447	SDValue Tmp = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: NewLHS, RHS: NewRHS, Cond: CCCode);
448	auto Call2 = makeLibCall(DAG, LC: LC2, RetVT, Ops, CallOptions, dl, InChain: Chain);
449	CCCode = getICmpCondCode(Pred: getSoftFloatCmpLibcallPredicate(Call: LC2));
450	if (ShouldInvertCC)
451	CCCode = getSetCCInverse(Operation: CCCode, Type: RetVT);
452	NewLHS = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Call2.first, RHS: NewRHS, Cond: CCCode);
453	if (Chain)
454	Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, N1: Call.second,
455	N2: Call2.second);
456	NewLHS = DAG.getNode(Opcode: ShouldInvertCC ? ISD::AND : ISD::OR, DL: dl,
457	VT: Tmp.getValueType(), N1: Tmp, N2: NewLHS);
458	NewRHS = SDValue ();
459	}
460	}
461
462	/// Return the entry encoding for a jump table in the current function. The
463	/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
464	unsigned TargetLowering::getJumpTableEncoding() const {
465	// In non-pic modes, just use the address of a block.
466	if (!isPositionIndependent())
467	return MachineJumpTableInfo::EK_BlockAddress;
468
469	// Otherwise, use a label difference.
470	return MachineJumpTableInfo::EK_LabelDifference32;
471	}
472
473	SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
474	SelectionDAG &DAG) const {
475	return Table;
476	}
477
478	/// This returns the relocation base for the given PIC jumptable, the same as
479	/// getPICJumpTableRelocBase, but as an MCExpr.
480	const MCExpr *
481	TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
482	unsigned JTI,MCContext &Ctx) const{
483	// The normal PIC reloc base is the label at the start of the jump table.
484	return MCSymbolRefExpr::create(Symbol: MF->getJTISymbol(JTI, Ctx), Ctx);
485	}
486
487	SDValue TargetLowering::expandIndirectJTBranch(const SDLoc &dl, SDValue Value,
488	SDValue Addr, int JTI,
489	SelectionDAG &DAG) const {
490	SDValue Chain = Value;
491	// Jump table debug info is only needed if CodeView is enabled.
492	if (DAG.getTarget().getTargetTriple().isOSBinFormatCOFF()) {
493	Chain = DAG.getJumpTableDebugInfo(JTI, Chain, DL: dl);
494	}
495	return DAG.getNode(Opcode: ISD::BRIND, DL: dl, VT: MVT::Other, N1: Chain, N2: Addr);
496	}
497
498	bool
499	TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode GA) const* {
500	const TargetMachine &TM = getTargetMachine();
501	const GlobalValue *GV = GA->getGlobal();
502
503	// If the address is not even local to this DSO we will have to load it from
504	// a got and then add the offset.
505	if (!TM.shouldAssumeDSOLocal(GV))
506	return false;
507
508	// If the code is position independent we will have to add a base register.
509	if (isPositionIndependent())
510	return false;
511
512	// Otherwise we can do it.
513	return true;
514	}
515
516	//===----------------------------------------------------------------------===//
517	// Optimization Methods
518	//===----------------------------------------------------------------------===//
519
520	/// If the specified instruction has a constant integer operand and there are
521	/// bits set in that constant that are not demanded, then clear those bits and
522	/// return true.
523	bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
524	const APInt &DemandedBits,
525	const APInt &DemandedElts,
526	TargetLoweringOpt &TLO) const {
527	SDLoc DL(Op);
528	unsigned Opcode = Op.getOpcode();
529
530	// Early-out if we've ended up calling an undemanded node, leave this to
531	// constant folding.
532	if (DemandedBits.isZero() \|\| DemandedElts.isZero())
533	return false;
534
535	// Do target-specific constant optimization.
536	if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
537	return TLO.New.getNode();
538
539	// FIXME: ISD::SELECT, ISD::SELECT_CC
540	switch (Opcode) {
541	default:
542	break;
543	case ISD::XOR:
544	case ISD::AND:
545	case ISD::OR: {
546	auto *Op1C = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: `1`));
547	if (!Op1C \|\| Op1C->isOpaque())
548	return false;
549
550	// If this is a 'not' op, don't touch it because that's a canonical form.
551	const APInt &C = Op1C->getAPIntValue();
552	if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(RHS: C))
553	return false;
554
555	if (!C.isSubsetOf(RHS: DemandedBits)) {
556	EVT VT = Op.getValueType();
557	SDValue NewC = TLO.DAG.getConstant(Val: DemandedBits & C, DL, VT);
558	SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, N1: Op.getOperand(i: `0`), N2: NewC,
559	Flags: Op ->getFlags());
560	return TLO.CombineTo(O: Op, N: NewOp);
561	}
562
563	break;
564	}
565	}
566
567	return false;
568	}
569
570	bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
571	const APInt &DemandedBits,
572	TargetLoweringOpt &TLO) const {
573	EVT VT = Op.getValueType();
574	APInt DemandedElts = VT.isVector()
575	? APInt::getAllOnes(numBits: VT.getVectorNumElements())
576	: APInt (`1`, `1`);
577	return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
578	}
579
580	/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
581	/// This uses isTruncateFree/isZExtFree and ANY_EXTEND for the widening cast,
582	/// but it could be generalized for targets with other types of implicit
583	/// widening casts.
584	bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
585	const APInt &DemandedBits,
586	TargetLoweringOpt &TLO) const {
587	assert(Op.getNumOperands() == `2` &&
588	"ShrinkDemandedOp only supports binary operators!");
589	assert(Op.getNode()->getNumValues() == `1` &&
590	"ShrinkDemandedOp only supports nodes with one result!");
591
592	EVT VT = Op.getValueType();
593	SelectionDAG &DAG = TLO.DAG;
594	SDLoc dl(Op);
595
596	// Early return, as this function cannot handle vector types.
597	if (VT.isVector())
598	return false;
599
600	assert(Op.getOperand(`0`).getValueType().getScalarSizeInBits() == BitWidth &&
601	Op.getOperand(`1`).getValueType().getScalarSizeInBits() == BitWidth &&
602	"ShrinkDemandedOp only supports operands that have the same size!");
603
604	// Don't do this if the node has another user, which may require the
605	// full value.
606	if (!Op.getNode()->hasOneUse())
607	return false;
608
609	// Search for the smallest integer type with free casts to and from
610	// Op's type. For expedience, just check power-of-2 integer types.
611	unsigned DemandedSize = DemandedBits.getActiveBits();
612	for (unsigned SmallVTBits = llvm::bit_ceil(Value: DemandedSize);
613	SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(A: SmallVTBits)) {
614	EVT SmallVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: SmallVTBits);
615	if (isTruncateFree(FromVT: VT, ToVT: SmallVT) && isZExtFree(FromTy: SmallVT, ToTy: VT)) {
616	// We found a type with free casts.
617
618	// If the operation has the 'disjoint' flag, then the
619	// operands on the new node are also disjoint.
620	SDNodeFlags Flags(Op ->getFlags().hasDisjoint() ? SDNodeFlags::Disjoint
621	: SDNodeFlags::None);
622	SDValue X = DAG.getNode(
623	Opcode: Op.getOpcode(), DL: dl, VT: SmallVT,
624	N1: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: SmallVT, Operand: Op.getOperand(i: `0`)),
625	N2: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: SmallVT, Operand: Op.getOperand(i: `1`)), Flags);
626	assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
627	SDValue Z = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT, Operand: X);
628	return TLO.CombineTo(O: Op, N: Z);
629	}
630	}
631	return false;
632	}
633
634	bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
635	DAGCombinerInfo &DCI) const {
636	SelectionDAG &DAG = DCI.DAG;
637	TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
638	!DCI.isBeforeLegalizeOps());
639	KnownBits Known;
640
641	bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
642	if (Simplified) {
643	DCI.AddToWorklist(N: Op.getNode());
644	DCI.CommitTargetLoweringOpt(TLO);
645	}
646	return Simplified;
647	}
648
649	bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
650	const APInt &DemandedElts,
651	DAGCombinerInfo &DCI) const {
652	SelectionDAG &DAG = DCI.DAG;
653	TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
654	!DCI.isBeforeLegalizeOps());
655	KnownBits Known;
656
657	bool Simplified =
658	SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO);
659	if (Simplified) {
660	DCI.AddToWorklist(N: Op.getNode());
661	DCI.CommitTargetLoweringOpt(TLO);
662	}
663	return Simplified;
664	}
665
666	bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
667	KnownBits &Known,
668	TargetLoweringOpt &TLO,
669	unsigned Depth,
670	bool AssumeSingleUse) const {
671	EVT VT = Op.getValueType();
672
673	// Since the number of lanes in a scalable vector is unknown at compile time,
674	// we track one bit which is implicitly broadcast to all lanes. This means
675	// that all lanes in a scalable vector are considered demanded.
676	APInt DemandedElts = VT.isFixedLengthVector()
677	? APInt::getAllOnes(numBits: VT.getVectorNumElements())
678	: APInt (`1`, `1`);
679	return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
680	AssumeSingleUse);
681	}
682
683	// TODO: Under what circumstances can we create nodes? Constant folding?
684	SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
685	SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
686	SelectionDAG &DAG, unsigned Depth) const {
687	EVT VT = Op.getValueType();
688
689	// Limit search depth.
690	if (Depth >= SelectionDAG::MaxRecursionDepth)
691	return SDValue ();
692
693	// Ignore UNDEFs.
694	if (Op.isUndef())
695	return SDValue ();
696
697	// Not demanding any bits/elts from Op.
698	if (DemandedBits == `0` \|\| DemandedElts == `0`)
699	return DAG.getUNDEF(VT);
700
701	bool IsLE = DAG.getDataLayout().isLittleEndian();
702	unsigned NumElts = DemandedElts.getBitWidth();
703	unsigned BitWidth = DemandedBits.getBitWidth();
704	KnownBits LHSKnown, RHSKnown;
705	switch (Op.getOpcode()) {
706	case ISD::BITCAST: {
707	if (VT.isScalableVector())
708	return SDValue ();
709
710	SDValue Src = peekThroughBitcasts(V: Op.getOperand(i: `0`));
711	EVT SrcVT = Src.getValueType();
712	EVT DstVT = Op.getValueType();
713	if (SrcVT == DstVT)
714	return Src;
715
716	unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
717	unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
718	if (NumSrcEltBits == NumDstEltBits)
719	if (SDValue V = SimplifyMultipleUseDemandedBits(
720	Op: Src, DemandedBits, DemandedElts, DAG, Depth: Depth + `1`))
721	return DAG.getBitcast(VT: DstVT, V);
722
723	if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == `0`) {
724	unsigned Scale = NumDstEltBits / NumSrcEltBits;
725	unsigned NumSrcElts = SrcVT.getVectorNumElements();
726	APInt DemandedSrcBits = APInt::getZero(numBits: NumSrcEltBits);
727	APInt DemandedSrcElts = APInt::getZero(numBits: NumSrcElts);
728	for (unsigned i = `0`; i != Scale; ++i) {
729	unsigned EltOffset = IsLE ? i : (Scale - `1` - i);
730	unsigned BitOffset = EltOffset * NumSrcEltBits;
731	APInt Sub = DemandedBits.extractBits(numBits: NumSrcEltBits, bitPosition: BitOffset);
732	if (!Sub.isZero()) {
733	DemandedSrcBits \|= Sub;
734	for (unsigned j = `0`; j != NumElts; ++j)
735	if (DemandedElts [j])
736	DemandedSrcElts.setBit((j * Scale) + i);
737	}
738	}
739
740	if (SDValue V = SimplifyMultipleUseDemandedBits(
741	Op: Src, DemandedBits: DemandedSrcBits, DemandedElts: DemandedSrcElts, DAG, Depth: Depth + `1`))
742	return DAG.getBitcast(VT: DstVT, V);
743	}
744
745	// TODO - bigendian once we have test coverage.
746	if (IsLE && (NumSrcEltBits % NumDstEltBits) == `0`) {
747	unsigned Scale = NumSrcEltBits / NumDstEltBits;
748	unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : `1`;
749	APInt DemandedSrcBits = APInt::getZero(numBits: NumSrcEltBits);
750	APInt DemandedSrcElts = APInt::getZero(numBits: NumSrcElts);
751	for (unsigned i = `0`; i != NumElts; ++i)
752	if (DemandedElts [i]) {
753	unsigned Offset = (i % Scale) * NumDstEltBits;
754	DemandedSrcBits.insertBits(SubBits: DemandedBits, bitPosition: Offset);
755	DemandedSrcElts.setBit(i / Scale);
756	}
757
758	if (SDValue V = SimplifyMultipleUseDemandedBits(
759	Op: Src, DemandedBits: DemandedSrcBits, DemandedElts: DemandedSrcElts, DAG, Depth: Depth + `1`))
760	return DAG.getBitcast(VT: DstVT, V);
761	}
762
763	break;
764	}
765	case ISD::FREEZE: {
766	SDValue N0 = Op.getOperand(i: `0`);
767	if (DAG.isGuaranteedNotToBeUndefOrPoison(Op: N0, DemandedElts,
768	/PoisonOnly=/false))
769	return N0;
770	break;
771	}
772	case ISD::AND: {
773	LHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: `0`), DemandedElts, Depth: Depth + `1`);
774	RHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: `1`), DemandedElts, Depth: Depth + `1`);
775
776	// If all of the demanded bits are known 1 on one side, return the other.
777	// These bits cannot contribute to the result of the 'and' in this
778	// context.
779	if (DemandedBits.isSubsetOf(RHS: LHSKnown.Zero \| RHSKnown.One))
780	return Op.getOperand(i: `0`);
781	if (DemandedBits.isSubsetOf(RHS: RHSKnown.Zero \| LHSKnown.One))
782	return Op.getOperand(i: `1`);
783	break;
784	}
785	case ISD::OR: {
786	LHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: `0`), DemandedElts, Depth: Depth + `1`);
787	RHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: `1`), DemandedElts, Depth: Depth + `1`);
788
789	// If all of the demanded bits are known zero on one side, return the
790	// other. These bits cannot contribute to the result of the 'or' in this
791	// context.
792	if (DemandedBits.isSubsetOf(RHS: LHSKnown.One \| RHSKnown.Zero))
793	return Op.getOperand(i: `0`);
794	if (DemandedBits.isSubsetOf(RHS: RHSKnown.One \| LHSKnown.Zero))
795	return Op.getOperand(i: `1`);
796	break;
797	}
798	case ISD::XOR: {
799	LHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: `0`), DemandedElts, Depth: Depth + `1`);
800	RHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: `1`), DemandedElts, Depth: Depth + `1`);
801
802	// If all of the demanded bits are known zero on one side, return the
803	// other.
804	if (DemandedBits.isSubsetOf(RHS: RHSKnown.Zero))
805	return Op.getOperand(i: `0`);
806	if (DemandedBits.isSubsetOf(RHS: LHSKnown.Zero))
807	return Op.getOperand(i: `1`);
808	break;
809	}
810	case ISD::ADD: {
811	RHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: `1`), DemandedElts, Depth: Depth + `1`);
812	if (RHSKnown.isZero())
813	return Op.getOperand(i: `0`);
814
815	LHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: `0`), DemandedElts, Depth: Depth + `1`);
816	if (LHSKnown.isZero())
817	return Op.getOperand(i: `1`);
818	break;
819	}
820	case ISD::SHL: {
821	// If we are only demanding sign bits then we can use the shift source
822	// directly.
823	if (std::optional<uint64_t> MaxSA =
824	DAG.getValidMaximumShiftAmount(V: Op, DemandedElts, Depth: Depth + `1`)) {
825	SDValue Op0 = Op.getOperand(i: `0`);
826	unsigned ShAmt = *MaxSA;
827	unsigned NumSignBits =
828	DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + `1`);
829	unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
830	if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
831	return Op0;
832	}
833	break;
834	}
835	case ISD::SRL: {
836	// If we are only demanding sign bits then we can use the shift source
837	// directly.
838	if (std::optional<uint64_t> MaxSA =
839	DAG.getValidMaximumShiftAmount(V: Op, DemandedElts, Depth: Depth + `1`)) {
840	SDValue Op0 = Op.getOperand(i: `0`);
841	unsigned ShAmt = *MaxSA;
842	// Must already be signbits in DemandedBits bounds, and can't demand any
843	// shifted in zeroes.
844	if (DemandedBits.countl_zero() >= ShAmt) {
845	unsigned NumSignBits =
846	DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + `1`);
847	if (DemandedBits.countr_zero() >= (BitWidth - NumSignBits))
848	return Op0;
849	}
850	}
851	break;
852	}
853	case ISD::SETCC: {
854	SDValue Op0 = Op.getOperand(i: `0`);
855	SDValue Op1 = Op.getOperand(i: `1`);
856	ISD::CondCode CC = cast<CondCodeSDNode>(Val: Op.getOperand(i: `2`))->get();
857	// If (1) we only need the sign-bit, (2) the setcc operands are the same
858	// width as the setcc result, and (3) the result of a setcc conforms to 0 or
859	// -1, we may be able to bypass the setcc.
860	if (DemandedBits.isSignMask() &&
861	Op0.getScalarValueSizeInBits() == BitWidth &&
862	getBooleanContents(Type: Op0.getValueType()) ==
863	BooleanContent::ZeroOrNegativeOneBooleanContent) {
864	// If we're testing X < 0, then this compare isn't needed - just use X!
865	// FIXME: We're limiting to integer types here, but this should also work
866	// if we don't care about FP signed-zero. The use of SETLT with FP means
867	// that we don't care about NaNs.
868	if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
869	(isNullConstant(V: Op1) \|\| ISD::isBuildVectorAllZeros(N: Op1.getNode())))
870	return Op0;
871	}
872	break;
873	}
874	case ISD::SIGN_EXTEND_INREG: {
875	// If none of the extended bits are demanded, eliminate the sextinreg.
876	SDValue Op0 = Op.getOperand(i: `0`);
877	EVT ExVT = cast<VTSDNode>(Val: Op.getOperand(i: `1`))->getVT();
878	unsigned ExBits = ExVT.getScalarSizeInBits();
879	if (DemandedBits.getActiveBits() <= ExBits &&
880	shouldRemoveRedundantExtend(Op))
881	return Op0;
882	// If the input is already sign extended, just drop the extension.
883	unsigned NumSignBits = DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + `1`);
884	if (NumSignBits >= (BitWidth - ExBits + `1`))
885	return Op0;
886	break;
887	}
888	case ISD::ANY_EXTEND_VECTOR_INREG:
889	case ISD::SIGN_EXTEND_VECTOR_INREG:
890	case ISD::ZERO_EXTEND_VECTOR_INREG: {
891	if (VT.isScalableVector())
892	return SDValue ();
893
894	// If we only want the lowest element and none of extended bits, then we can
895	// return the bitcasted source vector.
896	SDValue Src = Op.getOperand(i: `0`);
897	EVT SrcVT = Src.getValueType();
898	EVT DstVT = Op.getValueType();
899	if (IsLE && DemandedElts == `1` &&
900	DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
901	DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
902	return DAG.getBitcast(VT: DstVT, V: Src);
903	}
904	break;
905	}
906	case ISD::INSERT_VECTOR_ELT: {
907	if (VT.isScalableVector())
908	return SDValue ();
909
910	// If we don't demand the inserted element, return the base vector.
911	SDValue Vec = Op.getOperand(i: `0`);
912	auto *CIdx = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: `2`));
913	EVT VecVT = Vec.getValueType();
914	if (CIdx && CIdx->getAPIntValue().ult(RHS: VecVT.getVectorNumElements()) &&
915	!DemandedElts [CIdx->getZExtValue()])
916	return Vec;
917	break;
918	}
919	case ISD::INSERT_SUBVECTOR: {
920	if (VT.isScalableVector())
921	return SDValue ();
922
923	SDValue Vec = Op.getOperand(i: `0`);
924	SDValue Sub = Op.getOperand(i: `1`);
925	uint64_t Idx = Op.getConstantOperandVal(i: `2`);
926	unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
927	APInt DemandedSubElts = DemandedElts.extractBits(numBits: NumSubElts, bitPosition: Idx);
928	// If we don't demand the inserted subvector, return the base vector.
929	if (DemandedSubElts == `0`)
930	return Vec;
931	break;
932	}
933	case ISD::VECTOR_SHUFFLE: {
934	assert(!VT.isScalableVector());
935	ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Val&: Op)->getMask();
936
937	// If all the demanded elts are from one operand and are inline,
938	// then we can use the operand directly.
939	bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
940	for (unsigned i = `0`; i != NumElts; ++i) {
941	int M = ShuffleMask [i];
942	if (M < `0` \|\| !DemandedElts [i])
943	continue;
944	AllUndef = false;
945	IdentityLHS &= (M == (int)i);
946	IdentityRHS &= ((M - NumElts) == i);
947	}
948
949	if (AllUndef)
950	return DAG.getUNDEF(VT: Op.getValueType());
951	if (IdentityLHS)
952	return Op.getOperand(i: `0`);
953	if (IdentityRHS)
954	return Op.getOperand(i: `1`);
955	break;
956	}
957	default:
958	// TODO: Probably okay to remove after audit; here to reduce change size
959	// in initial enablement patch for scalable vectors
960	if (VT.isScalableVector())
961	return SDValue ();
962
963	if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
964	if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
965	Op, DemandedBits, DemandedElts, DAG, Depth))
966	return V;
967	break;
968	}
969	return SDValue ();
970	}
971
972	SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
973	SDValue Op, const APInt &DemandedBits, SelectionDAG &DAG,
974	unsigned Depth) const {
975	EVT VT = Op.getValueType();
976	// Since the number of lanes in a scalable vector is unknown at compile time,
977	// we track one bit which is implicitly broadcast to all lanes. This means
978	// that all lanes in a scalable vector are considered demanded.
979	APInt DemandedElts = VT.isFixedLengthVector()
980	? APInt::getAllOnes(numBits: VT.getVectorNumElements())
981	: APInt (`1`, `1`);
982	return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
983	Depth);
984	}
985
986	SDValue TargetLowering::SimplifyMultipleUseDemandedVectorElts(
987	SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
988	unsigned Depth) const {
989	APInt DemandedBits = APInt::getAllOnes(numBits: Op.getScalarValueSizeInBits());
990	return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
991	Depth);
992	}
993
994	// Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1).
995	// or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1).
996	static SDValue combineShiftToAVG(SDValue Op,
997	TargetLowering::TargetLoweringOpt &TLO,
998	const TargetLowering &TLI,
999	const APInt &DemandedBits,
1000	const APInt &DemandedElts, unsigned Depth) {
1001	assert((Op.getOpcode() == ISD::SRL \|\| Op.getOpcode() == ISD::SRA) &&
1002	"SRL or SRA node is required here!");
1003	// Is the right shift using an immediate value of 1?
1004	ConstantSDNode *N1C = isConstOrConstSplat(N: Op.getOperand(i: `1`), DemandedElts);
1005	if (!N1C \|\| !N1C->isOne())
1006	return SDValue ();
1007
1008	// We are looking for an avgfloor
1009	// add(ext, ext)
1010	// or one of these as a avgceil
1011	// add(add(ext, ext), 1)
1012	// add(add(ext, 1), ext)
1013	// add(ext, add(ext, 1))
1014	SDValue Add = Op.getOperand(i: `0`);
1015	if (Add.getOpcode() != ISD::ADD)
1016	return SDValue ();
1017
1018	SDValue ExtOpA = Add.getOperand(i: `0`);
1019	SDValue ExtOpB = Add.getOperand(i: `1`);
1020	SDValue Add2;
1021	auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3, SDValue A) {
1022	ConstantSDNode *ConstOp;
1023	if ((ConstOp = isConstOrConstSplat(N: Op2, DemandedElts)) &&
1024	ConstOp->isOne()) {
1025	ExtOpA = Op1;
1026	ExtOpB = Op3;
1027	Add2 = A;
1028	return true;
1029	}
1030	if ((ConstOp = isConstOrConstSplat(N: Op3, DemandedElts)) &&
1031	ConstOp->isOne()) {
1032	ExtOpA = Op1;
1033	ExtOpB = Op2;
1034	Add2 = A;
1035	return true;
1036	}
1037	return false;
1038	};
1039	bool IsCeil =
1040	(ExtOpA.getOpcode() == ISD::ADD &&
1041	MatchOperands (ExtOpA.getOperand(i: `0`), ExtOpA.getOperand(i: `1`), ExtOpB, ExtOpA)) \|\|
1042	(ExtOpB.getOpcode() == ISD::ADD &&
1043	MatchOperands (ExtOpB.getOperand(i: `0`), ExtOpB.getOperand(i: `1`), ExtOpA, ExtOpB));
1044
1045	// If the shift is signed (sra):
1046	// - Needs >= 2 sign bit for both operands.
1047	// - Needs >= 2 zero bits.
1048	// If the shift is unsigned (srl):
1049	// - Needs >= 1 zero bit for both operands.
1050	// - Needs 1 demanded bit zero and >= 2 sign bits.
1051	SelectionDAG &DAG = TLO.DAG;
1052	unsigned ShiftOpc = Op.getOpcode();
1053	bool IsSigned = false;
1054	unsigned KnownBits;
1055	unsigned NumSignedA = DAG.ComputeNumSignBits(Op: ExtOpA, DemandedElts, Depth);
1056	unsigned NumSignedB = DAG.ComputeNumSignBits(Op: ExtOpB, DemandedElts, Depth);
1057	unsigned NumSigned = std::min(a: NumSignedA, b: NumSignedB) - `1`;
1058	unsigned NumZeroA =
1059	DAG.computeKnownBits(Op: ExtOpA, DemandedElts, Depth).countMinLeadingZeros();
1060	unsigned NumZeroB =
1061	DAG.computeKnownBits(Op: ExtOpB, DemandedElts, Depth).countMinLeadingZeros();
1062	unsigned NumZero = std::min(a: NumZeroA, b: NumZeroB);
1063
1064	switch (ShiftOpc) {
1065	default:
1066	llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG");
1067	case ISD::SRA: {
1068	if (NumZero >= `2` && NumSigned < NumZero) {
1069	IsSigned = false;
1070	KnownBits = NumZero;
1071	break;
1072	}
1073	if (NumSigned >= `1`) {
1074	IsSigned = true;
1075	KnownBits = NumSigned;
1076	break;
1077	}
1078	return SDValue ();
1079	}
1080	case ISD::SRL: {
1081	if (NumZero >= `1` && NumSigned < NumZero) {
1082	IsSigned = false;
1083	KnownBits = NumZero;
1084	break;
1085	}
1086	if (NumSigned >= `1` && DemandedBits.isSignBitClear()) {
1087	IsSigned = true;
1088	KnownBits = NumSigned;
1089	break;
1090	}
1091	return SDValue ();
1092	}
1093	}
1094
1095	unsigned AVGOpc = IsCeil ? (IsSigned ? ISD::AVGCEILS : ISD::AVGCEILU)
1096	: (IsSigned ? ISD::AVGFLOORS : ISD::AVGFLOORU);
1097
1098	// Find the smallest power-2 type that is legal for this vector size and
1099	// operation, given the original type size and the number of known sign/zero
1100	// bits.
1101	EVT VT = Op.getValueType();
1102	unsigned MinWidth =
1103	std::max<unsigned>(a: VT.getScalarSizeInBits() - KnownBits, b: `8`);
1104	EVT NVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: llvm::bit_ceil(Value: MinWidth));
1105	if (NVT.getScalarSizeInBits() > VT.getScalarSizeInBits())
1106	return SDValue ();
1107	if (VT.isVector())
1108	NVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: NVT, EC: VT.getVectorElementCount());
1109	if (TLO.LegalTypes() && !TLI.isOperationLegal(Op: AVGOpc, VT: NVT)) {
1110	// If we could not transform, and (both) adds are nuw/nsw, we can use the
1111	// larger type size to do the transform.
1112	if (TLO.LegalOperations() && !TLI.isOperationLegal(Op: AVGOpc, VT))
1113	return SDValue ();
1114	if (DAG.willNotOverflowAdd(IsSigned, N0: Add.getOperand(i: `0`),
1115	N1: Add.getOperand(i: `1`)) &&
1116	(!Add2 \|\| DAG.willNotOverflowAdd(IsSigned, N0: Add2.getOperand(i: `0`),
1117	N1: Add2.getOperand(i: `1`))))
1118	NVT = VT;
1119	else
1120	return SDValue ();
1121	}
1122
1123	// Don't create a AVGFLOOR node with a scalar constant unless its legal as
1124	// this is likely to stop other folds (reassociation, value tracking etc.)
1125	if (!IsCeil && !TLI.isOperationLegal(Op: AVGOpc, VT: NVT) &&
1126	(isa<ConstantSDNode>(Val: ExtOpA) \|\| isa<ConstantSDNode>(Val: ExtOpB)))
1127	return SDValue ();
1128
1129	SDLoc DL(Op);
1130	SDValue ResultAVG =
1131	DAG.getNode(Opcode: AVGOpc, DL, VT: NVT, N1: DAG.getExtOrTrunc(IsSigned, Op: ExtOpA, DL, VT: NVT),
1132	N2: DAG.getExtOrTrunc(IsSigned, Op: ExtOpB, DL, VT: NVT));
1133	return DAG.getExtOrTrunc(IsSigned, Op: ResultAVG, DL, VT);
1134	}
1135
1136	/// Look at Op. At this point, we know that only the OriginalDemandedBits of the
1137	/// result of Op are ever used downstream. If we can use this information to
1138	/// simplify Op, create a new simplified DAG node and return true, returning the
1139	/// original and new nodes in Old and New. Otherwise, analyze the expression and
1140	/// return a mask of Known bits for the expression (used to simplify the
1141	/// caller). The Known bits may only be accurate for those bits in the
1142	/// OriginalDemandedBits and OriginalDemandedElts.
1143	bool TargetLowering::SimplifyDemandedBits(
1144	SDValue Op, const APInt &OriginalDemandedBits,
1145	const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
1146	unsigned Depth, bool AssumeSingleUse) const {
1147	unsigned BitWidth = OriginalDemandedBits.getBitWidth();
1148	assert(Op.getScalarValueSizeInBits() == BitWidth &&
1149	"Mask size mismatches value type size!");
1150
1151	// Don't know anything.
1152	Known = KnownBits (BitWidth);
1153
1154	EVT VT = Op.getValueType();
1155	bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
1156	unsigned NumElts = OriginalDemandedElts.getBitWidth();
1157	assert((!VT.isFixedLengthVector() \|\| NumElts == VT.getVectorNumElements()) &&
1158	"Unexpected vector size");
1159
1160	APInt DemandedBits = OriginalDemandedBits;
1161	APInt DemandedElts = OriginalDemandedElts;
1162	SDLoc dl(Op);
1163
1164	// Undef operand.
1165	if (Op.isUndef())
1166	return false;
1167
1168	// We can't simplify target constants.
1169	if (Op.getOpcode() == ISD::TargetConstant)
1170	return false;
1171
1172	if (Op.getOpcode() == ISD::Constant) {
1173	// We know all of the bits for a constant!
1174	Known = KnownBits::makeConstant(C: Op ->getAsAPIntVal());
1175	return false;
1176	}
1177
1178	if (Op.getOpcode() == ISD::ConstantFP) {
1179	// We know all of the bits for a floating point constant!
1180	Known = KnownBits::makeConstant(
1181	C: cast<ConstantFPSDNode>(Val&: Op)->getValueAPF().bitcastToAPInt());
1182	return false;
1183	}
1184
1185	// Other users may use these bits.
1186	bool HasMultiUse = false;
1187	if (!AssumeSingleUse && !Op.getNode()->hasOneUse()) {
1188	if (Depth >= SelectionDAG::MaxRecursionDepth) {
1189	// Limit search depth.
1190	return false;
1191	}
1192	// Allow multiple uses, just set the DemandedBits/Elts to all bits.
1193	DemandedBits = APInt::getAllOnes(numBits: BitWidth);
1194	DemandedElts = APInt::getAllOnes(numBits: NumElts);
1195	HasMultiUse = true;
1196	} else if (OriginalDemandedBits == `0` \|\| OriginalDemandedElts == `0`) {
1197	// Not demanding any bits/elts from Op.
1198	return TLO.CombineTo(O: Op, N: TLO.DAG.getUNDEF(VT));
1199	} else if (Depth >= SelectionDAG::MaxRecursionDepth) {
1200	// Limit search depth.
1201	return false;
1202	}
1203
1204	KnownBits Known2;
1205	switch (Op.getOpcode()) {
1206	case ISD::SCALAR_TO_VECTOR: {
1207	if (VT.isScalableVector())
1208	return false;
1209	if (!DemandedElts [`0`])
1210	return TLO.CombineTo(O: Op, N: TLO.DAG.getUNDEF(VT));
1211
1212	KnownBits SrcKnown;
1213	SDValue Src = Op.getOperand(i: `0`);
1214	unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
1215	APInt SrcDemandedBits = DemandedBits.zext(width: SrcBitWidth);
1216	if (SimplifyDemandedBits(Op: Src, DemandedBits: SrcDemandedBits, Known&: SrcKnown, TLO, Depth: Depth + `1`))
1217	return true;
1218
1219	// Upper elements are undef, so only get the knownbits if we just demand
1220	// the bottom element.
1221	if (DemandedElts == `1`)
1222	Known = SrcKnown.anyextOrTrunc(BitWidth);
1223	break;
1224	}
1225	case ISD::BUILD_VECTOR:
1226	// Collect the known bits that are shared by every demanded element.
1227	// TODO: Call SimplifyDemandedBits for non-constant demanded elements.
1228	Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1229	return false; // Don't fall through, will infinitely loop.
1230	case ISD::SPLAT_VECTOR: {
1231	SDValue Scl = Op.getOperand(i: `0`);
1232	APInt DemandedSclBits = DemandedBits.zextOrTrunc(width: Scl.getValueSizeInBits());
1233	KnownBits KnownScl;
1234	if (SimplifyDemandedBits(Op: Scl, DemandedBits: DemandedSclBits, Known&: KnownScl, TLO, Depth: Depth + `1`))
1235	return true;
1236
1237	// Implicitly truncate the bits to match the official semantics of
1238	// SPLAT_VECTOR.
1239	Known = KnownScl.trunc(BitWidth);
1240	break;
1241	}
1242	case ISD::LOAD: {
1243	auto *LD = cast<LoadSDNode>(Val&: Op);
1244	if (getTargetConstantFromLoad(LD)) {
1245	Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1246	return false; // Don't fall through, will infinitely loop.
1247	}
1248	if (ISD::isZEXTLoad(N: Op.getNode()) && Op.getResNo() == `0`) {
1249	// If this is a ZEXTLoad and we are looking at the loaded value.
1250	EVT MemVT = LD->getMemoryVT();
1251	unsigned MemBits = MemVT.getScalarSizeInBits();
1252	Known.Zero.setBitsFrom(MemBits);
1253	return false; // Don't fall through, will infinitely loop.
1254	}
1255	break;
1256	}
1257	case ISD::INSERT_VECTOR_ELT: {
1258	if (VT.isScalableVector())
1259	return false;
1260	SDValue Vec = Op.getOperand(i: `0`);
1261	SDValue Scl = Op.getOperand(i: `1`);
1262	auto *CIdx = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: `2`));
1263	EVT VecVT = Vec.getValueType();
1264
1265	// If index isn't constant, assume we need all vector elements AND the
1266	// inserted element.
1267	APInt DemandedVecElts(DemandedElts);
1268	if (CIdx && CIdx->getAPIntValue().ult(RHS: VecVT.getVectorNumElements())) {
1269	unsigned Idx = CIdx->getZExtValue();
1270	DemandedVecElts.clearBit(BitPosition: Idx);
1271
1272	// Inserted element is not required.
1273	if (!DemandedElts [Idx])
1274	return TLO.CombineTo(O: Op, N: Vec);
1275	}
1276
1277	KnownBits KnownScl;
1278	unsigned NumSclBits = Scl.getScalarValueSizeInBits();
1279	APInt DemandedSclBits = DemandedBits.zextOrTrunc(width: NumSclBits);
1280	if (SimplifyDemandedBits(Op: Scl, DemandedBits: DemandedSclBits, Known&: KnownScl, TLO, Depth: Depth + `1`))
1281	return true;
1282
1283	Known = KnownScl.anyextOrTrunc(BitWidth);
1284
1285	KnownBits KnownVec;
1286	if (SimplifyDemandedBits(Op: Vec, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedVecElts, Known&: KnownVec, TLO,
1287	Depth: Depth + `1`))
1288	return true;
1289
1290	if (!!DemandedVecElts)
1291	Known = Known.intersectWith(RHS: KnownVec);
1292
1293	return false;
1294	}
1295	case ISD::INSERT_SUBVECTOR: {
1296	if (VT.isScalableVector())
1297	return false;
1298	// Demand any elements from the subvector and the remainder from the src its
1299	// inserted into.
1300	SDValue Src = Op.getOperand(i: `0`);
1301	SDValue Sub = Op.getOperand(i: `1`);
1302	uint64_t Idx = Op.getConstantOperandVal(i: `2`);
1303	unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
1304	APInt DemandedSubElts = DemandedElts.extractBits(numBits: NumSubElts, bitPosition: Idx);
1305	APInt DemandedSrcElts = DemandedElts;
1306	DemandedSrcElts.clearBits(LoBit: Idx, HiBit: Idx + NumSubElts);
1307
1308	KnownBits KnownSub, KnownSrc;
1309	if (SimplifyDemandedBits(Op: Sub, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedSubElts, Known&: KnownSub, TLO,
1310	Depth: Depth + `1`))
1311	return true;
1312	if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedSrcElts, Known&: KnownSrc, TLO,
1313	Depth: Depth + `1`))
1314	return true;
1315
1316	Known.Zero.setAllBits();
1317	Known.One.setAllBits();
1318	if (!!DemandedSubElts)
1319	Known = Known.intersectWith(RHS: KnownSub);
1320	if (!!DemandedSrcElts)
1321	Known = Known.intersectWith(RHS: KnownSrc);
1322
1323	// Attempt to avoid multi-use src if we don't need anything from it.
1324	if (!DemandedBits.isAllOnes() \|\| !DemandedSubElts.isAllOnes() \|\|
1325	!DemandedSrcElts.isAllOnes()) {
1326	SDValue NewSub = SimplifyMultipleUseDemandedBits(
1327	Op: Sub, DemandedBits, DemandedElts: DemandedSubElts, DAG&: TLO.DAG, Depth: Depth + `1`);
1328	SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1329	Op: Src, DemandedBits, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + `1`);
1330	if (NewSub \|\| NewSrc) {
1331	NewSub = NewSub ? NewSub : Sub;
1332	NewSrc = NewSrc ? NewSrc : Src;
1333	SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: NewSrc, N2: NewSub,
1334	N3: Op.getOperand(i: `2`));
1335	return TLO.CombineTo(O: Op, N: NewOp);
1336	}
1337	}
1338	break;
1339	}
1340	case ISD::EXTRACT_SUBVECTOR: {
1341	if (VT.isScalableVector())
1342	return false;
1343	// Offset the demanded elts by the subvector index.
1344	SDValue Src = Op.getOperand(i: `0`);
1345	if (Src.getValueType().isScalableVector())
1346	break;
1347	uint64_t Idx = Op.getConstantOperandVal(i: `1`);
1348	unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1349	APInt DemandedSrcElts = DemandedElts.zext(width: NumSrcElts).shl(shiftAmt: Idx);
1350
1351	if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedSrcElts, Known, TLO,
1352	Depth: Depth + `1`))
1353	return true;
1354
1355	// Attempt to avoid multi-use src if we don't need anything from it.
1356	if (!DemandedBits.isAllOnes() \|\| !DemandedSrcElts.isAllOnes()) {
1357	SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
1358	Op: Src, DemandedBits, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + `1`);
1359	if (DemandedSrc) {
1360	SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: DemandedSrc,
1361	N2: Op.getOperand(i: `1`));
1362	return TLO.CombineTo(O: Op, N: NewOp);
1363	}
1364	}
1365	break;
1366	}
1367	case ISD::CONCAT_VECTORS: {
1368	if (VT.isScalableVector())
1369	return false;
1370	Known.Zero.setAllBits();
1371	Known.One.setAllBits();
1372	EVT SubVT = Op.getOperand(i: `0`).getValueType();
1373	unsigned NumSubVecs = Op.getNumOperands();
1374	unsigned NumSubElts = SubVT.getVectorNumElements();
1375	for (unsigned i = `0`; i != NumSubVecs; ++i) {
1376	APInt DemandedSubElts =
1377	DemandedElts.extractBits(numBits: NumSubElts, bitPosition: i * NumSubElts);
1378	if (SimplifyDemandedBits(Op: Op.getOperand(i), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedSubElts,
1379	Known&: Known2, TLO, Depth: Depth + `1`))
1380	return true;
1381	// Known bits are shared by every demanded subvector element.
1382	if (!!DemandedSubElts)
1383	Known = Known.intersectWith(RHS: Known2);
1384	}
1385	break;
1386	}
1387	case ISD::VECTOR_SHUFFLE: {
1388	assert(!VT.isScalableVector());
1389	ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Val&: Op)->getMask();
1390
1391	// Collect demanded elements from shuffle operands..
1392	APInt DemandedLHS, DemandedRHS;
1393	if (!getShuffleDemandedElts(SrcWidth: NumElts, Mask: ShuffleMask, DemandedElts, DemandedLHS,
1394	DemandedRHS))
1395	break;
1396
1397	if (!!DemandedLHS \|\| !!DemandedRHS) {
1398	SDValue Op0 = Op.getOperand(i: `0`);
1399	SDValue Op1 = Op.getOperand(i: `1`);
1400
1401	Known.Zero.setAllBits();
1402	Known.One.setAllBits();
1403	if (!!DemandedLHS) {
1404	if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedLHS, Known&: Known2, TLO,
1405	Depth: Depth + `1`))
1406	return true;
1407	Known = Known.intersectWith(RHS: Known2);
1408	}
1409	if (!!DemandedRHS) {
1410	if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedRHS, Known&: Known2, TLO,
1411	Depth: Depth + `1`))
1412	return true;
1413	Known = Known.intersectWith(RHS: Known2);
1414	}
1415
1416	// Attempt to avoid multi-use ops if we don't need anything from them.
1417	SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1418	Op: Op0, DemandedBits, DemandedElts: DemandedLHS, DAG&: TLO.DAG, Depth: Depth + `1`);
1419	SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1420	Op: Op1, DemandedBits, DemandedElts: DemandedRHS, DAG&: TLO.DAG, Depth: Depth + `1`);
1421	if (DemandedOp0 \|\| DemandedOp1) {
1422	Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1423	Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1424	SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, N1: Op0, N2: Op1, Mask: ShuffleMask);
1425	return TLO.CombineTo(O: Op, N: NewOp);
1426	}
1427	}
1428	break;
1429	}
1430	case ISD::AND: {
1431	SDValue Op0 = Op.getOperand(i: `0`);
1432	SDValue Op1 = Op.getOperand(i: `1`);
1433
1434	// If the RHS is a constant, check to see if the LHS would be zero without
1435	// using the bits from the RHS. Below, we use knowledge about the RHS to
1436	// simplify the LHS, here we're using information from the LHS to simplify
1437	// the RHS.
1438	if (ConstantSDNode *RHSC = isConstOrConstSplat(N: Op1, DemandedElts)) {
1439	// Do not increment Depth here; that can cause an infinite loop.
1440	KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op: Op0, DemandedElts, Depth);
1441	// If the LHS already has zeros where RHSC does, this 'and' is dead.
1442	if ((LHSKnown.Zero & DemandedBits) ==
1443	(~RHSC->getAPIntValue() & DemandedBits))
1444	return TLO.CombineTo(O: Op, N: Op0);
1445
1446	// If any of the set bits in the RHS are known zero on the LHS, shrink
1447	// the constant.
1448	if (ShrinkDemandedConstant(Op, DemandedBits: ~LHSKnown.Zero & DemandedBits,
1449	DemandedElts, TLO))
1450	return true;
1451
1452	// Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1453	// constant, but if this 'and' is only clearing bits that were just set by
1454	// the xor, then this 'and' can be eliminated by shrinking the mask of
1455	// the xor. For example, for a 32-bit X:
1456	// and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1457	if (isBitwiseNot(V: Op0) && Op0.hasOneUse() &&
1458	LHSKnown.One == ~RHSC->getAPIntValue()) {
1459	SDValue Xor = TLO.DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: Op0.getOperand(i: `0`), N2: Op1);
1460	return TLO.CombineTo(O: Op, N: Xor);
1461	}
1462	}
1463
1464	// AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)
1465	// iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits).
1466	if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR && !VT.isScalableVector() &&
1467	(Op0.getOperand(i: `0`).isUndef() \|\|
1468	ISD::isBuildVectorOfConstantSDNodes(N: Op0.getOperand(i: `0`).getNode())) &&
1469	Op0 ->hasOneUse()) {
1470	unsigned NumSubElts =
1471	Op0.getOperand(i: `1`).getValueType().getVectorNumElements();
1472	unsigned SubIdx = Op0.getConstantOperandVal(i: `2`);
1473	APInt DemandedSub =
1474	APInt::getBitsSet(numBits: NumElts, loBit: SubIdx, hiBit: SubIdx + NumSubElts);
1475	KnownBits KnownSubMask =
1476	TLO.DAG.computeKnownBits(Op: Op1, DemandedElts: DemandedSub & DemandedElts, Depth: Depth + `1`);
1477	if (DemandedBits.isSubsetOf(RHS: KnownSubMask.One)) {
1478	SDValue NewAnd =
1479	TLO.DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op0.getOperand(i: `0`), N2: Op1);
1480	SDValue NewInsert =
1481	TLO.DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL: dl, VT, N1: NewAnd,
1482	N2: Op0.getOperand(i: `1`), N3: Op0.getOperand(i: `2`));
1483	return TLO.CombineTo(O: Op, N: NewInsert);
1484	}
1485	}
1486
1487	if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts, Known, TLO,
1488	Depth: Depth + `1`))
1489	return true;
1490	if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: ~Known.Zero & DemandedBits, OriginalDemandedElts: DemandedElts,
1491	Known&: Known2, TLO, Depth: Depth + `1`))
1492	return true;
1493
1494	// If all of the demanded bits are known one on one side, return the other.
1495	// These bits cannot contribute to the result of the 'and'.
1496	if (DemandedBits.isSubsetOf(RHS: Known2.Zero \| Known.One))
1497	return TLO.CombineTo(O: Op, N: Op0);
1498	if (DemandedBits.isSubsetOf(RHS: Known.Zero \| Known2.One))
1499	return TLO.CombineTo(O: Op, N: Op1);
1500	// If all of the demanded bits in the inputs are known zeros, return zero.
1501	if (DemandedBits.isSubsetOf(RHS: Known.Zero \| Known2.Zero))
1502	return TLO.CombineTo(O: Op, N: TLO.DAG.getConstant(Val: `0`, DL: dl, VT));
1503	// If the RHS is a constant, see if we can simplify it.
1504	if (ShrinkDemandedConstant(Op, DemandedBits: ~Known2.Zero & DemandedBits, DemandedElts,
1505	TLO))
1506	return true;
1507	// If the operation can be done in a smaller type, do so.
1508	if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1509	return true;
1510
1511	// Attempt to avoid multi-use ops if we don't need anything from them.
1512	if (!DemandedBits.isAllOnes() \|\| !DemandedElts.isAllOnes()) {
1513	SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1514	Op: Op0, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`);
1515	SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1516	Op: Op1, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`);
1517	if (DemandedOp0 \|\| DemandedOp1) {
1518	Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1519	Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1520	SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: Op0, N2: Op1);
1521	return TLO.CombineTo(O: Op, N: NewOp);
1522	}
1523	}
1524
1525	Known &= Known2;
1526	break;
1527	}
1528	case ISD::OR: {
1529	SDValue Op0 = Op.getOperand(i: `0`);
1530	SDValue Op1 = Op.getOperand(i: `1`);
1531	if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts, Known, TLO,
1532	Depth: Depth + `1`)) {
1533	Op ->dropFlags(Mask: SDNodeFlags::Disjoint);
1534	return true;
1535	}
1536
1537	if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: ~Known.One & DemandedBits, OriginalDemandedElts: DemandedElts,
1538	Known&: Known2, TLO, Depth: Depth + `1`)) {
1539	Op ->dropFlags(Mask: SDNodeFlags::Disjoint);
1540	return true;
1541	}
1542
1543	// If all of the demanded bits are known zero on one side, return the other.
1544	// These bits cannot contribute to the result of the 'or'.
1545	if (DemandedBits.isSubsetOf(RHS: Known2.One \| Known.Zero))
1546	return TLO.CombineTo(O: Op, N: Op0);
1547	if (DemandedBits.isSubsetOf(RHS: Known.One \| Known2.Zero))
1548	return TLO.CombineTo(O: Op, N: Op1);
1549	// If the RHS is a constant, see if we can simplify it.
1550	if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1551	return true;
1552	// If the operation can be done in a smaller type, do so.
1553	if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1554	return true;
1555
1556	// Attempt to avoid multi-use ops if we don't need anything from them.
1557	if (!DemandedBits.isAllOnes() \|\| !DemandedElts.isAllOnes()) {
1558	SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1559	Op: Op0, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`);
1560	SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1561	Op: Op1, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`);
1562	if (DemandedOp0 \|\| DemandedOp1) {
1563	Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1564	Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1565	SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: Op0, N2: Op1);
1566	return TLO.CombineTo(O: Op, N: NewOp);
1567	}
1568	}
1569
1570	// (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1\|C2), (and Y, C2))
1571	// TODO: Use SimplifyMultipleUseDemandedBits to peek through masks.
1572	if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::AND &&
1573	Op0 ->hasOneUse() && Op1 ->hasOneUse()) {
1574	// Attempt to match all commutations - m_c_Or would've been useful!
1575	for (int I = `0`; I != `2`; ++I) {
1576	SDValue X = Op.getOperand(i: I).getOperand(i: `0`);
1577	SDValue C1 = Op.getOperand(i: I).getOperand(i: `1`);
1578	SDValue Alt = Op.getOperand(i: `1` - I).getOperand(i: `0`);
1579	SDValue C2 = Op.getOperand(i: `1` - I).getOperand(i: `1`);
1580	if (Alt.getOpcode() == ISD::OR) {
1581	for (int J = `0`; J != `2`; ++J) {
1582	if (X == Alt.getOperand(i: J)) {
1583	SDValue Y = Alt.getOperand(i: `1` - J);
1584	if (SDValue C12 = TLO.DAG.FoldConstantArithmetic(Opcode: ISD::OR, DL: dl, VT,
1585	Ops: {C1, C2})) {
1586	SDValue MaskX = TLO.DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: X, N2: C12);
1587	SDValue MaskY = TLO.DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Y, N2: C2);
1588	return TLO.CombineTo(
1589	O: Op, N: TLO.DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: MaskX, N2: MaskY));
1590	}
1591	}
1592	}
1593	}
1594	}
1595	}
1596
1597	Known \|= Known2;
1598	break;
1599	}
1600	case ISD::XOR: {
1601	SDValue Op0 = Op.getOperand(i: `0`);
1602	SDValue Op1 = Op.getOperand(i: `1`);
1603
1604	if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts, Known, TLO,
1605	Depth: Depth + `1`))
1606	return true;
1607	if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
1608	Depth: Depth + `1`))
1609	return true;
1610
1611	// If all of the demanded bits are known zero on one side, return the other.
1612	// These bits cannot contribute to the result of the 'xor'.
1613	if (DemandedBits.isSubsetOf(RHS: Known.Zero))
1614	return TLO.CombineTo(O: Op, N: Op0);
1615	if (DemandedBits.isSubsetOf(RHS: Known2.Zero))
1616	return TLO.CombineTo(O: Op, N: Op1);
1617	// If the operation can be done in a smaller type, do so.
1618	if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1619	return true;
1620
1621	// If all of the unknown bits are known to be zero on one side or the other
1622	// turn this into an inclusive* or.*
1623	// e.g. (A & C1)^(B & C2) -> (A & C1)\|(B & C2) iff C1&C2 == 0
1624	if (DemandedBits.isSubsetOf(RHS: Known.Zero \| Known2.Zero))
1625	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Op0, N2: Op1));
1626
1627	ConstantSDNode *C = isConstOrConstSplat(N: Op1, DemandedElts);
1628	if (C) {
1629	// If one side is a constant, and all of the set bits in the constant are
1630	// also known set on the other side, turn this into an AND, as we know
1631	// the bits will be cleared.
1632	// e.g. (X \| C1) ^ C2 --> (X \| C1) & ~C2 iff (C1&C2) == C2
1633	// NB: it is okay if more bits are known than are requested
1634	if (C->getAPIntValue() == Known2.One) {
1635	SDValue ANDC =
1636	TLO.DAG.getConstant(Val: ~C->getAPIntValue() & DemandedBits, DL: dl, VT);
1637	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op0, N2: ANDC));
1638	}
1639
1640	// If the RHS is a constant, see if we can change it. Don't alter a -1
1641	// constant because that's a 'not' op, and that is better for combining
1642	// and codegen.
1643	if (!C->isAllOnes() && DemandedBits.isSubsetOf(RHS: C->getAPIntValue())) {
1644	// We're flipping all demanded bits. Flip the undemanded bits too.
1645	SDValue New = TLO.DAG.getNOT(DL: dl, Val: Op0, VT);
1646	return TLO.CombineTo(O: Op, N: New);
1647	}
1648
1649	unsigned Op0Opcode = Op0.getOpcode();
1650	if ((Op0Opcode == ISD::SRL \|\| Op0Opcode == ISD::SHL) && Op0.hasOneUse()) {
1651	if (ConstantSDNode *ShiftC =
1652	isConstOrConstSplat(N: Op0.getOperand(i: `1`), DemandedElts)) {
1653	// Don't crash on an oversized shift. We can not guarantee that a
1654	// bogus shift has been simplified to undef.
1655	if (ShiftC->getAPIntValue().ult(RHS: BitWidth)) {
1656	uint64_t ShiftAmt = ShiftC->getZExtValue();
1657	APInt Ones = APInt::getAllOnes(numBits: BitWidth);
1658	Ones = Op0Opcode == ISD::SHL ? Ones.shl(shiftAmt: ShiftAmt)
1659	: Ones.lshr(shiftAmt: ShiftAmt);
1660	if ((DemandedBits & C->getAPIntValue()) == (DemandedBits & Ones) &&
1661	isDesirableToCommuteXorWithShift(N: Op.getNode())) {
1662	// If the xor constant is a demanded mask, do a 'not' before the
1663	// shift:
1664	// xor (X << ShiftC), XorC --> (not X) << ShiftC
1665	// xor (X >> ShiftC), XorC --> (not X) >> ShiftC
1666	SDValue Not = TLO.DAG.getNOT(DL: dl, Val: Op0.getOperand(i: `0`), VT);
1667	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Op0Opcode, DL: dl, VT, N1: Not,
1668	N2: Op0.getOperand(i: `1`)));
1669	}
1670	}
1671	}
1672	}
1673	}
1674
1675	// If we can't turn this into a 'not', try to shrink the constant.
1676	if (!C \|\| !C->isAllOnes())
1677	if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1678	return true;
1679
1680	// Attempt to avoid multi-use ops if we don't need anything from them.
1681	if (!DemandedBits.isAllOnes() \|\| !DemandedElts.isAllOnes()) {
1682	SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1683	Op: Op0, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`);
1684	SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1685	Op: Op1, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`);
1686	if (DemandedOp0 \|\| DemandedOp1) {
1687	Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1688	Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1689	SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: Op0, N2: Op1);
1690	return TLO.CombineTo(O: Op, N: NewOp);
1691	}
1692	}
1693
1694	Known ^= Known2;
1695	break;
1696	}
1697	case ISD::SELECT:
1698	if (SimplifyDemandedBits(Op: Op.getOperand(i: `2`), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1699	Known, TLO, Depth: Depth + `1`))
1700	return true;
1701	if (SimplifyDemandedBits(Op: Op.getOperand(i: `1`), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1702	Known&: Known2, TLO, Depth: Depth + `1`))
1703	return true;
1704
1705	// If the operands are constants, see if we can simplify them.
1706	if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1707	return true;
1708
1709	// Only known if known in both the LHS and RHS.
1710	Known = Known.intersectWith(RHS: Known2);
1711	break;
1712	case ISD::VSELECT:
1713	if (SimplifyDemandedBits(Op: Op.getOperand(i: `2`), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1714	Known, TLO, Depth: Depth + `1`))
1715	return true;
1716	if (SimplifyDemandedBits(Op: Op.getOperand(i: `1`), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1717	Known&: Known2, TLO, Depth: Depth + `1`))
1718	return true;
1719
1720	// Only known if known in both the LHS and RHS.
1721	Known = Known.intersectWith(RHS: Known2);
1722	break;
1723	case ISD::SELECT_CC:
1724	if (SimplifyDemandedBits(Op: Op.getOperand(i: `3`), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1725	Known, TLO, Depth: Depth + `1`))
1726	return true;
1727	if (SimplifyDemandedBits(Op: Op.getOperand(i: `2`), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1728	Known&: Known2, TLO, Depth: Depth + `1`))
1729	return true;
1730
1731	// If the operands are constants, see if we can simplify them.
1732	if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1733	return true;
1734
1735	// Only known if known in both the LHS and RHS.
1736	Known = Known.intersectWith(RHS: Known2);
1737	break;
1738	case ISD::SETCC: {
1739	SDValue Op0 = Op.getOperand(i: `0`);
1740	SDValue Op1 = Op.getOperand(i: `1`);
1741	ISD::CondCode CC = cast<CondCodeSDNode>(Val: Op.getOperand(i: `2`))->get();
1742	// If (1) we only need the sign-bit, (2) the setcc operands are the same
1743	// width as the setcc result, and (3) the result of a setcc conforms to 0 or
1744	// -1, we may be able to bypass the setcc.
1745	if (DemandedBits.isSignMask() &&
1746	Op0.getScalarValueSizeInBits() == BitWidth &&
1747	getBooleanContents(Type: Op0.getValueType()) ==
1748	BooleanContent::ZeroOrNegativeOneBooleanContent) {
1749	// If we're testing X < 0, then this compare isn't needed - just use X!
1750	// FIXME: We're limiting to integer types here, but this should also work
1751	// if we don't care about FP signed-zero. The use of SETLT with FP means
1752	// that we don't care about NaNs.
1753	if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
1754	(isNullConstant(V: Op1) \|\| ISD::isBuildVectorAllZeros(N: Op1.getNode())))
1755	return TLO.CombineTo(O: Op, N: Op0);
1756
1757	// TODO: Should we check for other forms of sign-bit comparisons?
1758	// Examples: X <= -1, X >= 0
1759	}
1760	if (getBooleanContents(Type: Op0.getValueType()) ==
1761	TargetLowering::ZeroOrOneBooleanContent &&
1762	BitWidth > `1`)
1763	Known.Zero.setBitsFrom(`1`);
1764	break;
1765	}
1766	case ISD::SHL: {
1767	SDValue Op0 = Op.getOperand(i: `0`);
1768	SDValue Op1 = Op.getOperand(i: `1`);
1769	EVT ShiftVT = Op1.getValueType();
1770
1771	if (std::optional<uint64_t> KnownSA =
1772	TLO.DAG.getValidShiftAmount(V: Op, DemandedElts, Depth: Depth + `1`)) {
1773	unsigned ShAmt = *KnownSA;
1774	if (ShAmt == `0`)
1775	return TLO.CombineTo(O: Op, N: Op0);
1776
1777	// If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1778	// single shift. We can do this if the bottom bits (which are shifted
1779	// out) are never demanded.
1780	// TODO - support non-uniform vector amounts.
1781	if (Op0.getOpcode() == ISD::SRL) {
1782	if (!DemandedBits.intersects(RHS: APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: ShAmt))) {
1783	if (std::optional<uint64_t> InnerSA =
1784	TLO.DAG.getValidShiftAmount(V: Op0, DemandedElts, Depth: Depth + `2`)) {
1785	unsigned C1 = *InnerSA;
1786	unsigned Opc = ISD::SHL;
1787	int Diff = ShAmt - C1;
1788	if (Diff < `0`) {
1789	Diff = -Diff;
1790	Opc = ISD::SRL;
1791	}
1792	SDValue NewSA = TLO.DAG.getConstant(Val: Diff, DL: dl, VT: ShiftVT);
1793	return TLO.CombineTo(
1794	O: Op, N: TLO.DAG.getNode(Opcode: Opc, DL: dl, VT, N1: Op0.getOperand(i: `0`), N2: NewSA));
1795	}
1796	}
1797	}
1798
1799	// Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1800	// are not demanded. This will likely allow the anyext to be folded away.
1801	// TODO - support non-uniform vector amounts.
1802	if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1803	SDValue InnerOp = Op0.getOperand(i: `0`);
1804	EVT InnerVT = InnerOp.getValueType();
1805	unsigned InnerBits = InnerVT.getScalarSizeInBits();
1806	if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1807	isTypeDesirableForOp(ISD::SHL, VT: InnerVT)) {
1808	SDValue NarrowShl = TLO.DAG.getNode(
1809	Opcode: ISD::SHL, DL: dl, VT: InnerVT, N1: InnerOp,
1810	N2: TLO.DAG.getShiftAmountConstant(Val: ShAmt, VT: InnerVT, DL: dl));
1811	return TLO.CombineTo(
1812	O: Op, N: TLO.DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT, Operand: NarrowShl));
1813	}
1814
1815	// Repeat the SHL optimization above in cases where an extension
1816	// intervenes: (shl (anyext (shr x, c1)), c2) to
1817	// (shl (anyext x), c2-c1). This requires that the bottom c1 bits
1818	// aren't demanded (as above) and that the shifted upper c1 bits of
1819	// x aren't demanded.
1820	// TODO - support non-uniform vector amounts.
1821	if (InnerOp.getOpcode() == ISD::SRL && Op0.hasOneUse() &&
1822	InnerOp.hasOneUse()) {
1823	if (std::optional<uint64_t> SA2 = TLO.DAG.getValidShiftAmount(
1824	V: InnerOp, DemandedElts, Depth: Depth + `2`)) {
1825	unsigned InnerShAmt = *SA2;
1826	if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1827	DemandedBits.getActiveBits() <=
1828	(InnerBits - InnerShAmt + ShAmt) &&
1829	DemandedBits.countr_zero() >= ShAmt) {
1830	SDValue NewSA =
1831	TLO.DAG.getConstant(Val: ShAmt - InnerShAmt, DL: dl, VT: ShiftVT);
1832	SDValue NewExt = TLO.DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT,
1833	Operand: InnerOp.getOperand(i: `0`));
1834	return TLO.CombineTo(
1835	O: Op, N: TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: NewExt, N2: NewSA));
1836	}
1837	}
1838	}
1839	}
1840
1841	APInt InDemandedMask = DemandedBits.lshr(shiftAmt: ShAmt);
1842	if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: InDemandedMask, OriginalDemandedElts: DemandedElts, Known, TLO,
1843	Depth: Depth + `1`)) {
1844	// Disable the nsw and nuw flags. We can no longer guarantee that we
1845	// won't wrap after simplification.
1846	Op ->dropFlags(Mask: SDNodeFlags::NoWrap);
1847	return true;
1848	}
1849	Known.Zero <<= ShAmt;
1850	Known.One <<= ShAmt;
1851	// low bits known zero.
1852	Known.Zero.setLowBits(ShAmt);
1853
1854	// Attempt to avoid multi-use ops if we don't need anything from them.
1855	if (!InDemandedMask.isAllOnes() \|\| !DemandedElts.isAllOnes()) {
1856	SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1857	Op: Op0, DemandedBits: InDemandedMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`);
1858	if (DemandedOp0) {
1859	SDValue NewOp = TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: DemandedOp0, N2: Op1);
1860	return TLO.CombineTo(O: Op, N: NewOp);
1861	}
1862	}
1863
1864	// TODO: Can we merge this fold with the one below?
1865	// Try shrinking the operation as long as the shift amount will still be
1866	// in range.
1867	if (ShAmt < DemandedBits.getActiveBits() && !VT.isVector() &&
1868	Op.getNode()->hasOneUse()) {
1869	// Search for the smallest integer type with free casts to and from
1870	// Op's type. For expedience, just check power-of-2 integer types.
1871	unsigned DemandedSize = DemandedBits.getActiveBits();
1872	for (unsigned SmallVTBits = llvm::bit_ceil(Value: DemandedSize);
1873	SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(A: SmallVTBits)) {
1874	EVT SmallVT = EVT::getIntegerVT(Context&: *TLO.DAG.getContext(), BitWidth: SmallVTBits);
1875	if (isNarrowingProfitable(N: Op.getNode(), SrcVT: VT, DestVT: SmallVT) &&
1876	isTypeDesirableForOp(ISD::SHL, VT: SmallVT) &&
1877	isTruncateFree(FromVT: VT, ToVT: SmallVT) && isZExtFree(FromTy: SmallVT, ToTy: VT) &&
1878	(!TLO.LegalOperations() \|\| isOperationLegal(Op: ISD::SHL, VT: SmallVT))) {
1879	assert(DemandedSize <= SmallVTBits &&
1880	"Narrowed below demanded bits?");
1881	// We found a type with free casts.
1882	SDValue NarrowShl = TLO.DAG.getNode(
1883	Opcode: ISD::SHL, DL: dl, VT: SmallVT,
1884	N1: TLO.DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: SmallVT, Operand: Op.getOperand(i: `0`)),
1885	N2: TLO.DAG.getShiftAmountConstant(Val: ShAmt, VT: SmallVT, DL: dl));
1886	return TLO.CombineTo(
1887	O: Op, N: TLO.DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT, Operand: NarrowShl));
1888	}
1889	}
1890	}
1891
1892	// Narrow shift to lower half - similar to ShrinkDemandedOp.
1893	// (shl i64:x, K) -> (i64 zero_extend (shl (i32 (trunc i64:x)), K))
1894	// Only do this if we demand the upper half so the knownbits are correct.
1895	unsigned HalfWidth = BitWidth / `2`;
1896	if ((BitWidth % `2`) == `0` && !VT.isVector() && ShAmt < HalfWidth &&
1897	DemandedBits.countLeadingOnes() >= HalfWidth) {
1898	EVT HalfVT = EVT::getIntegerVT(Context&: *TLO.DAG.getContext(), BitWidth: HalfWidth);
1899	if (isNarrowingProfitable(N: Op.getNode(), SrcVT: VT, DestVT: HalfVT) &&
1900	isTypeDesirableForOp(ISD::SHL, VT: HalfVT) &&
1901	isTruncateFree(FromVT: VT, ToVT: HalfVT) && isZExtFree(FromTy: HalfVT, ToTy: VT) &&
1902	(!TLO.LegalOperations() \|\| isOperationLegal(Op: ISD::SHL, VT: HalfVT))) {
1903	// If we're demanding the upper bits at all, we must ensure
1904	// that the upper bits of the shift result are known to be zero,
1905	// which is equivalent to the narrow shift being NUW.
1906	if (bool IsNUW = (Known.countMinLeadingZeros() >= HalfWidth)) {
1907	bool IsNSW = Known.countMinSignBits() > HalfWidth;
1908	SDNodeFlags Flags;
1909	Flags.setNoSignedWrap(IsNSW);
1910	Flags.setNoUnsignedWrap(IsNUW);
1911	SDValue NewOp = TLO.DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HalfVT, Operand: Op0);
1912	SDValue NewShiftAmt =
1913	TLO.DAG.getShiftAmountConstant(Val: ShAmt, VT: HalfVT, DL: dl);
1914	SDValue NewShift = TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: HalfVT, N1: NewOp,
1915	N2: NewShiftAmt, Flags);
1916	SDValue NewExt =
1917	TLO.DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: NewShift);
1918	return TLO.CombineTo(O: Op, N: NewExt);
1919	}
1920	}
1921	}
1922	} else {
1923	// This is a variable shift, so we can't shift the demand mask by a known
1924	// amount. But if we are not demanding high bits, then we are not
1925	// demanding those bits from the pre-shifted operand either.
1926	if (unsigned CTLZ = DemandedBits.countl_zero()) {
1927	APInt DemandedFromOp(APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth - CTLZ));
1928	if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: DemandedFromOp, OriginalDemandedElts: DemandedElts, Known, TLO,
1929	Depth: Depth + `1`)) {
1930	// Disable the nsw and nuw flags. We can no longer guarantee that we
1931	// won't wrap after simplification.
1932	Op ->dropFlags(Mask: SDNodeFlags::NoWrap);
1933	return true;
1934	}
1935	Known.resetAll();
1936	}
1937	}
1938
1939	// If we are only demanding sign bits then we can use the shift source
1940	// directly.
1941	if (std::optional<uint64_t> MaxSA =
1942	TLO.DAG.getValidMaximumShiftAmount(V: Op, DemandedElts, Depth: Depth + `1`)) {
1943	unsigned ShAmt = *MaxSA;
1944	unsigned NumSignBits =
1945	TLO.DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + `1`);
1946	unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
1947	if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
1948	return TLO.CombineTo(O: Op, N: Op0);
1949	}
1950	break;
1951	}
1952	case ISD::SRL: {
1953	SDValue Op0 = Op.getOperand(i: `0`);
1954	SDValue Op1 = Op.getOperand(i: `1`);
1955	EVT ShiftVT = Op1.getValueType();
1956
1957	if (std::optional<uint64_t> KnownSA =
1958	TLO.DAG.getValidShiftAmount(V: Op, DemandedElts, Depth: Depth + `1`)) {
1959	unsigned ShAmt = *KnownSA;
1960	if (ShAmt == `0`)
1961	return TLO.CombineTo(O: Op, N: Op0);
1962
1963	// If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1964	// single shift. We can do this if the top bits (which are shifted out)
1965	// are never demanded.
1966	// TODO - support non-uniform vector amounts.
1967	if (Op0.getOpcode() == ISD::SHL) {
1968	if (!DemandedBits.intersects(RHS: APInt::getHighBitsSet(numBits: BitWidth, hiBitsSet: ShAmt))) {
1969	if (std::optional<uint64_t> InnerSA =
1970	TLO.DAG.getValidShiftAmount(V: Op0, DemandedElts, Depth: Depth + `2`)) {
1971	unsigned C1 = *InnerSA;
1972	unsigned Opc = ISD::SRL;
1973	int Diff = ShAmt - C1;
1974	if (Diff < `0`) {
1975	Diff = -Diff;
1976	Opc = ISD::SHL;
1977	}
1978	SDValue NewSA = TLO.DAG.getConstant(Val: Diff, DL: dl, VT: ShiftVT);
1979	return TLO.CombineTo(
1980	O: Op, N: TLO.DAG.getNode(Opcode: Opc, DL: dl, VT, N1: Op0.getOperand(i: `0`), N2: NewSA));
1981	}
1982	}
1983	}
1984
1985	// If this is (srl (sra X, C1), ShAmt), see if we can combine this into a
1986	// single sra. We can do this if the top bits are never demanded.
1987	if (Op0.getOpcode() == ISD::SRA && Op0.hasOneUse()) {
1988	if (!DemandedBits.intersects(RHS: APInt::getHighBitsSet(numBits: BitWidth, hiBitsSet: ShAmt))) {
1989	if (std::optional<uint64_t> InnerSA =
1990	TLO.DAG.getValidShiftAmount(V: Op0, DemandedElts, Depth: Depth + `2`)) {
1991	unsigned C1 = *InnerSA;
1992	// Clamp the combined shift amount if it exceeds the bit width.
1993	unsigned Combined = std::min(a: C1 + ShAmt, b: BitWidth - `1`);
1994	SDValue NewSA = TLO.DAG.getConstant(Val: Combined, DL: dl, VT: ShiftVT);
1995	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::SRA, DL: dl, VT,
1996	N1: Op0.getOperand(i: `0`), N2: NewSA));
1997	}
1998	}
1999	}
2000
2001	APInt InDemandedMask = (DemandedBits << ShAmt);
2002
2003	// If the shift is exact, then it does demand the low bits (and knows that
2004	// they are zero).
2005	if (Op ->getFlags().hasExact())
2006	InDemandedMask.setLowBits(ShAmt);
2007
2008	// Narrow shift to lower half - similar to ShrinkDemandedOp.
2009	// (srl i64:x, K) -> (i64 zero_extend (srl (i32 (trunc i64:x)), K))
2010	if ((BitWidth % `2`) == `0` && !VT.isVector()) {
2011	APInt HiBits = APInt::getHighBitsSet(numBits: BitWidth, hiBitsSet: BitWidth / `2`);
2012	EVT HalfVT = EVT::getIntegerVT(Context&: *TLO.DAG.getContext(), BitWidth: BitWidth / `2`);
2013	if (isNarrowingProfitable(N: Op.getNode(), SrcVT: VT, DestVT: HalfVT) &&
2014	isTypeDesirableForOp(ISD::SRL, VT: HalfVT) &&
2015	isTruncateFree(FromVT: VT, ToVT: HalfVT) && isZExtFree(FromTy: HalfVT, ToTy: VT) &&
2016	(!TLO.LegalOperations() \|\| isOperationLegal(Op: ISD::SRL, VT: HalfVT)) &&
2017	((InDemandedMask.countLeadingZeros() >= (BitWidth / `2`)) \|\|
2018	TLO.DAG.MaskedValueIsZero(Op: Op0, Mask: HiBits))) {
2019	SDValue NewOp = TLO.DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HalfVT, Operand: Op0);
2020	SDValue NewShiftAmt =
2021	TLO.DAG.getShiftAmountConstant(Val: ShAmt, VT: HalfVT, DL: dl);
2022	SDValue NewShift =
2023	TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: HalfVT, N1: NewOp, N2: NewShiftAmt);
2024	return TLO.CombineTo(
2025	O: Op, N: TLO.DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: NewShift));
2026	}
2027	}
2028
2029	// Compute the new bits that are at the top now.
2030	if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: InDemandedMask, OriginalDemandedElts: DemandedElts, Known, TLO,
2031	Depth: Depth + `1`))
2032	return true;
2033	Known.Zero.lshrInPlace(ShiftAmt: ShAmt);
2034	Known.One.lshrInPlace(ShiftAmt: ShAmt);
2035	// High bits known zero.
2036	Known.Zero.setHighBits(ShAmt);
2037
2038	// Attempt to avoid multi-use ops if we don't need anything from them.
2039	if (!InDemandedMask.isAllOnes() \|\| !DemandedElts.isAllOnes()) {
2040	SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2041	Op: Op0, DemandedBits: InDemandedMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`);
2042	if (DemandedOp0) {
2043	SDValue NewOp = TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: DemandedOp0, N2: Op1);
2044	return TLO.CombineTo(O: Op, N: NewOp);
2045	}
2046	}
2047	} else {
2048	// Use generic knownbits computation as it has support for non-uniform
2049	// shift amounts.
2050	Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2051	}
2052
2053	// If we are only demanding sign bits then we can use the shift source
2054	// directly.
2055	if (std::optional<uint64_t> MaxSA =
2056	TLO.DAG.getValidMaximumShiftAmount(V: Op, DemandedElts, Depth: Depth + `1`)) {
2057	unsigned ShAmt = *MaxSA;
2058	// Must already be signbits in DemandedBits bounds, and can't demand any
2059	// shifted in zeroes.
2060	if (DemandedBits.countl_zero() >= ShAmt) {
2061	unsigned NumSignBits =
2062	TLO.DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + `1`);
2063	if (DemandedBits.countr_zero() >= (BitWidth - NumSignBits))
2064	return TLO.CombineTo(O: Op, N: Op0);
2065	}
2066	}
2067
2068	// Try to match AVG patterns (after shift simplification).
2069	if (SDValue AVG = combineShiftToAVG(Op, TLO, TLI: *this, DemandedBits,
2070	DemandedElts, Depth: Depth + `1`))
2071	return TLO.CombineTo(O: Op, N: AVG);
2072
2073	break;
2074	}
2075	case ISD::SRA: {
2076	SDValue Op0 = Op.getOperand(i: `0`);
2077	SDValue Op1 = Op.getOperand(i: `1`);
2078	EVT ShiftVT = Op1.getValueType();
2079
2080	// If we only want bits that already match the signbit then we don't need
2081	// to shift.
2082	unsigned NumHiDemandedBits = BitWidth - DemandedBits.countr_zero();
2083	if (TLO.DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + `1`) >=
2084	NumHiDemandedBits)
2085	return TLO.CombineTo(O: Op, N: Op0);
2086
2087	// If this is an arithmetic shift right and only the low-bit is set, we can
2088	// always convert this into a logical shr, even if the shift amount is
2089	// variable. The low bit of the shift cannot be an input sign bit unless
2090	// the shift amount is >= the size of the datatype, which is undefined.
2091	if (DemandedBits.isOne())
2092	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op0, N2: Op1));
2093
2094	if (std::optional<uint64_t> KnownSA =
2095	TLO.DAG.getValidShiftAmount(V: Op, DemandedElts, Depth: Depth + `1`)) {
2096	unsigned ShAmt = *KnownSA;
2097	if (ShAmt == `0`)
2098	return TLO.CombineTo(O: Op, N: Op0);
2099
2100	// fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target
2101	// supports sext_inreg.
2102	if (Op0.getOpcode() == ISD::SHL) {
2103	if (std::optional<uint64_t> InnerSA =
2104	TLO.DAG.getValidShiftAmount(V: Op0, DemandedElts, Depth: Depth + `2`)) {
2105	unsigned LowBits = BitWidth - ShAmt;
2106	EVT ExtVT = EVT::getIntegerVT(Context&: *TLO.DAG.getContext(), BitWidth: LowBits);
2107	if (VT.isVector())
2108	ExtVT = EVT::getVectorVT(Context&: *TLO.DAG.getContext(), VT: ExtVT,
2109	EC: VT.getVectorElementCount());
2110
2111	if (*InnerSA == ShAmt) {
2112	if (!TLO.LegalOperations() \|\|
2113	getOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: ExtVT) == Legal)
2114	return TLO.CombineTo(
2115	O: Op, N: TLO.DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL: dl, VT,
2116	N1: Op0.getOperand(i: `0`),
2117	N2: TLO.DAG.getValueType(ExtVT)));
2118
2119	// Even if we can't convert to sext_inreg, we might be able to
2120	// remove this shift pair if the input is already sign extended.
2121	unsigned NumSignBits =
2122	TLO.DAG.ComputeNumSignBits(Op: Op0.getOperand(i: `0`), DemandedElts);
2123	if (NumSignBits > ShAmt)
2124	return TLO.CombineTo(O: Op, N: Op0.getOperand(i: `0`));
2125	}
2126	}
2127	}
2128
2129	APInt InDemandedMask = (DemandedBits << ShAmt);
2130
2131	// If the shift is exact, then it does demand the low bits (and knows that
2132	// they are zero).
2133	if (Op ->getFlags().hasExact())
2134	InDemandedMask.setLowBits(ShAmt);
2135
2136	// If any of the demanded bits are produced by the sign extension, we also
2137	// demand the input sign bit.
2138	if (DemandedBits.countl_zero() < ShAmt)
2139	InDemandedMask.setSignBit();
2140
2141	if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: InDemandedMask, OriginalDemandedElts: DemandedElts, Known, TLO,
2142	Depth: Depth + `1`))
2143	return true;
2144	Known.Zero.lshrInPlace(ShiftAmt: ShAmt);
2145	Known.One.lshrInPlace(ShiftAmt: ShAmt);
2146
2147	// If the input sign bit is known to be zero, or if none of the top bits
2148	// are demanded, turn this into an unsigned shift right.
2149	if (Known.Zero [BitWidth - ShAmt - `1`] \|\|
2150	DemandedBits.countl_zero() >= ShAmt) {
2151	SDNodeFlags Flags;
2152	Flags.setExact(Op ->getFlags().hasExact());
2153	return TLO.CombineTo(
2154	O: Op, N: TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op0, N2: Op1, Flags));
2155	}
2156
2157	int Log2 = DemandedBits.exactLogBase2();
2158	if (Log2 >= `0`) {
2159	// The bit must come from the sign.
2160	SDValue NewSA = TLO.DAG.getConstant(Val: BitWidth - `1` - Log2, DL: dl, VT: ShiftVT);
2161	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op0, N2: NewSA));
2162	}
2163
2164	if (Known.One [BitWidth - ShAmt - `1`])
2165	// New bits are known one.
2166	Known.One.setHighBits(ShAmt);
2167
2168	// Attempt to avoid multi-use ops if we don't need anything from them.
2169	if (!InDemandedMask.isAllOnes() \|\| !DemandedElts.isAllOnes()) {
2170	SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2171	Op: Op0, DemandedBits: InDemandedMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`);
2172	if (DemandedOp0) {
2173	SDValue NewOp = TLO.DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: DemandedOp0, N2: Op1);
2174	return TLO.CombineTo(O: Op, N: NewOp);
2175	}
2176	}
2177	}
2178
2179	// Try to match AVG patterns (after shift simplification).
2180	if (SDValue AVG = combineShiftToAVG(Op, TLO, TLI: *this, DemandedBits,
2181	DemandedElts, Depth: Depth + `1`))
2182	return TLO.CombineTo(O: Op, N: AVG);
2183
2184	break;
2185	}
2186	case ISD::FSHL:
2187	case ISD::FSHR: {
2188	SDValue Op0 = Op.getOperand(i: `0`);
2189	SDValue Op1 = Op.getOperand(i: `1`);
2190	SDValue Op2 = Op.getOperand(i: `2`);
2191	bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
2192
2193	if (ConstantSDNode *SA = isConstOrConstSplat(N: Op2, DemandedElts)) {
2194	unsigned Amt = SA->getAPIntValue().urem(RHS: BitWidth);
2195
2196	// For fshl, 0-shift returns the 1st arg.
2197	// For fshr, 0-shift returns the 2nd arg.
2198	if (Amt == `0`) {
2199	if (SimplifyDemandedBits(Op: IsFSHL ? Op0 : Op1, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
2200	Known, TLO, Depth: Depth + `1`))
2201	return true;
2202	break;
2203	}
2204
2205	// fshl: (Op0 << Amt) \| (Op1 >> (BW - Amt))
2206	// fshr: (Op0 << (BW - Amt)) \| (Op1 >> Amt)
2207	APInt Demanded0 = DemandedBits.lshr(shiftAmt: IsFSHL ? Amt : (BitWidth - Amt));
2208	APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
2209	if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: Demanded0, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
2210	Depth: Depth + `1`))
2211	return true;
2212	if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: Demanded1, OriginalDemandedElts: DemandedElts, Known, TLO,
2213	Depth: Depth + `1`))
2214	return true;
2215
2216	Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt));
2217	Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt));
2218	Known.One.lshrInPlace(ShiftAmt: IsFSHL ? (BitWidth - Amt) : Amt);
2219	Known.Zero.lshrInPlace(ShiftAmt: IsFSHL ? (BitWidth - Amt) : Amt);
2220	Known = Known.unionWith(RHS: Known2);
2221
2222	// Attempt to avoid multi-use ops if we don't need anything from them.
2223	if (!Demanded0.isAllOnes() \|\| !Demanded1.isAllOnes() \|\|
2224	!DemandedElts.isAllOnes()) {
2225	SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2226	Op: Op0, DemandedBits: Demanded0, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`);
2227	SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2228	Op: Op1, DemandedBits: Demanded1, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`);
2229	if (DemandedOp0 \|\| DemandedOp1) {
2230	DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0;
2231	DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1;
2232	SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: DemandedOp0,
2233	N2: DemandedOp1, N3: Op2);
2234	return TLO.CombineTo(O: Op, N: NewOp);
2235	}
2236	}
2237	}
2238
2239	// For pow-2 bitwidths we only demand the bottom modulo amt bits.
2240	if (isPowerOf2_32(Value: BitWidth)) {
2241	APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - `1`);
2242	if (SimplifyDemandedBits(Op: Op2, OriginalDemandedBits: DemandedAmtBits, OriginalDemandedElts: DemandedElts,
2243	Known&: Known2, TLO, Depth: Depth + `1`))
2244	return true;
2245	}
2246	break;
2247	}
2248	case ISD::ROTL:
2249	case ISD::ROTR: {
2250	SDValue Op0 = Op.getOperand(i: `0`);
2251	SDValue Op1 = Op.getOperand(i: `1`);
2252	bool IsROTL = (Op.getOpcode() == ISD::ROTL);
2253
2254	// If we're rotating an 0/-1 value, then it stays an 0/-1 value.
2255	if (BitWidth == TLO.DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + `1`))
2256	return TLO.CombineTo(O: Op, N: Op0);
2257
2258	if (ConstantSDNode *SA = isConstOrConstSplat(N: Op1, DemandedElts)) {
2259	unsigned Amt = SA->getAPIntValue().urem(RHS: BitWidth);
2260	unsigned RevAmt = BitWidth - Amt;
2261
2262	// rotl: (Op0 << Amt) \| (Op0 >> (BW - Amt))
2263	// rotr: (Op0 << (BW - Amt)) \| (Op0 >> Amt)
2264	APInt Demanded0 = DemandedBits.rotr(rotateAmt: IsROTL ? Amt : RevAmt);
2265	if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: Demanded0, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
2266	Depth: Depth + `1`))
2267	return true;
2268
2269	// rot(x, 0) --> x*
2270	if (Amt == `0`)
2271	return TLO.CombineTo(O: Op, N: Op0);
2272
2273	// See if we don't demand either half of the rotated bits.
2274	if ((!TLO.LegalOperations() \|\| isOperationLegal(Op: ISD::SHL, VT)) &&
2275	DemandedBits.countr_zero() >= (IsROTL ? Amt : RevAmt)) {
2276	Op1 = TLO.DAG.getConstant(Val: IsROTL ? Amt : RevAmt, DL: dl, VT: Op1.getValueType());
2277	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op0, N2: Op1));
2278	}
2279	if ((!TLO.LegalOperations() \|\| isOperationLegal(Op: ISD::SRL, VT)) &&
2280	DemandedBits.countl_zero() >= (IsROTL ? RevAmt : Amt)) {
2281	Op1 = TLO.DAG.getConstant(Val: IsROTL ? RevAmt : Amt, DL: dl, VT: Op1.getValueType());
2282	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op0, N2: Op1));
2283	}
2284	}
2285
2286	// For pow-2 bitwidths we only demand the bottom modulo amt bits.
2287	if (isPowerOf2_32(Value: BitWidth)) {
2288	APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - `1`);
2289	if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: DemandedAmtBits, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
2290	Depth: Depth + `1`))
2291	return true;
2292	}
2293	break;
2294	}
2295	case ISD::SMIN:
2296	case ISD::SMAX:
2297	case ISD::UMIN:
2298	case ISD::UMAX: {
2299	unsigned Opc = Op.getOpcode();
2300	SDValue Op0 = Op.getOperand(i: `0`);
2301	SDValue Op1 = Op.getOperand(i: `1`);
2302
2303	// If we're only demanding signbits, then we can simplify to OR/AND node.
2304	unsigned BitOp =
2305	(Opc == ISD::SMIN \|\| Opc == ISD::UMAX) ? ISD::OR : ISD::AND;
2306	unsigned NumSignBits =
2307	std::min(a: TLO.DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + `1`),
2308	b: TLO.DAG.ComputeNumSignBits(Op: Op1, DemandedElts, Depth: Depth + `1`));
2309	unsigned NumDemandedUpperBits = BitWidth - DemandedBits.countr_zero();
2310	if (NumSignBits >= NumDemandedUpperBits)
2311	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: BitOp, DL: SDLoc (Op), VT, N1: Op0, N2: Op1));
2312
2313	// Check if one arg is always less/greater than (or equal) to the other arg.
2314	KnownBits Known0 = TLO.DAG.computeKnownBits(Op: Op0, DemandedElts, Depth: Depth + `1`);
2315	KnownBits Known1 = TLO.DAG.computeKnownBits(Op: Op1, DemandedElts, Depth: Depth + `1`);
2316	switch (Opc) {
2317	case ISD::SMIN:
2318	if (std::optional<bool> IsSLE = KnownBits::sle(LHS: Known0, RHS: Known1))
2319	return TLO.CombineTo(O: Op, N: *IsSLE ? Op0 : Op1);
2320	if (std::optional<bool> IsSLT = KnownBits::slt(LHS: Known0, RHS: Known1))
2321	return TLO.CombineTo(O: Op, N: *IsSLT ? Op0 : Op1);
2322	Known = KnownBits::smin(LHS: Known0, RHS: Known1);
2323	break;
2324	case ISD::SMAX:
2325	if (std::optional<bool> IsSGE = KnownBits::sge(LHS: Known0, RHS: Known1))
2326	return TLO.CombineTo(O: Op, N: *IsSGE ? Op0 : Op1);
2327	if (std::optional<bool> IsSGT = KnownBits::sgt(LHS: Known0, RHS: Known1))
2328	return TLO.CombineTo(O: Op, N: *IsSGT ? Op0 : Op1);
2329	Known = KnownBits::smax(LHS: Known0, RHS: Known1);
2330	break;
2331	case ISD::UMIN:
2332	if (std::optional<bool> IsULE = KnownBits::ule(LHS: Known0, RHS: Known1))
2333	return TLO.CombineTo(O: Op, N: *IsULE ? Op0 : Op1);
2334	if (std::optional<bool> IsULT = KnownBits::ult(LHS: Known0, RHS: Known1))
2335	return TLO.CombineTo(O: Op, N: *IsULT ? Op0 : Op1);
2336	Known = KnownBits::umin(LHS: Known0, RHS: Known1);
2337	break;
2338	case ISD::UMAX:
2339	if (std::optional<bool> IsUGE = KnownBits::uge(LHS: Known0, RHS: Known1))
2340	return TLO.CombineTo(O: Op, N: *IsUGE ? Op0 : Op1);
2341	if (std::optional<bool> IsUGT = KnownBits::ugt(LHS: Known0, RHS: Known1))
2342	return TLO.CombineTo(O: Op, N: *IsUGT ? Op0 : Op1);
2343	Known = KnownBits::umax(LHS: Known0, RHS: Known1);
2344	break;
2345	}
2346	break;
2347	}
2348	case ISD::BITREVERSE: {
2349	SDValue Src = Op.getOperand(i: `0`);
2350	APInt DemandedSrcBits = DemandedBits.reverseBits();
2351	if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedSrcBits, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
2352	Depth: Depth + `1`))
2353	return true;
2354	Known.One = Known2.One.reverseBits();
2355	Known.Zero = Known2.Zero.reverseBits();
2356	break;
2357	}
2358	case ISD::BSWAP: {
2359	SDValue Src = Op.getOperand(i: `0`);
2360
2361	// If the only bits demanded come from one byte of the bswap result,
2362	// just shift the input byte into position to eliminate the bswap.
2363	unsigned NLZ = DemandedBits.countl_zero();
2364	unsigned NTZ = DemandedBits.countr_zero();
2365
2366	// Round NTZ down to the next byte. If we have 11 trailing zeros, then
2367	// we need all the bits down to bit 8. Likewise, round NLZ. If we
2368	// have 14 leading zeros, round to 8.
2369	NLZ = alignDown(Value: NLZ, Align: `8`);
2370	NTZ = alignDown(Value: NTZ, Align: `8`);
2371	// If we need exactly one byte, we can do this transformation.
2372	if (BitWidth - NLZ - NTZ == `8`) {
2373	// Replace this with either a left or right shift to get the byte into
2374	// the right place.
2375	unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL;
2376	if (!TLO.LegalOperations() \|\| isOperationLegal(Op: ShiftOpcode, VT)) {
2377	unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
2378	SDValue ShAmt = TLO.DAG.getShiftAmountConstant(Val: ShiftAmount, VT, DL: dl);
2379	SDValue NewOp = TLO.DAG.getNode(Opcode: ShiftOpcode, DL: dl, VT, N1: Src, N2: ShAmt);
2380	return TLO.CombineTo(O: Op, N: NewOp);
2381	}
2382	}
2383
2384	APInt DemandedSrcBits = DemandedBits.byteSwap();
2385	if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedSrcBits, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
2386	Depth: Depth + `1`))
2387	return true;
2388	Known.One = Known2.One.byteSwap();
2389	Known.Zero = Known2.Zero.byteSwap();
2390	break;
2391	}
2392	case ISD::CTPOP: {
2393	// If only 1 bit is demanded, replace with PARITY as long as we're before
2394	// op legalization.
2395	// FIXME: Limit to scalars for now.
2396	if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
2397	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::PARITY, DL: dl, VT,
2398	Operand: Op.getOperand(i: `0`)));
2399
2400	Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2401	break;
2402	}
2403	case ISD::SIGN_EXTEND_INREG: {
2404	SDValue Op0 = Op.getOperand(i: `0`);
2405	EVT ExVT = cast<VTSDNode>(Val: Op.getOperand(i: `1`))->getVT();
2406	unsigned ExVTBits = ExVT.getScalarSizeInBits();
2407
2408	// If we only care about the highest bit, don't bother shifting right.
2409	if (DemandedBits.isSignMask()) {
2410	unsigned MinSignedBits =
2411	TLO.DAG.ComputeMaxSignificantBits(Op: Op0, DemandedElts, Depth: Depth + `1`);
2412	bool AlreadySignExtended = ExVTBits >= MinSignedBits;
2413	// However if the input is already sign extended we expect the sign
2414	// extension to be dropped altogether later and do not simplify.
2415	if (!AlreadySignExtended) {
2416	// Compute the correct shift amount type, which must be getShiftAmountTy
2417	// for scalar types after legalization.
2418	SDValue ShiftAmt =
2419	TLO.DAG.getShiftAmountConstant(Val: BitWidth - ExVTBits, VT, DL: dl);
2420	return TLO.CombineTo(O: Op,
2421	N: TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op0, N2: ShiftAmt));
2422	}
2423	}
2424
2425	// If none of the extended bits are demanded, eliminate the sextinreg.
2426	if (DemandedBits.getActiveBits() <= ExVTBits)
2427	return TLO.CombineTo(O: Op, N: Op0);
2428
2429	APInt InputDemandedBits = DemandedBits.getLoBits(numBits: ExVTBits);
2430
2431	// Since the sign extended bits are demanded, we know that the sign
2432	// bit is demanded.
2433	InputDemandedBits.setBit(ExVTBits - `1`);
2434
2435	if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: InputDemandedBits, OriginalDemandedElts: DemandedElts, Known, TLO,
2436	Depth: Depth + `1`))
2437	return true;
2438
2439	// If the sign bit of the input is known set or clear, then we know the
2440	// top bits of the result.
2441
2442	// If the input sign bit is known zero, convert this into a zero extension.
2443	if (Known.Zero [ExVTBits - `1`])
2444	return TLO.CombineTo(O: Op, N: TLO.DAG.getZeroExtendInReg(Op: Op0, DL: dl, VT: ExVT));
2445
2446	APInt Mask = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: ExVTBits);
2447	if (Known.One [ExVTBits - `1`]) { // Input sign bit known set
2448	Known.One.setBitsFrom(ExVTBits);
2449	Known.Zero &= Mask;
2450	} else { // Input sign bit unknown
2451	Known.Zero &= Mask;
2452	Known.One &= Mask;
2453	}
2454	break;
2455	}
2456	case ISD::BUILD_PAIR: {
2457	EVT HalfVT = Op.getOperand(i: `0`).getValueType();
2458	unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
2459
2460	APInt MaskLo = DemandedBits.getLoBits(numBits: HalfBitWidth).trunc(width: HalfBitWidth);
2461	APInt MaskHi = DemandedBits.getHiBits(numBits: HalfBitWidth).trunc(width: HalfBitWidth);
2462
2463	KnownBits KnownLo, KnownHi;
2464
2465	if (SimplifyDemandedBits(Op: Op.getOperand(i: `0`), DemandedBits: MaskLo, Known&: KnownLo, TLO, Depth: Depth + `1`))
2466	return true;
2467
2468	if (SimplifyDemandedBits(Op: Op.getOperand(i: `1`), DemandedBits: MaskHi, Known&: KnownHi, TLO, Depth: Depth + `1`))
2469	return true;
2470
2471	Known = KnownHi.concat(Lo: KnownLo);
2472	break;
2473	}
2474	case ISD::ZERO_EXTEND_VECTOR_INREG:
2475	if (VT.isScalableVector())
2476	return false;
2477	[[fallthrough]];
2478	case ISD::ZERO_EXTEND: {
2479	SDValue Src = Op.getOperand(i: `0`);
2480	EVT SrcVT = Src.getValueType();
2481	unsigned InBits = SrcVT.getScalarSizeInBits();
2482	unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : `1`;
2483	bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
2484
2485	// If none of the top bits are demanded, convert this into an any_extend.
2486	if (DemandedBits.getActiveBits() <= InBits) {
2487	// If we only need the non-extended bits of the bottom element
2488	// then we can just bitcast to the result.
2489	if (IsLE && IsVecInReg && DemandedElts == `1` &&
2490	VT.getSizeInBits() == SrcVT.getSizeInBits())
2491	return TLO.CombineTo(O: Op, N: TLO.DAG.getBitcast(VT, V: Src));
2492
2493	unsigned Opc =
2494	IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
2495	if (!TLO.LegalOperations() \|\| isOperationLegal(Op: Opc, VT))
2496	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Opc, DL: dl, VT, Operand: Src));
2497	}
2498
2499	APInt InDemandedBits = DemandedBits.trunc(width: InBits);
2500	APInt InDemandedElts = DemandedElts.zext(width: InElts);
2501	if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: InDemandedBits, OriginalDemandedElts: InDemandedElts, Known, TLO,
2502	Depth: Depth + `1`)) {
2503	Op ->dropFlags(Mask: SDNodeFlags::NonNeg);
2504	return true;
2505	}
2506	assert(Known.getBitWidth() == InBits && "Src width has changed?");
2507	Known = Known.zext(BitWidth);
2508
2509	// Attempt to avoid multi-use ops if we don't need anything from them.
2510	if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2511	Op: Src, DemandedBits: InDemandedBits, DemandedElts: InDemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`))
2512	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, Operand: NewSrc));
2513	break;
2514	}
2515	case ISD::SIGN_EXTEND_VECTOR_INREG:
2516	if (VT.isScalableVector())
2517	return false;
2518	[[fallthrough]];
2519	case ISD::SIGN_EXTEND: {
2520	SDValue Src = Op.getOperand(i: `0`);
2521	EVT SrcVT = Src.getValueType();
2522	unsigned InBits = SrcVT.getScalarSizeInBits();
2523	unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : `1`;
2524	bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
2525
2526	APInt InDemandedElts = DemandedElts.zext(width: InElts);
2527	APInt InDemandedBits = DemandedBits.trunc(width: InBits);
2528
2529	// Since some of the sign extended bits are demanded, we know that the sign
2530	// bit is demanded.
2531	InDemandedBits.setBit(InBits - `1`);
2532
2533	// If none of the top bits are demanded, convert this into an any_extend.
2534	if (DemandedBits.getActiveBits() <= InBits) {
2535	// If we only need the non-extended bits of the bottom element
2536	// then we can just bitcast to the result.
2537	if (IsLE && IsVecInReg && DemandedElts == `1` &&
2538	VT.getSizeInBits() == SrcVT.getSizeInBits())
2539	return TLO.CombineTo(O: Op, N: TLO.DAG.getBitcast(VT, V: Src));
2540
2541	// Don't lose an all signbits 0/-1 splat on targets with 0/-1 booleans.
2542	if (getBooleanContents(Type: VT) != ZeroOrNegativeOneBooleanContent \|\|
2543	TLO.DAG.ComputeNumSignBits(Op: Src, DemandedElts: InDemandedElts, Depth: Depth + `1`) !=
2544	InBits) {
2545	unsigned Opc =
2546	IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
2547	if (!TLO.LegalOperations() \|\| isOperationLegal(Op: Opc, VT))
2548	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Opc, DL: dl, VT, Operand: Src));
2549	}
2550	}
2551
2552	if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: InDemandedBits, OriginalDemandedElts: InDemandedElts, Known, TLO,
2553	Depth: Depth + `1`))
2554	return true;
2555	assert(Known.getBitWidth() == InBits && "Src width has changed?");
2556
2557	// If the sign bit is known one, the top bits match.
2558	Known = Known.sext(BitWidth);
2559
2560	// If the sign bit is known zero, convert this to a zero extend.
2561	if (Known.isNonNegative()) {
2562	unsigned Opc =
2563	IsVecInReg ? ISD::ZERO_EXTEND_VECTOR_INREG : ISD::ZERO_EXTEND;
2564	if (!TLO.LegalOperations() \|\| isOperationLegal(Op: Opc, VT)) {
2565	SDNodeFlags Flags;
2566	if (!IsVecInReg)
2567	Flags \|= SDNodeFlags::NonNeg;
2568	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Opc, DL: dl, VT, Operand: Src, Flags));
2569	}
2570	}
2571
2572	// Attempt to avoid multi-use ops if we don't need anything from them.
2573	if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2574	Op: Src, DemandedBits: InDemandedBits, DemandedElts: InDemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`))
2575	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, Operand: NewSrc));
2576	break;
2577	}
2578	case ISD::ANY_EXTEND_VECTOR_INREG:
2579	if (VT.isScalableVector())
2580	return false;
2581	[[fallthrough]];
2582	case ISD::ANY_EXTEND: {
2583	SDValue Src = Op.getOperand(i: `0`);
2584	EVT SrcVT = Src.getValueType();
2585	unsigned InBits = SrcVT.getScalarSizeInBits();
2586	unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : `1`;
2587	bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
2588
2589	// If we only need the bottom element then we can just bitcast.
2590	// TODO: Handle ANY_EXTEND?
2591	if (IsLE && IsVecInReg && DemandedElts == `1` &&
2592	VT.getSizeInBits() == SrcVT.getSizeInBits())
2593	return TLO.CombineTo(O: Op, N: TLO.DAG.getBitcast(VT, V: Src));
2594
2595	APInt InDemandedBits = DemandedBits.trunc(width: InBits);
2596	APInt InDemandedElts = DemandedElts.zext(width: InElts);
2597	if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: InDemandedBits, OriginalDemandedElts: InDemandedElts, Known, TLO,
2598	Depth: Depth + `1`))
2599	return true;
2600	assert(Known.getBitWidth() == InBits && "Src width has changed?");
2601	Known = Known.anyext(BitWidth);
2602
2603	// Attempt to avoid multi-use ops if we don't need anything from them.
2604	if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2605	Op: Src, DemandedBits: InDemandedBits, DemandedElts: InDemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`))
2606	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, Operand: NewSrc));
2607	break;
2608	}
2609	case ISD::TRUNCATE: {
2610	SDValue Src = Op.getOperand(i: `0`);
2611
2612	// Simplify the input, using demanded bit information, and compute the known
2613	// zero/one bits live out.
2614	unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
2615	APInt TruncMask = DemandedBits.zext(width: OperandBitWidth);
2616	if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: TruncMask, OriginalDemandedElts: DemandedElts, Known, TLO,
2617	Depth: Depth + `1`)) {
2618	// Disable the nsw and nuw flags. We can no longer guarantee that we
2619	// won't wrap after simplification.
2620	Op ->dropFlags(Mask: SDNodeFlags::NoWrap);
2621	return true;
2622	}
2623	Known = Known.trunc(BitWidth);
2624
2625	// Attempt to avoid multi-use ops if we don't need anything from them.
2626	if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2627	Op: Src, DemandedBits: TruncMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`))
2628	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: NewSrc));
2629
2630	// If the input is only used by this truncate, see if we can shrink it based
2631	// on the known demanded bits.
2632	switch (Src.getOpcode()) {
2633	default:
2634	break;
2635	case ISD::SRL:
2636	// Shrink SRL by a constant if none of the high bits shifted in are
2637	// demanded.
2638	if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
2639	// Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
2640	// undesirable.
2641	break;
2642
2643	if (Src.getNode()->hasOneUse()) {
2644	if (isTruncateFree(Val: Src, VT2: VT) &&
2645	!isTruncateFree(FromVT: Src.getValueType(), ToVT: VT)) {
2646	// If truncate is only free at trunc(srl), do not turn it into
2647	// srl(trunc). The check is done by first check the truncate is free
2648	// at Src's opcode(srl), then check the truncate is not done by
2649	// referencing sub-register. In test, if both trunc(srl) and
2650	// srl(trunc)'s trunc are free, srl(trunc) performs better. If only
2651	// trunc(srl)'s trunc is free, trunc(srl) is better.
2652	break;
2653	}
2654
2655	std::optional<uint64_t> ShAmtC =
2656	TLO.DAG.getValidShiftAmount(V: Src, DemandedElts, Depth: Depth + `2`);
2657	if (!ShAmtC \|\| *ShAmtC >= BitWidth)
2658	break;
2659	uint64_t ShVal = *ShAmtC;
2660
2661	APInt HighBits =
2662	APInt::getHighBitsSet(numBits: OperandBitWidth, hiBitsSet: OperandBitWidth - BitWidth);
2663	HighBits.lshrInPlace(ShiftAmt: ShVal);
2664	HighBits = HighBits.trunc(width: BitWidth);
2665	if (!(HighBits & DemandedBits)) {
2666	// None of the shifted in bits are needed. Add a truncate of the
2667	// shift input, then shift it.
2668	SDValue NewShAmt = TLO.DAG.getShiftAmountConstant(Val: ShVal, VT, DL: dl);
2669	SDValue NewTrunc =
2670	TLO.DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Src.getOperand(i: `0`));
2671	return TLO.CombineTo(
2672	O: Op, N: TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: NewTrunc, N2: NewShAmt));
2673	}
2674	}
2675	break;
2676	}
2677
2678	break;
2679	}
2680	case ISD::AssertZext: {
2681	// AssertZext demands all of the high bits, plus any of the low bits
2682	// demanded by its users.
2683	EVT ZVT = cast<VTSDNode>(Val: Op.getOperand(i: `1`))->getVT();
2684	APInt InMask = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: ZVT.getSizeInBits());
2685	if (SimplifyDemandedBits(Op: Op.getOperand(i: `0`), DemandedBits: ~InMask \| DemandedBits, Known,
2686	TLO, Depth: Depth + `1`))
2687	return true;
2688
2689	Known.Zero \|= ~InMask;
2690	Known.One &= (~Known.Zero);
2691	break;
2692	}
2693	case ISD::EXTRACT_VECTOR_ELT: {
2694	SDValue Src = Op.getOperand(i: `0`);
2695	SDValue Idx = Op.getOperand(i: `1`);
2696	ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
2697	unsigned EltBitWidth = Src.getScalarValueSizeInBits();
2698
2699	if (SrcEltCnt.isScalable())
2700	return false;
2701
2702	// Demand the bits from every vector element without a constant index.
2703	unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2704	APInt DemandedSrcElts = APInt::getAllOnes(numBits: NumSrcElts);
2705	if (auto *CIdx = dyn_cast<ConstantSDNode>(Val&: Idx))
2706	if (CIdx->getAPIntValue().ult(RHS: NumSrcElts))
2707	DemandedSrcElts = APInt::getOneBitSet(numBits: NumSrcElts, BitNo: CIdx->getZExtValue());
2708
2709	// If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
2710	// anything about the extended bits.
2711	APInt DemandedSrcBits = DemandedBits;
2712	if (BitWidth > EltBitWidth)
2713	DemandedSrcBits = DemandedSrcBits.trunc(width: EltBitWidth);
2714
2715	if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedSrcBits, OriginalDemandedElts: DemandedSrcElts, Known&: Known2, TLO,
2716	Depth: Depth + `1`))
2717	return true;
2718
2719	// Attempt to avoid multi-use ops if we don't need anything from them.
2720	if (!DemandedSrcBits.isAllOnes() \|\| !DemandedSrcElts.isAllOnes()) {
2721	if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2722	Op: Src, DemandedBits: DemandedSrcBits, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + `1`)) {
2723	SDValue NewOp =
2724	TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: DemandedSrc, N2: Idx);
2725	return TLO.CombineTo(O: Op, N: NewOp);
2726	}
2727	}
2728
2729	Known = Known2;
2730	if (BitWidth > EltBitWidth)
2731	Known = Known.anyext(BitWidth);
2732	break;
2733	}
2734	case ISD::BITCAST: {
2735	if (VT.isScalableVector())
2736	return false;
2737	SDValue Src = Op.getOperand(i: `0`);
2738	EVT SrcVT = Src.getValueType();
2739	unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
2740
2741	// If this is an FP->Int bitcast and if the sign bit is the only
2742	// thing demanded, turn this into a FGETSIGN.
2743	if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
2744	DemandedBits == APInt::getSignMask(BitWidth: Op.getValueSizeInBits()) &&
2745	SrcVT.isFloatingPoint()) {
2746	bool OpVTLegal = isOperationLegalOrCustom(Op: ISD::FGETSIGN, VT);
2747	bool i32Legal = isOperationLegalOrCustom(Op: ISD::FGETSIGN, VT: MVT::i32);
2748	if ((OpVTLegal \|\| i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
2749	SrcVT != MVT::f128) {
2750	// Cannot eliminate/lower SHL for f128 yet.
2751	EVT Ty = OpVTLegal ? VT : MVT::i32;
2752	// Make a FGETSIGN + SHL to move the sign bit into the appropriate
2753	// place. We expect the SHL to be eliminated by other optimizations.
2754	SDValue Sign = TLO.DAG.getNode(Opcode: ISD::FGETSIGN, DL: dl, VT: Ty, Operand: Src);
2755	unsigned OpVTSizeInBits = Op.getValueSizeInBits();
2756	if (!OpVTLegal && OpVTSizeInBits > `32`)
2757	Sign = TLO.DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: Sign);
2758	unsigned ShVal = Op.getValueSizeInBits() - `1`;
2759	SDValue ShAmt = TLO.DAG.getConstant(Val: ShVal, DL: dl, VT);
2760	return TLO.CombineTo(O: Op,
2761	N: TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Sign, N2: ShAmt));
2762	}
2763	}
2764
2765	// Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
2766	// Demand the elt/bit if any of the original elts/bits are demanded.
2767	if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == `0`) {
2768	unsigned Scale = BitWidth / NumSrcEltBits;
2769	unsigned NumSrcElts = SrcVT.getVectorNumElements();
2770	APInt DemandedSrcBits = APInt::getZero(numBits: NumSrcEltBits);
2771	APInt DemandedSrcElts = APInt::getZero(numBits: NumSrcElts);
2772	for (unsigned i = `0`; i != Scale; ++i) {
2773	unsigned EltOffset = IsLE ? i : (Scale - `1` - i);
2774	unsigned BitOffset = EltOffset * NumSrcEltBits;
2775	APInt Sub = DemandedBits.extractBits(numBits: NumSrcEltBits, bitPosition: BitOffset);
2776	if (!Sub.isZero()) {
2777	DemandedSrcBits \|= Sub;
2778	for (unsigned j = `0`; j != NumElts; ++j)
2779	if (DemandedElts [j])
2780	DemandedSrcElts.setBit((j * Scale) + i);
2781	}
2782	}
2783
2784	APInt KnownSrcUndef, KnownSrcZero;
2785	if (SimplifyDemandedVectorElts(Op: Src, DemandedEltMask: DemandedSrcElts, KnownUndef&: KnownSrcUndef,
2786	KnownZero&: KnownSrcZero, TLO, Depth: Depth + `1`))
2787	return true;
2788
2789	KnownBits KnownSrcBits;
2790	if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedSrcBits, OriginalDemandedElts: DemandedSrcElts,
2791	Known&: KnownSrcBits, TLO, Depth: Depth + `1`))
2792	return true;
2793	} else if (IsLE && (NumSrcEltBits % BitWidth) == `0`) {
2794	// TODO - bigendian once we have test coverage.
2795	unsigned Scale = NumSrcEltBits / BitWidth;
2796	unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : `1`;
2797	APInt DemandedSrcBits = APInt::getZero(numBits: NumSrcEltBits);
2798	APInt DemandedSrcElts = APInt::getZero(numBits: NumSrcElts);
2799	for (unsigned i = `0`; i != NumElts; ++i)
2800	if (DemandedElts [i]) {
2801	unsigned Offset = (i % Scale) * BitWidth;
2802	DemandedSrcBits.insertBits(SubBits: DemandedBits, bitPosition: Offset);
2803	DemandedSrcElts.setBit(i / Scale);
2804	}
2805
2806	if (SrcVT.isVector()) {
2807	APInt KnownSrcUndef, KnownSrcZero;
2808	if (SimplifyDemandedVectorElts(Op: Src, DemandedEltMask: DemandedSrcElts, KnownUndef&: KnownSrcUndef,
2809	KnownZero&: KnownSrcZero, TLO, Depth: Depth + `1`))
2810	return true;
2811	}
2812
2813	KnownBits KnownSrcBits;
2814	if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedSrcBits, OriginalDemandedElts: DemandedSrcElts,
2815	Known&: KnownSrcBits, TLO, Depth: Depth + `1`))
2816	return true;
2817
2818	// Attempt to avoid multi-use ops if we don't need anything from them.
2819	if (!DemandedSrcBits.isAllOnes() \|\| !DemandedSrcElts.isAllOnes()) {
2820	if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2821	Op: Src, DemandedBits: DemandedSrcBits, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + `1`)) {
2822	SDValue NewOp = TLO.DAG.getBitcast(VT, V: DemandedSrc);
2823	return TLO.CombineTo(O: Op, N: NewOp);
2824	}
2825	}
2826	}
2827
2828	// If this is a bitcast, let computeKnownBits handle it. Only do this on a
2829	// recursive call where Known may be useful to the caller.
2830	if (Depth > `0`) {
2831	Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2832	return false;
2833	}
2834	break;
2835	}
2836	case ISD::MUL:
2837	if (DemandedBits.isPowerOf2()) {
2838	// The LSB of XY is set only if (X & 1) == 1 and (Y & 1) == 1.*
2839	// If we demand exactly one bit N and we have "X (C' << N)" where C' is*
2840	// odd (has LSB set), then the left-shifted low bit of X is the answer.
2841	unsigned CTZ = DemandedBits.countr_zero();
2842	ConstantSDNode *C = isConstOrConstSplat(N: Op.getOperand(i: `1`), DemandedElts);
2843	if (C && C->getAPIntValue().countr_zero() == CTZ) {
2844	SDValue AmtC = TLO.DAG.getShiftAmountConstant(Val: CTZ, VT, DL: dl);
2845	SDValue Shl = TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op.getOperand(i: `0`), N2: AmtC);
2846	return TLO.CombineTo(O: Op, N: Shl);
2847	}
2848	}
2849	// For a squared value "X X", the bottom 2 bits are 0 and X[0] because:*
2850	// X X is odd iff X is odd.*
2851	// 'Quadratic Reciprocity': X X -> 0 for bit[1]*
2852	if (Op.getOperand(i: `0`) == Op.getOperand(i: `1`) && DemandedBits.ult(RHS: `4`)) {
2853	SDValue One = TLO.DAG.getConstant(Val: `1`, DL: dl, VT);
2854	SDValue And1 = TLO.DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op.getOperand(i: `0`), N2: One);
2855	return TLO.CombineTo(O: Op, N: And1);
2856	}
2857	[[fallthrough]];
2858	case ISD::ADD:
2859	case ISD::SUB: {
2860	// Add, Sub, and Mul don't demand any bits in positions beyond that
2861	// of the highest bit demanded of them.
2862	SDValue Op0 = Op.getOperand(i: `0`), Op1 = Op.getOperand(i: `1`);
2863	SDNodeFlags Flags = Op.getNode()->getFlags();
2864	unsigned DemandedBitsLZ = DemandedBits.countl_zero();
2865	APInt LoMask = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth - DemandedBitsLZ);
2866	KnownBits KnownOp0, KnownOp1;
2867	auto GetDemandedBitsLHSMask = [&](APInt Demanded,
2868	const KnownBits &KnownRHS) {
2869	if (Op.getOpcode() == ISD::MUL)
2870	Demanded.clearHighBits(hiBits: KnownRHS.countMinTrailingZeros());
2871	return Demanded;
2872	};
2873	if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: LoMask, OriginalDemandedElts: DemandedElts, Known&: KnownOp1, TLO,
2874	Depth: Depth + `1`) \|\|
2875	SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: GetDemandedBitsLHSMask (LoMask, KnownOp1),
2876	OriginalDemandedElts: DemandedElts, Known&: KnownOp0, TLO, Depth: Depth + `1`) \|\|
2877	// See if the operation should be performed at a smaller bit width.
2878	ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
2879	// Disable the nsw and nuw flags. We can no longer guarantee that we
2880	// won't wrap after simplification.
2881	Op ->dropFlags(Mask: SDNodeFlags::NoWrap);
2882	return true;
2883	}
2884
2885	// neg x with only low bit demanded is simply x.
2886	if (Op.getOpcode() == ISD::SUB && DemandedBits.isOne() &&
2887	isNullConstant(V: Op0))
2888	return TLO.CombineTo(O: Op, N: Op1);
2889
2890	// Attempt to avoid multi-use ops if we don't need anything from them.
2891	if (!LoMask.isAllOnes() \|\| !DemandedElts.isAllOnes()) {
2892	SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2893	Op: Op0, DemandedBits: LoMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`);
2894	SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2895	Op: Op1, DemandedBits: LoMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`);
2896	if (DemandedOp0 \|\| DemandedOp1) {
2897	Op0 = DemandedOp0 ? DemandedOp0 : Op0;
2898	Op1 = DemandedOp1 ? DemandedOp1 : Op1;
2899	SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: Op0, N2: Op1,
2900	Flags: Flags & ~SDNodeFlags::NoWrap);
2901	return TLO.CombineTo(O: Op, N: NewOp);
2902	}
2903	}
2904
2905	// If we have a constant operand, we may be able to turn it into -1 if we
2906	// do not demand the high bits. This can make the constant smaller to
2907	// encode, allow more general folding, or match specialized instruction
2908	// patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
2909	// is probably not useful (and could be detrimental).
2910	ConstantSDNode *C = isConstOrConstSplat(N: Op1);
2911	APInt HighMask = APInt::getHighBitsSet(numBits: BitWidth, hiBitsSet: DemandedBitsLZ);
2912	if (C && !C->isAllOnes() && !C->isOne() &&
2913	(C->getAPIntValue() \| HighMask).isAllOnes()) {
2914	SDValue Neg1 = TLO.DAG.getAllOnesConstant(DL: dl, VT);
2915	// Disable the nsw and nuw flags. We can no longer guarantee that we
2916	// won't wrap after simplification.
2917	SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: Op0, N2: Neg1,
2918	Flags: Flags & ~SDNodeFlags::NoWrap);
2919	return TLO.CombineTo(O: Op, N: NewOp);
2920	}
2921
2922	// Match a multiply with a disguised negated-power-of-2 and convert to a
2923	// an equivalent shift-left amount.
2924	// Example: (X MulC) + Op1 --> Op1 - (X << log2(-MulC))*
2925	auto getShiftLeftAmt = [&HighMask](SDValue Mul) -> unsigned {
2926	if (Mul.getOpcode() != ISD::MUL \|\| !Mul.hasOneUse())
2927	return `0`;
2928
2929	// Don't touch opaque constants. Also, ignore zero and power-of-2
2930	// multiplies. Those will get folded later.
2931	ConstantSDNode *MulC = isConstOrConstSplat(N: Mul.getOperand(i: `1`));
2932	if (MulC && !MulC->isOpaque() && !MulC->isZero() &&
2933	!MulC->getAPIntValue().isPowerOf2()) {
2934	APInt UnmaskedC = MulC->getAPIntValue() \| HighMask;
2935	if (UnmaskedC.isNegatedPowerOf2())
2936	return (-UnmaskedC).logBase2();
2937	}
2938	return `0`;
2939	};
2940
2941	auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y,
2942	unsigned ShlAmt) {
2943	SDValue ShlAmtC = TLO.DAG.getShiftAmountConstant(Val: ShlAmt, VT, DL: dl);
2944	SDValue Shl = TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: X, N2: ShlAmtC);
2945	SDValue Res = TLO.DAG.getNode(Opcode: NT, DL: dl, VT, N1: Y, N2: Shl);
2946	return TLO.CombineTo(O: Op, N: Res);
2947	};
2948
2949	if (isOperationLegalOrCustom(Op: ISD::SHL, VT)) {
2950	if (Op.getOpcode() == ISD::ADD) {
2951	// (X MulC) + Op1 --> Op1 - (X << log2(-MulC))*
2952	if (unsigned ShAmt = getShiftLeftAmt (Op0))
2953	return foldMul (ISD::SUB, Op0.getOperand(i: `0`), Op1, ShAmt);
2954	// Op0 + (X MulC) --> Op0 - (X << log2(-MulC))*
2955	if (unsigned ShAmt = getShiftLeftAmt (Op1))
2956	return foldMul (ISD::SUB, Op1.getOperand(i: `0`), Op0, ShAmt);
2957	}
2958	if (Op.getOpcode() == ISD::SUB) {
2959	// Op0 - (X MulC) --> Op0 + (X << log2(-MulC))*
2960	if (unsigned ShAmt = getShiftLeftAmt (Op1))
2961	return foldMul (ISD::ADD, Op1.getOperand(i: `0`), Op0, ShAmt);
2962	}
2963	}
2964
2965	if (Op.getOpcode() == ISD::MUL) {
2966	Known = KnownBits::mul(LHS: KnownOp0, RHS: KnownOp1);
2967	} else { // Op.getOpcode() is either ISD::ADD or ISD::SUB.
2968	Known = KnownBits::computeForAddSub(
2969	Add: Op.getOpcode() == ISD::ADD, NSW: Flags.hasNoSignedWrap(),
2970	NUW: Flags.hasNoUnsignedWrap(), LHS: KnownOp0, RHS: KnownOp1);
2971	}
2972	break;
2973	}
2974	case ISD::FABS: {
2975	SDValue Op0 = Op.getOperand(i: `0`);
2976	APInt SignMask = APInt::getSignMask(BitWidth);
2977
2978	if (!DemandedBits.intersects(RHS: SignMask))
2979	return TLO.CombineTo(O: Op, N: Op0);
2980
2981	if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts, Known, TLO,
2982	Depth: Depth + `1`))
2983	return true;
2984
2985	if (Known.isNonNegative())
2986	return TLO.CombineTo(O: Op, N: Op0);
2987	if (Known.isNegative())
2988	return TLO.CombineTo(
2989	O: Op, N: TLO.DAG.getNode(Opcode: ISD::FNEG, DL: dl, VT, Operand: Op0, Flags: Op ->getFlags()));
2990
2991	Known.Zero \|= SignMask;
2992	Known.One &= ~SignMask;
2993
2994	break;
2995	}
2996	case ISD::FCOPYSIGN: {
2997	SDValue Op0 = Op.getOperand(i: `0`);
2998	SDValue Op1 = Op.getOperand(i: `1`);
2999
3000	unsigned BitWidth0 = Op0.getScalarValueSizeInBits();
3001	unsigned BitWidth1 = Op1.getScalarValueSizeInBits();
3002	APInt SignMask0 = APInt::getSignMask(BitWidth: BitWidth0);
3003	APInt SignMask1 = APInt::getSignMask(BitWidth: BitWidth1);
3004
3005	if (!DemandedBits.intersects(RHS: SignMask0))
3006	return TLO.CombineTo(O: Op, N: Op0);
3007
3008	if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: ~SignMask0 & DemandedBits, OriginalDemandedElts: DemandedElts,
3009	Known, TLO, Depth: Depth + `1`) \|\|
3010	SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: SignMask1, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
3011	Depth: Depth + `1`))
3012	return true;
3013
3014	if (Known2.isNonNegative())
3015	return TLO.CombineTo(
3016	O: Op, N: TLO.DAG.getNode(Opcode: ISD::FABS, DL: dl, VT, Operand: Op0, Flags: Op ->getFlags()));
3017
3018	if (Known2.isNegative())
3019	return TLO.CombineTo(
3020	O: Op, N: TLO.DAG.getNode(Opcode: ISD::FNEG, DL: dl, VT,
3021	Operand: TLO.DAG.getNode(Opcode: ISD::FABS, DL: SDLoc (Op0), VT, Operand: Op0)));
3022
3023	Known.Zero &= ~SignMask0;
3024	Known.One &= ~SignMask0;
3025	break;
3026	}
3027	case ISD::FNEG: {
3028	SDValue Op0 = Op.getOperand(i: `0`);
3029	APInt SignMask = APInt::getSignMask(BitWidth);
3030
3031	if (!DemandedBits.intersects(RHS: SignMask))
3032	return TLO.CombineTo(O: Op, N: Op0);
3033
3034	if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts, Known, TLO,
3035	Depth: Depth + `1`))
3036	return true;
3037
3038	if (!Known.isSignUnknown()) {
3039	Known.Zero ^= SignMask;
3040	Known.One ^= SignMask;
3041	}
3042
3043	break;
3044	}
3045	default:
3046	// We also ask the target about intrinsics (which could be specific to it).
3047	if (Op.getOpcode() >= ISD::BUILTIN_OP_END \|\|
3048	Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
3049	// TODO: Probably okay to remove after audit; here to reduce change size
3050	// in initial enablement patch for scalable vectors
3051	if (Op.getValueType().isScalableVector())
3052	break;
3053	if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
3054	Known, TLO, Depth))
3055	return true;
3056	break;
3057	}
3058
3059	// Just use computeKnownBits to compute output bits.
3060	Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
3061	break;
3062	}
3063
3064	// If we know the value of all of the demanded bits, return this as a
3065	// constant.
3066	if (!isTargetCanonicalConstantNode(Op) &&
3067	DemandedBits.isSubsetOf(RHS: Known.Zero \| Known.One)) {
3068	// Avoid folding to a constant if any OpaqueConstant is involved.
3069	if (llvm::any_of(Range: Op ->ops(), P: [](SDValue V) {
3070	auto *C = dyn_cast<ConstantSDNode>(Val&: V);
3071	return C && C->isOpaque();
3072	}))
3073	return false;
3074	if (VT.isInteger())
3075	return TLO.CombineTo(O: Op, N: TLO.DAG.getConstant(Val: Known.One, DL: dl, VT));
3076	if (VT.isFloatingPoint())
3077	return TLO.CombineTo(
3078	O: Op, N: TLO.DAG.getConstantFP(Val: APFloat (VT.getFltSemantics(), Known.One),
3079	DL: dl, VT));
3080	}
3081
3082	// A multi use 'all demanded elts' simplify failed to find any knownbits.
3083	// Try again just for the original demanded elts.
3084	// Ensure we do this AFTER constant folding above.
3085	if (HasMultiUse && Known.isUnknown() && !OriginalDemandedElts.isAllOnes())
3086	Known = TLO.DAG.computeKnownBits(Op, DemandedElts: OriginalDemandedElts, Depth);
3087
3088	return false;
3089	}
3090
3091	bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op,
3092	const APInt &DemandedElts,
3093	DAGCombinerInfo &DCI) const {
3094	SelectionDAG &DAG = DCI.DAG;
3095	TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
3096	!DCI.isBeforeLegalizeOps());
3097
3098	APInt KnownUndef, KnownZero;
3099	bool Simplified =
3100	SimplifyDemandedVectorElts(Op, DemandedEltMask: DemandedElts, KnownUndef, KnownZero, TLO);
3101	if (Simplified) {
3102	DCI.AddToWorklist(N: Op.getNode());
3103	DCI.CommitTargetLoweringOpt(TLO);
3104	}
3105
3106	return Simplified;
3107	}
3108
3109	/// Given a vector binary operation and known undefined elements for each input
3110	/// operand, compute whether each element of the output is undefined.
3111	static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG,
3112	const APInt &UndefOp0,
3113	const APInt &UndefOp1) {
3114	EVT VT = BO.getValueType();
3115	assert(DAG.getTargetLoweringInfo().isBinOp(BO.getOpcode()) && VT.isVector() &&
3116	"Vector binop only");
3117
3118	EVT EltVT = VT.getVectorElementType();
3119	unsigned NumElts = VT.isFixedLengthVector() ? VT.getVectorNumElements() : `1`;
3120	assert(UndefOp0.getBitWidth() == NumElts &&
3121	UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
3122
3123	auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
3124	const APInt &UndefVals) {
3125	if (UndefVals [Index])
3126	return DAG.getUNDEF(VT: EltVT);
3127
3128	if (auto *BV = dyn_cast<BuildVectorSDNode>(Val&: V)) {
3129	// Try hard to make sure that the getNode() call is not creating temporary
3130	// nodes. Ignore opaque integers because they do not constant fold.
3131	SDValue Elt = BV->getOperand(Num: Index);
3132	auto *C = dyn_cast<ConstantSDNode>(Val&: Elt);
3133	if (isa<ConstantFPSDNode>(Val: Elt) \|\| Elt.isUndef() \|\| (C && !C->isOpaque()))
3134	return Elt;
3135	}
3136
3137	return SDValue ();
3138	};
3139
3140	APInt KnownUndef = APInt::getZero(numBits: NumElts);
3141	for (unsigned i = `0`; i != NumElts; ++i) {
3142	// If both inputs for this element are either constant or undef and match
3143	// the element type, compute the constant/undef result for this element of
3144	// the vector.
3145	// TODO: Ideally we would use FoldConstantArithmetic() here, but that does
3146	// not handle FP constants. The code within getNode() should be refactored
3147	// to avoid the danger of creating a bogus temporary node here.
3148	SDValue C0 = getUndefOrConstantElt (BO.getOperand(i: `0`), i, UndefOp0);
3149	SDValue C1 = getUndefOrConstantElt (BO.getOperand(i: `1`), i, UndefOp1);
3150	if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
3151	if (DAG.getNode(Opcode: BO.getOpcode(), DL: SDLoc (BO), VT: EltVT, N1: C0, N2: C1).isUndef())
3152	KnownUndef.setBit(i);
3153	}
3154	return KnownUndef;
3155	}
3156
3157	bool TargetLowering::SimplifyDemandedVectorElts(
3158	SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
3159	APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
3160	bool AssumeSingleUse) const {
3161	EVT VT = Op.getValueType();
3162	unsigned Opcode = Op.getOpcode();
3163	APInt DemandedElts = OriginalDemandedElts;
3164	unsigned NumElts = DemandedElts.getBitWidth();
3165	assert(VT.isVector() && "Expected vector op");
3166
3167	KnownUndef = KnownZero = APInt::getZero(numBits: NumElts);
3168
3169	if (!shouldSimplifyDemandedVectorElts(Op, TLO))
3170	return false;
3171
3172	// TODO: For now we assume we know nothing about scalable vectors.
3173	if (VT.isScalableVector())
3174	return false;
3175
3176	assert(VT.getVectorNumElements() == NumElts &&
3177	"Mask size mismatches value type element count!");
3178
3179	// Undef operand.
3180	if (Op.isUndef()) {
3181	KnownUndef.setAllBits();
3182	return false;
3183	}
3184
3185	// If Op has other users, assume that all elements are needed.
3186	if (!AssumeSingleUse && !Op.getNode()->hasOneUse())
3187	DemandedElts.setAllBits();
3188
3189	// Not demanding any elements from Op.
3190	if (DemandedElts == `0`) {
3191	KnownUndef.setAllBits();
3192	return TLO.CombineTo(O: Op, N: TLO.DAG.getUNDEF(VT));
3193	}
3194
3195	// Limit search depth.
3196	if (Depth >= SelectionDAG::MaxRecursionDepth)
3197	return false;
3198
3199	SDLoc DL(Op);
3200	unsigned EltSizeInBits = VT.getScalarSizeInBits();
3201	bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
3202
3203	// Helper for demanding the specified elements and all the bits of both binary
3204	// operands.
3205	auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
3206	SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op: Op0, DemandedElts,
3207	DAG&: TLO.DAG, Depth: Depth + `1`);
3208	SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op: Op1, DemandedElts,
3209	DAG&: TLO.DAG, Depth: Depth + `1`);
3210	if (NewOp0 \|\| NewOp1) {
3211	SDValue NewOp =
3212	TLO.DAG.getNode(Opcode, DL: SDLoc (Op), VT, N1: NewOp0 ? NewOp0 : Op0,
3213	N2: NewOp1 ? NewOp1 : Op1, Flags: Op ->getFlags());
3214	return TLO.CombineTo(O: Op, N: NewOp);
3215	}
3216	return false;
3217	};
3218
3219	switch (Opcode) {
3220	case ISD::SCALAR_TO_VECTOR: {
3221	if (!DemandedElts [`0`]) {
3222	KnownUndef.setAllBits();
3223	return TLO.CombineTo(O: Op, N: TLO.DAG.getUNDEF(VT));
3224	}
3225	SDValue ScalarSrc = Op.getOperand(i: `0`);
3226	if (ScalarSrc.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
3227	SDValue Src = ScalarSrc.getOperand(i: `0`);
3228	SDValue Idx = ScalarSrc.getOperand(i: `1`);
3229	EVT SrcVT = Src.getValueType();
3230
3231	ElementCount SrcEltCnt = SrcVT.getVectorElementCount();
3232
3233	if (SrcEltCnt.isScalable())
3234	return false;
3235
3236	unsigned NumSrcElts = SrcEltCnt.getFixedValue();
3237	if (isNullConstant(V: Idx)) {
3238	APInt SrcDemandedElts = APInt::getOneBitSet(numBits: NumSrcElts, BitNo: `0`);
3239	APInt SrcUndef = KnownUndef.zextOrTrunc(width: NumSrcElts);
3240	APInt SrcZero = KnownZero.zextOrTrunc(width: NumSrcElts);
3241	if (SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: SrcDemandedElts, KnownUndef&: SrcUndef, KnownZero&: SrcZero,
3242	TLO, Depth: Depth + `1`))
3243	return true;
3244	}
3245	}
3246	KnownUndef.setHighBits(NumElts - `1`);
3247	break;
3248	}
3249	case ISD::BITCAST: {
3250	SDValue Src = Op.getOperand(i: `0`);
3251	EVT SrcVT = Src.getValueType();
3252
3253	if (!SrcVT.isVector()) {
3254	// TODO - bigendian once we have test coverage.
3255	if (IsLE) {
3256	APInt DemandedSrcBits = APInt::getZero(numBits: SrcVT.getSizeInBits());
3257	unsigned EltSize = VT.getScalarSizeInBits();
3258	for (unsigned I = `0`; I != NumElts; ++I) {
3259	if (DemandedElts [I]) {
3260	unsigned Offset = I * EltSize;
3261	DemandedSrcBits.setBits(loBit: Offset, hiBit: Offset + EltSize);
3262	}
3263	}
3264	KnownBits Known;
3265	if (SimplifyDemandedBits(Op: Src, DemandedBits: DemandedSrcBits, Known, TLO, Depth: Depth + `1`))
3266	return true;
3267	}
3268	break;
3269	}
3270
3271	// Fast handling of 'identity' bitcasts.
3272	unsigned NumSrcElts = SrcVT.getVectorNumElements();
3273	if (NumSrcElts == NumElts)
3274	return SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: DemandedElts, KnownUndef,
3275	KnownZero, TLO, Depth: Depth + `1`);
3276
3277	APInt SrcDemandedElts, SrcZero, SrcUndef;
3278
3279	// Bitcast from 'large element' src vector to 'small element' vector, we
3280	// must demand a source element if any DemandedElt maps to it.
3281	if ((NumElts % NumSrcElts) == `0`) {
3282	unsigned Scale = NumElts / NumSrcElts;
3283	SrcDemandedElts = APIntOps::ScaleBitMask(A: DemandedElts, NewBitWidth: NumSrcElts);
3284	if (SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: SrcDemandedElts, KnownUndef&: SrcUndef, KnownZero&: SrcZero,
3285	TLO, Depth: Depth + `1`))
3286	return true;
3287
3288	// Try calling SimplifyDemandedBits, converting demanded elts to the bits
3289	// of the large element.
3290	// TODO - bigendian once we have test coverage.
3291	if (IsLE) {
3292	unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
3293	APInt SrcDemandedBits = APInt::getZero(numBits: SrcEltSizeInBits);
3294	for (unsigned i = `0`; i != NumElts; ++i)
3295	if (DemandedElts [i]) {
3296	unsigned Ofs = (i % Scale) * EltSizeInBits;
3297	SrcDemandedBits.setBits(loBit: Ofs, hiBit: Ofs + EltSizeInBits);
3298	}
3299
3300	KnownBits Known;
3301	if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: SrcDemandedBits, OriginalDemandedElts: SrcDemandedElts, Known,
3302	TLO, Depth: Depth + `1`))
3303	return true;
3304
3305	// The bitcast has split each wide element into a number of
3306	// narrow subelements. We have just computed the Known bits
3307	// for wide elements. See if element splitting results in
3308	// some subelements being zero. Only for demanded elements!
3309	for (unsigned SubElt = `0`; SubElt != Scale; ++SubElt) {
3310	if (!Known.Zero.extractBits(numBits: EltSizeInBits, bitPosition: SubElt * EltSizeInBits)
3311	.isAllOnes())
3312	continue;
3313	for (unsigned SrcElt = `0`; SrcElt != NumSrcElts; ++SrcElt) {
3314	unsigned Elt = Scale * SrcElt + SubElt;
3315	if (DemandedElts [Elt])
3316	KnownZero.setBit(Elt);
3317	}
3318	}
3319	}
3320
3321	// If the src element is zero/undef then all the output elements will be -
3322	// only demanded elements are guaranteed to be correct.
3323	for (unsigned i = `0`; i != NumSrcElts; ++i) {
3324	if (SrcDemandedElts [i]) {
3325	if (SrcZero [i])
3326	KnownZero.setBits(loBit: i * Scale, hiBit: (i + `1`) * Scale);
3327	if (SrcUndef [i])
3328	KnownUndef.setBits(loBit: i * Scale, hiBit: (i + `1`) * Scale);
3329	}
3330	}
3331	}
3332
3333	// Bitcast from 'small element' src vector to 'large element' vector, we
3334	// demand all smaller source elements covered by the larger demanded element
3335	// of this vector.
3336	if ((NumSrcElts % NumElts) == `0`) {
3337	unsigned Scale = NumSrcElts / NumElts;
3338	SrcDemandedElts = APIntOps::ScaleBitMask(A: DemandedElts, NewBitWidth: NumSrcElts);
3339	if (SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: SrcDemandedElts, KnownUndef&: SrcUndef, KnownZero&: SrcZero,
3340	TLO, Depth: Depth + `1`))
3341	return true;
3342
3343	// If all the src elements covering an output element are zero/undef, then
3344	// the output element will be as well, assuming it was demanded.
3345	for (unsigned i = `0`; i != NumElts; ++i) {
3346	if (DemandedElts [i]) {
3347	if (SrcZero.extractBits(numBits: Scale, bitPosition: i * Scale).isAllOnes())
3348	KnownZero.setBit(i);
3349	if (SrcUndef.extractBits(numBits: Scale, bitPosition: i * Scale).isAllOnes())
3350	KnownUndef.setBit(i);
3351	}
3352	}
3353	}
3354	break;
3355	}
3356	case ISD::FREEZE: {
3357	SDValue N0 = Op.getOperand(i: `0`);
3358	if (TLO.DAG.isGuaranteedNotToBeUndefOrPoison(Op: N0, DemandedElts,
3359	/PoisonOnly=/false))
3360	return TLO.CombineTo(O: Op, N: N0);
3361
3362	// TODO: Replace this with the general fold from DAGCombiner::visitFREEZE
3363	// freeze(op(x, ...)) -> op(freeze(x), ...).
3364	if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && DemandedElts == `1`)
3365	return TLO.CombineTo(
3366	O: Op, N: TLO.DAG.getNode(Opcode: ISD::SCALAR_TO_VECTOR, DL, VT,
3367	Operand: TLO.DAG.getFreeze(V: N0.getOperand(i: `0`))));
3368	break;
3369	}
3370	case ISD::BUILD_VECTOR: {
3371	// Check all elements and simplify any unused elements with UNDEF.
3372	if (!DemandedElts.isAllOnes()) {
3373	// Don't simplify BROADCASTS.
3374	if (llvm::any_of(Range: Op ->op_values(),
3375	P: [&](SDValue Elt) { return Op.getOperand(i: `0`) != Elt; })) {
3376	SmallVector<SDValue, `32`> Ops(Op ->ops());
3377	bool Updated = false;
3378	for (unsigned i = `0`; i != NumElts; ++i) {
3379	if (!DemandedElts [i] && !Ops [i].isUndef()) {
3380	Ops [i] = TLO.DAG.getUNDEF(VT: Ops [`0`].getValueType());
3381	KnownUndef.setBit(i);
3382	Updated = true;
3383	}
3384	}
3385	if (Updated)
3386	return TLO.CombineTo(O: Op, N: TLO.DAG.getBuildVector(VT, DL, Ops));
3387	}
3388	}
3389	for (unsigned i = `0`; i != NumElts; ++i) {
3390	SDValue SrcOp = Op.getOperand(i);
3391	if (SrcOp.isUndef()) {
3392	KnownUndef.setBit(i);
3393	} else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
3394	(isNullConstant(V: SrcOp) \|\| isNullFPConstant(V: SrcOp))) {
3395	KnownZero.setBit(i);
3396	}
3397	}
3398	break;
3399	}
3400	case ISD::CONCAT_VECTORS: {
3401	EVT SubVT = Op.getOperand(i: `0`).getValueType();
3402	unsigned NumSubVecs = Op.getNumOperands();
3403	unsigned NumSubElts = SubVT.getVectorNumElements();
3404	for (unsigned i = `0`; i != NumSubVecs; ++i) {
3405	SDValue SubOp = Op.getOperand(i);
3406	APInt SubElts = DemandedElts.extractBits(numBits: NumSubElts, bitPosition: i * NumSubElts);
3407	APInt SubUndef, SubZero;
3408	if (SimplifyDemandedVectorElts(Op: SubOp, OriginalDemandedElts: SubElts, KnownUndef&: SubUndef, KnownZero&: SubZero, TLO,
3409	Depth: Depth + `1`))
3410	return true;
3411	KnownUndef.insertBits(SubBits: SubUndef, bitPosition: i * NumSubElts);
3412	KnownZero.insertBits(SubBits: SubZero, bitPosition: i * NumSubElts);
3413	}
3414
3415	// Attempt to avoid multi-use ops if we don't need anything from them.
3416	if (!DemandedElts.isAllOnes()) {
3417	bool FoundNewSub = false;
3418	SmallVector<SDValue, `2`> DemandedSubOps;
3419	for (unsigned i = `0`; i != NumSubVecs; ++i) {
3420	SDValue SubOp = Op.getOperand(i);
3421	APInt SubElts = DemandedElts.extractBits(numBits: NumSubElts, bitPosition: i * NumSubElts);
3422	SDValue NewSubOp = SimplifyMultipleUseDemandedVectorElts(
3423	Op: SubOp, DemandedElts: SubElts, DAG&: TLO.DAG, Depth: Depth + `1`);
3424	DemandedSubOps.push_back(Elt: NewSubOp ? NewSubOp : SubOp);
3425	FoundNewSub = NewSubOp ? true : FoundNewSub;
3426	}
3427	if (FoundNewSub) {
3428	SDValue NewOp =
3429	TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: SDLoc (Op), VT, Ops: DemandedSubOps);
3430	return TLO.CombineTo(O: Op, N: NewOp);
3431	}
3432	}
3433	break;
3434	}
3435	case ISD::INSERT_SUBVECTOR: {
3436	// Demand any elements from the subvector and the remainder from the src its
3437	// inserted into.
3438	SDValue Src = Op.getOperand(i: `0`);
3439	SDValue Sub = Op.getOperand(i: `1`);
3440	uint64_t Idx = Op.getConstantOperandVal(i: `2`);
3441	unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
3442	APInt DemandedSubElts = DemandedElts.extractBits(numBits: NumSubElts, bitPosition: Idx);
3443	APInt DemandedSrcElts = DemandedElts;
3444	DemandedSrcElts.clearBits(LoBit: Idx, HiBit: Idx + NumSubElts);
3445
3446	APInt SubUndef, SubZero;
3447	if (SimplifyDemandedVectorElts(Op: Sub, OriginalDemandedElts: DemandedSubElts, KnownUndef&: SubUndef, KnownZero&: SubZero, TLO,
3448	Depth: Depth + `1`))
3449	return true;
3450
3451	// If none of the src operand elements are demanded, replace it with undef.
3452	if (!DemandedSrcElts && !Src.isUndef())
3453	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT,
3454	N1: TLO.DAG.getUNDEF(VT), N2: Sub,
3455	N3: Op.getOperand(i: `2`)));
3456
3457	if (SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: DemandedSrcElts, KnownUndef, KnownZero,
3458	TLO, Depth: Depth + `1`))
3459	return true;
3460	KnownUndef.insertBits(SubBits: SubUndef, bitPosition: Idx);
3461	KnownZero.insertBits(SubBits: SubZero, bitPosition: Idx);
3462
3463	// Attempt to avoid multi-use ops if we don't need anything from them.
3464	if (!DemandedSrcElts.isAllOnes() \|\| !DemandedSubElts.isAllOnes()) {
3465	SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3466	Op: Src, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + `1`);
3467	SDValue NewSub = SimplifyMultipleUseDemandedVectorElts(
3468	Op: Sub, DemandedElts: DemandedSubElts, DAG&: TLO.DAG, Depth: Depth + `1`);
3469	if (NewSrc \|\| NewSub) {
3470	NewSrc = NewSrc ? NewSrc : Src;
3471	NewSub = NewSub ? NewSub : Sub;
3472	SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: SDLoc (Op), VT, N1: NewSrc,
3473	N2: NewSub, N3: Op.getOperand(i: `2`));
3474	return TLO.CombineTo(O: Op, N: NewOp);
3475	}
3476	}
3477	break;
3478	}
3479	case ISD::EXTRACT_SUBVECTOR: {
3480	// Offset the demanded elts by the subvector index.
3481	SDValue Src = Op.getOperand(i: `0`);
3482	if (Src.getValueType().isScalableVector())
3483	break;
3484	uint64_t Idx = Op.getConstantOperandVal(i: `1`);
3485	unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3486	APInt DemandedSrcElts = DemandedElts.zext(width: NumSrcElts).shl(shiftAmt: Idx);
3487
3488	APInt SrcUndef, SrcZero;
3489	if (SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: DemandedSrcElts, KnownUndef&: SrcUndef, KnownZero&: SrcZero, TLO,
3490	Depth: Depth + `1`))
3491	return true;
3492	KnownUndef = SrcUndef.extractBits(numBits: NumElts, bitPosition: Idx);
3493	KnownZero = SrcZero.extractBits(numBits: NumElts, bitPosition: Idx);
3494
3495	// Attempt to avoid multi-use ops if we don't need anything from them.
3496	if (!DemandedElts.isAllOnes()) {
3497	SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3498	Op: Src, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + `1`);
3499	if (NewSrc) {
3500	SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: SDLoc (Op), VT, N1: NewSrc,
3501	N2: Op.getOperand(i: `1`));
3502	return TLO.CombineTo(O: Op, N: NewOp);
3503	}
3504	}
3505	break;
3506	}
3507	case ISD::INSERT_VECTOR_ELT: {
3508	SDValue Vec = Op.getOperand(i: `0`);
3509	SDValue Scl = Op.getOperand(i: `1`);
3510	auto *CIdx = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: `2`));
3511
3512	// For a legal, constant insertion index, if we don't need this insertion
3513	// then strip it, else remove it from the demanded elts.
3514	if (CIdx && CIdx->getAPIntValue().ult(RHS: NumElts)) {
3515	unsigned Idx = CIdx->getZExtValue();
3516	if (!DemandedElts [Idx])
3517	return TLO.CombineTo(O: Op, N: Vec);
3518
3519	APInt DemandedVecElts(DemandedElts);
3520	DemandedVecElts.clearBit(BitPosition: Idx);
3521	if (SimplifyDemandedVectorElts(Op: Vec, OriginalDemandedElts: DemandedVecElts, KnownUndef,
3522	KnownZero, TLO, Depth: Depth + `1`))
3523	return true;
3524
3525	KnownUndef.setBitVal(BitPosition: Idx, BitValue: Scl.isUndef());
3526
3527	KnownZero.setBitVal(BitPosition: Idx, BitValue: isNullConstant(V: Scl) \|\| isNullFPConstant(V: Scl));
3528	break;
3529	}
3530
3531	APInt VecUndef, VecZero;
3532	if (SimplifyDemandedVectorElts(Op: Vec, OriginalDemandedElts: DemandedElts, KnownUndef&: VecUndef, KnownZero&: VecZero, TLO,
3533	Depth: Depth + `1`))
3534	return true;
3535	// Without knowing the insertion index we can't set KnownUndef/KnownZero.
3536	break;
3537	}
3538	case ISD::VSELECT: {
3539	SDValue Sel = Op.getOperand(i: `0`);
3540	SDValue LHS = Op.getOperand(i: `1`);
3541	SDValue RHS = Op.getOperand(i: `2`);
3542
3543	// Try to transform the select condition based on the current demanded
3544	// elements.
3545	APInt UndefSel, ZeroSel;
3546	if (SimplifyDemandedVectorElts(Op: Sel, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefSel, KnownZero&: ZeroSel, TLO,
3547	Depth: Depth + `1`))
3548	return true;
3549
3550	// See if we can simplify either vselect operand.
3551	APInt DemandedLHS(DemandedElts);
3552	APInt DemandedRHS(DemandedElts);
3553	APInt UndefLHS, ZeroLHS;
3554	APInt UndefRHS, ZeroRHS;
3555	if (SimplifyDemandedVectorElts(Op: LHS, OriginalDemandedElts: DemandedLHS, KnownUndef&: UndefLHS, KnownZero&: ZeroLHS, TLO,
3556	Depth: Depth + `1`))
3557	return true;
3558	if (SimplifyDemandedVectorElts(Op: RHS, OriginalDemandedElts: DemandedRHS, KnownUndef&: UndefRHS, KnownZero&: ZeroRHS, TLO,
3559	Depth: Depth + `1`))
3560	return true;
3561
3562	KnownUndef = UndefLHS & UndefRHS;
3563	KnownZero = ZeroLHS & ZeroRHS;
3564
3565	// If we know that the selected element is always zero, we don't need the
3566	// select value element.
3567	APInt DemandedSel = DemandedElts & ~KnownZero;
3568	if (DemandedSel != DemandedElts)
3569	if (SimplifyDemandedVectorElts(Op: Sel, OriginalDemandedElts: DemandedSel, KnownUndef&: UndefSel, KnownZero&: ZeroSel, TLO,
3570	Depth: Depth + `1`))
3571	return true;
3572
3573	break;
3574	}
3575	case ISD::VECTOR_SHUFFLE: {
3576	SDValue LHS = Op.getOperand(i: `0`);
3577	SDValue RHS = Op.getOperand(i: `1`);
3578	ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Val&: Op)->getMask();
3579
3580	// Collect demanded elements from shuffle operands..
3581	APInt DemandedLHS(NumElts, `0`);
3582	APInt DemandedRHS(NumElts, `0`);
3583	for (unsigned i = `0`; i != NumElts; ++i) {
3584	int M = ShuffleMask [i];
3585	if (M < `0` \|\| !DemandedElts [i])
3586	continue;
3587	assert(`0` <= M && M < (int)(`2` * NumElts) && "Shuffle index out of range");
3588	if (M < (int)NumElts)
3589	DemandedLHS.setBit(M);
3590	else
3591	DemandedRHS.setBit(M - NumElts);
3592	}
3593
3594	// If either side isn't demanded, replace it by UNDEF. We handle this
3595	// explicitly here to also simplify in case of multiple uses (on the
3596	// contrary to the SimplifyDemandedVectorElts calls below).
3597	bool FoldLHS = !DemandedLHS && !LHS.isUndef();
3598	bool FoldRHS = !DemandedRHS && !RHS.isUndef();
3599	if (FoldLHS \|\| FoldRHS) {
3600	LHS = FoldLHS ? TLO.DAG.getUNDEF(VT: LHS.getValueType()) : LHS;
3601	RHS = FoldRHS ? TLO.DAG.getUNDEF(VT: RHS.getValueType()) : RHS;
3602	SDValue NewOp =
3603	TLO.DAG.getVectorShuffle(VT, dl: SDLoc (Op), N1: LHS, N2: RHS, Mask: ShuffleMask);
3604	return TLO.CombineTo(O: Op, N: NewOp);
3605	}
3606
3607	// See if we can simplify either shuffle operand.
3608	APInt UndefLHS, ZeroLHS;
3609	APInt UndefRHS, ZeroRHS;
3610	if (SimplifyDemandedVectorElts(Op: LHS, OriginalDemandedElts: DemandedLHS, KnownUndef&: UndefLHS, KnownZero&: ZeroLHS, TLO,
3611	Depth: Depth + `1`))
3612	return true;
3613	if (SimplifyDemandedVectorElts(Op: RHS, OriginalDemandedElts: DemandedRHS, KnownUndef&: UndefRHS, KnownZero&: ZeroRHS, TLO,
3614	Depth: Depth + `1`))
3615	return true;
3616
3617	// Simplify mask using undef elements from LHS/RHS.
3618	bool Updated = false;
3619	bool IdentityLHS = true, IdentityRHS = true;
3620	SmallVector<int, `32`> NewMask(ShuffleMask);
3621	for (unsigned i = `0`; i != NumElts; ++i) {
3622	int &M = NewMask [i];
3623	if (M < `0`)
3624	continue;
3625	if (!DemandedElts [i] \|\| (M < (int)NumElts && UndefLHS [M]) \|\|
3626	(M >= (int)NumElts && UndefRHS [M - NumElts])) {
3627	Updated = true;
3628	M = -`1`;
3629	}
3630	IdentityLHS &= (M < `0`) \|\| (M == (int)i);
3631	IdentityRHS &= (M < `0`) \|\| ((M - NumElts) == i);
3632	}
3633
3634	// Update legal shuffle masks based on demanded elements if it won't reduce
3635	// to Identity which can cause premature removal of the shuffle mask.
3636	if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
3637	SDValue LegalShuffle =
3638	buildLegalVectorShuffle(VT, DL, N0: LHS, N1: RHS, Mask: NewMask, DAG&: TLO.DAG);
3639	if (LegalShuffle)
3640	return TLO.CombineTo(O: Op, N: LegalShuffle);
3641	}
3642
3643	// Propagate undef/zero elements from LHS/RHS.
3644	for (unsigned i = `0`; i != NumElts; ++i) {
3645	int M = ShuffleMask [i];
3646	if (M < `0`) {
3647	KnownUndef.setBit(i);
3648	} else if (M < (int)NumElts) {
3649	if (UndefLHS [M])
3650	KnownUndef.setBit(i);
3651	if (ZeroLHS [M])
3652	KnownZero.setBit(i);
3653	} else {
3654	if (UndefRHS [M - NumElts])
3655	KnownUndef.setBit(i);
3656	if (ZeroRHS [M - NumElts])
3657	KnownZero.setBit(i);
3658	}
3659	}
3660	break;
3661	}
3662	case ISD::ANY_EXTEND_VECTOR_INREG:
3663	case ISD::SIGN_EXTEND_VECTOR_INREG:
3664	case ISD::ZERO_EXTEND_VECTOR_INREG: {
3665	APInt SrcUndef, SrcZero;
3666	SDValue Src = Op.getOperand(i: `0`);
3667	unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3668	APInt DemandedSrcElts = DemandedElts.zext(width: NumSrcElts);
3669	if (SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: DemandedSrcElts, KnownUndef&: SrcUndef, KnownZero&: SrcZero, TLO,
3670	Depth: Depth + `1`))
3671	return true;
3672	KnownZero = SrcZero.zextOrTrunc(width: NumElts);
3673	KnownUndef = SrcUndef.zextOrTrunc(width: NumElts);
3674
3675	if (IsLE && Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
3676	Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
3677	DemandedSrcElts == `1`) {
3678	// aext - if we just need the bottom element then we can bitcast.
3679	return TLO.CombineTo(O: Op, N: TLO.DAG.getBitcast(VT, V: Src));
3680	}
3681
3682	if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
3683	// zext(undef) upper bits are guaranteed to be zero.
3684	if (DemandedElts.isSubsetOf(RHS: KnownUndef))
3685	return TLO.CombineTo(O: Op, N: TLO.DAG.getConstant(Val: `0`, DL: SDLoc (Op), VT));
3686	KnownUndef.clearAllBits();
3687
3688	// zext - if we just need the bottom element then we can mask:
3689	// zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
3690	if (IsLE && DemandedSrcElts == `1` && Src.getOpcode() == ISD::AND &&
3691	Op ->isOnlyUserOf(N: Src.getNode()) &&
3692	Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
3693	SDLoc DL(Op);
3694	EVT SrcVT = Src.getValueType();
3695	EVT SrcSVT = SrcVT.getScalarType();
3696	SmallVector<SDValue> MaskElts;
3697	MaskElts.push_back(Elt: TLO.DAG.getAllOnesConstant(DL, VT: SrcSVT));
3698	MaskElts.append(NumInputs: NumSrcElts - `1`, Elt: TLO.DAG.getConstant(Val: `0`, DL, VT: SrcSVT));
3699	SDValue Mask = TLO.DAG.getBuildVector(VT: SrcVT, DL, Ops: MaskElts);
3700	if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
3701	Opcode: ISD::AND, DL, VT: SrcVT, Ops: {Src.getOperand(i: `1`), Mask})) {
3702	Fold = TLO.DAG.getNode(Opcode: ISD::AND, DL, VT: SrcVT, N1: Src.getOperand(i: `0`), N2: Fold);
3703	return TLO.CombineTo(O: Op, N: TLO.DAG.getBitcast(VT, V: Fold));
3704	}
3705	}
3706	}
3707	break;
3708	}
3709
3710	// TODO: There are more binop opcodes that could be handled here - MIN,
3711	// MAX, saturated math, etc.
3712	case ISD::ADD: {
3713	SDValue Op0 = Op.getOperand(i: `0`);
3714	SDValue Op1 = Op.getOperand(i: `1`);
3715	if (Op0 == Op1 && Op ->isOnlyUserOf(N: Op0.getNode())) {
3716	APInt UndefLHS, ZeroLHS;
3717	if (SimplifyDemandedVectorElts(Op: Op0, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefLHS, KnownZero&: ZeroLHS, TLO,
3718	Depth: Depth + `1`, /AssumeSingleUse/ true))
3719	return true;
3720	}
3721	[[fallthrough]];
3722	}
3723	case ISD::AVGCEILS:
3724	case ISD::AVGCEILU:
3725	case ISD::AVGFLOORS:
3726	case ISD::AVGFLOORU:
3727	case ISD::OR:
3728	case ISD::XOR:
3729	case ISD::SUB:
3730	case ISD::FADD:
3731	case ISD::FSUB:
3732	case ISD::FMUL:
3733	case ISD::FDIV:
3734	case ISD::FREM: {
3735	SDValue Op0 = Op.getOperand(i: `0`);
3736	SDValue Op1 = Op.getOperand(i: `1`);
3737
3738	APInt UndefRHS, ZeroRHS;
3739	if (SimplifyDemandedVectorElts(Op: Op1, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefRHS, KnownZero&: ZeroRHS, TLO,
3740	Depth: Depth + `1`))
3741	return true;
3742	APInt UndefLHS, ZeroLHS;
3743	if (SimplifyDemandedVectorElts(Op: Op0, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefLHS, KnownZero&: ZeroLHS, TLO,
3744	Depth: Depth + `1`))
3745	return true;
3746
3747	KnownZero = ZeroLHS & ZeroRHS;
3748	KnownUndef = getKnownUndefForVectorBinop(BO: Op, DAG&: TLO.DAG, UndefOp0: UndefLHS, UndefOp1: UndefRHS);
3749
3750	// Attempt to avoid multi-use ops if we don't need anything from them.
3751	// TODO - use KnownUndef to relax the demandedelts?
3752	if (!DemandedElts.isAllOnes())
3753	if (SimplifyDemandedVectorEltsBinOp (Op0, Op1))
3754	return true;
3755	break;
3756	}
3757	case ISD::SHL:
3758	case ISD::SRL:
3759	case ISD::SRA:
3760	case ISD::ROTL:
3761	case ISD::ROTR: {
3762	SDValue Op0 = Op.getOperand(i: `0`);
3763	SDValue Op1 = Op.getOperand(i: `1`);
3764
3765	APInt UndefRHS, ZeroRHS;
3766	if (SimplifyDemandedVectorElts(Op: Op1, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefRHS, KnownZero&: ZeroRHS, TLO,
3767	Depth: Depth + `1`))
3768	return true;
3769	APInt UndefLHS, ZeroLHS;
3770	if (SimplifyDemandedVectorElts(Op: Op0, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefLHS, KnownZero&: ZeroLHS, TLO,
3771	Depth: Depth + `1`))
3772	return true;
3773
3774	KnownZero = ZeroLHS;
3775	KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
3776
3777	// Attempt to avoid multi-use ops if we don't need anything from them.
3778	// TODO - use KnownUndef to relax the demandedelts?
3779	if (!DemandedElts.isAllOnes())
3780	if (SimplifyDemandedVectorEltsBinOp (Op0, Op1))
3781	return true;
3782	break;
3783	}
3784	case ISD::MUL:
3785	case ISD::MULHU:
3786	case ISD::MULHS:
3787	case ISD::AND: {
3788	SDValue Op0 = Op.getOperand(i: `0`);
3789	SDValue Op1 = Op.getOperand(i: `1`);
3790
3791	APInt SrcUndef, SrcZero;
3792	if (SimplifyDemandedVectorElts(Op: Op1, OriginalDemandedElts: DemandedElts, KnownUndef&: SrcUndef, KnownZero&: SrcZero, TLO,
3793	Depth: Depth + `1`))
3794	return true;
3795	// If we know that a demanded element was zero in Op1 we don't need to
3796	// demand it in Op0 - its guaranteed to be zero.
3797	APInt DemandedElts0 = DemandedElts & ~SrcZero;
3798	if (SimplifyDemandedVectorElts(Op: Op0, OriginalDemandedElts: DemandedElts0, KnownUndef, KnownZero,
3799	TLO, Depth: Depth + `1`))
3800	return true;
3801
3802	KnownUndef &= DemandedElts0;
3803	KnownZero &= DemandedElts0;
3804
3805	// If every element pair has a zero/undef then just fold to zero.
3806	// fold (and x, undef) -> 0 / (and x, 0) -> 0
3807	// fold (mul x, undef) -> 0 / (mul x, 0) -> 0
3808	if (DemandedElts.isSubsetOf(RHS: SrcZero \| KnownZero \| SrcUndef \| KnownUndef))
3809	return TLO.CombineTo(O: Op, N: TLO.DAG.getConstant(Val: `0`, DL: SDLoc (Op), VT));
3810
3811	// If either side has a zero element, then the result element is zero, even
3812	// if the other is an UNDEF.
3813	// TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
3814	// and then handle 'and' nodes with the rest of the binop opcodes.
3815	KnownZero \|= SrcZero;
3816	KnownUndef &= SrcUndef;
3817	KnownUndef &= ~KnownZero;
3818
3819	// Attempt to avoid multi-use ops if we don't need anything from them.
3820	if (!DemandedElts.isAllOnes())
3821	if (SimplifyDemandedVectorEltsBinOp (Op0, Op1))
3822	return true;
3823	break;
3824	}
3825	case ISD::TRUNCATE:
3826	case ISD::SIGN_EXTEND:
3827	case ISD::ZERO_EXTEND:
3828	if (SimplifyDemandedVectorElts(Op: Op.getOperand(i: `0`), OriginalDemandedElts: DemandedElts, KnownUndef,
3829	KnownZero, TLO, Depth: Depth + `1`))
3830	return true;
3831
3832	if (!DemandedElts.isAllOnes())
3833	if (SDValue NewOp = SimplifyMultipleUseDemandedVectorElts(
3834	Op: Op.getOperand(i: `0`), DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`))
3835	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode, DL: SDLoc (Op), VT, Operand: NewOp));
3836
3837	if (Op.getOpcode() == ISD::ZERO_EXTEND) {
3838	// zext(undef) upper bits are guaranteed to be zero.
3839	if (DemandedElts.isSubsetOf(RHS: KnownUndef))
3840	return TLO.CombineTo(O: Op, N: TLO.DAG.getConstant(Val: `0`, DL: SDLoc (Op), VT));
3841	KnownUndef.clearAllBits();
3842	}
3843	break;
3844	case ISD::SINT_TO_FP:
3845	case ISD::UINT_TO_FP:
3846	case ISD::FP_TO_SINT:
3847	case ISD::FP_TO_UINT:
3848	if (SimplifyDemandedVectorElts(Op: Op.getOperand(i: `0`), OriginalDemandedElts: DemandedElts, KnownUndef,
3849	KnownZero, TLO, Depth: Depth + `1`))
3850	return true;
3851	// Don't fall through to generic undef -> undef handling.
3852	return false;
3853	default: {
3854	if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
3855	if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
3856	KnownZero, TLO, Depth))
3857	return true;
3858	} else {
3859	KnownBits Known;
3860	APInt DemandedBits = APInt::getAllOnes(numBits: EltSizeInBits);
3861	if (SimplifyDemandedBits(Op, OriginalDemandedBits: DemandedBits, OriginalDemandedElts, Known,
3862	TLO, Depth, AssumeSingleUse))
3863	return true;
3864	}
3865	break;
3866	}
3867	}
3868	assert((KnownUndef & KnownZero) == `0` && "Elements flagged as undef AND zero");
3869
3870	// Constant fold all undef cases.
3871	// TODO: Handle zero cases as well.
3872	if (DemandedElts.isSubsetOf(RHS: KnownUndef))
3873	return TLO.CombineTo(O: Op, N: TLO.DAG.getUNDEF(VT));
3874
3875	return false;
3876	}
3877
3878	/// Determine which of the bits specified in Mask are known to be either zero or
3879	/// one and return them in the Known.
3880	void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
3881	KnownBits &Known,
3882	const APInt &DemandedElts,
3883	const SelectionDAG &DAG,
3884	unsigned Depth) const {
3885	assert((Op.getOpcode() >= ISD::BUILTIN_OP_END \|\|
3886	Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN \|\|
3887	Op.getOpcode() == ISD::INTRINSIC_W_CHAIN \|\|
3888	Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3889	"Should use MaskedValueIsZero if you don't know whether Op"
3890	" is a target node!");
3891	Known.resetAll();
3892	}
3893
3894	void TargetLowering::computeKnownBitsForTargetInstr(
3895	GISelValueTracking &Analysis, Register R, KnownBits &Known,
3896	const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3897	unsigned Depth) const {
3898	Known.resetAll();
3899	}
3900
3901	void TargetLowering::computeKnownFPClassForTargetInstr(
3902	GISelValueTracking &Analysis, Register R, KnownFPClass &Known,
3903	const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3904	unsigned Depth) const {
3905	Known.resetAll();
3906	}
3907
3908	void TargetLowering::computeKnownBitsForFrameIndex(
3909	const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
3910	// The low bits are known zero if the pointer is aligned.
3911	Known.Zero.setLowBits(Log2(A: MF.getFrameInfo().getObjectAlign(ObjectIdx: FrameIdx)));
3912	}
3913
3914	Align TargetLowering::computeKnownAlignForTargetInstr(
3915	GISelValueTracking &Analysis, Register R, const MachineRegisterInfo &MRI,
3916	unsigned Depth) const {
3917	return Align (`1`);
3918	}
3919
3920	/// This method can be implemented by targets that want to expose additional
3921	/// information about sign bits to the DAG Combiner.
3922	unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
3923	const APInt &,
3924	const SelectionDAG &,
3925	unsigned Depth) const {
3926	assert((Op.getOpcode() >= ISD::BUILTIN_OP_END \|\|
3927	Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN \|\|
3928	Op.getOpcode() == ISD::INTRINSIC_W_CHAIN \|\|
3929	Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3930	"Should use ComputeNumSignBits if you don't know whether Op"
3931	" is a target node!");
3932	return `1`;
3933	}
3934
3935	unsigned TargetLowering::computeNumSignBitsForTargetInstr(
3936	GISelValueTracking &Analysis, Register R, const APInt &DemandedElts,
3937	const MachineRegisterInfo &MRI, unsigned Depth) const {
3938	return `1`;
3939	}
3940
3941	bool TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
3942	SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
3943	TargetLoweringOpt &TLO, unsigned Depth) const {
3944	assert((Op.getOpcode() >= ISD::BUILTIN_OP_END \|\|
3945	Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN \|\|
3946	Op.getOpcode() == ISD::INTRINSIC_W_CHAIN \|\|
3947	Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3948	"Should use SimplifyDemandedVectorElts if you don't know whether Op"
3949	" is a target node!");
3950	return false;
3951	}
3952
3953	bool TargetLowering::SimplifyDemandedBitsForTargetNode(
3954	SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3955	KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
3956	assert((Op.getOpcode() >= ISD::BUILTIN_OP_END \|\|
3957	Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN \|\|
3958	Op.getOpcode() == ISD::INTRINSIC_W_CHAIN \|\|
3959	Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3960	"Should use SimplifyDemandedBits if you don't know whether Op"
3961	" is a target node!");
3962	computeKnownBitsForTargetNode(Op, Known, DemandedElts, DAG: TLO.DAG, Depth);
3963	return false;
3964	}
3965
3966	SDValue TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
3967	SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3968	SelectionDAG &DAG, unsigned Depth) const {
3969	assert(
3970	(Op.getOpcode() >= ISD::BUILTIN_OP_END \|\|
3971	Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN \|\|
3972	Op.getOpcode() == ISD::INTRINSIC_W_CHAIN \|\|
3973	Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3974	"Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
3975	" is a target node!");
3976	return SDValue ();
3977	}
3978
3979	SDValue
3980	TargetLowering::buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0,
3981	SDValue N1, MutableArrayRef<int> Mask,
3982	SelectionDAG &DAG) const {
3983	bool LegalMask = isShuffleMaskLegal(Mask, VT);
3984	if (!LegalMask) {
3985	std::swap(a&: N0, b&: N1);
3986	ShuffleVectorSDNode::commuteMask(Mask);
3987	LegalMask = isShuffleMaskLegal(Mask, VT);
3988	}
3989
3990	if (!LegalMask)
3991	return SDValue ();
3992
3993	return DAG.getVectorShuffle(VT, dl: DL, N1: N0, N2: N1, Mask);
3994	}
3995
3996	const Constant TargetLowering::getTargetConstantFromLoad(LoadSDNode) const {
3997	return nullptr;
3998	}
3999
4000	bool TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode(
4001	SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
4002	bool PoisonOnly, unsigned Depth) const {
4003	assert(
4004	(Op.getOpcode() >= ISD::BUILTIN_OP_END \|\|
4005	Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN \|\|
4006	Op.getOpcode() == ISD::INTRINSIC_W_CHAIN \|\|
4007	Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4008	"Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"
4009	" is a target node!");
4010
4011	// If Op can't create undef/poison and none of its operands are undef/poison
4012	// then Op is never undef/poison.
4013	return !canCreateUndefOrPoisonForTargetNode(Op, DemandedElts, DAG, PoisonOnly,
4014	/ConsiderFlags/ true, Depth) &&
4015	all_of(Range: Op ->ops(), P: [&](SDValue V) {
4016	return DAG.isGuaranteedNotToBeUndefOrPoison(Op: V, PoisonOnly,
4017	Depth: Depth + `1`);
4018	});
4019	}
4020
4021	bool TargetLowering::canCreateUndefOrPoisonForTargetNode(
4022	SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
4023	bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
4024	assert((Op.getOpcode() >= ISD::BUILTIN_OP_END \|\|
4025	Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN \|\|
4026	Op.getOpcode() == ISD::INTRINSIC_W_CHAIN \|\|
4027	Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4028	"Should use canCreateUndefOrPoison if you don't know whether Op"
4029	" is a target node!");
4030	// Be conservative and return true.
4031	return true;
4032	}
4033
4034	bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
4035	const APInt &DemandedElts,
4036	const SelectionDAG &DAG,
4037	bool SNaN,
4038	unsigned Depth) const {
4039	assert((Op.getOpcode() >= ISD::BUILTIN_OP_END \|\|
4040	Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN \|\|
4041	Op.getOpcode() == ISD::INTRINSIC_W_CHAIN \|\|
4042	Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4043	"Should use isKnownNeverNaN if you don't know whether Op"
4044	" is a target node!");
4045	return false;
4046	}
4047
4048	bool TargetLowering::isSplatValueForTargetNode(SDValue Op,
4049	const APInt &DemandedElts,
4050	APInt &UndefElts,
4051	const SelectionDAG &DAG,
4052	unsigned Depth) const {
4053	assert((Op.getOpcode() >= ISD::BUILTIN_OP_END \|\|
4054	Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN \|\|
4055	Op.getOpcode() == ISD::INTRINSIC_W_CHAIN \|\|
4056	Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4057	"Should use isSplatValue if you don't know whether Op"
4058	" is a target node!");
4059	return false;
4060	}
4061
4062	// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
4063	// work with truncating build vectors and vectors with elements of less than
4064	// 8 bits.
4065	bool TargetLowering::isConstTrueVal(SDValue N) const {
4066	if (!N)
4067	return false;
4068
4069	unsigned EltWidth;
4070	APInt CVal;
4071	if (ConstantSDNode CN = isConstOrConstSplat(N, /AllowUndefs=/*false,
4072	/AllowTruncation=/true)) {
4073	CVal = CN->getAPIntValue();
4074	EltWidth = N.getValueType().getScalarSizeInBits();
4075	} else
4076	return false;
4077
4078	// If this is a truncating splat, truncate the splat value.
4079	// Otherwise, we may fail to match the expected values below.
4080	if (EltWidth < CVal.getBitWidth())
4081	CVal = CVal.trunc(width: EltWidth);
4082
4083	switch (getBooleanContents(Type: N.getValueType())) {
4084	case UndefinedBooleanContent:
4085	return CVal [`0`];
4086	case ZeroOrOneBooleanContent:
4087	return CVal.isOne();
4088	case ZeroOrNegativeOneBooleanContent:
4089	return CVal.isAllOnes();
4090	}
4091
4092	llvm_unreachable("Invalid boolean contents");
4093	}
4094
4095	bool TargetLowering::isConstFalseVal(SDValue N) const {
4096	if (!N)
4097	return false;
4098
4099	const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val&: N);
4100	if (!CN) {
4101	const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Val&: N);
4102	if (!BV)
4103	return false;
4104
4105	// Only interested in constant splats, we don't care about undef
4106	// elements in identifying boolean constants and getConstantSplatNode
4107	// returns NULL if all ops are undef;
4108	CN = BV->getConstantSplatNode();
4109	if (!CN)
4110	return false;
4111	}
4112
4113	if (getBooleanContents(Type: N ->getValueType(ResNo: `0`)) == UndefinedBooleanContent)
4114	return !CN->getAPIntValue()[`0`];
4115
4116	return CN->isZero();
4117	}
4118
4119	bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
4120	bool SExt) const {
4121	if (VT == MVT::i1)
4122	return N->isOne();
4123
4124	TargetLowering::BooleanContent Cnt = getBooleanContents(Type: VT);
4125	switch (Cnt) {
4126	case TargetLowering::ZeroOrOneBooleanContent:
4127	// An extended value of 1 is always true, unless its original type is i1,
4128	// in which case it will be sign extended to -1.
4129	return (N->isOne() && !SExt) \|\| (SExt && (N->getValueType(ResNo: `0`) != MVT::i1));
4130	case TargetLowering::UndefinedBooleanContent:
4131	case TargetLowering::ZeroOrNegativeOneBooleanContent:
4132	return N->isAllOnes() && SExt;
4133	}
4134	llvm_unreachable("Unexpected enumeration.");
4135	}
4136
4137	/// This helper function of SimplifySetCC tries to optimize the comparison when
4138	/// either operand of the SetCC node is a bitwise-and instruction.
4139	SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
4140	ISD::CondCode Cond, const SDLoc &DL,
4141	DAGCombinerInfo &DCI) const {
4142	if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
4143	std::swap(a&: N0, b&: N1);
4144
4145	SelectionDAG &DAG = DCI.DAG;
4146	EVT OpVT = N0.getValueType();
4147	if (N0.getOpcode() != ISD::AND \|\| !OpVT.isInteger() \|\|
4148	(Cond != ISD::SETEQ && Cond != ISD::SETNE))
4149	return SDValue ();
4150
4151	// (X & Y) != 0 --> zextOrTrunc(X & Y)
4152	// iff everything but LSB is known zero:
4153	if (Cond == ISD::SETNE && isNullConstant(V: N1) &&
4154	(getBooleanContents(Type: OpVT) == TargetLowering::UndefinedBooleanContent \|\|
4155	getBooleanContents(Type: OpVT) == TargetLowering::ZeroOrOneBooleanContent)) {
4156	unsigned NumEltBits = OpVT.getScalarSizeInBits();
4157	APInt UpperBits = APInt::getHighBitsSet(numBits: NumEltBits, hiBitsSet: NumEltBits - `1`);
4158	if (DAG.MaskedValueIsZero(Op: N0, Mask: UpperBits))
4159	return DAG.getBoolExtOrTrunc(Op: N0, SL: DL, VT, OpVT);
4160	}
4161
4162	// Try to eliminate a power-of-2 mask constant by converting to a signbit
4163	// test in a narrow type that we can truncate to with no cost. Examples:
4164	// (i32 X & 32768) == 0 --> (trunc X to i16) >= 0
4165	// (i32 X & 32768) != 0 --> (trunc X to i16) < 0
4166	// TODO: This conservatively checks for type legality on the source and
4167	// destination types. That may inhibit optimizations, but it also
4168	// allows setcc->shift transforms that may be more beneficial.
4169	auto *AndC = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: `1`));
4170	if (AndC && isNullConstant(V: N1) && AndC->getAPIntValue().isPowerOf2() &&
4171	isTypeLegal(VT: OpVT) && N0.hasOneUse()) {
4172	EVT NarrowVT = EVT::getIntegerVT(Context&: *DAG.getContext(),
4173	BitWidth: AndC->getAPIntValue().getActiveBits());
4174	if (isTruncateFree(FromVT: OpVT, ToVT: NarrowVT) && isTypeLegal(VT: NarrowVT)) {
4175	SDValue Trunc = DAG.getZExtOrTrunc(Op: N0.getOperand(i: `0`), DL, VT: NarrowVT);
4176	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT: NarrowVT);
4177	return DAG.getSetCC(DL, VT, LHS: Trunc, RHS: Zero,
4178	Cond: Cond == ISD::SETEQ ? ISD::SETGE : ISD::SETLT);
4179	}
4180	}
4181
4182	// Match these patterns in any of their permutations:
4183	// (X & Y) == Y
4184	// (X & Y) != Y
4185	SDValue X, Y;
4186	if (N0.getOperand(i: `0`) == N1) {
4187	X = N0.getOperand(i: `1`);
4188	Y = N0.getOperand(i: `0`);
4189	} else if (N0.getOperand(i: `1`) == N1) {
4190	X = N0.getOperand(i: `0`);
4191	Y = N0.getOperand(i: `1`);
4192	} else {
4193	return SDValue ();
4194	}
4195
4196	// TODO: We should invert (X & Y) eq/ne 0 -> (X & Y) ne/eq Y if
4197	// `isXAndYEqZeroPreferableToXAndYEqY` is false. This is a bit difficult as
4198	// its liable to create and infinite loop.
4199	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT: OpVT);
4200	if (isXAndYEqZeroPreferableToXAndYEqY(Cond, OpVT) &&
4201	DAG.isKnownToBeAPowerOfTwo(Val: Y)) {
4202	// Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
4203	// Note that where Y is variable and is known to have at most one bit set
4204	// (for example, if it is Z & 1) we cannot do this; the expressions are not
4205	// equivalent when Y == 0.
4206	assert(OpVT.isInteger());
4207	Cond = ISD::getSetCCInverse(Operation: Cond, Type: OpVT);
4208	if (DCI.isBeforeLegalizeOps() \|\|
4209	isCondCodeLegal(CC: Cond, VT: N0.getSimpleValueType()))
4210	return DAG.getSetCC(DL, VT, LHS: N0, RHS: Zero, Cond);
4211	} else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
4212	// If the target supports an 'and-not' or 'and-complement' logic operation,
4213	// try to use that to make a comparison operation more efficient.
4214	// But don't do this transform if the mask is a single bit because there are
4215	// more efficient ways to deal with that case (for example, 'bt' on x86 or
4216	// 'rlwinm' on PPC).
4217
4218	// Bail out if the compare operand that we want to turn into a zero is
4219	// already a zero (otherwise, infinite loop).
4220	if (isNullConstant(V: Y))
4221	return SDValue ();
4222
4223	// Transform this into: ~X & Y == 0.
4224	SDValue NotX = DAG.getNOT(DL: SDLoc (X), Val: X, VT: OpVT);
4225	SDValue NewAnd = DAG.getNode(Opcode: ISD::AND, DL: SDLoc (N0), VT: OpVT, N1: NotX, N2: Y);
4226	return DAG.getSetCC(DL, VT, LHS: NewAnd, RHS: Zero, Cond);
4227	}
4228
4229	return SDValue ();
4230	}
4231
4232	/// This helper function of SimplifySetCC tries to optimize the comparison when
4233	/// either operand of the SetCC node is a bitwise-or instruction.
4234	/// For now, this just transforms (X \| Y) ==/!= Y into X & ~Y ==/!= 0.
4235	SDValue TargetLowering::foldSetCCWithOr(EVT VT, SDValue N0, SDValue N1,
4236	ISD::CondCode Cond, const SDLoc &DL,
4237	DAGCombinerInfo &DCI) const {
4238	if (N1.getOpcode() == ISD::OR && N0.getOpcode() != ISD::OR)
4239	std::swap(a&: N0, b&: N1);
4240
4241	SelectionDAG &DAG = DCI.DAG;
4242	EVT OpVT = N0.getValueType();
4243	if (!N0.hasOneUse() \|\| !OpVT.isInteger() \|\|
4244	(Cond != ISD::SETEQ && Cond != ISD::SETNE))
4245	return SDValue ();
4246
4247	// (X \| Y) == Y
4248	// (X \| Y) != Y
4249	SDValue X;
4250	if (sd_match(N: N0, P: m_Or(L: m_Value(N&: X), R: m_Specific(N: N1))) && hasAndNotCompare(Y: N1)) {
4251	// If the target supports an 'and-not' or 'and-complement' logic operation,
4252	// try to use that to make a comparison operation more efficient.
4253
4254	// Bail out if the compare operand that we want to turn into a zero is
4255	// already a zero (otherwise, infinite loop).
4256	if (isNullConstant(V: N1))
4257	return SDValue ();
4258
4259	// Transform this into: X & ~Y ==/!= 0.
4260	SDValue NotY = DAG.getNOT(DL: SDLoc (N1), Val: N1, VT: OpVT);
4261	SDValue NewAnd = DAG.getNode(Opcode: ISD::AND, DL: SDLoc (N0), VT: OpVT, N1: X, N2: NotY);
4262	return DAG.getSetCC(DL, VT, LHS: NewAnd, RHS: DAG.getConstant(Val: `0`, DL, VT: OpVT), Cond);
4263	}
4264
4265	return SDValue ();
4266	}
4267
4268	/// There are multiple IR patterns that could be checking whether certain
4269	/// truncation of a signed number would be lossy or not. The pattern which is
4270	/// best at IR level, may not lower optimally. Thus, we want to unfold it.
4271	/// We are looking for the following pattern: (KeptBits is a constant)
4272	/// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
4273	/// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
4274	/// KeptBits also can't be 1, that would have been folded to %x dstcond 0
4275	/// We will unfold it into the natural trunc+sext pattern:
4276	/// ((%x << C) a>> C) dstcond %x
4277	/// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
4278	SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
4279	EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
4280	const SDLoc &DL) const {
4281	// We must be comparing with a constant.
4282	ConstantSDNode *C1;
4283	if (!(C1 = dyn_cast<ConstantSDNode>(Val&: N1)))
4284	return SDValue ();
4285
4286	// N0 should be: add %x, (1 << (KeptBits-1))
4287	if (N0 ->getOpcode() != ISD::ADD)
4288	return SDValue ();
4289
4290	// And we must be 'add'ing a constant.
4291	ConstantSDNode *C01;
4292	if (!(C01 = dyn_cast<ConstantSDNode>(Val: N0 ->getOperand(Num: `1`))))
4293	return SDValue ();
4294
4295	SDValue X = N0 ->getOperand(Num: `0`);
4296	EVT XVT = X.getValueType();
4297
4298	// Validate constants ...
4299
4300	APInt I1 = C1->getAPIntValue();
4301
4302	ISD::CondCode NewCond;
4303	if (Cond == ISD::CondCode::SETULT) {
4304	NewCond = ISD::CondCode::SETEQ;
4305	} else if (Cond == ISD::CondCode::SETULE) {
4306	NewCond = ISD::CondCode::SETEQ;
4307	// But need to 'canonicalize' the constant.
4308	I1 += `1`;
4309	} else if (Cond == ISD::CondCode::SETUGT) {
4310	NewCond = ISD::CondCode::SETNE;
4311	// But need to 'canonicalize' the constant.
4312	I1 += `1`;
4313	} else if (Cond == ISD::CondCode::SETUGE) {
4314	NewCond = ISD::CondCode::SETNE;
4315	} else
4316	return SDValue ();
4317
4318	APInt I01 = C01->getAPIntValue();
4319
4320	auto checkConstants = [&I1, &I01]() -> bool {
4321	// Both of them must be power-of-two, and the constant from setcc is bigger.
4322	return I1.ugt(RHS: I01) && I1.isPowerOf2() && I01.isPowerOf2();
4323	};
4324
4325	if (checkConstants ()) {
4326	// Great, e.g. got icmp ult i16 (add i16 %x, 128), 256
4327	} else {
4328	// What if we invert constants? (and the target predicate)
4329	I1.negate();
4330	I01.negate();
4331	assert(XVT.isInteger());
4332	NewCond = getSetCCInverse(Operation: NewCond, Type: XVT);
4333	if (!checkConstants ())
4334	return SDValue ();
4335	// Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
4336	}
4337
4338	// They are power-of-two, so which bit is set?
4339	const unsigned KeptBits = I1.logBase2();
4340	const unsigned KeptBitsMinusOne = I01.logBase2();
4341
4342	// Magic!
4343	if (KeptBits != (KeptBitsMinusOne + `1`))
4344	return SDValue ();
4345	assert(KeptBits > `0` && KeptBits < XVT.getSizeInBits() && "unreachable");
4346
4347	// We don't want to do this in every single case.
4348	SelectionDAG &DAG = DCI.DAG;
4349	if (!shouldTransformSignedTruncationCheck(XVT, KeptBits))
4350	return SDValue ();
4351
4352	// Unfold into: sext_inreg(%x) cond %x
4353	// Where 'cond' will be either 'eq' or 'ne'.
4354	SDValue SExtInReg = DAG.getNode(
4355	Opcode: ISD::SIGN_EXTEND_INREG, DL, VT: XVT, N1: X,
4356	N2: DAG.getValueType(EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: KeptBits)));
4357	return DAG.getSetCC(DL, VT: SCCVT, LHS: SExtInReg, RHS: X, Cond: NewCond);
4358	}
4359
4360	// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
4361	SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
4362	EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
4363	DAGCombinerInfo &DCI, const SDLoc &DL) const {
4364	assert(isConstOrConstSplat(N1C) && isConstOrConstSplat(N1C)->isZero() &&
4365	"Should be a comparison with 0.");
4366	assert((Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) &&
4367	"Valid only for [in]equality comparisons.");
4368
4369	unsigned NewShiftOpcode;
4370	SDValue X, C, Y;
4371
4372	SelectionDAG &DAG = DCI.DAG;
4373
4374	// Look for '(C l>>/<< Y)'.
4375	auto Match = [&NewShiftOpcode, &X, &C, &Y, &DAG, this](SDValue V) {
4376	// The shift should be one-use.
4377	if (!V.hasOneUse())
4378	return false;
4379	unsigned OldShiftOpcode = V.getOpcode();
4380	switch (OldShiftOpcode) {
4381	case ISD::SHL:
4382	NewShiftOpcode = ISD::SRL;
4383	break;
4384	case ISD::SRL:
4385	NewShiftOpcode = ISD::SHL;
4386	break;
4387	default:
4388	return false; // must be a logical shift.
4389	}
4390	// We should be shifting a constant.
4391	// FIXME: best to use isConstantOrConstantVector().
4392	C = V.getOperand(i: `0`);
4393	ConstantSDNode *CC =
4394	isConstOrConstSplat(N: C, /AllowUndefs=/true, /AllowTruncation=/true);
4395	if (!CC)
4396	return false;
4397	Y = V.getOperand(i: `1`);
4398
4399	ConstantSDNode *XC =
4400	isConstOrConstSplat(N: X, /AllowUndefs=/true, /AllowTruncation=/true);
4401	return shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
4402	X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
4403	};
4404
4405	// LHS of comparison should be an one-use 'and'.
4406	if (N0.getOpcode() != ISD::AND \|\| !N0.hasOneUse())
4407	return SDValue ();
4408
4409	X = N0.getOperand(i: `0`);
4410	SDValue Mask = N0.getOperand(i: `1`);
4411
4412	// 'and' is commutative!
4413	if (!Match (Mask)) {
4414	std::swap(a&: X, b&: Mask);
4415	if (!Match (Mask))
4416	return SDValue ();
4417	}
4418
4419	EVT VT = X.getValueType();
4420
4421	// Produce:
4422	// ((X 'OppositeShiftOpcode' Y) & C) Cond 0
4423	SDValue T0 = DAG.getNode(Opcode: NewShiftOpcode, DL, VT, N1: X, N2: Y);
4424	SDValue T1 = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: T0, N2: C);
4425	SDValue T2 = DAG.getSetCC(DL, VT: SCCVT, LHS: T1, RHS: N1C, Cond);
4426	return T2;
4427	}
4428
4429	/// Try to fold an equality comparison with a {add/sub/xor} binary operation as
4430	/// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
4431	/// handle the commuted versions of these patterns.
4432	SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
4433	ISD::CondCode Cond, const SDLoc &DL,
4434	DAGCombinerInfo &DCI) const {
4435	unsigned BOpcode = N0.getOpcode();
4436	assert((BOpcode == ISD::ADD \|\| BOpcode == ISD::SUB \|\| BOpcode == ISD::XOR) &&
4437	"Unexpected binop");
4438	assert((Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) && "Unexpected condcode");
4439
4440	// (X + Y) == X --> Y == 0
4441	// (X - Y) == X --> Y == 0
4442	// (X ^ Y) == X --> Y == 0
4443	SelectionDAG &DAG = DCI.DAG;
4444	EVT OpVT = N0.getValueType();
4445	SDValue X = N0.getOperand(i: `0`);
4446	SDValue Y = N0.getOperand(i: `1`);
4447	if (X == N1)
4448	return DAG.getSetCC(DL, VT, LHS: Y, RHS: DAG.getConstant(Val: `0`, DL, VT: OpVT), Cond);
4449
4450	if (Y != N1)
4451	return SDValue ();
4452
4453	// (X + Y) == Y --> X == 0
4454	// (X ^ Y) == Y --> X == 0
4455	if (BOpcode == ISD::ADD \|\| BOpcode == ISD::XOR)
4456	return DAG.getSetCC(DL, VT, LHS: X, RHS: DAG.getConstant(Val: `0`, DL, VT: OpVT), Cond);
4457
4458	// The shift would not be valid if the operands are boolean (i1).
4459	if (!N0.hasOneUse() \|\| OpVT.getScalarSizeInBits() == `1`)
4460	return SDValue ();
4461
4462	// (X - Y) == Y --> X == Y << 1
4463	SDValue One = DAG.getShiftAmountConstant(Val: `1`, VT: OpVT, DL);
4464	SDValue YShl1 = DAG.getNode(Opcode: ISD::SHL, DL, VT: N1.getValueType(), N1: Y, N2: One);
4465	if (!DCI.isCalledByLegalizer())
4466	DCI.AddToWorklist(N: YShl1.getNode());
4467	return DAG.getSetCC(DL, VT, LHS: X, RHS: YShl1, Cond);
4468	}
4469
4470	static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT,
4471	SDValue N0, const APInt &C1,
4472	ISD::CondCode Cond, const SDLoc &dl,
4473	SelectionDAG &DAG) {
4474	// Look through truncs that don't change the value of a ctpop.
4475	// FIXME: Add vector support? Need to be careful with setcc result type below.
4476	SDValue CTPOP = N0;
4477	if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
4478	N0.getScalarValueSizeInBits() > Log2_32(Value: N0.getOperand(i: `0`).getScalarValueSizeInBits()))
4479	CTPOP = N0.getOperand(i: `0`);
4480
4481	if (CTPOP.getOpcode() != ISD::CTPOP \|\| !CTPOP.hasOneUse())
4482	return SDValue ();
4483
4484	EVT CTVT = CTPOP.getValueType();
4485	SDValue CTOp = CTPOP.getOperand(i: `0`);
4486
4487	// Expand a power-of-2-or-zero comparison based on ctpop:
4488	// (ctpop x) u< 2 -> (x & x-1) == 0
4489	// (ctpop x) u> 1 -> (x & x-1) != 0
4490	if (Cond == ISD::SETULT \|\| Cond == ISD::SETUGT) {
4491	// Keep the CTPOP if it is a cheap vector op.
4492	if (CTVT.isVector() && TLI.isCtpopFast(VT: CTVT))
4493	return SDValue ();
4494
4495	unsigned CostLimit = TLI.getCustomCtpopCost(VT: CTVT, Cond);
4496	if (C1.ugt(RHS: CostLimit + (Cond == ISD::SETULT)))
4497	return SDValue ();
4498	if (C1 == `0` && (Cond == ISD::SETULT))
4499	return SDValue (); // This is handled elsewhere.
4500
4501	unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);
4502
4503	SDValue NegOne = DAG.getAllOnesConstant(DL: dl, VT: CTVT);
4504	SDValue Result = CTOp;
4505	for (unsigned i = `0`; i < Passes; i++) {
4506	SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: CTVT, N1: Result, N2: NegOne);
4507	Result = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: CTVT, N1: Result, N2: Add);
4508	}
4509	ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;
4510	return DAG.getSetCC(DL: dl, VT, LHS: Result, RHS: DAG.getConstant(Val: `0`, DL: dl, VT: CTVT), Cond: CC);
4511	}
4512
4513	// Expand a power-of-2 comparison based on ctpop
4514	if ((Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) && C1 == `1`) {
4515	// Keep the CTPOP if it is cheap.
4516	if (TLI.isCtpopFast(VT: CTVT))
4517	return SDValue ();
4518
4519	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT: CTVT);
4520	SDValue NegOne = DAG.getAllOnesConstant(DL: dl, VT: CTVT);
4521	assert(CTVT.isInteger());
4522	SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: CTVT, N1: CTOp, N2: NegOne);
4523
4524	// Its not uncommon for known-never-zero X to exist in (ctpop X) eq/ne 1, so
4525	// check before emitting a potentially unnecessary op.
4526	if (DAG.isKnownNeverZero(Op: CTOp)) {
4527	// (ctpop x) == 1 --> (x & x-1) == 0
4528	// (ctpop x) != 1 --> (x & x-1) != 0
4529	SDValue And = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: CTVT, N1: CTOp, N2: Add);
4530	SDValue RHS = DAG.getSetCC(DL: dl, VT, LHS: And, RHS: Zero, Cond);
4531	return RHS;
4532	}
4533
4534	// (ctpop x) == 1 --> (x ^ x-1) > x-1
4535	// (ctpop x) != 1 --> (x ^ x-1) <= x-1
4536	SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: CTVT, N1: CTOp, N2: Add);
4537	ISD::CondCode CmpCond = Cond == ISD::SETEQ ? ISD::SETUGT : ISD::SETULE;
4538	return DAG.getSetCC(DL: dl, VT, LHS: Xor, RHS: Add, Cond: CmpCond);
4539	}
4540
4541	return SDValue ();
4542	}
4543
4544	static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1,
4545	ISD::CondCode Cond, const SDLoc &dl,
4546	SelectionDAG &DAG) {
4547	if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4548	return SDValue ();
4549
4550	auto C1 = isConstOrConstSplat(N: N1, /* AllowUndefs / true);
4551	if (!C1 \|\| !(C1->isZero() \|\| C1->isAllOnes()))
4552	return SDValue ();
4553
4554	auto getRotateSource = [](SDValue X) {
4555	if (X.getOpcode() == ISD::ROTL \|\| X.getOpcode() == ISD::ROTR)
4556	return X.getOperand(i: `0`);
4557	return SDValue ();
4558	};
4559
4560	// Peek through a rotated value compared against 0 or -1:
4561	// (rot X, Y) == 0/-1 --> X == 0/-1
4562	// (rot X, Y) != 0/-1 --> X != 0/-1
4563	if (SDValue R = getRotateSource (N0))
4564	return DAG.getSetCC(DL: dl, VT, LHS: R, RHS: N1, Cond);
4565
4566	// Peek through an 'or' of a rotated value compared against 0:
4567	// or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
4568	// or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
4569	//
4570	// TODO: Add the 'and' with -1 sibling.
4571	// TODO: Recurse through a series of 'or' ops to find the rotate.
4572	EVT OpVT = N0.getValueType();
4573	if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) {
4574	if (SDValue R = getRotateSource (N0.getOperand(i: `0`))) {
4575	SDValue NewOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1: R, N2: N0.getOperand(i: `1`));
4576	return DAG.getSetCC(DL: dl, VT, LHS: NewOr, RHS: N1, Cond);
4577	}
4578	if (SDValue R = getRotateSource (N0.getOperand(i: `1`))) {
4579	SDValue NewOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1: R, N2: N0.getOperand(i: `0`));
4580	return DAG.getSetCC(DL: dl, VT, LHS: NewOr, RHS: N1, Cond);
4581	}
4582	}
4583
4584	return SDValue ();
4585	}
4586
4587	static SDValue foldSetCCWithFunnelShift(EVT VT, SDValue N0, SDValue N1,
4588	ISD::CondCode Cond, const SDLoc &dl,
4589	SelectionDAG &DAG) {
4590	// If we are testing for all-bits-clear, we might be able to do that with
4591	// less shifting since bit-order does not matter.
4592	if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4593	return SDValue ();
4594
4595	auto C1 = isConstOrConstSplat(N: N1, /* AllowUndefs / true);
4596	if (!C1 \|\| !C1->isZero())
4597	return SDValue ();
4598
4599	if (!N0.hasOneUse() \|\|
4600	(N0.getOpcode() != ISD::FSHL && N0.getOpcode() != ISD::FSHR))
4601	return SDValue ();
4602
4603	unsigned BitWidth = N0.getScalarValueSizeInBits();
4604	auto *ShAmtC = isConstOrConstSplat(N: N0.getOperand(i: `2`));
4605	if (!ShAmtC)
4606	return SDValue ();
4607
4608	uint64_t ShAmt = ShAmtC->getAPIntValue().urem(RHS: BitWidth);
4609	if (ShAmt == `0`)
4610	return SDValue ();
4611
4612	// Canonicalize fshr as fshl to reduce pattern-matching.
4613	if (N0.getOpcode() == ISD::FSHR)
4614	ShAmt = BitWidth - ShAmt;
4615
4616	// Match an 'or' with a specific operand 'Other' in either commuted variant.
4617	SDValue X, Y;
4618	auto matchOr = [&X, &Y](SDValue Or, SDValue Other) {
4619	if (Or.getOpcode() != ISD::OR \|\| !Or.hasOneUse())
4620	return false;
4621	if (Or.getOperand(i: `0`) == Other) {
4622	X = Or.getOperand(i: `0`);
4623	Y = Or.getOperand(i: `1`);
4624	return true;
4625	}
4626	if (Or.getOperand(i: `1`) == Other) {
4627	X = Or.getOperand(i: `1`);
4628	Y = Or.getOperand(i: `0`);
4629	return true;
4630	}
4631	return false;
4632	};
4633
4634	EVT OpVT = N0.getValueType();
4635	EVT ShAmtVT = N0.getOperand(i: `2`).getValueType();
4636	SDValue F0 = N0.getOperand(i: `0`);
4637	SDValue F1 = N0.getOperand(i: `1`);
4638	if (matchOr (F0, F1)) {
4639	// fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0
4640	SDValue NewShAmt = DAG.getConstant(Val: ShAmt, DL: dl, VT: ShAmtVT);
4641	SDValue Shift = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: OpVT, N1: Y, N2: NewShAmt);
4642	SDValue NewOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1: Shift, N2: X);
4643	return DAG.getSetCC(DL: dl, VT, LHS: NewOr, RHS: N1, Cond);
4644	}
4645	if (matchOr (F1, F0)) {
4646	// fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0
4647	SDValue NewShAmt = DAG.getConstant(Val: BitWidth - ShAmt, DL: dl, VT: ShAmtVT);
4648	SDValue Shift = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: OpVT, N1: Y, N2: NewShAmt);
4649	SDValue NewOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1: Shift, N2: X);
4650	return DAG.getSetCC(DL: dl, VT, LHS: NewOr, RHS: N1, Cond);
4651	}
4652
4653	return SDValue ();
4654	}
4655
4656	/// Try to simplify a setcc built with the specified operands and cc. If it is
4657	/// unable to simplify it, return a null SDValue.
4658	SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
4659	ISD::CondCode Cond, bool foldBooleans,
4660	DAGCombinerInfo &DCI,
4661	const SDLoc &dl) const {
4662	SelectionDAG &DAG = DCI.DAG;
4663	const DataLayout &Layout = DAG.getDataLayout();
4664	EVT OpVT = N0.getValueType();
4665	AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4666
4667	// Constant fold or commute setcc.
4668	if (SDValue Fold = DAG.FoldSetCC(VT, N1: N0, N2: N1, Cond, dl))
4669	return Fold;
4670
4671	bool N0ConstOrSplat =
4672	isConstOrConstSplat(N: N0, /AllowUndefs/ false, /AllowTruncate/ AllowTruncation: true);
4673	bool N1ConstOrSplat =
4674	isConstOrConstSplat(N: N1, /AllowUndefs/ false, /AllowTruncate/ AllowTruncation: true);
4675
4676	// Canonicalize toward having the constant on the RHS.
4677	// TODO: Handle non-splat vector constants. All undef causes trouble.
4678	// FIXME: We can't yet fold constant scalable vector splats, so avoid an
4679	// infinite loop here when we encounter one.
4680	ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Operation: Cond);
4681	if (N0ConstOrSplat && !N1ConstOrSplat &&
4682	(DCI.isBeforeLegalizeOps() \|\|
4683	isCondCodeLegal(CC: SwappedCC, VT: N0.getSimpleValueType())))
4684	return DAG.getSetCC(DL: dl, VT, LHS: N1, RHS: N0, Cond: SwappedCC);
4685
4686	// If we have a subtract with the same 2 non-constant operands as this setcc
4687	// -- but in reverse order -- then try to commute the operands of this setcc
4688	// to match. A matching pair of setcc (cmp) and sub may be combined into 1
4689	// instruction on some targets.
4690	if (!N0ConstOrSplat && !N1ConstOrSplat &&
4691	(DCI.isBeforeLegalizeOps() \|\|
4692	isCondCodeLegal(CC: SwappedCC, VT: N0.getSimpleValueType())) &&
4693	DAG.doesNodeExist(Opcode: ISD::SUB, VTList: DAG.getVTList(VT: OpVT), Ops: {N1, N0}) &&
4694	!DAG.doesNodeExist(Opcode: ISD::SUB, VTList: DAG.getVTList(VT: OpVT), Ops: {N0, N1}))
4695	return DAG.getSetCC(DL: dl, VT, LHS: N1, RHS: N0, Cond: SwappedCC);
4696
4697	if (SDValue V = foldSetCCWithRotate(VT, N0, N1, Cond, dl, DAG))
4698	return V;
4699
4700	if (SDValue V = foldSetCCWithFunnelShift(VT, N0, N1, Cond, dl, DAG))
4701	return V;
4702
4703	if (auto *N1C = isConstOrConstSplat(N: N1)) {
4704	const APInt &C1 = N1C->getAPIntValue();
4705
4706	// Optimize some CTPOP cases.
4707	if (SDValue V = simplifySetCCWithCTPOP(TLI: *this, VT, N0, C1, Cond, dl, DAG))
4708	return V;
4709
4710	// For equality to 0 of a no-wrap multiply, decompose and test each op:
4711	// X Y == 0 --> (X == 0) \|\| (Y == 0)*
4712	// X Y != 0 --> (X != 0) && (Y != 0)*
4713	// TODO: This bails out if minsize is set, but if the target doesn't have a
4714	// single instruction multiply for this type, it would likely be
4715	// smaller to decompose.
4716	if (C1.isZero() && (Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) &&
4717	N0.getOpcode() == ISD::MUL && N0.hasOneUse() &&
4718	(N0 ->getFlags().hasNoUnsignedWrap() \|\|
4719	N0 ->getFlags().hasNoSignedWrap()) &&
4720	!Attr.hasFnAttr(Kind: Attribute::MinSize)) {
4721	SDValue IsXZero = DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: `0`), RHS: N1, Cond);
4722	SDValue IsYZero = DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: `1`), RHS: N1, Cond);
4723	unsigned LogicOp = Cond == ISD::SETEQ ? ISD::OR : ISD::AND;
4724	return DAG.getNode(Opcode: LogicOp, DL: dl, VT, N1: IsXZero, N2: IsYZero);
4725	}
4726
4727	// If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
4728	// equality comparison, then we're just comparing whether X itself is
4729	// zero.
4730	if (N0.getOpcode() == ISD::SRL && (C1.isZero() \|\| C1.isOne()) &&
4731	N0.getOperand(i: `0`).getOpcode() == ISD::CTLZ &&
4732	llvm::has_single_bit<uint32_t>(Value: N0.getScalarValueSizeInBits())) {
4733	if (ConstantSDNode *ShAmt = isConstOrConstSplat(N: N0.getOperand(i: `1`))) {
4734	if ((Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) &&
4735	ShAmt->getAPIntValue() == Log2_32(Value: N0.getScalarValueSizeInBits())) {
4736	if ((C1 == `0`) == (Cond == ISD::SETEQ)) {
4737	// (srl (ctlz x), 5) == 0 -> X != 0
4738	// (srl (ctlz x), 5) != 1 -> X != 0
4739	Cond = ISD::SETNE;
4740	} else {
4741	// (srl (ctlz x), 5) != 0 -> X == 0
4742	// (srl (ctlz x), 5) == 1 -> X == 0
4743	Cond = ISD::SETEQ;
4744	}
4745	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT: N0.getValueType());
4746	return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: `0`).getOperand(i: `0`), RHS: Zero,
4747	Cond);
4748	}
4749	}
4750	}
4751	}
4752
4753	// FIXME: Support vectors.
4754	if (auto *N1C = dyn_cast<ConstantSDNode>(Val: N1.getNode())) {
4755	const APInt &C1 = N1C->getAPIntValue();
4756
4757	// (zext x) == C --> x == (trunc C)
4758	// (sext x) == C --> x == (trunc C)
4759	if ((Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) &&
4760	DCI.isBeforeLegalize() && N0 ->hasOneUse()) {
4761	unsigned MinBits = N0.getValueSizeInBits();
4762	SDValue PreExt;
4763	bool Signed = false;
4764	if (N0 ->getOpcode() == ISD::ZERO_EXTEND) {
4765	// ZExt
4766	MinBits = N0 ->getOperand(Num: `0`).getValueSizeInBits();
4767	PreExt = N0 ->getOperand(Num: `0`);
4768	} else if (N0 ->getOpcode() == ISD::AND) {
4769	// DAGCombine turns costly ZExts into ANDs
4770	if (auto *C = dyn_cast<ConstantSDNode>(Val: N0 ->getOperand(Num: `1`)))
4771	if ((C->getAPIntValue()+`1`).isPowerOf2()) {
4772	MinBits = C->getAPIntValue().countr_one();
4773	PreExt = N0 ->getOperand(Num: `0`);
4774	}
4775	} else if (N0 ->getOpcode() == ISD::SIGN_EXTEND) {
4776	// SExt
4777	MinBits = N0 ->getOperand(Num: `0`).getValueSizeInBits();
4778	PreExt = N0 ->getOperand(Num: `0`);
4779	Signed = true;
4780	} else if (auto *LN0 = dyn_cast<LoadSDNode>(Val&: N0)) {
4781	// ZEXTLOAD / SEXTLOAD
4782	if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
4783	MinBits = LN0->getMemoryVT().getSizeInBits();
4784	PreExt = N0;
4785	} else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
4786	Signed = true;
4787	MinBits = LN0->getMemoryVT().getSizeInBits();
4788	PreExt = N0;
4789	}
4790	}
4791
4792	// Figure out how many bits we need to preserve this constant.
4793	unsigned ReqdBits = Signed ? C1.getSignificantBits() : C1.getActiveBits();
4794
4795	// Make sure we're not losing bits from the constant.
4796	if (MinBits > `0` &&
4797	MinBits < C1.getBitWidth() &&
4798	MinBits >= ReqdBits) {
4799	EVT MinVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: MinBits);
4800	if (isTypeDesirableForOp(ISD::SETCC, VT: MinVT)) {
4801	// Will get folded away.
4802	SDValue Trunc = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MinVT, Operand: PreExt);
4803	if (MinBits == `1` && C1 == `1`)
4804	// Invert the condition.
4805	return DAG.getSetCC(DL: dl, VT, LHS: Trunc, RHS: DAG.getConstant(Val: `0`, DL: dl, VT: MVT::i1),
4806	Cond: Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4807	SDValue C = DAG.getConstant(Val: C1.trunc(width: MinBits), DL: dl, VT: MinVT);
4808	return DAG.getSetCC(DL: dl, VT, LHS: Trunc, RHS: C, Cond);
4809	}
4810
4811	// If truncating the setcc operands is not desirable, we can still
4812	// simplify the expression in some cases:
4813	// setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
4814	// setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
4815	// setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
4816	// setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
4817	// setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
4818	// setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
4819	SDValue TopSetCC = N0 ->getOperand(Num: `0`);
4820	unsigned N0Opc = N0 ->getOpcode();
4821	bool SExt = (N0Opc == ISD::SIGN_EXTEND);
4822	if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
4823	TopSetCC.getOpcode() == ISD::SETCC &&
4824	(N0Opc == ISD::ZERO_EXTEND \|\| N0Opc == ISD::SIGN_EXTEND) &&
4825	(isConstFalseVal(N: N1) \|\|
4826	isExtendedTrueVal(N: N1C, VT: N0 ->getValueType(ResNo: `0`), SExt))) {
4827
4828	bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) \|\|
4829	(!N1C->isZero() && Cond == ISD::SETNE);
4830
4831	if (!Inverse)
4832	return TopSetCC;
4833
4834	ISD::CondCode InvCond = ISD::getSetCCInverse(
4835	Operation: cast<CondCodeSDNode>(Val: TopSetCC.getOperand(i: `2`))->get(),
4836	Type: TopSetCC.getOperand(i: `0`).getValueType());
4837	return DAG.getSetCC(DL: dl, VT, LHS: TopSetCC.getOperand(i: `0`),
4838	RHS: TopSetCC.getOperand(i: `1`),
4839	Cond: InvCond);
4840	}
4841	}
4842	}
4843
4844	// If the LHS is '(and load, const)', the RHS is 0, the test is for
4845	// equality or unsigned, and all 1 bits of the const are in the same
4846	// partial word, see if we can shorten the load.
4847	if (DCI.isBeforeLegalize() &&
4848	!ISD::isSignedIntSetCC(Code: Cond) &&
4849	N0.getOpcode() == ISD::AND && C1 == `0` &&
4850	N0.getNode()->hasOneUse() &&
4851	isa<LoadSDNode>(Val: N0.getOperand(i: `0`)) &&
4852	N0.getOperand(i: `0`).getNode()->hasOneUse() &&
4853	isa<ConstantSDNode>(Val: N0.getOperand(i: `1`))) {
4854	auto *Lod = cast<LoadSDNode>(Val: N0.getOperand(i: `0`));
4855	APInt bestMask;
4856	unsigned bestWidth = `0`, bestOffset = `0`;
4857	if (Lod->isSimple() && Lod->isUnindexed() &&
4858	(Lod->getMemoryVT().isByteSized() \|\|
4859	isPaddedAtMostSignificantBitsWhenStored(VT: Lod->getMemoryVT()))) {
4860	unsigned memWidth = Lod->getMemoryVT().getStoreSizeInBits();
4861	unsigned origWidth = N0.getValueSizeInBits();
4862	unsigned maskWidth = origWidth;
4863	// We can narrow (e.g.) 16-bit extending loads on 32-bit target to
4864	// 8 bits, but have to be careful...
4865	if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
4866	origWidth = Lod->getMemoryVT().getSizeInBits();
4867	const APInt &Mask = N0.getConstantOperandAPInt(i: `1`);
4868	// Only consider power-of-2 widths (and at least one byte) as candiates
4869	// for the narrowed load.
4870	for (unsigned width = `8`; width < origWidth; width *= `2`) {
4871	EVT newVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: width);
4872	APInt newMask = APInt::getLowBitsSet(numBits: maskWidth, loBitsSet: width);
4873	// Avoid accessing any padding here for now (we could use memWidth
4874	// instead of origWidth here otherwise).
4875	unsigned maxOffset = origWidth - width;
4876	for (unsigned offset = `0`; offset <= maxOffset; offset += `8`) {
4877	if (Mask.isSubsetOf(RHS: newMask)) {
4878	unsigned ptrOffset =
4879	Layout.isLittleEndian() ? offset : memWidth - width - offset;
4880	unsigned IsFast = `0`;
4881	assert((ptrOffset % `8`) == `0` && "Non-Bytealigned pointer offset");
4882	Align NewAlign = commonAlignment(A: Lod->getAlign(), Offset: ptrOffset / `8`);
4883	if (shouldReduceLoadWidth(Load: Lod, ExtTy: ISD::NON_EXTLOAD, NewVT: newVT,
4884	ByteOffset: ptrOffset / `8`) &&
4885	allowsMemoryAccess(
4886	Context&: *DAG.getContext(), DL: Layout, VT: newVT, AddrSpace: Lod->getAddressSpace(),
4887	Alignment: NewAlign, Flags: Lod->getMemOperand()->getFlags(), Fast: &IsFast) &&
4888	IsFast) {
4889	bestOffset = ptrOffset / `8`;
4890	bestMask = Mask.lshr(shiftAmt: offset);
4891	bestWidth = width;
4892	break;
4893	}
4894	}
4895	newMask <<= `8`;
4896	}
4897	if (bestWidth)
4898	break;
4899	}
4900	}
4901	if (bestWidth) {
4902	EVT newVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: bestWidth);
4903	SDValue Ptr = Lod->getBasePtr();
4904	if (bestOffset != `0`)
4905	Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: TypeSize::getFixed(ExactSize: bestOffset));
4906	SDValue NewLoad =
4907	DAG.getLoad(VT: newVT, dl, Chain: Lod->getChain(), Ptr,
4908	PtrInfo: Lod->getPointerInfo().getWithOffset(O: bestOffset),
4909	Alignment: Lod->getBaseAlign());
4910	SDValue And =
4911	DAG.getNode(Opcode: ISD::AND, DL: dl, VT: newVT, N1: NewLoad,
4912	N2: DAG.getConstant(Val: bestMask.trunc(width: bestWidth), DL: dl, VT: newVT));
4913	return DAG.getSetCC(DL: dl, VT, LHS: And, RHS: DAG.getConstant(Val: `0LL`, DL: dl, VT: newVT), Cond);
4914	}
4915	}
4916
4917	// If the LHS is a ZERO_EXTEND, perform the comparison on the input.
4918	if (N0.getOpcode() == ISD::ZERO_EXTEND) {
4919	unsigned InSize = N0.getOperand(i: `0`).getValueSizeInBits();
4920
4921	// If the comparison constant has bits in the upper part, the
4922	// zero-extended value could never match.
4923	if (C1.intersects(RHS: APInt::getHighBitsSet(numBits: C1.getBitWidth(),
4924	hiBitsSet: C1.getBitWidth() - InSize))) {
4925	switch (Cond) {
4926	case ISD::SETUGT:
4927	case ISD::SETUGE:
4928	case ISD::SETEQ:
4929	return DAG.getConstant(Val: `0`, DL: dl, VT);
4930	case ISD::SETULT:
4931	case ISD::SETULE:
4932	case ISD::SETNE:
4933	return DAG.getConstant(Val: `1`, DL: dl, VT);
4934	case ISD::SETGT:
4935	case ISD::SETGE:
4936	// True if the sign bit of C1 is set.
4937	return DAG.getConstant(Val: C1.isNegative(), DL: dl, VT);
4938	case ISD::SETLT:
4939	case ISD::SETLE:
4940	// True if the sign bit of C1 isn't set.
4941	return DAG.getConstant(Val: C1.isNonNegative(), DL: dl, VT);
4942	default:
4943	break;
4944	}
4945	}
4946
4947	// Otherwise, we can perform the comparison with the low bits.
4948	switch (Cond) {
4949	case ISD::SETEQ:
4950	case ISD::SETNE:
4951	case ISD::SETUGT:
4952	case ISD::SETUGE:
4953	case ISD::SETULT:
4954	case ISD::SETULE: {
4955	EVT newVT = N0.getOperand(i: `0`).getValueType();
4956	// FIXME: Should use isNarrowingProfitable.
4957	if (DCI.isBeforeLegalizeOps() \|\|
4958	(isOperationLegal(Op: ISD::SETCC, VT: newVT) &&
4959	isCondCodeLegal(CC: Cond, VT: newVT.getSimpleVT()) &&
4960	isTypeDesirableForOp(ISD::SETCC, VT: newVT))) {
4961	EVT NewSetCCVT = getSetCCResultType(DL: Layout, Context&: *DAG.getContext(), VT: newVT);
4962	SDValue NewConst = DAG.getConstant(Val: C1.trunc(width: InSize), DL: dl, VT: newVT);
4963
4964	SDValue NewSetCC = DAG.getSetCC(DL: dl, VT: NewSetCCVT, LHS: N0.getOperand(i: `0`),
4965	RHS: NewConst, Cond);
4966	return DAG.getBoolExtOrTrunc(Op: NewSetCC, SL: dl, VT, OpVT: N0.getValueType());
4967	}
4968	break;
4969	}
4970	default:
4971	break; // todo, be more careful with signed comparisons
4972	}
4973	} else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
4974	(Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) &&
4975	!isSExtCheaperThanZExt(FromTy: cast<VTSDNode>(Val: N0.getOperand(i: `1`))->getVT(),
4976	ToTy: OpVT)) {
4977	EVT ExtSrcTy = cast<VTSDNode>(Val: N0.getOperand(i: `1`))->getVT();
4978	unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
4979	EVT ExtDstTy = N0.getValueType();
4980	unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
4981
4982	// If the constant doesn't fit into the number of bits for the source of
4983	// the sign extension, it is impossible for both sides to be equal.
4984	if (C1.getSignificantBits() > ExtSrcTyBits)
4985	return DAG.getBoolConstant(V: Cond == ISD::SETNE, DL: dl, VT, OpVT);
4986
4987	assert(ExtDstTy == N0.getOperand(`0`).getValueType() &&
4988	ExtDstTy != ExtSrcTy && "Unexpected types!");
4989	APInt Imm = APInt::getLowBitsSet(numBits: ExtDstTyBits, loBitsSet: ExtSrcTyBits);
4990	SDValue ZextOp = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ExtDstTy, N1: N0.getOperand(i: `0`),
4991	N2: DAG.getConstant(Val: Imm, DL: dl, VT: ExtDstTy));
4992	if (!DCI.isCalledByLegalizer())
4993	DCI.AddToWorklist(N: ZextOp.getNode());
4994	// Otherwise, make this a use of a zext.
4995	return DAG.getSetCC(DL: dl, VT, LHS: ZextOp,
4996	RHS: DAG.getConstant(Val: C1 & Imm, DL: dl, VT: ExtDstTy), Cond);
4997	} else if ((N1C->isZero() \|\| N1C->isOne()) &&
4998	(Cond == ISD::SETEQ \|\| Cond == ISD::SETNE)) {
4999	// SETCC (X), [0\|1], [EQ\|NE] -> X if X is known 0/1. i1 types are
5000	// excluded as they are handled below whilst checking for foldBooleans.
5001	if ((N0.getOpcode() == ISD::SETCC \|\| VT.getScalarType() != MVT::i1) &&
5002	isTypeLegal(VT) && VT.bitsLE(VT: N0.getValueType()) &&
5003	(N0.getValueType() == MVT::i1 \|\|
5004	getBooleanContents(Type: N0.getValueType()) == ZeroOrOneBooleanContent) &&
5005	DAG.MaskedValueIsZero(
5006	Op: N0, Mask: APInt::getBitsSetFrom(numBits: N0.getValueSizeInBits(), loBit: `1`))) {
5007	bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
5008	if (TrueWhenTrue)
5009	return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: N0);
5010	// Invert the condition.
5011	if (N0.getOpcode() == ISD::SETCC) {
5012	ISD::CondCode CC = cast<CondCodeSDNode>(Val: N0.getOperand(i: `2`))->get();
5013	CC = ISD::getSetCCInverse(Operation: CC, Type: N0.getOperand(i: `0`).getValueType());
5014	if (DCI.isBeforeLegalizeOps() \|\|
5015	isCondCodeLegal(CC, VT: N0.getOperand(i: `0`).getSimpleValueType()))
5016	return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: `0`), RHS: N0.getOperand(i: `1`), Cond: CC);
5017	}
5018	}
5019
5020	if ((N0.getOpcode() == ISD::XOR \|\|
5021	(N0.getOpcode() == ISD::AND &&
5022	N0.getOperand(i: `0`).getOpcode() == ISD::XOR &&
5023	N0.getOperand(i: `1`) == N0.getOperand(i: `0`).getOperand(i: `1`))) &&
5024	isOneConstant(V: N0.getOperand(i: `1`))) {
5025	// If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
5026	// can only do this if the top bits are known zero.
5027	unsigned BitWidth = N0.getValueSizeInBits();
5028	if (DAG.MaskedValueIsZero(Op: N0,
5029	Mask: APInt::getHighBitsSet(numBits: BitWidth,
5030	hiBitsSet: BitWidth-`1`))) {
5031	// Okay, get the un-inverted input value.
5032	SDValue Val;
5033	if (N0.getOpcode() == ISD::XOR) {
5034	Val = N0.getOperand(i: `0`);
5035	} else {
5036	assert(N0.getOpcode() == ISD::AND &&
5037	N0.getOperand(`0`).getOpcode() == ISD::XOR);
5038	// ((X^1)&1)^1 -> X & 1
5039	Val = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: N0.getValueType(),
5040	N1: N0.getOperand(i: `0`).getOperand(i: `0`),
5041	N2: N0.getOperand(i: `1`));
5042	}
5043
5044	return DAG.getSetCC(DL: dl, VT, LHS: Val, RHS: N1,
5045	Cond: Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
5046	}
5047	} else if (N1C->isOne()) {
5048	SDValue Op0 = N0;
5049	if (Op0.getOpcode() == ISD::TRUNCATE)
5050	Op0 = Op0.getOperand(i: `0`);
5051
5052	if ((Op0.getOpcode() == ISD::XOR) &&
5053	Op0.getOperand(i: `0`).getOpcode() == ISD::SETCC &&
5054	Op0.getOperand(i: `1`).getOpcode() == ISD::SETCC) {
5055	SDValue XorLHS = Op0.getOperand(i: `0`);
5056	SDValue XorRHS = Op0.getOperand(i: `1`);
5057	// Ensure that the input setccs return an i1 type or 0/1 value.
5058	if (Op0.getValueType() == MVT::i1 \|\|
5059	(getBooleanContents(Type: XorLHS.getOperand(i: `0`).getValueType()) ==
5060	ZeroOrOneBooleanContent &&
5061	getBooleanContents(Type: XorRHS.getOperand(i: `0`).getValueType()) ==
5062	ZeroOrOneBooleanContent)) {
5063	// (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
5064	Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ;
5065	return DAG.getSetCC(DL: dl, VT, LHS: XorLHS, RHS: XorRHS, Cond);
5066	}
5067	}
5068	if (Op0.getOpcode() == ISD::AND && isOneConstant(V: Op0.getOperand(i: `1`))) {
5069	// If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
5070	if (Op0.getValueType().bitsGT(VT))
5071	Op0 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
5072	N1: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Op0.getOperand(i: `0`)),
5073	N2: DAG.getConstant(Val: `1`, DL: dl, VT));
5074	else if (Op0.getValueType().bitsLT(VT))
5075	Op0 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
5076	N1: DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT, Operand: Op0.getOperand(i: `0`)),
5077	N2: DAG.getConstant(Val: `1`, DL: dl, VT));
5078
5079	return DAG.getSetCC(DL: dl, VT, LHS: Op0,
5080	RHS: DAG.getConstant(Val: `0`, DL: dl, VT: Op0.getValueType()),
5081	Cond: Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
5082	}
5083	if (Op0.getOpcode() == ISD::AssertZext &&
5084	cast<VTSDNode>(Val: Op0.getOperand(i: `1`))->getVT() == MVT::i1)
5085	return DAG.getSetCC(DL: dl, VT, LHS: Op0,
5086	RHS: DAG.getConstant(Val: `0`, DL: dl, VT: Op0.getValueType()),
5087	Cond: Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
5088	}
5089	}
5090
5091	// Given:
5092	// icmp eq/ne (urem %x, %y), 0
5093	// Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
5094	// icmp eq/ne %x, 0
5095	if (N0.getOpcode() == ISD::UREM && N1C->isZero() &&
5096	(Cond == ISD::SETEQ \|\| Cond == ISD::SETNE)) {
5097	KnownBits XKnown = DAG.computeKnownBits(Op: N0.getOperand(i: `0`));
5098	KnownBits YKnown = DAG.computeKnownBits(Op: N0.getOperand(i: `1`));
5099	if (XKnown.countMaxPopulation() == `1` && YKnown.countMinPopulation() >= `2`)
5100	return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: `0`), RHS: N1, Cond);
5101	}
5102
5103	// Fold set_cc seteq (ashr X, BW-1), -1 -> set_cc setlt X, 0
5104	// and set_cc setne (ashr X, BW-1), -1 -> set_cc setge X, 0
5105	if ((Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) &&
5106	N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(Val: N0.getOperand(i: `1`)) &&
5107	N0.getConstantOperandAPInt(i: `1`) == OpVT.getScalarSizeInBits() - `1` &&
5108	N1C->isAllOnes()) {
5109	return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: `0`),
5110	RHS: DAG.getConstant(Val: `0`, DL: dl, VT: OpVT),
5111	Cond: Cond == ISD::SETEQ ? ISD::SETLT : ISD::SETGE);
5112	}
5113
5114	if (SDValue V =
5115	optimizeSetCCOfSignedTruncationCheck(SCCVT: VT, N0, N1, Cond, DCI, DL: dl))
5116	return V;
5117	}
5118
5119	// These simplifications apply to splat vectors as well.
5120	// TODO: Handle more splat vector cases.
5121	if (auto *N1C = isConstOrConstSplat(N: N1)) {
5122	const APInt &C1 = N1C->getAPIntValue();
5123
5124	APInt MinVal, MaxVal;
5125	unsigned OperandBitSize = N1C->getValueType(ResNo: `0`).getScalarSizeInBits();
5126	if (ISD::isSignedIntSetCC(Code: Cond)) {
5127	MinVal = APInt::getSignedMinValue(numBits: OperandBitSize);
5128	MaxVal = APInt::getSignedMaxValue(numBits: OperandBitSize);
5129	} else {
5130	MinVal = APInt::getMinValue(numBits: OperandBitSize);
5131	MaxVal = APInt::getMaxValue(numBits: OperandBitSize);
5132	}
5133
5134	// Canonicalize GE/LE comparisons to use GT/LT comparisons.
5135	if (Cond == ISD::SETGE \|\| Cond == ISD::SETUGE) {
5136	// X >= MIN --> true
5137	if (C1 == MinVal)
5138	return DAG.getBoolConstant(V: true, DL: dl, VT, OpVT);
5139
5140	if (!VT.isVector()) { // TODO: Support this for vectors.
5141	// X >= C0 --> X > (C0 - 1)
5142	APInt C = C1 - `1`;
5143	ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT;
5144	if ((DCI.isBeforeLegalizeOps() \|\|
5145	isCondCodeLegal(CC: NewCC, VT: OpVT.getSimpleVT())) &&
5146	(!N1C->isOpaque() \|\| (C.getBitWidth() <= `64` &&
5147	isLegalICmpImmediate(C.getSExtValue())))) {
5148	return DAG.getSetCC(DL: dl, VT, LHS: N0,
5149	RHS: DAG.getConstant(Val: C, DL: dl, VT: N1.getValueType()),
5150	Cond: NewCC);
5151	}
5152	}
5153	}
5154
5155	if (Cond == ISD::SETLE \|\| Cond == ISD::SETULE) {
5156	// X <= MAX --> true
5157	if (C1 == MaxVal)
5158	return DAG.getBoolConstant(V: true, DL: dl, VT, OpVT);
5159
5160	// X <= C0 --> X < (C0 + 1)
5161	if (!VT.isVector()) { // TODO: Support this for vectors.
5162	APInt C = C1 + `1`;
5163	ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT;
5164	if ((DCI.isBeforeLegalizeOps() \|\|
5165	isCondCodeLegal(CC: NewCC, VT: OpVT.getSimpleVT())) &&
5166	(!N1C->isOpaque() \|\| (C.getBitWidth() <= `64` &&
5167	isLegalICmpImmediate(C.getSExtValue())))) {
5168	return DAG.getSetCC(DL: dl, VT, LHS: N0,
5169	RHS: DAG.getConstant(Val: C, DL: dl, VT: N1.getValueType()),
5170	Cond: NewCC);
5171	}
5172	}
5173	}
5174
5175	if (Cond == ISD::SETLT \|\| Cond == ISD::SETULT) {
5176	if (C1 == MinVal)
5177	return DAG.getBoolConstant(V: false, DL: dl, VT, OpVT); // X < MIN --> false
5178
5179	// TODO: Support this for vectors after legalize ops.
5180	if (!VT.isVector() \|\| DCI.isBeforeLegalizeOps()) {
5181	// Canonicalize setlt X, Max --> setne X, Max
5182	if (C1 == MaxVal)
5183	return DAG.getSetCC(DL: dl, VT, LHS: N0, RHS: N1, Cond: ISD::SETNE);
5184
5185	// If we have setult X, 1, turn it into seteq X, 0
5186	if (C1 == MinVal +`1`)
5187	return DAG.getSetCC(DL: dl, VT, LHS: N0,
5188	RHS: DAG.getConstant(Val: MinVal, DL: dl, VT: N0.getValueType()),
5189	Cond: ISD::SETEQ);
5190	}
5191	}
5192
5193	if (Cond == ISD::SETGT \|\| Cond == ISD::SETUGT) {
5194	if (C1 == MaxVal)
5195	return DAG.getBoolConstant(V: false, DL: dl, VT, OpVT); // X > MAX --> false
5196
5197	// TODO: Support this for vectors after legalize ops.
5198	if (!VT.isVector() \|\| DCI.isBeforeLegalizeOps()) {
5199	// Canonicalize setgt X, Min --> setne X, Min
5200	if (C1 == MinVal)
5201	return DAG.getSetCC(DL: dl, VT, LHS: N0, RHS: N1, Cond: ISD::SETNE);
5202
5203	// If we have setugt X, Max-1, turn it into seteq X, Max
5204	if (C1 == MaxVal -`1`)
5205	return DAG.getSetCC(DL: dl, VT, LHS: N0,
5206	RHS: DAG.getConstant(Val: MaxVal, DL: dl, VT: N0.getValueType()),
5207	Cond: ISD::SETEQ);
5208	}
5209	}
5210
5211	if (Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) {
5212	// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
5213	if (C1.isZero())
5214	if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
5215	SCCVT: VT, N0, N1C: N1, Cond, DCI, DL: dl))
5216	return CC;
5217
5218	// For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y).
5219	// For example, when high 32-bits of i64 X are known clear:
5220	// all bits clear: (X \| (Y<<32)) == 0 --> (X \| Y) == 0
5221	// all bits set: (X \| (Y<<32)) == -1 --> (X & Y) == -1
5222	bool CmpZero = N1C->isZero();
5223	bool CmpNegOne = N1C->isAllOnes();
5224	if ((CmpZero \|\| CmpNegOne) && N0.hasOneUse()) {
5225	// Match or(lo,shl(hi,bw/2)) pattern.
5226	auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
5227	unsigned EltBits = V.getScalarValueSizeInBits();
5228	if (V.getOpcode() != ISD::OR \|\| (EltBits % `2`) != `0`)
5229	return false;
5230	SDValue LHS = V.getOperand(i: `0`);
5231	SDValue RHS = V.getOperand(i: `1`);
5232	APInt HiBits = APInt::getHighBitsSet(numBits: EltBits, hiBitsSet: EltBits / `2`);
5233	// Unshifted element must have zero upperbits.
5234	if (RHS.getOpcode() == ISD::SHL &&
5235	isa<ConstantSDNode>(Val: RHS.getOperand(i: `1`)) &&
5236	RHS.getConstantOperandAPInt(i: `1`) == (EltBits / `2`) &&
5237	DAG.MaskedValueIsZero(Op: LHS, Mask: HiBits)) {
5238	Lo = LHS;
5239	Hi = RHS.getOperand(i: `0`);
5240	return true;
5241	}
5242	if (LHS.getOpcode() == ISD::SHL &&
5243	isa<ConstantSDNode>(Val: LHS.getOperand(i: `1`)) &&
5244	LHS.getConstantOperandAPInt(i: `1`) == (EltBits / `2`) &&
5245	DAG.MaskedValueIsZero(Op: RHS, Mask: HiBits)) {
5246	Lo = RHS;
5247	Hi = LHS.getOperand(i: `0`);
5248	return true;
5249	}
5250	return false;
5251	};
5252
5253	auto MergeConcat = [&](SDValue Lo, SDValue Hi) {
5254	unsigned EltBits = N0.getScalarValueSizeInBits();
5255	unsigned HalfBits = EltBits / `2`;
5256	APInt HiBits = APInt::getHighBitsSet(numBits: EltBits, hiBitsSet: HalfBits);
5257	SDValue LoBits = DAG.getConstant(Val: ~HiBits, DL: dl, VT: OpVT);
5258	SDValue HiMask = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: OpVT, N1: Hi, N2: LoBits);
5259	SDValue NewN0 =
5260	DAG.getNode(Opcode: CmpZero ? ISD::OR : ISD::AND, DL: dl, VT: OpVT, N1: Lo, N2: HiMask);
5261	SDValue NewN1 = CmpZero ? DAG.getConstant(Val: `0`, DL: dl, VT: OpVT) : LoBits;
5262	return DAG.getSetCC(DL: dl, VT, LHS: NewN0, RHS: NewN1, Cond);
5263	};
5264
5265	SDValue Lo, Hi;
5266	if (IsConcat (N0, Lo, Hi))
5267	return MergeConcat (Lo, Hi);
5268
5269	if (N0.getOpcode() == ISD::AND \|\| N0.getOpcode() == ISD::OR) {
5270	SDValue Lo0, Lo1, Hi0, Hi1;
5271	if (IsConcat (N0.getOperand(i: `0`), Lo0, Hi0) &&
5272	IsConcat (N0.getOperand(i: `1`), Lo1, Hi1)) {
5273	return MergeConcat (DAG.getNode(Opcode: N0.getOpcode(), DL: dl, VT: OpVT, N1: Lo0, N2: Lo1),
5274	DAG.getNode(Opcode: N0.getOpcode(), DL: dl, VT: OpVT, N1: Hi0, N2: Hi1));
5275	}
5276	}
5277	}
5278	}
5279
5280	// If we have "setcc X, C0", check to see if we can shrink the immediate
5281	// by changing cc.
5282	// TODO: Support this for vectors after legalize ops.
5283	if (!VT.isVector() \|\| DCI.isBeforeLegalizeOps()) {
5284	// SETUGT X, SINTMAX -> SETLT X, 0
5285	// SETUGE X, SINTMIN -> SETLT X, 0
5286	if ((Cond == ISD::SETUGT && C1.isMaxSignedValue()) \|\|
5287	(Cond == ISD::SETUGE && C1.isMinSignedValue()))
5288	return DAG.getSetCC(DL: dl, VT, LHS: N0,
5289	RHS: DAG.getConstant(Val: `0`, DL: dl, VT: N1.getValueType()),
5290	Cond: ISD::SETLT);
5291
5292	// SETULT X, SINTMIN -> SETGT X, -1
5293	// SETULE X, SINTMAX -> SETGT X, -1
5294	if ((Cond == ISD::SETULT && C1.isMinSignedValue()) \|\|
5295	(Cond == ISD::SETULE && C1.isMaxSignedValue()))
5296	return DAG.getSetCC(DL: dl, VT, LHS: N0,
5297	RHS: DAG.getAllOnesConstant(DL: dl, VT: N1.getValueType()),
5298	Cond: ISD::SETGT);
5299	}
5300	}
5301
5302	// Back to non-vector simplifications.
5303	// TODO: Can we do these for vector splats?
5304	if (auto *N1C = dyn_cast<ConstantSDNode>(Val: N1.getNode())) {
5305	const APInt &C1 = N1C->getAPIntValue();
5306	EVT ShValTy = N0.getValueType();
5307
5308	// Fold bit comparisons when we can. This will result in an
5309	// incorrect value when boolean false is negative one, unless
5310	// the bitsize is 1 in which case the false value is the same
5311	// in practice regardless of the representation.
5312	if ((VT.getSizeInBits() == `1` \|\|
5313	getBooleanContents(Type: N0.getValueType()) == ZeroOrOneBooleanContent) &&
5314	(Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) &&
5315	(VT == ShValTy \|\| (isTypeLegal(VT) && VT.bitsLE(VT: ShValTy))) &&
5316	N0.getOpcode() == ISD::AND) {
5317	if (auto *AndRHS = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: `1`))) {
5318	if (Cond == ISD::SETNE && C1 == `0`) {// (X & 8) != 0 --> (X & 8) >> 3
5319	// Perform the xform if the AND RHS is a single bit.
5320	unsigned ShCt = AndRHS->getAPIntValue().logBase2();
5321	if (AndRHS->getAPIntValue().isPowerOf2() &&
5322	!shouldAvoidTransformToShift(VT: ShValTy, Amount: ShCt)) {
5323	return DAG.getNode(
5324	Opcode: ISD::TRUNCATE, DL: dl, VT,
5325	Operand: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: ShValTy, N1: N0,
5326	N2: DAG.getShiftAmountConstant(Val: ShCt, VT: ShValTy, DL: dl)));
5327	}
5328	} else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
5329	// (X & 8) == 8 --> (X & 8) >> 3
5330	// Perform the xform if C1 is a single bit.
5331	unsigned ShCt = C1.logBase2();
5332	if (C1.isPowerOf2() && !shouldAvoidTransformToShift(VT: ShValTy, Amount: ShCt)) {
5333	return DAG.getNode(
5334	Opcode: ISD::TRUNCATE, DL: dl, VT,
5335	Operand: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: ShValTy, N1: N0,
5336	N2: DAG.getShiftAmountConstant(Val: ShCt, VT: ShValTy, DL: dl)));
5337	}
5338	}
5339	}
5340	}
5341
5342	if (C1.getSignificantBits() <= `64` &&
5343	!isLegalICmpImmediate(C1.getSExtValue())) {
5344	// (X & -256) == 256 -> (X >> 8) == 1
5345	if ((Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) &&
5346	N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
5347	if (auto *AndRHS = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: `1`))) {
5348	const APInt &AndRHSC = AndRHS->getAPIntValue();
5349	if (AndRHSC.isNegatedPowerOf2() && C1.isSubsetOf(RHS: AndRHSC)) {
5350	unsigned ShiftBits = AndRHSC.countr_zero();
5351	if (!shouldAvoidTransformToShift(VT: ShValTy, Amount: ShiftBits)) {
5352	SDValue Shift = DAG.getNode(
5353	Opcode: ISD::SRL, DL: dl, VT: ShValTy, N1: N0.getOperand(i: `0`),
5354	N2: DAG.getShiftAmountConstant(Val: ShiftBits, VT: ShValTy, DL: dl));
5355	SDValue CmpRHS = DAG.getConstant(Val: C1.lshr(shiftAmt: ShiftBits), DL: dl, VT: ShValTy);
5356	return DAG.getSetCC(DL: dl, VT, LHS: Shift, RHS: CmpRHS, Cond);
5357	}
5358	}
5359	}
5360	} else if (Cond == ISD::SETULT \|\| Cond == ISD::SETUGE \|\|
5361	Cond == ISD::SETULE \|\| Cond == ISD::SETUGT) {
5362	bool AdjOne = (Cond == ISD::SETULE \|\| Cond == ISD::SETUGT);
5363	// X < 0x100000000 -> (X >> 32) < 1
5364	// X >= 0x100000000 -> (X >> 32) >= 1
5365	// X <= 0x0ffffffff -> (X >> 32) < 1
5366	// X > 0x0ffffffff -> (X >> 32) >= 1
5367	unsigned ShiftBits;
5368	APInt NewC = C1;
5369	ISD::CondCode NewCond = Cond;
5370	if (AdjOne) {
5371	ShiftBits = C1.countr_one();
5372	NewC = NewC + `1`;
5373	NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
5374	} else {
5375	ShiftBits = C1.countr_zero();
5376	}
5377	NewC.lshrInPlace(ShiftAmt: ShiftBits);
5378	if (ShiftBits && NewC.getSignificantBits() <= `64` &&
5379	isLegalICmpImmediate(NewC.getSExtValue()) &&
5380	!shouldAvoidTransformToShift(VT: ShValTy, Amount: ShiftBits)) {
5381	SDValue Shift =
5382	DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: ShValTy, N1: N0,
5383	N2: DAG.getShiftAmountConstant(Val: ShiftBits, VT: ShValTy, DL: dl));
5384	SDValue CmpRHS = DAG.getConstant(Val: NewC, DL: dl, VT: ShValTy);
5385	return DAG.getSetCC(DL: dl, VT, LHS: Shift, RHS: CmpRHS, Cond: NewCond);
5386	}
5387	}
5388	}
5389	}
5390
5391	if (!isa<ConstantFPSDNode>(Val: N0) && isa<ConstantFPSDNode>(Val: N1)) {
5392	auto *CFP = cast<ConstantFPSDNode>(Val&: N1);
5393	assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
5394
5395	// Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
5396	// constant if knowing that the operand is non-nan is enough. We prefer to
5397	// have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
5398	// materialize 0.0.
5399	if (Cond == ISD::SETO \|\| Cond == ISD::SETUO)
5400	return DAG.getSetCC(DL: dl, VT, LHS: N0, RHS: N0, Cond);
5401
5402	// setcc (fneg x), C -> setcc swap(pred) x, -C
5403	if (N0.getOpcode() == ISD::FNEG) {
5404	ISD::CondCode SwapCond = ISD::getSetCCSwappedOperands(Operation: Cond);
5405	if (DCI.isBeforeLegalizeOps() \|\|
5406	isCondCodeLegal(CC: SwapCond, VT: N0.getSimpleValueType())) {
5407	SDValue NegN1 = DAG.getNode(Opcode: ISD::FNEG, DL: dl, VT: N0.getValueType(), Operand: N1);
5408	return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: `0`), RHS: NegN1, Cond: SwapCond);
5409	}
5410	}
5411
5412	// setueq/setoeq X, (fabs Inf) -> is_fpclass X, fcInf
5413	if (isOperationLegalOrCustom(Op: ISD::IS_FPCLASS, VT: N0.getValueType()) &&
5414	!isFPImmLegal(CFP->getValueAPF(), CFP->getValueType(ResNo: `0`))) {
5415	bool IsFabs = N0.getOpcode() == ISD::FABS;
5416	SDValue Op = IsFabs ? N0.getOperand(i: `0`) : N0;
5417	if ((Cond == ISD::SETOEQ \|\| Cond == ISD::SETUEQ) && CFP->isInfinity()) {
5418	FPClassTest Flag = CFP->isNegative() ? (IsFabs ? fcNone : fcNegInf)
5419	: (IsFabs ? fcInf : fcPosInf);
5420	if (Cond == ISD::SETUEQ)
5421	Flag \|= fcNan;
5422	return DAG.getNode(Opcode: ISD::IS_FPCLASS, DL: dl, VT, N1: Op,
5423	N2: DAG.getTargetConstant(Val: Flag, DL: dl, VT: MVT::i32));
5424	}
5425	}
5426
5427	// If the condition is not legal, see if we can find an equivalent one
5428	// which is legal.
5429	if (!isCondCodeLegal(CC: Cond, VT: N0.getSimpleValueType())) {
5430	// If the comparison was an awkward floating-point == or != and one of
5431	// the comparison operands is infinity or negative infinity, convert the
5432	// condition to a less-awkward <= or >=.
5433	if (CFP->getValueAPF().isInfinity()) {
5434	bool IsNegInf = CFP->getValueAPF().isNegative();
5435	ISD::CondCode NewCond = ISD::SETCC_INVALID;
5436	switch (Cond) {
5437	case ISD::SETOEQ: NewCond = IsNegInf ? ISD::SETOLE : ISD::SETOGE; break;
5438	case ISD::SETUEQ: NewCond = IsNegInf ? ISD::SETULE : ISD::SETUGE; break;
5439	case ISD::SETUNE: NewCond = IsNegInf ? ISD::SETUGT : ISD::SETULT; break;
5440	case ISD::SETONE: NewCond = IsNegInf ? ISD::SETOGT : ISD::SETOLT; break;
5441	default: break;
5442	}
5443	if (NewCond != ISD::SETCC_INVALID &&
5444	isCondCodeLegal(CC: NewCond, VT: N0.getSimpleValueType()))
5445	return DAG.getSetCC(DL: dl, VT, LHS: N0, RHS: N1, Cond: NewCond);
5446	}
5447	}
5448	}
5449
5450	if (N0 == N1) {
5451	// The sext(setcc()) => setcc() optimization relies on the appropriate
5452	// constant being emitted.
5453	assert(!N0.getValueType().isInteger() &&
5454	"Integer types should be handled by FoldSetCC");
5455
5456	bool EqTrue = ISD::isTrueWhenEqual(Cond);
5457	unsigned UOF = ISD::getUnorderedFlavor(Cond);
5458	if (UOF == `2`) // FP operators that are undefined on NaNs.
5459	return DAG.getBoolConstant(V: EqTrue, DL: dl, VT, OpVT);
5460	if (UOF == unsigned(EqTrue))
5461	return DAG.getBoolConstant(V: EqTrue, DL: dl, VT, OpVT);
5462	// Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
5463	// if it is not already.
5464	ISD::CondCode NewCond = UOF == `0` ? ISD::SETO : ISD::SETUO;
5465	if (NewCond != Cond &&
5466	(DCI.isBeforeLegalizeOps() \|\|
5467	isCondCodeLegal(CC: NewCond, VT: N0.getSimpleValueType())))
5468	return DAG.getSetCC(DL: dl, VT, LHS: N0, RHS: N1, Cond: NewCond);
5469	}
5470
5471	// ~X > ~Y --> Y > X
5472	// ~X < ~Y --> Y < X
5473	// ~X < C --> X > ~C
5474	// ~X > C --> X < ~C
5475	if ((isSignedIntSetCC(Code: Cond) \|\| isUnsignedIntSetCC(Code: Cond)) &&
5476	N0.getValueType().isInteger()) {
5477	if (isBitwiseNot(V: N0)) {
5478	if (isBitwiseNot(V: N1))
5479	return DAG.getSetCC(DL: dl, VT, LHS: N1.getOperand(i: `0`), RHS: N0.getOperand(i: `0`), Cond);
5480
5481	if (DAG.isConstantIntBuildVectorOrConstantInt(N: N1) &&
5482	!DAG.isConstantIntBuildVectorOrConstantInt(N: N0.getOperand(i: `0`))) {
5483	SDValue Not = DAG.getNOT(DL: dl, Val: N1, VT: OpVT);
5484	return DAG.getSetCC(DL: dl, VT, LHS: Not, RHS: N0.getOperand(i: `0`), Cond);
5485	}
5486	}
5487	}
5488
5489	if ((Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) &&
5490	N0.getValueType().isInteger()) {
5491	if (N0.getOpcode() == ISD::ADD \|\| N0.getOpcode() == ISD::SUB \|\|
5492	N0.getOpcode() == ISD::XOR) {
5493	// Simplify (X+Y) == (X+Z) --> Y == Z
5494	if (N0.getOpcode() == N1.getOpcode()) {
5495	if (N0.getOperand(i: `0`) == N1.getOperand(i: `0`))
5496	return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: `1`), RHS: N1.getOperand(i: `1`), Cond);
5497	if (N0.getOperand(i: `1`) == N1.getOperand(i: `1`))
5498	return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: `0`), RHS: N1.getOperand(i: `0`), Cond);
5499	if (isCommutativeBinOp(Opcode: N0.getOpcode())) {
5500	// If X op Y == Y op X, try other combinations.
5501	if (N0.getOperand(i: `0`) == N1.getOperand(i: `1`))
5502	return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: `1`), RHS: N1.getOperand(i: `0`),
5503	Cond);
5504	if (N0.getOperand(i: `1`) == N1.getOperand(i: `0`))
5505	return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: `0`), RHS: N1.getOperand(i: `1`),
5506	Cond);
5507	}
5508	}
5509
5510	// If RHS is a legal immediate value for a compare instruction, we need
5511	// to be careful about increasing register pressure needlessly.
5512	bool LegalRHSImm = false;
5513
5514	if (auto *RHSC = dyn_cast<ConstantSDNode>(Val&: N1)) {
5515	if (auto *LHSR = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: `1`))) {
5516	// Turn (X+C1) == C2 --> X == C2-C1
5517	if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse())
5518	return DAG.getSetCC(
5519	DL: dl, VT, LHS: N0.getOperand(i: `0`),
5520	RHS: DAG.getConstant(Val: RHSC->getAPIntValue() - LHSR->getAPIntValue(),
5521	DL: dl, VT: N0.getValueType()),
5522	Cond);
5523
5524	// Turn (X^C1) == C2 --> X == C1^C2
5525	if (N0.getOpcode() == ISD::XOR && N0.getNode()->hasOneUse())
5526	return DAG.getSetCC(
5527	DL: dl, VT, LHS: N0.getOperand(i: `0`),
5528	RHS: DAG.getConstant(Val: LHSR->getAPIntValue() ^ RHSC->getAPIntValue(),
5529	DL: dl, VT: N0.getValueType()),
5530	Cond);
5531	}
5532
5533	// Turn (C1-X) == C2 --> X == C1-C2
5534	if (auto *SUBC = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: `0`)))
5535	if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse())
5536	return DAG.getSetCC(
5537	DL: dl, VT, LHS: N0.getOperand(i: `1`),
5538	RHS: DAG.getConstant(Val: SUBC->getAPIntValue() - RHSC->getAPIntValue(),
5539	DL: dl, VT: N0.getValueType()),
5540	Cond);
5541
5542	// Could RHSC fold directly into a compare?
5543	if (RHSC->getValueType(ResNo: `0`).getSizeInBits() <= `64`)
5544	LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
5545	}
5546
5547	// (X+Y) == X --> Y == 0 and similar folds.
5548	// Don't do this if X is an immediate that can fold into a cmp
5549	// instruction and X+Y has other uses. It could be an induction variable
5550	// chain, and the transform would increase register pressure.
5551	if (!LegalRHSImm \|\| N0.hasOneUse())
5552	if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, DL: dl, DCI))
5553	return V;
5554	}
5555
5556	if (N1.getOpcode() == ISD::ADD \|\| N1.getOpcode() == ISD::SUB \|\|
5557	N1.getOpcode() == ISD::XOR)
5558	if (SDValue V = foldSetCCWithBinOp(VT, N0: N1, N1: N0, Cond, DL: dl, DCI))
5559	return V;
5560
5561	if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, DL: dl, DCI))
5562	return V;
5563
5564	if (SDValue V = foldSetCCWithOr(VT, N0, N1, Cond, DL: dl, DCI))
5565	return V;
5566	}
5567
5568	// Fold remainder of division by a constant.
5569	if ((N0.getOpcode() == ISD::UREM \|\| N0.getOpcode() == ISD::SREM) &&
5570	N0.hasOneUse() && (Cond == ISD::SETEQ \|\| Cond == ISD::SETNE)) {
5571	// When division is cheap or optimizing for minimum size,
5572	// fall through to DIVREM creation by skipping this fold.
5573	if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Kind: Attribute::MinSize)) {
5574	if (N0.getOpcode() == ISD::UREM) {
5575	if (SDValue Folded = buildUREMEqFold(SETCCVT: VT, REMNode: N0, CompTargetNode: N1, Cond, DCI, DL: dl))
5576	return Folded;
5577	} else if (N0.getOpcode() == ISD::SREM) {
5578	if (SDValue Folded = buildSREMEqFold(SETCCVT: VT, REMNode: N0, CompTargetNode: N1, Cond, DCI, DL: dl))
5579	return Folded;
5580	}
5581	}
5582	}
5583
5584	// Fold away ALL boolean setcc's.
5585	if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
5586	SDValue Temp;
5587	switch (Cond) {
5588	default: llvm_unreachable("Unknown integer setcc!");
5589	case ISD::SETEQ: // X == Y -> ~(X^Y)
5590	Temp = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: OpVT, N1: N0, N2: N1);
5591	N0 = DAG.getNOT(DL: dl, Val: Temp, VT: OpVT);
5592	if (!DCI.isCalledByLegalizer())
5593	DCI.AddToWorklist(N: Temp.getNode());
5594	break;
5595	case ISD::SETNE: // X != Y --> (X^Y)
5596	N0 = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: OpVT, N1: N0, N2: N1);
5597	break;
5598	case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
5599	case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
5600	Temp = DAG.getNOT(DL: dl, Val: N0, VT: OpVT);
5601	N0 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: OpVT, N1, N2: Temp);
5602	if (!DCI.isCalledByLegalizer())
5603	DCI.AddToWorklist(N: Temp.getNode());
5604	break;
5605	case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
5606	case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
5607	Temp = DAG.getNOT(DL: dl, Val: N1, VT: OpVT);
5608	N0 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: OpVT, N1: N0, N2: Temp);
5609	if (!DCI.isCalledByLegalizer())
5610	DCI.AddToWorklist(N: Temp.getNode());
5611	break;
5612	case ISD::SETULE: // X <=u Y --> X == 0 \| Y == 1 --> ~X \| Y
5613	case ISD::SETGE: // X >=s Y --> X == 0 \| Y == 1 --> ~X \| Y
5614	Temp = DAG.getNOT(DL: dl, Val: N0, VT: OpVT);
5615	N0 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1, N2: Temp);
5616	if (!DCI.isCalledByLegalizer())
5617	DCI.AddToWorklist(N: Temp.getNode());
5618	break;
5619	case ISD::SETUGE: // X >=u Y --> X == 1 \| Y == 0 --> ~Y \| X
5620	case ISD::SETLE: // X <=s Y --> X == 1 \| Y == 0 --> ~Y \| X
5621	Temp = DAG.getNOT(DL: dl, Val: N1, VT: OpVT);
5622	N0 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1: N0, N2: Temp);
5623	break;
5624	}
5625	if (VT.getScalarType() != MVT::i1) {
5626	if (!DCI.isCalledByLegalizer())
5627	DCI.AddToWorklist(N: N0.getNode());
5628	// FIXME: If running after legalize, we probably can't do this.
5629	ISD::NodeType ExtendCode = getExtendForContent(Content: getBooleanContents(Type: OpVT));
5630	N0 = DAG.getNode(Opcode: ExtendCode, DL: dl, VT, Operand: N0);
5631	}
5632	return N0;
5633	}
5634
5635	// Could not fold it.
5636	return SDValue ();
5637	}
5638
5639	/// Returns true (and the GlobalValue and the offset) if the node is a
5640	/// GlobalAddress + offset.
5641	bool TargetLowering::isGAPlusOffset(SDNode WN, const* GlobalValue *&GA,
5642	int64_t &Offset) const {
5643
5644	SDNode *N = unwrapAddress(N: SDValue (WN, `0`)).getNode();
5645
5646	if (auto *GASD = dyn_cast<GlobalAddressSDNode>(Val: N)) {
5647	GA = GASD->getGlobal();
5648	Offset += GASD->getOffset();
5649	return true;
5650	}
5651
5652	if (N->getOpcode() == ISD::ADD) {
5653	SDValue N1 = N->getOperand(Num: `0`);
5654	SDValue N2 = N->getOperand(Num: `1`);
5655	if (isGAPlusOffset(WN: N1.getNode(), GA, Offset)) {
5656	if (auto *V = dyn_cast<ConstantSDNode>(Val&: N2)) {
5657	Offset += V->getSExtValue();
5658	return true;
5659	}
5660	} else if (isGAPlusOffset(WN: N2.getNode(), GA, Offset)) {
5661	if (auto *V = dyn_cast<ConstantSDNode>(Val&: N1)) {
5662	Offset += V->getSExtValue();
5663	return true;
5664	}
5665	}
5666	}
5667
5668	return false;
5669	}
5670
5671	SDValue TargetLowering::PerformDAGCombine(SDNode *N,
5672	DAGCombinerInfo &DCI) const {
5673	// Default implementation: no optimization.
5674	return SDValue ();
5675	}
5676
5677	//===----------------------------------------------------------------------===//
5678	// Inline Assembler Implementation Methods
5679	//===----------------------------------------------------------------------===//
5680
5681	TargetLowering::ConstraintType
5682	TargetLowering::getConstraintType(StringRef Constraint) const {
5683	unsigned S = Constraint.size();
5684
5685	if (S == `1`) {
5686	switch (Constraint [`0`]) {
5687	default: break;
5688	case `'r'`:
5689	return C_RegisterClass;
5690	case `'m'`: // memory
5691	case `'o'`: // offsetable
5692	case `'V'`: // not offsetable
5693	return C_Memory;
5694	case `'p'`: // Address.
5695	return C_Address;
5696	case `'n'`: // Simple Integer
5697	case `'E'`: // Floating Point Constant
5698	case `'F'`: // Floating Point Constant
5699	return C_Immediate;
5700	case `'i'`: // Simple Integer or Relocatable Constant
5701	case `'s'`: // Relocatable Constant
5702	case `'X'`: // Allow ANY value.
5703	case `'I'`: // Target registers.
5704	case `'J'`:
5705	case `'K'`:
5706	case `'L'`:
5707	case `'M'`:
5708	case `'N'`:
5709	case `'O'`:
5710	case `'P'`:
5711	case `'<'`:
5712	case `'>'`:
5713	return C_Other;
5714	}
5715	}
5716
5717	if (S > `1` && Constraint [`0`] == `'{'` && Constraint [S - `1`] == `'}'`) {
5718	if (S == `8` && Constraint.substr(Start: `1`, N: `6`) == "memory") // "{memory}"
5719	return C_Memory;
5720	return C_Register;
5721	}
5722	return C_Unknown;
5723	}
5724
5725	/// Try to replace an X constraint, which matches anything, with another that
5726	/// has more specific requirements based on the type of the corresponding
5727	/// operand.
5728	const char TargetLowering::LowerXConstraint(EVT ConstraintVT) const* {
5729	if (ConstraintVT.isInteger())
5730	return "r";
5731	if (ConstraintVT.isFloatingPoint())
5732	return "f"; // works for many targets
5733	return nullptr;
5734	}
5735
5736	SDValue TargetLowering::LowerAsmOutputForConstraint(
5737	SDValue &Chain, SDValue &Glue, const SDLoc &DL,
5738	const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
5739	return SDValue ();
5740	}
5741
5742	/// Lower the specified operand into the Ops vector.
5743	/// If it is invalid, don't add anything to Ops.
5744	void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
5745	StringRef Constraint,
5746	std::vector<SDValue> &Ops,
5747	SelectionDAG &DAG) const {
5748
5749	if (Constraint.size() > `1`)
5750	return;
5751
5752	char ConstraintLetter = Constraint [`0`];
5753	switch (ConstraintLetter) {
5754	default: break;
5755	case `'X'`: // Allows any operand
5756	case `'i'`: // Simple Integer or Relocatable Constant
5757	case `'n'`: // Simple Integer
5758	case `'s'`: { // Relocatable Constant
5759
5760	ConstantSDNode *C;
5761	uint64_t Offset = `0`;
5762
5763	// Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
5764	// etc., since getelementpointer is variadic. We can't use
5765	// SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
5766	// while in this case the GA may be furthest from the root node which is
5767	// likely an ISD::ADD.
5768	while (true) {
5769	if ((C = dyn_cast<ConstantSDNode>(Val&: Op)) && ConstraintLetter != `'s'`) {
5770	// gcc prints these as sign extended. Sign extend value to 64 bits
5771	// now; without this it would get ZExt'd later in
5772	// ScheduleDAGSDNodes::EmitNode, which is very generic.
5773	bool IsBool = C->getConstantIntValue()->getBitWidth() == `1`;
5774	BooleanContent BCont = getBooleanContents(Type: MVT::i64);
5775	ISD::NodeType ExtOpc =
5776	IsBool ? getExtendForContent(Content: BCont) : ISD::SIGN_EXTEND;
5777	int64_t ExtVal =
5778	ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue() : C->getSExtValue();
5779	Ops.push_back(
5780	x: DAG.getTargetConstant(Val: Offset + ExtVal, DL: SDLoc (C), VT: MVT::i64));
5781	return;
5782	}
5783	if (ConstraintLetter != `'n'`) {
5784	if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Val&: Op)) {
5785	Ops.push_back(x: DAG.getTargetGlobalAddress(GV: GA->getGlobal(), DL: SDLoc (Op),
5786	VT: GA->getValueType(ResNo: `0`),
5787	offset: Offset + GA->getOffset()));
5788	return;
5789	}
5790	if (const auto *BA = dyn_cast<BlockAddressSDNode>(Val&: Op)) {
5791	Ops.push_back(x: DAG.getTargetBlockAddress(
5792	BA: BA->getBlockAddress(), VT: BA->getValueType(ResNo: `0`),
5793	Offset: Offset + BA->getOffset(), TargetFlags: BA->getTargetFlags()));
5794	return;
5795	}
5796	if (isa<BasicBlockSDNode>(Val: Op)) {
5797	Ops.push_back(x: Op);
5798	return;
5799	}
5800	}
5801	const unsigned OpCode = Op.getOpcode();
5802	if (OpCode == ISD::ADD \|\| OpCode == ISD::SUB) {
5803	if ((C = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: `0`))))
5804	Op = Op.getOperand(i: `1`);
5805	// Subtraction is not commutative.
5806	else if (OpCode == ISD::ADD &&
5807	(C = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: `1`))))
5808	Op = Op.getOperand(i: `0`);
5809	else
5810	return;
5811	Offset += (OpCode == ISD::ADD ? `1` : -`1`) * C->getSExtValue();
5812	continue;
5813	}
5814	return;
5815	}
5816	break;
5817	}
5818	}
5819	}
5820
5821	void TargetLowering::CollectTargetIntrinsicOperands(
5822	const CallInst &I, SmallVectorImpl<SDValue> &Ops, SelectionDAG &DAG) const {
5823	}
5824
5825	std::pair<unsigned, const TargetRegisterClass *>
5826	TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
5827	StringRef Constraint,
5828	MVT VT) const {
5829	if (!Constraint.starts_with(Prefix: "{"))
5830	return std::make_pair(x: `0u`, y: static_cast<TargetRegisterClass >(nullptr*));
5831	assert(*(Constraint.end() - `1`) == `'}'` && "Not a brace enclosed constraint?");
5832
5833	// Remove the braces from around the name.
5834	StringRef RegName(Constraint.data() + `1`, Constraint.size() - `2`);
5835
5836	std::pair<unsigned, const TargetRegisterClass *> R =
5837	std::make_pair(x: `0u`, y: static_cast<const TargetRegisterClass >(nullptr*));
5838
5839	// Figure out which register class contains this reg.
5840	for (const TargetRegisterClass *RC : RI->regclasses()) {
5841	// If none of the value types for this register class are valid, we
5842	// can't use it. For example, 64-bit reg classes on 32-bit targets.
5843	if (!isLegalRC(TRI: RI, RC: RC))
5844	continue;
5845
5846	for (const MCPhysReg &PR : *RC) {
5847	if (RegName.equals_insensitive(RHS: RI->getRegAsmName(Reg: PR))) {
5848	std::pair<unsigned, const TargetRegisterClass *> S =
5849	std::make_pair(x: PR, y&: RC);
5850
5851	// If this register class has the requested value type, return it,
5852	// otherwise keep searching and return the first class found
5853	// if no other is found which explicitly has the requested type.
5854	if (RI->isTypeLegalForClass(RC: *RC, T: VT))
5855	return S;
5856	if (!R.second)
5857	R = S;
5858	}
5859	}
5860	}
5861
5862	return R;
5863	}
5864
5865	//===----------------------------------------------------------------------===//
5866	// Constraint Selection.
5867
5868	/// Return true of this is an input operand that is a matching constraint like
5869	/// "4".
5870	bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {
5871	assert(!ConstraintCode.empty() && "No known constraint!");
5872	return isdigit(static_cast<unsigned char>(ConstraintCode [`0`]));
5873	}
5874
5875	/// If this is an input matching constraint, this method returns the output
5876	/// operand it matches.
5877	unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
5878	assert(!ConstraintCode.empty() && "No known constraint!");
5879	return atoi(nptr: ConstraintCode.c_str());
5880	}
5881
5882	/// Split up the constraint string from the inline assembly value into the
5883	/// specific constraints and their prefixes, and also tie in the associated
5884	/// operand values.
5885	/// If this returns an empty vector, and if the constraint string itself
5886	/// isn't empty, there was an error parsing.
5887	TargetLowering::AsmOperandInfoVector
5888	TargetLowering::ParseConstraints(const DataLayout &DL,
5889	const TargetRegisterInfo *TRI,
5890	const CallBase &Call) const {
5891	/// Information about all of the constraints.
5892	AsmOperandInfoVector ConstraintOperands;
5893	const InlineAsm *IA = cast<InlineAsm>(Val: Call.getCalledOperand());
5894	unsigned maCount = `0`; // Largest number of multiple alternative constraints.
5895
5896	// Do a prepass over the constraints, canonicalizing them, and building up the
5897	// ConstraintOperands list.
5898	unsigned ArgNo = `0`; // ArgNo - The argument of the CallInst.
5899	unsigned ResNo = `0`; // ResNo - The result number of the next output.
5900	unsigned LabelNo = `0`; // LabelNo - CallBr indirect dest number.
5901
5902	for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
5903	ConstraintOperands.emplace_back(args: std::move(CI));
5904	AsmOperandInfo &OpInfo = ConstraintOperands.back();
5905
5906	// Update multiple alternative constraint count.
5907	if (OpInfo.multipleAlternatives.size() > maCount)
5908	maCount = OpInfo.multipleAlternatives.size();
5909
5910	OpInfo.ConstraintVT = MVT::Other;
5911
5912	// Compute the value type for each operand.
5913	switch (OpInfo.Type) {
5914	case InlineAsm::isOutput:
5915	// Indirect outputs just consume an argument.
5916	if (OpInfo.isIndirect) {
5917	OpInfo.CallOperandVal = Call.getArgOperand(i: ArgNo);
5918	break;
5919	}
5920
5921	// The return value of the call is this value. As such, there is no
5922	// corresponding argument.
5923	assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
5924	if (auto *STy = dyn_cast<StructType>(Val: Call.getType())) {
5925	OpInfo.ConstraintVT =
5926	getAsmOperandValueType(DL, Ty: STy->getElementType(N: ResNo))
5927	.getSimpleVT();
5928	} else {
5929	assert(ResNo == `0` && "Asm only has one result!");
5930	OpInfo.ConstraintVT =
5931	getAsmOperandValueType(DL, Ty: Call.getType()).getSimpleVT();
5932	}
5933	++ResNo;
5934	break;
5935	case InlineAsm::isInput:
5936	OpInfo.CallOperandVal = Call.getArgOperand(i: ArgNo);
5937	break;
5938	case InlineAsm::isLabel:
5939	OpInfo.CallOperandVal = cast<CallBrInst>(Val: &Call)->getIndirectDest(i: LabelNo);
5940	++LabelNo;
5941	continue;
5942	case InlineAsm::isClobber:
5943	// Nothing to do.
5944	break;
5945	}
5946
5947	if (OpInfo.CallOperandVal) {
5948	llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
5949	if (OpInfo.isIndirect) {
5950	OpTy = Call.getParamElementType(ArgNo);
5951	assert(OpTy && "Indirect operand must have elementtype attribute");
5952	}
5953
5954	// Look for vector wrapped in a struct. e.g. { <16 x i8> }.
5955	if (StructType *STy = dyn_cast<StructType>(Val: OpTy))
5956	if (STy->getNumElements() == `1`)
5957	OpTy = STy->getElementType(N: `0`);
5958
5959	// If OpTy is not a single value, it may be a struct/union that we
5960	// can tile with integers.
5961	if (!OpTy->isSingleValueType() && OpTy->isSized()) {
5962	unsigned BitSize = DL.getTypeSizeInBits(Ty: OpTy);
5963	switch (BitSize) {
5964	default: break;
5965	case `1`:
5966	case `8`:
5967	case `16`:
5968	case `32`:
5969	case `64`:
5970	case `128`:
5971	OpTy = IntegerType::get(C&: OpTy->getContext(), NumBits: BitSize);
5972	break;
5973	}
5974	}
5975
5976	EVT VT = getAsmOperandValueType(DL, Ty: OpTy, AllowUnknown: true);
5977	OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
5978	ArgNo++;
5979	}
5980	}
5981
5982	// If we have multiple alternative constraints, select the best alternative.
5983	if (!ConstraintOperands.empty()) {
5984	if (maCount) {
5985	unsigned bestMAIndex = `0`;
5986	int bestWeight = -`1`;
5987	// weight: -1 = invalid match, and 0 = so-so match to 5 = good match.
5988	int weight = -`1`;
5989	unsigned maIndex;
5990	// Compute the sums of the weights for each alternative, keeping track
5991	// of the best (highest weight) one so far.
5992	for (maIndex = `0`; maIndex < maCount; ++maIndex) {
5993	int weightSum = `0`;
5994	for (unsigned cIndex = `0`, eIndex = ConstraintOperands.size();
5995	cIndex != eIndex; ++cIndex) {
5996	AsmOperandInfo &OpInfo = ConstraintOperands [cIndex];
5997	if (OpInfo.Type == InlineAsm::isClobber)
5998	continue;
5999
6000	// If this is an output operand with a matching input operand,
6001	// look up the matching input. If their types mismatch, e.g. one
6002	// is an integer, the other is floating point, or their sizes are
6003	// different, flag it as an maCantMatch.
6004	if (OpInfo.hasMatchingInput()) {
6005	AsmOperandInfo &Input = ConstraintOperands [OpInfo.MatchingInput];
6006	if (OpInfo.ConstraintVT != Input.ConstraintVT) {
6007	if ((OpInfo.ConstraintVT.isInteger() !=
6008	Input.ConstraintVT.isInteger()) \|\|
6009	(OpInfo.ConstraintVT.getSizeInBits() !=
6010	Input.ConstraintVT.getSizeInBits())) {
6011	weightSum = -`1`; // Can't match.
6012	break;
6013	}
6014	}
6015	}
6016	weight = getMultipleConstraintMatchWeight(info&: OpInfo, maIndex);
6017	if (weight == -`1`) {
6018	weightSum = -`1`;
6019	break;
6020	}
6021	weightSum += weight;
6022	}
6023	// Update best.
6024	if (weightSum > bestWeight) {
6025	bestWeight = weightSum;
6026	bestMAIndex = maIndex;
6027	}
6028	}
6029
6030	// Now select chosen alternative in each constraint.
6031	for (AsmOperandInfo &cInfo : ConstraintOperands)
6032	if (cInfo.Type != InlineAsm::isClobber)
6033	cInfo.selectAlternative(index: bestMAIndex);
6034	}
6035	}
6036
6037	// Check and hook up tied operands, choose constraint code to use.
6038	for (unsigned cIndex = `0`, eIndex = ConstraintOperands.size();
6039	cIndex != eIndex; ++cIndex) {
6040	AsmOperandInfo &OpInfo = ConstraintOperands [cIndex];
6041
6042	// If this is an output operand with a matching input operand, look up the
6043	// matching input. If their types mismatch, e.g. one is an integer, the
6044	// other is floating point, or their sizes are different, flag it as an
6045	// error.
6046	if (OpInfo.hasMatchingInput()) {
6047	AsmOperandInfo &Input = ConstraintOperands [OpInfo.MatchingInput];
6048
6049	if (OpInfo.ConstraintVT != Input.ConstraintVT) {
6050	std::pair<unsigned, const TargetRegisterClass *> MatchRC =
6051	getRegForInlineAsmConstraint(RI: TRI, Constraint: OpInfo.ConstraintCode,
6052	VT: OpInfo.ConstraintVT);
6053	std::pair<unsigned, const TargetRegisterClass *> InputRC =
6054	getRegForInlineAsmConstraint(RI: TRI, Constraint: Input.ConstraintCode,
6055	VT: Input.ConstraintVT);
6056	const bool OutOpIsIntOrFP = OpInfo.ConstraintVT.isInteger() \|\|
6057	OpInfo.ConstraintVT.isFloatingPoint();
6058	const bool InOpIsIntOrFP = Input.ConstraintVT.isInteger() \|\|
6059	Input.ConstraintVT.isFloatingPoint();
6060	if ((OutOpIsIntOrFP != InOpIsIntOrFP) \|\|
6061	(MatchRC.second != InputRC.second)) {
6062	report_fatal_error(reason: "Unsupported asm: input constraint"
6063	" with a matching output constraint of"
6064	" incompatible type!");
6065	}
6066	}
6067	}
6068	}
6069
6070	return ConstraintOperands;
6071	}
6072
6073	/// Return a number indicating our preference for chosing a type of constraint
6074	/// over another, for the purpose of sorting them. Immediates are almost always
6075	/// preferrable (when they can be emitted). A higher return value means a
6076	/// stronger preference for one constraint type relative to another.
6077	/// FIXME: We should prefer registers over memory but doing so may lead to
6078	/// unrecoverable register exhaustion later.
6079	/// https://github.com/llvm/llvm-project/issues/20571
6080	static unsigned getConstraintPiority(TargetLowering::ConstraintType CT) {
6081	switch (CT) {
6082	case TargetLowering::C_Immediate:
6083	case TargetLowering::C_Other:
6084	return `4`;
6085	case TargetLowering::C_Memory:
6086	case TargetLowering::C_Address:
6087	return `3`;
6088	case TargetLowering::C_RegisterClass:
6089	return `2`;
6090	case TargetLowering::C_Register:
6091	return `1`;
6092	case TargetLowering::C_Unknown:
6093	return `0`;
6094	}
6095	llvm_unreachable("Invalid constraint type");
6096	}
6097
6098	/// Examine constraint type and operand type and determine a weight value.
6099	/// This object must already have been set up with the operand type
6100	/// and the current alternative constraint selected.
6101	TargetLowering::ConstraintWeight
6102	TargetLowering::getMultipleConstraintMatchWeight(
6103	AsmOperandInfo &info, int maIndex) const {
6104	InlineAsm::ConstraintCodeVector *rCodes;
6105	if (maIndex >= (int)info.multipleAlternatives.size())
6106	rCodes = &info.Codes;
6107	else
6108	rCodes = &info.multipleAlternatives [maIndex].Codes;
6109	ConstraintWeight BestWeight = CW_Invalid;
6110
6111	// Loop over the options, keeping track of the most general one.
6112	for (const std::string &rCode : *rCodes) {
6113	ConstraintWeight weight =
6114	getSingleConstraintMatchWeight(info, constraint: rCode.c_str());
6115	if (weight > BestWeight)
6116	BestWeight = weight;
6117	}
6118
6119	return BestWeight;
6120	}
6121
6122	/// Examine constraint type and operand type and determine a weight value.
6123	/// This object must already have been set up with the operand type
6124	/// and the current alternative constraint selected.
6125	TargetLowering::ConstraintWeight
6126	TargetLowering::getSingleConstraintMatchWeight(
6127	AsmOperandInfo &info, const char constraint) const* {
6128	ConstraintWeight weight = CW_Invalid;
6129	Value *CallOperandVal = info.CallOperandVal;
6130	// If we don't have a value, we can't do a match,
6131	// but allow it at the lowest weight.
6132	if (!CallOperandVal)
6133	return CW_Default;
6134	// Look at the constraint type.
6135	switch (*constraint) {
6136	case `'i'`: // immediate integer.
6137	case `'n'`: // immediate integer with a known value.
6138	if (isa<ConstantInt>(Val: CallOperandVal))
6139	weight = CW_Constant;
6140	break;
6141	case `'s'`: // non-explicit intregal immediate.
6142	if (isa<GlobalValue>(Val: CallOperandVal))
6143	weight = CW_Constant;
6144	break;
6145	case `'E'`: // immediate float if host format.
6146	case `'F'`: // immediate float.
6147	if (isa<ConstantFP>(Val: CallOperandVal))
6148	weight = CW_Constant;
6149	break;
6150	case `'<'`: // memory operand with autodecrement.
6151	case `'>'`: // memory operand with autoincrement.
6152	case `'m'`: // memory operand.
6153	case `'o'`: // offsettable memory operand
6154	case `'V'`: // non-offsettable memory operand
6155	weight = CW_Memory;
6156	break;
6157	case `'r'`: // general register.
6158	case `'g'`: // general register, memory operand or immediate integer.
6159	// note: Clang converts "g" to "imr".
6160	if (CallOperandVal->getType()->isIntegerTy())
6161	weight = CW_Register;
6162	break;
6163	case `'X'`: // any operand.
6164	default:
6165	weight = CW_Default;
6166	break;
6167	}
6168	return weight;
6169	}
6170
6171	/// If there are multiple different constraints that we could pick for this
6172	/// operand (e.g. "imr") try to pick the 'best' one.
6173	/// This is somewhat tricky: constraints (TargetLowering::ConstraintType) fall
6174	/// into seven classes:
6175	/// Register -> one specific register
6176	/// RegisterClass -> a group of regs
6177	/// Memory -> memory
6178	/// Address -> a symbolic memory reference
6179	/// Immediate -> immediate values
6180	/// Other -> magic values (such as "Flag Output Operands")
6181	/// Unknown -> something we don't recognize yet and can't handle
6182	/// Ideally, we would pick the most specific constraint possible: if we have
6183	/// something that fits into a register, we would pick it. The problem here
6184	/// is that if we have something that could either be in a register or in
6185	/// memory that use of the register could cause selection of other
6186	/// operands to fail: they might only succeed if we pick memory. Because of
6187	/// this the heuristic we use is:
6188	///
6189	/// 1) If there is an 'other' constraint, and if the operand is valid for
6190	/// that constraint, use it. This makes us take advantage of 'i'
6191	/// constraints when available.
6192	/// 2) Otherwise, pick the most general constraint present. This prefers
6193	/// 'm' over 'r', for example.
6194	///
6195	TargetLowering::ConstraintGroup TargetLowering::getConstraintPreferences(
6196	TargetLowering::AsmOperandInfo &OpInfo) const {
6197	ConstraintGroup Ret;
6198
6199	Ret.reserve(N: OpInfo.Codes.size());
6200	for (StringRef Code : OpInfo.Codes) {
6201	TargetLowering::ConstraintType CType = getConstraintType(Constraint: Code);
6202
6203	// Indirect 'other' or 'immediate' constraints are not allowed.
6204	if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory \|\|
6205	CType == TargetLowering::C_Register \|\|
6206	CType == TargetLowering::C_RegisterClass))
6207	continue;
6208
6209	// Things with matching constraints can only be registers, per gcc
6210	// documentation. This mainly affects "g" constraints.
6211	if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
6212	continue;
6213
6214	Ret.emplace_back(Args&: Code, Args&: CType);
6215	}
6216
6217	llvm::stable_sort(Range&: Ret, C: [](ConstraintPair a, ConstraintPair b) {
6218	return getConstraintPiority(CT: a.second) > getConstraintPiority(CT: b.second);
6219	});
6220
6221	return Ret;
6222	}
6223
6224	/// If we have an immediate, see if we can lower it. Return true if we can,
6225	/// false otherwise.
6226	static bool lowerImmediateIfPossible(TargetLowering::ConstraintPair &P,
6227	SDValue Op, SelectionDAG *DAG,
6228	const TargetLowering &TLI) {
6229
6230	assert((P.second == TargetLowering::C_Other \|\|
6231	P.second == TargetLowering::C_Immediate) &&
6232	"need immediate or other");
6233
6234	if (!Op.getNode())
6235	return false;
6236
6237	std::vector<SDValue> ResultOps;
6238	TLI.LowerAsmOperandForConstraint(Op, Constraint: P.first, Ops&: ResultOps, DAG&: *DAG);
6239	return !ResultOps.empty();
6240	}
6241
6242	/// Determines the constraint code and constraint type to use for the specific
6243	/// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
6244	void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
6245	SDValue Op,
6246	SelectionDAG DAG) const* {
6247	assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
6248
6249	// Single-letter constraints ('r') are very common.
6250	if (OpInfo.Codes.size() == `1`) {
6251	OpInfo.ConstraintCode = OpInfo.Codes [`0`];
6252	OpInfo.ConstraintType = getConstraintType(Constraint: OpInfo.ConstraintCode);
6253	} else {
6254	ConstraintGroup G = getConstraintPreferences(OpInfo);
6255	if (G.empty())
6256	return;
6257
6258	unsigned BestIdx = `0`;
6259	for (const unsigned E = G.size();
6260	BestIdx < E && (G [BestIdx].second == TargetLowering::C_Other \|\|
6261	G [BestIdx].second == TargetLowering::C_Immediate);
6262	++BestIdx) {
6263	if (lowerImmediateIfPossible(P&: G [BestIdx], Op, DAG, TLI: *this))
6264	break;
6265	// If we're out of constraints, just pick the first one.
6266	if (BestIdx + `1` == E) {
6267	BestIdx = `0`;
6268	break;
6269	}
6270	}
6271
6272	OpInfo.ConstraintCode = G [BestIdx].first;
6273	OpInfo.ConstraintType = G [BestIdx].second;
6274	}
6275
6276	// 'X' matches anything.
6277	if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
6278	// Constants are handled elsewhere. For Functions, the type here is the
6279	// type of the result, which is not what we want to look at; leave them
6280	// alone.
6281	Value *v = OpInfo.CallOperandVal;
6282	if (isa<ConstantInt>(Val: v) \|\| isa<Function>(Val: v)) {
6283	return;
6284	}
6285
6286	if (isa<BasicBlock>(Val: v) \|\| isa<BlockAddress>(Val: v)) {
6287	OpInfo.ConstraintCode = "i";
6288	return;
6289	}
6290
6291	// Otherwise, try to resolve it to something we know about by looking at
6292	// the actual operand type.
6293	if (const char *Repl = LowerXConstraint(ConstraintVT: OpInfo.ConstraintVT)) {
6294	OpInfo.ConstraintCode = Repl;
6295	OpInfo.ConstraintType = getConstraintType(Constraint: OpInfo.ConstraintCode);
6296	}
6297	}
6298	}
6299
6300	/// Given an exact SDIV by a constant, create a multiplication
6301	/// with the multiplicative inverse of the constant.
6302	/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6303	static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
6304	const SDLoc &dl, SelectionDAG &DAG,
6305	SmallVectorImpl<SDNode *> &Created) {
6306	SDValue Op0 = N->getOperand(Num: `0`);
6307	SDValue Op1 = N->getOperand(Num: `1`);
6308	EVT VT = N->getValueType(ResNo: `0`);
6309	EVT SVT = VT.getScalarType();
6310	EVT ShVT = TLI.getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
6311	EVT ShSVT = ShVT.getScalarType();
6312
6313	bool UseSRA = false;
6314	SmallVector<SDValue, `16`> Shifts, Factors;
6315
6316	auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6317	if (C->isZero())
6318	return false;
6319	APInt Divisor = C->getAPIntValue();
6320	unsigned Shift = Divisor.countr_zero();
6321	if (Shift) {
6322	Divisor.ashrInPlace(ShiftAmt: Shift);
6323	UseSRA = true;
6324	}
6325	APInt Factor = Divisor.multiplicativeInverse();
6326	Shifts.push_back(Elt: DAG.getConstant(Val: Shift, DL: dl, VT: ShSVT));
6327	Factors.push_back(Elt: DAG.getConstant(Val: Factor, DL: dl, VT: SVT));
6328	return true;
6329	};
6330
6331	// Collect all magic values from the build vector.
6332	if (!ISD::matchUnaryPredicate(Op: Op1, Match: BuildSDIVPattern))
6333	return SDValue ();
6334
6335	SDValue Shift, Factor;
6336	if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6337	Shift = DAG.getBuildVector(VT: ShVT, DL: dl, Ops: Shifts);
6338	Factor = DAG.getBuildVector(VT, DL: dl, Ops: Factors);
6339	} else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6340	assert(Shifts.size() == `1` && Factors.size() == `1` &&
6341	"Expected matchUnaryPredicate to return one element for scalable "
6342	"vectors");
6343	Shift = DAG.getSplatVector(VT: ShVT, DL: dl, Op: Shifts [`0`]);
6344	Factor = DAG.getSplatVector(VT, DL: dl, Op: Factors [`0`]);
6345	} else {
6346	assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6347	Shift = Shifts [`0`];
6348	Factor = Factors [`0`];
6349	}
6350
6351	SDValue Res = Op0;
6352	if (UseSRA) {
6353	Res = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: Res, N2: Shift, Flags: SDNodeFlags::Exact);
6354	Created.push_back(Elt: Res.getNode());
6355	}
6356
6357	return DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: Res, N2: Factor);
6358	}
6359
6360	/// Given an exact UDIV by a constant, create a multiplication
6361	/// with the multiplicative inverse of the constant.
6362	/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6363	static SDValue BuildExactUDIV(const TargetLowering &TLI, SDNode *N,
6364	const SDLoc &dl, SelectionDAG &DAG,
6365	SmallVectorImpl<SDNode *> &Created) {
6366	EVT VT = N->getValueType(ResNo: `0`);
6367	EVT SVT = VT.getScalarType();
6368	EVT ShVT = TLI.getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
6369	EVT ShSVT = ShVT.getScalarType();
6370
6371	bool UseSRL = false;
6372	SmallVector<SDValue, `16`> Shifts, Factors;
6373
6374	auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6375	if (C->isZero())
6376	return false;
6377	APInt Divisor = C->getAPIntValue();
6378	unsigned Shift = Divisor.countr_zero();
6379	if (Shift) {
6380	Divisor.lshrInPlace(ShiftAmt: Shift);
6381	UseSRL = true;
6382	}
6383	// Calculate the multiplicative inverse modulo BW.
6384	APInt Factor = Divisor.multiplicativeInverse();
6385	Shifts.push_back(Elt: DAG.getConstant(Val: Shift, DL: dl, VT: ShSVT));
6386	Factors.push_back(Elt: DAG.getConstant(Val: Factor, DL: dl, VT: SVT));
6387	return true;
6388	};
6389
6390	SDValue Op1 = N->getOperand(Num: `1`);
6391
6392	// Collect all magic values from the build vector.
6393	if (!ISD::matchUnaryPredicate(Op: Op1, Match: BuildUDIVPattern))
6394	return SDValue ();
6395
6396	SDValue Shift, Factor;
6397	if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6398	Shift = DAG.getBuildVector(VT: ShVT, DL: dl, Ops: Shifts);
6399	Factor = DAG.getBuildVector(VT, DL: dl, Ops: Factors);
6400	} else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6401	assert(Shifts.size() == `1` && Factors.size() == `1` &&
6402	"Expected matchUnaryPredicate to return one element for scalable "
6403	"vectors");
6404	Shift = DAG.getSplatVector(VT: ShVT, DL: dl, Op: Shifts [`0`]);
6405	Factor = DAG.getSplatVector(VT, DL: dl, Op: Factors [`0`]);
6406	} else {
6407	assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6408	Shift = Shifts [`0`];
6409	Factor = Factors [`0`];
6410	}
6411
6412	SDValue Res = N->getOperand(Num: `0`);
6413	if (UseSRL) {
6414	Res = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Res, N2: Shift, Flags: SDNodeFlags::Exact);
6415	Created.push_back(Elt: Res.getNode());
6416	}
6417
6418	return DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: Res, N2: Factor);
6419	}
6420
6421	SDValue TargetLowering::BuildSDIVPow2(SDNode N, const* APInt &Divisor,
6422	SelectionDAG &DAG,
6423	SmallVectorImpl<SDNode > &Created) const* {
6424	AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6425	if (isIntDivCheap(VT: N->getValueType(ResNo: `0`), Attr))
6426	return SDValue (N, `0`); // Lower SDIV as SDIV
6427	return SDValue ();
6428	}
6429
6430	SDValue
6431	TargetLowering::BuildSREMPow2(SDNode N, const* APInt &Divisor,
6432	SelectionDAG &DAG,
6433	SmallVectorImpl<SDNode > &Created) const* {
6434	AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6435	if (isIntDivCheap(VT: N->getValueType(ResNo: `0`), Attr))
6436	return SDValue (N, `0`); // Lower SREM as SREM
6437	return SDValue ();
6438	}
6439
6440	/// Build sdiv by power-of-2 with conditional move instructions
6441	/// Ref: "Hacker's Delight" by Henry Warren 10-1
6442	/// If conditional move/branch is preferred, we lower sdiv x, +/-2k into:
6443	/// bgez x, label
6444	/// add x, x, 2k-1
6445	/// label:
6446	/// sra res, x, k
6447	/// neg res, res (when the divisor is negative)
6448	SDValue TargetLowering::buildSDIVPow2WithCMov(
6449	SDNode N, const* APInt &Divisor, SelectionDAG &DAG,
6450	SmallVectorImpl<SDNode > &Created) const* {
6451	unsigned Lg2 = Divisor.countr_zero();
6452	EVT VT = N->getValueType(ResNo: `0`);
6453
6454	SDLoc DL(N);
6455	SDValue N0 = N->getOperand(Num: `0`);
6456	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT);
6457	APInt Lg2Mask = APInt::getLowBitsSet(numBits: VT.getSizeInBits(), loBitsSet: Lg2);
6458	SDValue Pow2MinusOne = DAG.getConstant(Val: Lg2Mask, DL, VT);
6459
6460	// If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
6461	EVT CCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
6462	SDValue Cmp = DAG.getSetCC(DL, VT: CCVT, LHS: N0, RHS: Zero, Cond: ISD::SETLT);
6463	SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: N0, N2: Pow2MinusOne);
6464	SDValue CMov = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: Cmp, N2: Add, N3: N0);
6465
6466	Created.push_back(Elt: Cmp.getNode());
6467	Created.push_back(Elt: Add.getNode());
6468	Created.push_back(Elt: CMov.getNode());
6469
6470	// Divide by pow2.
6471	SDValue SRA =
6472	DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: CMov, N2: DAG.getConstant(Val: Lg2, DL, VT));
6473
6474	// If we're dividing by a positive value, we're done. Otherwise, we must
6475	// negate the result.
6476	if (Divisor.isNonNegative())
6477	return SRA;
6478
6479	Created.push_back(Elt: SRA.getNode());
6480	return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Zero, N2: SRA);
6481	}
6482
6483	/// Given an ISD::SDIV node expressing a divide by constant,
6484	/// return a DAG expression to select that will generate the same value by
6485	/// multiplying by a magic number.
6486	/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6487	SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
6488	bool IsAfterLegalization,
6489	bool IsAfterLegalTypes,
6490	SmallVectorImpl<SDNode > &Created) const* {
6491	SDLoc dl(N);
6492	EVT VT = N->getValueType(ResNo: `0`);
6493	EVT SVT = VT.getScalarType();
6494	EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
6495	EVT ShSVT = ShVT.getScalarType();
6496	unsigned EltBits = VT.getScalarSizeInBits();
6497	EVT MulVT;
6498
6499	// Check to see if we can do this.
6500	// FIXME: We should be more aggressive here.
6501	if (!isTypeLegal(VT)) {
6502	// Limit this to simple scalars for now.
6503	if (VT.isVector() \|\| !VT.isSimple())
6504	return SDValue ();
6505
6506	// If this type will be promoted to a large enough type with a legal
6507	// multiply operation, we can go ahead and do this transform.
6508	if (getTypeAction(VT: VT.getSimpleVT()) != TypePromoteInteger)
6509	return SDValue ();
6510
6511	MulVT = getTypeToTransformTo(Context&: *DAG.getContext(), VT);
6512	if (MulVT.getSizeInBits() < (`2` * EltBits) \|\|
6513	!isOperationLegal(Op: ISD::MUL, VT: MulVT))
6514	return SDValue ();
6515	}
6516
6517	// If the sdiv has an 'exact' bit we can use a simpler lowering.
6518	if (N->getFlags().hasExact())
6519	return BuildExactSDIV(TLI: *this, N, dl, DAG, Created);
6520
6521	SmallVector<SDValue, `16`> MagicFactors, Factors, Shifts, ShiftMasks;
6522
6523	auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6524	if (C->isZero())
6525	return false;
6526
6527	const APInt &Divisor = C->getAPIntValue();
6528	SignedDivisionByConstantInfo magics = SignedDivisionByConstantInfo::get(D: Divisor);
6529	int NumeratorFactor = `0`;
6530	int ShiftMask = -`1`;
6531
6532	if (Divisor.isOne() \|\| Divisor.isAllOnes()) {
6533	// If d is +1/-1, we just multiply the numerator by +1/-1.
6534	NumeratorFactor = Divisor.getSExtValue();
6535	magics.Magic = `0`;
6536	magics.ShiftAmount = `0`;
6537	ShiftMask = `0`;
6538	} else if (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) {
6539	// If d > 0 and m < 0, add the numerator.
6540	NumeratorFactor = `1`;
6541	} else if (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) {
6542	// If d < 0 and m > 0, subtract the numerator.
6543	NumeratorFactor = -`1`;
6544	}
6545
6546	MagicFactors.push_back(Elt: DAG.getConstant(Val: magics.Magic, DL: dl, VT: SVT));
6547	Factors.push_back(Elt: DAG.getSignedConstant(Val: NumeratorFactor, DL: dl, VT: SVT));
6548	Shifts.push_back(Elt: DAG.getConstant(Val: magics.ShiftAmount, DL: dl, VT: ShSVT));
6549	ShiftMasks.push_back(Elt: DAG.getSignedConstant(Val: ShiftMask, DL: dl, VT: SVT));
6550	return true;
6551	};
6552
6553	SDValue N0 = N->getOperand(Num: `0`);
6554	SDValue N1 = N->getOperand(Num: `1`);
6555
6556	// Collect the shifts / magic values from each element.
6557	if (!ISD::matchUnaryPredicate(Op: N1, Match: BuildSDIVPattern))
6558	return SDValue ();
6559
6560	SDValue MagicFactor, Factor, Shift, ShiftMask;
6561	if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6562	MagicFactor = DAG.getBuildVector(VT, DL: dl, Ops: MagicFactors);
6563	Factor = DAG.getBuildVector(VT, DL: dl, Ops: Factors);
6564	Shift = DAG.getBuildVector(VT: ShVT, DL: dl, Ops: Shifts);
6565	ShiftMask = DAG.getBuildVector(VT, DL: dl, Ops: ShiftMasks);
6566	} else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6567	assert(MagicFactors.size() == `1` && Factors.size() == `1` &&
6568	Shifts.size() == `1` && ShiftMasks.size() == `1` &&
6569	"Expected matchUnaryPredicate to return one element for scalable "
6570	"vectors");
6571	MagicFactor = DAG.getSplatVector(VT, DL: dl, Op: MagicFactors [`0`]);
6572	Factor = DAG.getSplatVector(VT, DL: dl, Op: Factors [`0`]);
6573	Shift = DAG.getSplatVector(VT: ShVT, DL: dl, Op: Shifts [`0`]);
6574	ShiftMask = DAG.getSplatVector(VT, DL: dl, Op: ShiftMasks [`0`]);
6575	} else {
6576	assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6577	MagicFactor = MagicFactors [`0`];
6578	Factor = Factors [`0`];
6579	Shift = Shifts [`0`];
6580	ShiftMask = ShiftMasks [`0`];
6581	}
6582
6583	// Multiply the numerator (operand 0) by the magic value.
6584	// FIXME: We should support doing a MUL in a wider type.
6585	auto GetMULHS = [&](SDValue X, SDValue Y) {
6586	// If the type isn't legal, use a wider mul of the type calculated
6587	// earlier.
6588	if (!isTypeLegal(VT)) {
6589	X = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: MulVT, Operand: X);
6590	Y = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: MulVT, Operand: Y);
6591	Y = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: MulVT, N1: X, N2: Y);
6592	Y = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: MulVT, N1: Y,
6593	N2: DAG.getShiftAmountConstant(Val: EltBits, VT: MulVT, DL: dl));
6594	return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Y);
6595	}
6596
6597	if (isOperationLegalOrCustom(Op: ISD::MULHS, VT, LegalOnly: IsAfterLegalization))
6598	return DAG.getNode(Opcode: ISD::MULHS, DL: dl, VT, N1: X, N2: Y);
6599	if (isOperationLegalOrCustom(Op: ISD::SMUL_LOHI, VT, LegalOnly: IsAfterLegalization)) {
6600	SDValue LoHi =
6601	DAG.getNode(Opcode: ISD::SMUL_LOHI, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: VT), N1: X, N2: Y);
6602	return SDValue (LoHi.getNode(), `1`);
6603	}
6604	// If type twice as wide legal, widen and use a mul plus a shift.
6605	unsigned Size = VT.getScalarSizeInBits();
6606	EVT WideVT = EVT::getIntegerVT(Context&: DAG.getContext(), BitWidth: Size `2`);
6607	if (VT.isVector())
6608	WideVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: WideVT,
6609	EC: VT.getVectorElementCount());
6610	// Some targets like AMDGPU try to go from SDIV to SDIVREM which is then
6611	// custom lowered. This is very expensive so avoid it at all costs for
6612	// constant divisors.
6613	if ((!IsAfterLegalTypes && isOperationExpand(Op: ISD::SDIV, VT) &&
6614	isOperationCustom(Op: ISD::SDIVREM, VT: VT.getScalarType())) \|\|
6615	isOperationLegalOrCustom(Op: ISD::MUL, VT: WideVT)) {
6616	X = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: WideVT, Operand: X);
6617	Y = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: WideVT, Operand: Y);
6618	Y = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: WideVT, N1: X, N2: Y);
6619	Y = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: WideVT, N1: Y,
6620	N2: DAG.getShiftAmountConstant(Val: EltBits, VT: WideVT, DL: dl));
6621	return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Y);
6622	}
6623	return SDValue ();
6624	};
6625
6626	SDValue Q = GetMULHS (N0, MagicFactor);
6627	if (!Q)
6628	return SDValue ();
6629
6630	Created.push_back(Elt: Q.getNode());
6631
6632	// (Optionally) Add/subtract the numerator using Factor.
6633	Factor = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: N0, N2: Factor);
6634	Created.push_back(Elt: Factor.getNode());
6635	Q = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Q, N2: Factor);
6636	Created.push_back(Elt: Q.getNode());
6637
6638	// Shift right algebraic by shift value.
6639	Q = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: Q, N2: Shift);
6640	Created.push_back(Elt: Q.getNode());
6641
6642	// Extract the sign bit, mask it and add it to the quotient.
6643	SDValue SignShift = DAG.getConstant(Val: EltBits - `1`, DL: dl, VT: ShVT);
6644	SDValue T = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Q, N2: SignShift);
6645	Created.push_back(Elt: T.getNode());
6646	T = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: T, N2: ShiftMask);
6647	Created.push_back(Elt: T.getNode());
6648	return DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Q, N2: T);
6649	}
6650
6651	/// Given an ISD::UDIV node expressing a divide by constant,
6652	/// return a DAG expression to select that will generate the same value by
6653	/// multiplying by a magic number.
6654	/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6655	SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
6656	bool IsAfterLegalization,
6657	bool IsAfterLegalTypes,
6658	SmallVectorImpl<SDNode > &Created) const* {
6659	SDLoc dl(N);
6660	EVT VT = N->getValueType(ResNo: `0`);
6661	EVT SVT = VT.getScalarType();
6662	EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
6663	EVT ShSVT = ShVT.getScalarType();
6664	unsigned EltBits = VT.getScalarSizeInBits();
6665	EVT MulVT;
6666
6667	// Check to see if we can do this.
6668	// FIXME: We should be more aggressive here.
6669	if (!isTypeLegal(VT)) {
6670	// Limit this to simple scalars for now.
6671	if (VT.isVector() \|\| !VT.isSimple())
6672	return SDValue ();
6673
6674	// If this type will be promoted to a large enough type with a legal
6675	// multiply operation, we can go ahead and do this transform.
6676	if (getTypeAction(VT: VT.getSimpleVT()) != TypePromoteInteger)
6677	return SDValue ();
6678
6679	MulVT = getTypeToTransformTo(Context&: *DAG.getContext(), VT);
6680	if (MulVT.getSizeInBits() < (`2` * EltBits) \|\|
6681	!isOperationLegal(Op: ISD::MUL, VT: MulVT))
6682	return SDValue ();
6683	}
6684
6685	// If the udiv has an 'exact' bit we can use a simpler lowering.
6686	if (N->getFlags().hasExact())
6687	return BuildExactUDIV(TLI: *this, N, dl, DAG, Created);
6688
6689	SDValue N0 = N->getOperand(Num: `0`);
6690	SDValue N1 = N->getOperand(Num: `1`);
6691
6692	// Try to use leading zeros of the dividend to reduce the multiplier and
6693	// avoid expensive fixups.
6694	unsigned KnownLeadingZeros = DAG.computeKnownBits(Op: N0).countMinLeadingZeros();
6695
6696	bool UseNPQ = false, UsePreShift = false, UsePostShift = false;
6697	SmallVector<SDValue, `16`> PreShifts, PostShifts, MagicFactors, NPQFactors;
6698
6699	auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6700	if (C->isZero())
6701	return false;
6702	const APInt& Divisor = C->getAPIntValue();
6703
6704	SDValue PreShift, MagicFactor, NPQFactor, PostShift;
6705
6706	// Magic algorithm doesn't work for division by 1. We need to emit a select
6707	// at the end.
6708	if (Divisor.isOne()) {
6709	PreShift = PostShift = DAG.getUNDEF(VT: ShSVT);
6710	MagicFactor = NPQFactor = DAG.getUNDEF(VT: SVT);
6711	} else {
6712	UnsignedDivisionByConstantInfo magics =
6713	UnsignedDivisionByConstantInfo::get(
6714	D: Divisor, LeadingZeros: std::min(a: KnownLeadingZeros, b: Divisor.countl_zero()));
6715
6716	MagicFactor = DAG.getConstant(Val: magics.Magic, DL: dl, VT: SVT);
6717
6718	assert(magics.PreShift < Divisor.getBitWidth() &&
6719	"We shouldn't generate an undefined shift!");
6720	assert(magics.PostShift < Divisor.getBitWidth() &&
6721	"We shouldn't generate an undefined shift!");
6722	assert((!magics.IsAdd \|\| magics.PreShift == `0`) &&
6723	"Unexpected pre-shift");
6724	PreShift = DAG.getConstant(Val: magics.PreShift, DL: dl, VT: ShSVT);
6725	PostShift = DAG.getConstant(Val: magics.PostShift, DL: dl, VT: ShSVT);
6726	NPQFactor = DAG.getConstant(
6727	Val: magics.IsAdd ? APInt::getOneBitSet(numBits: EltBits, BitNo: EltBits - `1`)
6728	: APInt::getZero(numBits: EltBits),
6729	DL: dl, VT: SVT);
6730	UseNPQ \|= magics.IsAdd;
6731	UsePreShift \|= magics.PreShift != `0`;
6732	UsePostShift \|= magics.PostShift != `0`;
6733	}
6734
6735	PreShifts.push_back(Elt: PreShift);
6736	MagicFactors.push_back(Elt: MagicFactor);
6737	NPQFactors.push_back(Elt: NPQFactor);
6738	PostShifts.push_back(Elt: PostShift);
6739	return true;
6740	};
6741
6742	// Collect the shifts/magic values from each element.
6743	if (!ISD::matchUnaryPredicate(Op: N1, Match: BuildUDIVPattern))
6744	return SDValue ();
6745
6746	SDValue PreShift, PostShift, MagicFactor, NPQFactor;
6747	if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6748	PreShift = DAG.getBuildVector(VT: ShVT, DL: dl, Ops: PreShifts);
6749	MagicFactor = DAG.getBuildVector(VT, DL: dl, Ops: MagicFactors);
6750	NPQFactor = DAG.getBuildVector(VT, DL: dl, Ops: NPQFactors);
6751	PostShift = DAG.getBuildVector(VT: ShVT, DL: dl, Ops: PostShifts);
6752	} else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6753	assert(PreShifts.size() == `1` && MagicFactors.size() == `1` &&
6754	NPQFactors.size() == `1` && PostShifts.size() == `1` &&
6755	"Expected matchUnaryPredicate to return one for scalable vectors");
6756	PreShift = DAG.getSplatVector(VT: ShVT, DL: dl, Op: PreShifts [`0`]);
6757	MagicFactor = DAG.getSplatVector(VT, DL: dl, Op: MagicFactors [`0`]);
6758	NPQFactor = DAG.getSplatVector(VT, DL: dl, Op: NPQFactors [`0`]);
6759	PostShift = DAG.getSplatVector(VT: ShVT, DL: dl, Op: PostShifts [`0`]);
6760	} else {
6761	assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6762	PreShift = PreShifts [`0`];
6763	MagicFactor = MagicFactors [`0`];
6764	PostShift = PostShifts [`0`];
6765	}
6766
6767	SDValue Q = N0;
6768	if (UsePreShift) {
6769	Q = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Q, N2: PreShift);
6770	Created.push_back(Elt: Q.getNode());
6771	}
6772
6773	// FIXME: We should support doing a MUL in a wider type.
6774	auto GetMULHU = [&](SDValue X, SDValue Y) {
6775	// If the type isn't legal, use a wider mul of the type calculated
6776	// earlier.
6777	if (!isTypeLegal(VT)) {
6778	X = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: MulVT, Operand: X);
6779	Y = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: MulVT, Operand: Y);
6780	Y = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: MulVT, N1: X, N2: Y);
6781	Y = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: MulVT, N1: Y,
6782	N2: DAG.getShiftAmountConstant(Val: EltBits, VT: MulVT, DL: dl));
6783	return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Y);
6784	}
6785
6786	if (isOperationLegalOrCustom(Op: ISD::MULHU, VT, LegalOnly: IsAfterLegalization))
6787	return DAG.getNode(Opcode: ISD::MULHU, DL: dl, VT, N1: X, N2: Y);
6788	if (isOperationLegalOrCustom(Op: ISD::UMUL_LOHI, VT, LegalOnly: IsAfterLegalization)) {
6789	SDValue LoHi =
6790	DAG.getNode(Opcode: ISD::UMUL_LOHI, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: VT), N1: X, N2: Y);
6791	return SDValue (LoHi.getNode(), `1`);
6792	}
6793	// If type twice as wide legal, widen and use a mul plus a shift.
6794	unsigned Size = VT.getScalarSizeInBits();
6795	EVT WideVT = EVT::getIntegerVT(Context&: DAG.getContext(), BitWidth: Size `2`);
6796	if (VT.isVector())
6797	WideVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: WideVT,
6798	EC: VT.getVectorElementCount());
6799	// Some targets like AMDGPU try to go from UDIV to UDIVREM which is then
6800	// custom lowered. This is very expensive so avoid it at all costs for
6801	// constant divisors.
6802	if ((!IsAfterLegalTypes && isOperationExpand(Op: ISD::UDIV, VT) &&
6803	isOperationCustom(Op: ISD::UDIVREM, VT: VT.getScalarType())) \|\|
6804	isOperationLegalOrCustom(Op: ISD::MUL, VT: WideVT)) {
6805	X = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: WideVT, Operand: X);
6806	Y = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: WideVT, Operand: Y);
6807	Y = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: WideVT, N1: X, N2: Y);
6808	Y = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: WideVT, N1: Y,
6809	N2: DAG.getShiftAmountConstant(Val: EltBits, VT: WideVT, DL: dl));
6810	return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Y);
6811	}
6812	return SDValue (); // No mulhu or equivalent
6813	};
6814
6815	// Multiply the numerator (operand 0) by the magic value.
6816	Q = GetMULHU (Q, MagicFactor);
6817	if (!Q)
6818	return SDValue ();
6819
6820	Created.push_back(Elt: Q.getNode());
6821
6822	if (UseNPQ) {
6823	SDValue NPQ = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: N0, N2: Q);
6824	Created.push_back(Elt: NPQ.getNode());
6825
6826	// For vectors we might have a mix of non-NPQ/NPQ paths, so use
6827	// MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
6828	if (VT.isVector())
6829	NPQ = GetMULHU (NPQ, NPQFactor);
6830	else
6831	NPQ = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: NPQ, N2: DAG.getConstant(Val: `1`, DL: dl, VT: ShVT));
6832
6833	Created.push_back(Elt: NPQ.getNode());
6834
6835	Q = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: NPQ, N2: Q);
6836	Created.push_back(Elt: Q.getNode());
6837	}
6838
6839	if (UsePostShift) {
6840	Q = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Q, N2: PostShift);
6841	Created.push_back(Elt: Q.getNode());
6842	}
6843
6844	EVT SetCCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
6845
6846	SDValue One = DAG.getConstant(Val: `1`, DL: dl, VT);
6847	SDValue IsOne = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: N1, RHS: One, Cond: ISD::SETEQ);
6848	return DAG.getSelect(DL: dl, VT, Cond: IsOne, LHS: N0, RHS: Q);
6849	}
6850
6851	/// If all values in Values that don't* match the predicate are same 'splat'*
6852	/// value, then replace all values with that splat value.
6853	/// Else, if AlternativeReplacement was provided, then replace all values that
6854	/// do match predicate with AlternativeReplacement value.
6855	static void
6856	turnVectorIntoSplatVector(MutableArrayRef<SDValue> Values,
6857	std::function<bool(SDValue)> Predicate,
6858	SDValue AlternativeReplacement = SDValue ()) {
6859	SDValue Replacement;
6860	// Is there a value for which the Predicate does NOT* match? What is it?*
6861	auto SplatValue = llvm::find_if_not(Range&: Values, P: Predicate);
6862	if (SplatValue != Values.end()) {
6863	// Does Values consist only of SplatValue's and values matching Predicate?
6864	if (llvm::all_of(Range&: Values, P: [Predicate, SplatValue](SDValue Value) {
6865	return Value == *SplatValue \|\| Predicate (Value);
6866	})) // Then we shall replace values matching predicate with SplatValue.
6867	Replacement = *SplatValue;
6868	}
6869	if (!Replacement) {
6870	// Oops, we did not find the "baseline" splat value.
6871	if (!AlternativeReplacement)
6872	return; // Nothing to do.
6873	// Let's replace with provided value then.
6874	Replacement = AlternativeReplacement;
6875	}
6876	std::replace_if(first: Values.begin(), last: Values.end(), pred: Predicate, new_value: Replacement);
6877	}
6878
6879	/// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
6880	/// where the divisor is constant and the comparison target is zero,
6881	/// return a DAG expression that will generate the same comparison result
6882	/// using only multiplications, additions and shifts/rotations.
6883	/// Ref: "Hacker's Delight" 10-17.
6884	SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
6885	SDValue CompTargetNode,
6886	ISD::CondCode Cond,
6887	DAGCombinerInfo &DCI,
6888	const SDLoc &DL) const {
6889	SmallVector<SDNode *, `5`> Built;
6890	if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
6891	DCI, DL, Created&: Built)) {
6892	for (SDNode *N : Built)
6893	DCI.AddToWorklist(N);
6894	return Folded;
6895	}
6896
6897	return SDValue ();
6898	}
6899
6900	SDValue
6901	TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
6902	SDValue CompTargetNode, ISD::CondCode Cond,
6903	DAGCombinerInfo &DCI, const SDLoc &DL,
6904	SmallVectorImpl<SDNode > &Created) const* {
6905	// fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
6906	// - D must be constant, with D = D0 2^K where D0 is odd*
6907	// - P is the multiplicative inverse of D0 modulo 2^W
6908	// - Q = floor(((2^W) - 1) / D)
6909	// where W is the width of the common type of N and D.
6910	assert((Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) &&
6911	"Only applicable for (in)equality comparisons.");
6912
6913	SelectionDAG &DAG = DCI.DAG;
6914
6915	EVT VT = REMNode.getValueType();
6916	EVT SVT = VT.getScalarType();
6917	EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
6918	EVT ShSVT = ShVT.getScalarType();
6919
6920	// If MUL is unavailable, we cannot proceed in any case.
6921	if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::MUL, VT))
6922	return SDValue ();
6923
6924	bool ComparingWithAllZeros = true;
6925	bool AllComparisonsWithNonZerosAreTautological = true;
6926	bool HadTautologicalLanes = false;
6927	bool AllLanesAreTautological = true;
6928	bool HadEvenDivisor = false;
6929	bool AllDivisorsArePowerOfTwo = true;
6930	bool HadTautologicalInvertedLanes = false;
6931	SmallVector<SDValue, `16`> PAmts, KAmts, QAmts;
6932
6933	auto BuildUREMPattern = [&](ConstantSDNode CDiv, ConstantSDNode CCmp) {
6934	// Division by 0 is UB. Leave it to be constant-folded elsewhere.
6935	if (CDiv->isZero())
6936	return false;
6937
6938	const APInt &D = CDiv->getAPIntValue();
6939	const APInt &Cmp = CCmp->getAPIntValue();
6940
6941	ComparingWithAllZeros &= Cmp.isZero();
6942
6943	// x u% C1` is always* less than C1. So given `x u% C1 == C2`,*
6944	// if C2 is not less than C1, the comparison is always false.
6945	// But we will only be able to produce the comparison that will give the
6946	// opposive tautological answer. So this lane would need to be fixed up.
6947	bool TautologicalInvertedLane = D.ule(RHS: Cmp);
6948	HadTautologicalInvertedLanes \|= TautologicalInvertedLane;
6949
6950	// If all lanes are tautological (either all divisors are ones, or divisor
6951	// is not greater than the constant we are comparing with),
6952	// we will prefer to avoid the fold.
6953	bool TautologicalLane = D.isOne() \|\| TautologicalInvertedLane;
6954	HadTautologicalLanes \|= TautologicalLane;
6955	AllLanesAreTautological &= TautologicalLane;
6956
6957	// If we are comparing with non-zero, we need'll need to subtract said
6958	// comparison value from the LHS. But there is no point in doing that if
6959	// every lane where we are comparing with non-zero is tautological..
6960	if (!Cmp.isZero())
6961	AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
6962
6963	// Decompose D into D0 2^K*
6964	unsigned K = D.countr_zero();
6965	assert((!D.isOne() \|\| (K == `0`)) && "For divisor '1' we won't rotate.");
6966	APInt D0 = D.lshr(shiftAmt: K);
6967
6968	// D is even if it has trailing zeros.
6969	HadEvenDivisor \|= (K != `0`);
6970	// D is a power-of-two if D0 is one.
6971	// If all divisors are power-of-two, we will prefer to avoid the fold.
6972	AllDivisorsArePowerOfTwo &= D0.isOne();
6973
6974	// P = inv(D0, 2^W)
6975	// 2^W requires W + 1 bits, so we have to extend and then truncate.
6976	unsigned W = D.getBitWidth();
6977	APInt P = D0.multiplicativeInverse();
6978	assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
6979
6980	// Q = floor((2^W - 1) u/ D)
6981	// R = ((2^W - 1) u% D)
6982	APInt Q, R;
6983	APInt::udivrem(LHS: APInt::getAllOnes(numBits: W), RHS: D, Quotient&: Q, Remainder&: R);
6984
6985	// If we are comparing with zero, then that comparison constant is okay,
6986	// else it may need to be one less than that.
6987	if (Cmp.ugt(RHS: R))
6988	Q -= `1`;
6989
6990	assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&
6991	"We are expecting that K is always less than all-ones for ShSVT");
6992
6993	// If the lane is tautological the result can be constant-folded.
6994	if (TautologicalLane) {
6995	// Set P and K amount to a bogus values so we can try to splat them.
6996	P = `0`;
6997	K = -`1`;
6998	// And ensure that comparison constant is tautological,
6999	// it will always compare true/false.
7000	Q = -`1`;
7001	}
7002
7003	PAmts.push_back(Elt: DAG.getConstant(Val: P, DL, VT: SVT));
7004	KAmts.push_back(
7005	Elt: DAG.getConstant(Val: APInt (ShSVT.getSizeInBits(), K, /isSigned=/false,
7006	/implicitTrunc=/true),
7007	DL, VT: ShSVT));
7008	QAmts.push_back(Elt: DAG.getConstant(Val: Q, DL, VT: SVT));
7009	return true;
7010	};
7011
7012	SDValue N = REMNode.getOperand(i: `0`);
7013	SDValue D = REMNode.getOperand(i: `1`);
7014
7015	// Collect the values from each element.
7016	if (!ISD::matchBinaryPredicate(LHS: D, RHS: CompTargetNode, Match: BuildUREMPattern))
7017	return SDValue ();
7018
7019	// If all lanes are tautological, the result can be constant-folded.
7020	if (AllLanesAreTautological)
7021	return SDValue ();
7022
7023	// If this is a urem by a powers-of-two, avoid the fold since it can be
7024	// best implemented as a bit test.
7025	if (AllDivisorsArePowerOfTwo)
7026	return SDValue ();
7027
7028	SDValue PVal, KVal, QVal;
7029	if (D.getOpcode() == ISD::BUILD_VECTOR) {
7030	if (HadTautologicalLanes) {
7031	// Try to turn PAmts into a splat, since we don't care about the values
7032	// that are currently '0'. If we can't, just keep '0'`s.
7033	turnVectorIntoSplatVector(Values: PAmts, Predicate: isNullConstant);
7034	// Try to turn KAmts into a splat, since we don't care about the values
7035	// that are currently '-1'. If we can't, change them to '0'`s.
7036	turnVectorIntoSplatVector(Values: KAmts, Predicate: isAllOnesConstant,
7037	AlternativeReplacement: DAG.getConstant(Val: `0`, DL, VT: ShSVT));
7038	}
7039
7040	PVal = DAG.getBuildVector(VT, DL, Ops: PAmts);
7041	KVal = DAG.getBuildVector(VT: ShVT, DL, Ops: KAmts);
7042	QVal = DAG.getBuildVector(VT, DL, Ops: QAmts);
7043	} else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
7044	assert(PAmts.size() == `1` && KAmts.size() == `1` && QAmts.size() == `1` &&
7045	"Expected matchBinaryPredicate to return one element for "
7046	"SPLAT_VECTORs");
7047	PVal = DAG.getSplatVector(VT, DL, Op: PAmts [`0`]);
7048	KVal = DAG.getSplatVector(VT: ShVT, DL, Op: KAmts [`0`]);
7049	QVal = DAG.getSplatVector(VT, DL, Op: QAmts [`0`]);
7050	} else {
7051	PVal = PAmts [`0`];
7052	KVal = KAmts [`0`];
7053	QVal = QAmts [`0`];
7054	}
7055
7056	if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
7057	if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::SUB, VT))
7058	return SDValue (); // FIXME: Could/should use `ISD::ADD`?
7059	assert(CompTargetNode.getValueType() == N.getValueType() &&
7060	"Expecting that the types on LHS and RHS of comparisons match.");
7061	N = DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: N, N2: CompTargetNode);
7062	}
7063
7064	// (mul N, P)
7065	SDValue Op0 = DAG.getNode(Opcode: ISD::MUL, DL, VT, N1: N, N2: PVal);
7066	Created.push_back(Elt: Op0.getNode());
7067
7068	// Rotate right only if any divisor was even. We avoid rotates for all-odd
7069	// divisors as a performance improvement, since rotating by 0 is a no-op.
7070	if (HadEvenDivisor) {
7071	// We need ROTR to do this.
7072	if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::ROTR, VT))
7073	return SDValue ();
7074	// UREM: (rotr (mul N, P), K)
7075	Op0 = DAG.getNode(Opcode: ISD::ROTR, DL, VT, N1: Op0, N2: KVal);
7076	Created.push_back(Elt: Op0.getNode());
7077	}
7078
7079	// UREM: (setule/setugt (rotr (mul N, P), K), Q)
7080	SDValue NewCC =
7081	DAG.getSetCC(DL, VT: SETCCVT, LHS: Op0, RHS: QVal,
7082	Cond: ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
7083	if (!HadTautologicalInvertedLanes)
7084	return NewCC;
7085
7086	// If any lanes previously compared always-false, the NewCC will give
7087	// always-true result for them, so we need to fixup those lanes.
7088	// Or the other way around for inequality predicate.
7089	assert(VT.isVector() && "Can/should only get here for vectors.");
7090	Created.push_back(Elt: NewCC.getNode());
7091
7092	// x u% C1` is always* less than C1. So given `x u% C1 == C2`,*
7093	// if C2 is not less than C1, the comparison is always false.
7094	// But we have produced the comparison that will give the
7095	// opposive tautological answer. So these lanes would need to be fixed up.
7096	SDValue TautologicalInvertedChannels =
7097	DAG.getSetCC(DL, VT: SETCCVT, LHS: D, RHS: CompTargetNode, Cond: ISD::SETULE);
7098	Created.push_back(Elt: TautologicalInvertedChannels.getNode());
7099
7100	// NOTE: we avoid letting illegal types through even if we're before legalize
7101	// ops – legalization has a hard time producing good code for this.
7102	if (isOperationLegalOrCustom(Op: ISD::VSELECT, VT: SETCCVT)) {
7103	// If we have a vector select, let's replace the comparison results in the
7104	// affected lanes with the correct tautological result.
7105	SDValue Replacement = DAG.getBoolConstant(V: Cond == ISD::SETEQ ? false : true,
7106	DL, VT: SETCCVT, OpVT: SETCCVT);
7107	return DAG.getNode(Opcode: ISD::VSELECT, DL, VT: SETCCVT, N1: TautologicalInvertedChannels,
7108	N2: Replacement, N3: NewCC);
7109	}
7110
7111	// Else, we can just invert the comparison result in the appropriate lanes.
7112	//
7113	// NOTE: see the note above VSELECT above.
7114	if (isOperationLegalOrCustom(Op: ISD::XOR, VT: SETCCVT))
7115	return DAG.getNode(Opcode: ISD::XOR, DL, VT: SETCCVT, N1: NewCC,
7116	N2: TautologicalInvertedChannels);
7117
7118	return SDValue (); // Don't know how to lower.
7119	}
7120
7121	/// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
7122	/// where the divisor is constant and the comparison target is zero,
7123	/// return a DAG expression that will generate the same comparison result
7124	/// using only multiplications, additions and shifts/rotations.
7125	/// Ref: "Hacker's Delight" 10-17.
7126	SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
7127	SDValue CompTargetNode,
7128	ISD::CondCode Cond,
7129	DAGCombinerInfo &DCI,
7130	const SDLoc &DL) const {
7131	SmallVector<SDNode *, `7`> Built;
7132	if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
7133	DCI, DL, Created&: Built)) {
7134	assert(Built.size() <= `7` && "Max size prediction failed.");
7135	for (SDNode *N : Built)
7136	DCI.AddToWorklist(N);
7137	return Folded;
7138	}
7139
7140	return SDValue ();
7141	}
7142
7143	SDValue
7144	TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
7145	SDValue CompTargetNode, ISD::CondCode Cond,
7146	DAGCombinerInfo &DCI, const SDLoc &DL,
7147	SmallVectorImpl<SDNode > &Created) const* {
7148	// Derived from Hacker's Delight, 2nd Edition, by Hank Warren. Section 10-17.
7149	// Fold:
7150	// (seteq/ne (srem N, D), 0)
7151	// To:
7152	// (setule/ugt (rotr (add (mul N, P), A), K), Q)
7153	//
7154	// - D must be constant, with D = D0 2^K where D0 is odd*
7155	// - P is the multiplicative inverse of D0 modulo 2^W
7156	// - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
7157	// - Q = floor((2 A) / (2^K))*
7158	// where W is the width of the common type of N and D.
7159	//
7160	// When D is a power of two (and thus D0 is 1), the normal
7161	// formula for A and Q don't apply, because the derivation
7162	// depends on D not dividing 2^(W-1), and thus theorem ZRS
7163	// does not apply. This specifically fails when N = INT_MIN.
7164	//
7165	// Instead, for power-of-two D, we use:
7166	// - A = 2^(W-1)
7167	// \|-> Order-preserving map from [-2^(W-1), 2^(W-1) - 1] to [0,2^W - 1])
7168	// - Q = 2^(W-K) - 1
7169	// \|-> Test that the top K bits are zero after rotation
7170	assert((Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) &&
7171	"Only applicable for (in)equality comparisons.");
7172
7173	SelectionDAG &DAG = DCI.DAG;
7174
7175	EVT VT = REMNode.getValueType();
7176	EVT SVT = VT.getScalarType();
7177	EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
7178	EVT ShSVT = ShVT.getScalarType();
7179
7180	// If we are after ops legalization, and MUL is unavailable, we can not
7181	// proceed.
7182	if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::MUL, VT))
7183	return SDValue ();
7184
7185	// TODO: Could support comparing with non-zero too.
7186	ConstantSDNode *CompTarget = isConstOrConstSplat(N: CompTargetNode);
7187	if (!CompTarget \|\| !CompTarget->isZero())
7188	return SDValue ();
7189
7190	bool HadIntMinDivisor = false;
7191	bool HadOneDivisor = false;
7192	bool AllDivisorsAreOnes = true;
7193	bool HadEvenDivisor = false;
7194	bool NeedToApplyOffset = false;
7195	bool AllDivisorsArePowerOfTwo = true;
7196	SmallVector<SDValue, `16`> PAmts, AAmts, KAmts, QAmts;
7197
7198	auto BuildSREMPattern = [&](ConstantSDNode *C) {
7199	// Division by 0 is UB. Leave it to be constant-folded elsewhere.
7200	if (C->isZero())
7201	return false;
7202
7203	// FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
7204
7205	// WARNING: this fold is only valid for positive divisors!
7206	APInt D = C->getAPIntValue();
7207	if (D.isNegative())
7208	D.negate(); // `rem %X, -C` is equivalent to `rem %X, C`
7209
7210	HadIntMinDivisor \|= D.isMinSignedValue();
7211
7212	// If all divisors are ones, we will prefer to avoid the fold.
7213	HadOneDivisor \|= D.isOne();
7214	AllDivisorsAreOnes &= D.isOne();
7215
7216	// Decompose D into D0 2^K*
7217	unsigned K = D.countr_zero();
7218	assert((!D.isOne() \|\| (K == `0`)) && "For divisor '1' we won't rotate.");
7219	APInt D0 = D.lshr(shiftAmt: K);
7220
7221	if (!D.isMinSignedValue()) {
7222	// D is even if it has trailing zeros; unless it's INT_MIN, in which case
7223	// we don't care about this lane in this fold, we'll special-handle it.
7224	HadEvenDivisor \|= (K != `0`);
7225	}
7226
7227	// D is a power-of-two if D0 is one. This includes INT_MIN.
7228	// If all divisors are power-of-two, we will prefer to avoid the fold.
7229	AllDivisorsArePowerOfTwo &= D0.isOne();
7230
7231	// P = inv(D0, 2^W)
7232	// 2^W requires W + 1 bits, so we have to extend and then truncate.
7233	unsigned W = D.getBitWidth();
7234	APInt P = D0.multiplicativeInverse();
7235	assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
7236
7237	// A = floor((2^(W - 1) - 1) / D0) & -2^K
7238	APInt A = APInt::getSignedMaxValue(numBits: W).udiv(RHS: D0);
7239	A.clearLowBits(loBits: K);
7240
7241	if (!D.isMinSignedValue()) {
7242	// If divisor INT_MIN, then we don't care about this lane in this fold,
7243	// we'll special-handle it.
7244	NeedToApplyOffset \|= A != `0`;
7245	}
7246
7247	// Q = floor((2 A) / (2^K))*
7248	APInt Q = (`2` * A).udiv(RHS: APInt::getOneBitSet(numBits: W, BitNo: K));
7249
7250	assert(APInt::getAllOnes(SVT.getSizeInBits()).ugt(A) &&
7251	"We are expecting that A is always less than all-ones for SVT");
7252	assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&
7253	"We are expecting that K is always less than all-ones for ShSVT");
7254
7255	// If D was a power of two, apply the alternate constant derivation.
7256	if (D0.isOne()) {
7257	// A = 2^(W-1)
7258	A = APInt::getSignedMinValue(numBits: W);
7259	// - Q = 2^(W-K) - 1
7260	Q = APInt::getAllOnes(numBits: W - K).zext(width: W);
7261	}
7262
7263	// If the divisor is 1 the result can be constant-folded. Likewise, we
7264	// don't care about INT_MIN lanes, those can be set to undef if appropriate.
7265	if (D.isOne()) {
7266	// Set P, A and K to a bogus values so we can try to splat them.
7267	P = `0`;
7268	A = -`1`;
7269	K = -`1`;
7270
7271	// x ?% 1 == 0 <--> true <--> x u<= -1
7272	Q = -`1`;
7273	}
7274
7275	PAmts.push_back(Elt: DAG.getConstant(Val: P, DL, VT: SVT));
7276	AAmts.push_back(Elt: DAG.getConstant(Val: A, DL, VT: SVT));
7277	KAmts.push_back(
7278	Elt: DAG.getConstant(Val: APInt (ShSVT.getSizeInBits(), K, /isSigned=/false,
7279	/implicitTrunc=/true),
7280	DL, VT: ShSVT));
7281	QAmts.push_back(Elt: DAG.getConstant(Val: Q, DL, VT: SVT));
7282	return true;
7283	};
7284
7285	SDValue N = REMNode.getOperand(i: `0`);
7286	SDValue D = REMNode.getOperand(i: `1`);
7287
7288	// Collect the values from each element.
7289	if (!ISD::matchUnaryPredicate(Op: D, Match: BuildSREMPattern))
7290	return SDValue ();
7291
7292	// If this is a srem by a one, avoid the fold since it can be constant-folded.
7293	if (AllDivisorsAreOnes)
7294	return SDValue ();
7295
7296	// If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
7297	// since it can be best implemented as a bit test.
7298	if (AllDivisorsArePowerOfTwo)
7299	return SDValue ();
7300
7301	SDValue PVal, AVal, KVal, QVal;
7302	if (D.getOpcode() == ISD::BUILD_VECTOR) {
7303	if (HadOneDivisor) {
7304	// Try to turn PAmts into a splat, since we don't care about the values
7305	// that are currently '0'. If we can't, just keep '0'`s.
7306	turnVectorIntoSplatVector(Values: PAmts, Predicate: isNullConstant);
7307	// Try to turn AAmts into a splat, since we don't care about the
7308	// values that are currently '-1'. If we can't, change them to '0'`s.
7309	turnVectorIntoSplatVector(Values: AAmts, Predicate: isAllOnesConstant,
7310	AlternativeReplacement: DAG.getConstant(Val: `0`, DL, VT: SVT));
7311	// Try to turn KAmts into a splat, since we don't care about the values
7312	// that are currently '-1'. If we can't, change them to '0'`s.
7313	turnVectorIntoSplatVector(Values: KAmts, Predicate: isAllOnesConstant,
7314	AlternativeReplacement: DAG.getConstant(Val: `0`, DL, VT: ShSVT));
7315	}
7316
7317	PVal = DAG.getBuildVector(VT, DL, Ops: PAmts);
7318	AVal = DAG.getBuildVector(VT, DL, Ops: AAmts);
7319	KVal = DAG.getBuildVector(VT: ShVT, DL, Ops: KAmts);
7320	QVal = DAG.getBuildVector(VT, DL, Ops: QAmts);
7321	} else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
7322	assert(PAmts.size() == `1` && AAmts.size() == `1` && KAmts.size() == `1` &&
7323	QAmts.size() == `1` &&
7324	"Expected matchUnaryPredicate to return one element for scalable "
7325	"vectors");
7326	PVal = DAG.getSplatVector(VT, DL, Op: PAmts [`0`]);
7327	AVal = DAG.getSplatVector(VT, DL, Op: AAmts [`0`]);
7328	KVal = DAG.getSplatVector(VT: ShVT, DL, Op: KAmts [`0`]);
7329	QVal = DAG.getSplatVector(VT, DL, Op: QAmts [`0`]);
7330	} else {
7331	assert(isa<ConstantSDNode>(D) && "Expected a constant");
7332	PVal = PAmts [`0`];
7333	AVal = AAmts [`0`];
7334	KVal = KAmts [`0`];
7335	QVal = QAmts [`0`];
7336	}
7337
7338	// (mul N, P)
7339	SDValue Op0 = DAG.getNode(Opcode: ISD::MUL, DL, VT, N1: N, N2: PVal);
7340	Created.push_back(Elt: Op0.getNode());
7341
7342	if (NeedToApplyOffset) {
7343	// We need ADD to do this.
7344	if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::ADD, VT))
7345	return SDValue ();
7346
7347	// (add (mul N, P), A)
7348	Op0 = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Op0, N2: AVal);
7349	Created.push_back(Elt: Op0.getNode());
7350	}
7351
7352	// Rotate right only if any divisor was even. We avoid rotates for all-odd
7353	// divisors as a performance improvement, since rotating by 0 is a no-op.
7354	if (HadEvenDivisor) {
7355	// We need ROTR to do this.
7356	if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::ROTR, VT))
7357	return SDValue ();
7358	// SREM: (rotr (add (mul N, P), A), K)
7359	Op0 = DAG.getNode(Opcode: ISD::ROTR, DL, VT, N1: Op0, N2: KVal);
7360	Created.push_back(Elt: Op0.getNode());
7361	}
7362
7363	// SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
7364	SDValue Fold =
7365	DAG.getSetCC(DL, VT: SETCCVT, LHS: Op0, RHS: QVal,
7366	Cond: ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
7367
7368	// If we didn't have lanes with INT_MIN divisor, then we're done.
7369	if (!HadIntMinDivisor)
7370	return Fold;
7371
7372	// That fold is only valid for positive divisors. Which effectively means,
7373	// it is invalid for INT_MIN divisors. So if we have such a lane,
7374	// we must fix-up results for said lanes.
7375	assert(VT.isVector() && "Can/should only get here for vectors.");
7376
7377	// NOTE: we avoid letting illegal types through even if we're before legalize
7378	// ops – legalization has a hard time producing good code for the code that
7379	// follows.
7380	if (!isOperationLegalOrCustom(Op: ISD::SETCC, VT: SETCCVT) \|\|
7381	!isOperationLegalOrCustom(Op: ISD::AND, VT) \|\|
7382	!isCondCodeLegalOrCustom(CC: Cond, VT: VT.getSimpleVT()) \|\|
7383	!isOperationLegalOrCustom(Op: ISD::VSELECT, VT: SETCCVT))
7384	return SDValue ();
7385
7386	Created.push_back(Elt: Fold.getNode());
7387
7388	SDValue IntMin = DAG.getConstant(
7389	Val: APInt::getSignedMinValue(numBits: SVT.getScalarSizeInBits()), DL, VT);
7390	SDValue IntMax = DAG.getConstant(
7391	Val: APInt::getSignedMaxValue(numBits: SVT.getScalarSizeInBits()), DL, VT);
7392	SDValue Zero =
7393	DAG.getConstant(Val: APInt::getZero(numBits: SVT.getScalarSizeInBits()), DL, VT);
7394
7395	// Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.
7396	SDValue DivisorIsIntMin = DAG.getSetCC(DL, VT: SETCCVT, LHS: D, RHS: IntMin, Cond: ISD::SETEQ);
7397	Created.push_back(Elt: DivisorIsIntMin.getNode());
7398
7399	// (N s% INT_MIN) ==/!= 0 <--> (N & INT_MAX) ==/!= 0
7400	SDValue Masked = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: N, N2: IntMax);
7401	Created.push_back(Elt: Masked.getNode());
7402	SDValue MaskedIsZero = DAG.getSetCC(DL, VT: SETCCVT, LHS: Masked, RHS: Zero, Cond);
7403	Created.push_back(Elt: MaskedIsZero.getNode());
7404
7405	// To produce final result we need to blend 2 vectors: 'SetCC' and
7406	// 'MaskedIsZero'. If the divisor for channel was NOT* INT_MIN, we pick*
7407	// from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is
7408	// constant-folded, select can get lowered to a shuffle with constant mask.
7409	SDValue Blended = DAG.getNode(Opcode: ISD::VSELECT, DL, VT: SETCCVT, N1: DivisorIsIntMin,
7410	N2: MaskedIsZero, N3: Fold);
7411
7412	return Blended;
7413	}
7414
7415	bool TargetLowering::
7416	verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {
7417	if (!isa<ConstantSDNode>(Val: Op.getOperand(i: `0`))) {
7418	DAG.getContext()->emitError(ErrorStr: "argument to '__builtin_return_address' must "
7419	"be a constant integer");
7420	return true;
7421	}
7422
7423	return false;
7424	}
7425
7426	SDValue TargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
7427	const DenormalMode &Mode) const {
7428	SDLoc DL(Op);
7429	EVT VT = Op.getValueType();
7430	EVT CCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
7431	SDValue FPZero = DAG.getConstantFP(Val: `0.0`, DL, VT);
7432
7433	// This is specifically a check for the handling of denormal inputs, not the
7434	// result.
7435	if (Mode.Input == DenormalMode::PreserveSign \|\|
7436	Mode.Input == DenormalMode::PositiveZero) {
7437	// Test = X == 0.0
7438	return DAG.getSetCC(DL, VT: CCVT, LHS: Op, RHS: FPZero, Cond: ISD::SETEQ);
7439	}
7440
7441	// Testing it with denormal inputs to avoid wrong estimate.
7442	//
7443	// Test = fabs(X) < SmallestNormal
7444	const fltSemantics &FltSem = VT.getFltSemantics();
7445	APFloat SmallestNorm = APFloat::getSmallestNormalized(Sem: FltSem);
7446	SDValue NormC = DAG.getConstantFP(Val: SmallestNorm, DL, VT);
7447	SDValue Fabs = DAG.getNode(Opcode: ISD::FABS, DL, VT, Operand: Op);
7448	return DAG.getSetCC(DL, VT: CCVT, LHS: Fabs, RHS: NormC, Cond: ISD::SETLT);
7449	}
7450
7451	SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
7452	bool LegalOps, bool OptForSize,
7453	NegatibleCost &Cost,
7454	unsigned Depth) const {
7455	// fneg is removable even if it has multiple uses.
7456	if (Op.getOpcode() == ISD::FNEG \|\| Op.getOpcode() == ISD::VP_FNEG) {
7457	Cost = NegatibleCost::Cheaper;
7458	return Op.getOperand(i: `0`);
7459	}
7460
7461	// Don't recurse exponentially.
7462	if (Depth > SelectionDAG::MaxRecursionDepth)
7463	return SDValue ();
7464
7465	// Pre-increment recursion depth for use in recursive calls.
7466	++Depth;
7467	const SDNodeFlags Flags = Op ->getFlags();
7468	const TargetOptions &Options = DAG.getTarget().Options;
7469	EVT VT = Op.getValueType();
7470	unsigned Opcode = Op.getOpcode();
7471
7472	// Don't allow anything with multiple uses unless we know it is free.
7473	if (!Op.hasOneUse() && Opcode != ISD::ConstantFP) {
7474	bool IsFreeExtend = Opcode == ISD::FP_EXTEND &&
7475	isFPExtFree(DestVT: VT, SrcVT: Op.getOperand(i: `0`).getValueType());
7476	if (!IsFreeExtend)
7477	return SDValue ();
7478	}
7479
7480	auto RemoveDeadNode = [&](SDValue N) {
7481	if (N && N.getNode()->use_empty())
7482	DAG.RemoveDeadNode(N: N.getNode());
7483	};
7484
7485	SDLoc DL(Op);
7486
7487	// Because getNegatedExpression can delete nodes we need a handle to keep
7488	// temporary nodes alive in case the recursion manages to create an identical
7489	// node.
7490	std::list<HandleSDNode> Handles;
7491
7492	switch (Opcode) {
7493	case ISD::ConstantFP: {
7494	// Don't invert constant FP values after legalization unless the target says
7495	// the negated constant is legal.
7496	bool IsOpLegal =
7497	isOperationLegal(Op: ISD::ConstantFP, VT) \|\|
7498	isFPImmLegal(neg(X: cast<ConstantFPSDNode>(Val&: Op)->getValueAPF()), VT,
7499	ForCodeSize: OptForSize);
7500
7501	if (LegalOps && !IsOpLegal)
7502	break;
7503
7504	APFloat V = cast<ConstantFPSDNode>(Val&: Op)->getValueAPF();
7505	V.changeSign();
7506	SDValue CFP = DAG.getConstantFP(Val: V, DL, VT);
7507
7508	// If we already have the use of the negated floating constant, it is free
7509	// to negate it even it has multiple uses.
7510	if (!Op.hasOneUse() && CFP.use_empty())
7511	break;
7512	Cost = NegatibleCost::Neutral;
7513	return CFP;
7514	}
7515	case ISD::BUILD_VECTOR: {
7516	// Only permit BUILD_VECTOR of constants.
7517	if (llvm::any_of(Range: Op ->op_values(), P: [&](SDValue N) {
7518	return !N.isUndef() && !isa<ConstantFPSDNode>(Val: N);
7519	}))
7520	break;
7521
7522	bool IsOpLegal =
7523	(isOperationLegal(Op: ISD::ConstantFP, VT) &&
7524	isOperationLegal(Op: ISD::BUILD_VECTOR, VT)) \|\|
7525	llvm::all_of(Range: Op ->op_values(), P: [&](SDValue N) {
7526	return N.isUndef() \|\|
7527	isFPImmLegal(neg(X: cast<ConstantFPSDNode>(Val&: N)->getValueAPF()), VT,
7528	ForCodeSize: OptForSize);
7529	});
7530
7531	if (LegalOps && !IsOpLegal)
7532	break;
7533
7534	SmallVector<SDValue, `4`> Ops;
7535	for (SDValue C : Op ->op_values()) {
7536	if (C.isUndef()) {
7537	Ops.push_back(Elt: C);
7538	continue;
7539	}
7540	APFloat V = cast<ConstantFPSDNode>(Val&: C)->getValueAPF();
7541	V.changeSign();
7542	Ops.push_back(Elt: DAG.getConstantFP(Val: V, DL, VT: C.getValueType()));
7543	}
7544	Cost = NegatibleCost::Neutral;
7545	return DAG.getBuildVector(VT, DL, Ops);
7546	}
7547	case ISD::FADD: {
7548	if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7549	break;
7550
7551	// After operation legalization, it might not be legal to create new FSUBs.
7552	if (LegalOps && !isOperationLegalOrCustom(Op: ISD::FSUB, VT))
7553	break;
7554	SDValue X = Op.getOperand(i: `0`), Y = Op.getOperand(i: `1`);
7555
7556	// fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y)
7557	NegatibleCost CostX = NegatibleCost::Expensive;
7558	SDValue NegX =
7559	getNegatedExpression(Op: X, DAG, LegalOps, OptForSize, Cost&: CostX, Depth);
7560	// Prevent this node from being deleted by the next call.
7561	if (NegX)
7562	Handles.emplace_back(args&: NegX);
7563
7564	// fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
7565	NegatibleCost CostY = NegatibleCost::Expensive;
7566	SDValue NegY =
7567	getNegatedExpression(Op: Y, DAG, LegalOps, OptForSize, Cost&: CostY, Depth);
7568
7569	// We're done with the handles.
7570	Handles.clear();
7571
7572	// Negate the X if its cost is less or equal than Y.
7573	if (NegX && (CostX <= CostY)) {
7574	Cost = CostX;
7575	SDValue N = DAG.getNode(Opcode: ISD::FSUB, DL, VT, N1: NegX, N2: Y, Flags);
7576	if (NegY != N)
7577	RemoveDeadNode (NegY);
7578	return N;
7579	}
7580
7581	// Negate the Y if it is not expensive.
7582	if (NegY) {
7583	Cost = CostY;
7584	SDValue N = DAG.getNode(Opcode: ISD::FSUB, DL, VT, N1: NegY, N2: X, Flags);
7585	if (NegX != N)
7586	RemoveDeadNode (NegX);
7587	return N;
7588	}
7589	break;
7590	}
7591	case ISD::FSUB: {
7592	// We can't turn -(A-B) into B-A when we honor signed zeros.
7593	if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7594	break;
7595
7596	SDValue X = Op.getOperand(i: `0`), Y = Op.getOperand(i: `1`);
7597	// fold (fneg (fsub 0, Y)) -> Y
7598	if (ConstantFPSDNode C = isConstOrConstSplatFP(N: X, /AllowUndefs/* true))
7599	if (C->isZero()) {
7600	Cost = NegatibleCost::Cheaper;
7601	return Y;
7602	}
7603
7604	// fold (fneg (fsub X, Y)) -> (fsub Y, X)
7605	Cost = NegatibleCost::Neutral;
7606	return DAG.getNode(Opcode: ISD::FSUB, DL, VT, N1: Y, N2: X, Flags);
7607	}
7608	case ISD::FMUL:
7609	case ISD::FDIV: {
7610	SDValue X = Op.getOperand(i: `0`), Y = Op.getOperand(i: `1`);
7611
7612	// fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
7613	NegatibleCost CostX = NegatibleCost::Expensive;
7614	SDValue NegX =
7615	getNegatedExpression(Op: X, DAG, LegalOps, OptForSize, Cost&: CostX, Depth);
7616	// Prevent this node from being deleted by the next call.
7617	if (NegX)
7618	Handles.emplace_back(args&: NegX);
7619
7620	// fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
7621	NegatibleCost CostY = NegatibleCost::Expensive;
7622	SDValue NegY =
7623	getNegatedExpression(Op: Y, DAG, LegalOps, OptForSize, Cost&: CostY, Depth);
7624
7625	// We're done with the handles.
7626	Handles.clear();
7627
7628	// Negate the X if its cost is less or equal than Y.
7629	if (NegX && (CostX <= CostY)) {
7630	Cost = CostX;
7631	SDValue N = DAG.getNode(Opcode, DL, VT, N1: NegX, N2: Y, Flags);
7632	if (NegY != N)
7633	RemoveDeadNode (NegY);
7634	return N;
7635	}
7636
7637	// Ignore X 2.0 because that is expected to be canonicalized to X + X.*
7638	if (auto *C = isConstOrConstSplatFP(N: Op.getOperand(i: `1`)))
7639	if (C->isExactlyValue(V: `2.0`) && Op.getOpcode() == ISD::FMUL)
7640	break;
7641
7642	// Negate the Y if it is not expensive.
7643	if (NegY) {
7644	Cost = CostY;
7645	SDValue N = DAG.getNode(Opcode, DL, VT, N1: X, N2: NegY, Flags);
7646	if (NegX != N)
7647	RemoveDeadNode (NegX);
7648	return N;
7649	}
7650	break;
7651	}
7652	case ISD::FMA:
7653	case ISD::FMAD: {
7654	if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7655	break;
7656
7657	SDValue X = Op.getOperand(i: `0`), Y = Op.getOperand(i: `1`), Z = Op.getOperand(i: `2`);
7658	NegatibleCost CostZ = NegatibleCost::Expensive;
7659	SDValue NegZ =
7660	getNegatedExpression(Op: Z, DAG, LegalOps, OptForSize, Cost&: CostZ, Depth);
7661	// Give up if fail to negate the Z.
7662	if (!NegZ)
7663	break;
7664
7665	// Prevent this node from being deleted by the next two calls.
7666	Handles.emplace_back(args&: NegZ);
7667
7668	// fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
7669	NegatibleCost CostX = NegatibleCost::Expensive;
7670	SDValue NegX =
7671	getNegatedExpression(Op: X, DAG, LegalOps, OptForSize, Cost&: CostX, Depth);
7672	// Prevent this node from being deleted by the next call.
7673	if (NegX)
7674	Handles.emplace_back(args&: NegX);
7675
7676	// fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
7677	NegatibleCost CostY = NegatibleCost::Expensive;
7678	SDValue NegY =
7679	getNegatedExpression(Op: Y, DAG, LegalOps, OptForSize, Cost&: CostY, Depth);
7680
7681	// We're done with the handles.
7682	Handles.clear();
7683
7684	// Negate the X if its cost is less or equal than Y.
7685	if (NegX && (CostX <= CostY)) {
7686	Cost = std::min(a: CostX, b: CostZ);
7687	SDValue N = DAG.getNode(Opcode, DL, VT, N1: NegX, N2: Y, N3: NegZ, Flags);
7688	if (NegY != N)
7689	RemoveDeadNode (NegY);
7690	return N;
7691	}
7692
7693	// Negate the Y if it is not expensive.
7694	if (NegY) {
7695	Cost = std::min(a: CostY, b: CostZ);
7696	SDValue N = DAG.getNode(Opcode, DL, VT, N1: X, N2: NegY, N3: NegZ, Flags);
7697	if (NegX != N)
7698	RemoveDeadNode (NegX);
7699	return N;
7700	}
7701	break;
7702	}
7703
7704	case ISD::FP_EXTEND:
7705	case ISD::FSIN:
7706	if (SDValue NegV = getNegatedExpression(Op: Op.getOperand(i: `0`), DAG, LegalOps,
7707	OptForSize, Cost, Depth))
7708	return DAG.getNode(Opcode, DL, VT, Operand: NegV);
7709	break;
7710	case ISD::FP_ROUND:
7711	if (SDValue NegV = getNegatedExpression(Op: Op.getOperand(i: `0`), DAG, LegalOps,
7712	OptForSize, Cost, Depth))
7713	return DAG.getNode(Opcode: ISD::FP_ROUND, DL, VT, N1: NegV, N2: Op.getOperand(i: `1`));
7714	break;
7715	case ISD::SELECT:
7716	case ISD::VSELECT: {
7717	// fold (fneg (select C, LHS, RHS)) -> (select C, (fneg LHS), (fneg RHS))
7718	// iff at least one cost is cheaper and the other is neutral/cheaper
7719	SDValue LHS = Op.getOperand(i: `1`);
7720	NegatibleCost CostLHS = NegatibleCost::Expensive;
7721	SDValue NegLHS =
7722	getNegatedExpression(Op: LHS, DAG, LegalOps, OptForSize, Cost&: CostLHS, Depth);
7723	if (!NegLHS \|\| CostLHS > NegatibleCost::Neutral) {
7724	RemoveDeadNode (NegLHS);
7725	break;
7726	}
7727
7728	// Prevent this node from being deleted by the next call.
7729	Handles.emplace_back(args&: NegLHS);
7730
7731	SDValue RHS = Op.getOperand(i: `2`);
7732	NegatibleCost CostRHS = NegatibleCost::Expensive;
7733	SDValue NegRHS =
7734	getNegatedExpression(Op: RHS, DAG, LegalOps, OptForSize, Cost&: CostRHS, Depth);
7735
7736	// We're done with the handles.
7737	Handles.clear();
7738
7739	if (!NegRHS \|\| CostRHS > NegatibleCost::Neutral \|\|
7740	(CostLHS != NegatibleCost::Cheaper &&
7741	CostRHS != NegatibleCost::Cheaper)) {
7742	RemoveDeadNode (NegLHS);
7743	RemoveDeadNode (NegRHS);
7744	break;
7745	}
7746
7747	Cost = std::min(a: CostLHS, b: CostRHS);
7748	return DAG.getSelect(DL, VT, Cond: Op.getOperand(i: `0`), LHS: NegLHS, RHS: NegRHS);
7749	}
7750	}
7751
7752	return SDValue ();
7753	}
7754
7755	//===----------------------------------------------------------------------===//
7756	// Legalization Utilities
7757	//===----------------------------------------------------------------------===//
7758
7759	bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
7760	SDValue LHS, SDValue RHS,
7761	SmallVectorImpl<SDValue> &Result,
7762	EVT HiLoVT, SelectionDAG &DAG,
7763	MulExpansionKind Kind, SDValue LL,
7764	SDValue LH, SDValue RL, SDValue RH) const {
7765	assert(Opcode == ISD::MUL \|\| Opcode == ISD::UMUL_LOHI \|\|
7766	Opcode == ISD::SMUL_LOHI);
7767
7768	bool HasMULHS = (Kind == MulExpansionKind::Always) \|\|
7769	isOperationLegalOrCustom(Op: ISD::MULHS, VT: HiLoVT);
7770	bool HasMULHU = (Kind == MulExpansionKind::Always) \|\|
7771	isOperationLegalOrCustom(Op: ISD::MULHU, VT: HiLoVT);
7772	bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) \|\|
7773	isOperationLegalOrCustom(Op: ISD::SMUL_LOHI, VT: HiLoVT);
7774	bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) \|\|
7775	isOperationLegalOrCustom(Op: ISD::UMUL_LOHI, VT: HiLoVT);
7776
7777	if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
7778	return false;
7779
7780	unsigned OuterBitSize = VT.getScalarSizeInBits();
7781	unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
7782
7783	// LL, LH, RL, and RH must be either all NULL or all set to a value.
7784	assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) \|\|
7785	(!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
7786
7787	SDVTList VTs = DAG.getVTList(VT1: HiLoVT, VT2: HiLoVT);
7788	auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
7789	bool Signed) -> bool {
7790	if ((Signed && HasSMUL_LOHI) \|\| (!Signed && HasUMUL_LOHI)) {
7791	Lo = DAG.getNode(Opcode: Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, DL: dl, VTList: VTs, N1: L, N2: R);
7792	Hi = SDValue (Lo.getNode(), `1`);
7793	return true;
7794	}
7795	if ((Signed && HasMULHS) \|\| (!Signed && HasMULHU)) {
7796	Lo = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: HiLoVT, N1: L, N2: R);
7797	Hi = DAG.getNode(Opcode: Signed ? ISD::MULHS : ISD::MULHU, DL: dl, VT: HiLoVT, N1: L, N2: R);
7798	return true;
7799	}
7800	return false;
7801	};
7802
7803	SDValue Lo, Hi;
7804
7805	if (!LL.getNode() && !RL.getNode() &&
7806	isOperationLegalOrCustom(Op: ISD::TRUNCATE, VT: HiLoVT)) {
7807	LL = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: LHS);
7808	RL = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: RHS);
7809	}
7810
7811	if (!LL.getNode())
7812	return false;
7813
7814	APInt HighMask = APInt::getHighBitsSet(numBits: OuterBitSize, hiBitsSet: InnerBitSize);
7815	if (DAG.MaskedValueIsZero(Op: LHS, Mask: HighMask) &&
7816	DAG.MaskedValueIsZero(Op: RHS, Mask: HighMask)) {
7817	// The inputs are both zero-extended.
7818	if (MakeMUL_LOHI (LL, RL, Lo, Hi, false)) {
7819	Result.push_back(Elt: Lo);
7820	Result.push_back(Elt: Hi);
7821	if (Opcode != ISD::MUL) {
7822	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT: HiLoVT);
7823	Result.push_back(Elt: Zero);
7824	Result.push_back(Elt: Zero);
7825	}
7826	return true;
7827	}
7828	}
7829
7830	if (!VT.isVector() && Opcode == ISD::MUL &&
7831	DAG.ComputeMaxSignificantBits(Op: LHS) <= InnerBitSize &&
7832	DAG.ComputeMaxSignificantBits(Op: RHS) <= InnerBitSize) {
7833	// The input values are both sign-extended.
7834	// TODO non-MUL case?
7835	if (MakeMUL_LOHI (LL, RL, Lo, Hi, true)) {
7836	Result.push_back(Elt: Lo);
7837	Result.push_back(Elt: Hi);
7838	return true;
7839	}
7840	}
7841
7842	unsigned ShiftAmount = OuterBitSize - InnerBitSize;
7843	SDValue Shift = DAG.getShiftAmountConstant(Val: ShiftAmount, VT, DL: dl);
7844
7845	if (!LH.getNode() && !RH.getNode() &&
7846	isOperationLegalOrCustom(Op: ISD::SRL, VT) &&
7847	isOperationLegalOrCustom(Op: ISD::TRUNCATE, VT: HiLoVT)) {
7848	LH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: LHS, N2: Shift);
7849	LH = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: LH);
7850	RH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: RHS, N2: Shift);
7851	RH = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: RH);
7852	}
7853
7854	if (!LH.getNode())
7855	return false;
7856
7857	if (!MakeMUL_LOHI (LL, RL, Lo, Hi, false))
7858	return false;
7859
7860	Result.push_back(Elt: Lo);
7861
7862	if (Opcode == ISD::MUL) {
7863	RH = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: HiLoVT, N1: LL, N2: RH);
7864	LH = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: HiLoVT, N1: LH, N2: RL);
7865	Hi = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: HiLoVT, N1: Hi, N2: RH);
7866	Hi = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: HiLoVT, N1: Hi, N2: LH);
7867	Result.push_back(Elt: Hi);
7868	return true;
7869	}
7870
7871	// Compute the full width result.
7872	auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
7873	Lo = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: Lo);
7874	Hi = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: Hi);
7875	Hi = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Hi, N2: Shift);
7876	return DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Lo, N2: Hi);
7877	};
7878
7879	SDValue Next = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: Hi);
7880	if (!MakeMUL_LOHI (LL, RH, Lo, Hi, false))
7881	return false;
7882
7883	// This is effectively the add part of a multiply-add of half-sized operands,
7884	// so it cannot overflow.
7885	Next = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Next, N2: Merge (Lo, Hi));
7886
7887	if (!MakeMUL_LOHI (LH, RL, Lo, Hi, false))
7888	return false;
7889
7890	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT: HiLoVT);
7891	EVT BoolType = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
7892
7893	bool UseGlue = (isOperationLegalOrCustom(Op: ISD::ADDC, VT) &&
7894	isOperationLegalOrCustom(Op: ISD::ADDE, VT));
7895	if (UseGlue)
7896	Next = DAG.getNode(Opcode: ISD::ADDC, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: MVT::Glue), N1: Next,
7897	N2: Merge (Lo, Hi));
7898	else
7899	Next = DAG.getNode(Opcode: ISD::UADDO_CARRY, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: BoolType), N1: Next,
7900	N2: Merge (Lo, Hi), N3: DAG.getConstant(Val: `0`, DL: dl, VT: BoolType));
7901
7902	SDValue Carry = Next.getValue(R: `1`);
7903	Result.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: Next));
7904	Next = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Next, N2: Shift);
7905
7906	if (!MakeMUL_LOHI (LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
7907	return false;
7908
7909	if (UseGlue)
7910	Hi = DAG.getNode(Opcode: ISD::ADDE, DL: dl, VTList: DAG.getVTList(VT1: HiLoVT, VT2: MVT::Glue), N1: Hi, N2: Zero,
7911	N3: Carry);
7912	else
7913	Hi = DAG.getNode(Opcode: ISD::UADDO_CARRY, DL: dl, VTList: DAG.getVTList(VT1: HiLoVT, VT2: BoolType), N1: Hi,
7914	N2: Zero, N3: Carry);
7915
7916	Next = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Next, N2: Merge (Lo, Hi));
7917
7918	if (Opcode == ISD::SMUL_LOHI) {
7919	SDValue NextSub = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Next,
7920	N2: DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: RL));
7921	Next = DAG.getSelectCC(DL: dl, LHS: LH, RHS: Zero, True: NextSub, False: Next, Cond: ISD::SETLT);
7922
7923	NextSub = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Next,
7924	N2: DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: LL));
7925	Next = DAG.getSelectCC(DL: dl, LHS: RH, RHS: Zero, True: NextSub, False: Next, Cond: ISD::SETLT);
7926	}
7927
7928	Result.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: Next));
7929	Next = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Next, N2: Shift);
7930	Result.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: Next));
7931	return true;
7932	}
7933
7934	bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
7935	SelectionDAG &DAG, MulExpansionKind Kind,
7936	SDValue LL, SDValue LH, SDValue RL,
7937	SDValue RH) const {
7938	SmallVector<SDValue, `2`> Result;
7939	bool Ok = expandMUL_LOHI(Opcode: N->getOpcode(), VT: N->getValueType(ResNo: `0`), dl: SDLoc (N),
7940	LHS: N->getOperand(Num: `0`), RHS: N->getOperand(Num: `1`), Result, HiLoVT,
7941	DAG, Kind, LL, LH, RL, RH);
7942	if (Ok) {
7943	assert(Result.size() == `2`);
7944	Lo = Result [`0`];
7945	Hi = Result [`1`];
7946	}
7947	return Ok;
7948	}
7949
7950	// Optimize unsigned division or remainder by constants for types twice as large
7951	// as a legal VT.
7952	//
7953	// If (1 << (BitWidth / 2)) % Constant == 1, then the remainder
7954	// can be computed
7955	// as:
7956	// Sum += __builtin_uadd_overflow(Lo, High, &Sum);
7957	// Remainder = Sum % Constant
7958	// This is based on "Remainder by Summing Digits" from Hacker's Delight.
7959	//
7960	// For division, we can compute the remainder using the algorithm described
7961	// above, subtract it from the dividend to get an exact multiple of Constant.
7962	// Then multiply that exact multiply by the multiplicative inverse modulo
7963	// (1 << (BitWidth / 2)) to get the quotient.
7964
7965	// If Constant is even, we can shift right the dividend and the divisor by the
7966	// number of trailing zeros in Constant before applying the remainder algorithm.
7967	// If we're after the quotient, we can subtract this value from the shifted
7968	// dividend and multiply by the multiplicative inverse of the shifted divisor.
7969	// If we want the remainder, we shift the value left by the number of trailing
7970	// zeros and add the bits that were shifted out of the dividend.
7971	bool TargetLowering::expandDIVREMByConstant(SDNode *N,
7972	SmallVectorImpl<SDValue> &Result,
7973	EVT HiLoVT, SelectionDAG &DAG,
7974	SDValue LL, SDValue LH) const {
7975	unsigned Opcode = N->getOpcode();
7976	EVT VT = N->getValueType(ResNo: `0`);
7977
7978	// TODO: Support signed division/remainder.
7979	if (Opcode == ISD::SREM \|\| Opcode == ISD::SDIV \|\| Opcode == ISD::SDIVREM)
7980	return false;
7981	assert(
7982	(Opcode == ISD::UREM \|\| Opcode == ISD::UDIV \|\| Opcode == ISD::UDIVREM) &&
7983	"Unexpected opcode");
7984
7985	auto *CN = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: `1`));
7986	if (!CN)
7987	return false;
7988
7989	APInt Divisor = CN->getAPIntValue();
7990	unsigned BitWidth = Divisor.getBitWidth();
7991	unsigned HBitWidth = BitWidth / `2`;
7992	assert(VT.getScalarSizeInBits() == BitWidth &&
7993	HiLoVT.getScalarSizeInBits() == HBitWidth && "Unexpected VTs");
7994
7995	// Divisor needs to less than (1 << HBitWidth).
7996	APInt HalfMaxPlus1 = APInt::getOneBitSet(numBits: BitWidth, BitNo: HBitWidth);
7997	if (Divisor.uge(RHS: HalfMaxPlus1))
7998	return false;
7999
8000	// We depend on the UREM by constant optimization in DAGCombiner that requires
8001	// high multiply.
8002	if (!isOperationLegalOrCustom(Op: ISD::MULHU, VT: HiLoVT) &&
8003	!isOperationLegalOrCustom(Op: ISD::UMUL_LOHI, VT: HiLoVT))
8004	return false;
8005
8006	// Don't expand if optimizing for size.
8007	if (DAG.shouldOptForSize())
8008	return false;
8009
8010	// Early out for 0 or 1 divisors.
8011	if (Divisor.ule(RHS: `1`))
8012	return false;
8013
8014	// If the divisor is even, shift it until it becomes odd.
8015	unsigned TrailingZeros = `0`;
8016	if (!Divisor [`0`]) {
8017	TrailingZeros = Divisor.countr_zero();
8018	Divisor.lshrInPlace(ShiftAmt: TrailingZeros);
8019	}
8020
8021	SDLoc dl(N);
8022	SDValue Sum;
8023	SDValue PartialRem;
8024
8025	// If (1 << HBitWidth) % divisor == 1, we can add the two halves together and
8026	// then add in the carry.
8027	// TODO: If we can't split it in half, we might be able to split into 3 or
8028	// more pieces using a smaller bit width.
8029	if (HalfMaxPlus1.urem(RHS: Divisor).isOne()) {
8030	assert(!LL == !LH && "Expected both input halves or no input halves!");
8031	if (!LL)
8032	std::tie(args&: LL, args&: LH) = DAG.SplitScalar(N: N->getOperand(Num: `0`), DL: dl, LoVT: HiLoVT, HiVT: HiLoVT);
8033
8034	// Shift the input by the number of TrailingZeros in the divisor. The
8035	// shifted out bits will be added to the remainder later.
8036	if (TrailingZeros) {
8037	// Save the shifted off bits if we need the remainder.
8038	if (Opcode != ISD::UDIV) {
8039	APInt Mask = APInt::getLowBitsSet(numBits: HBitWidth, loBitsSet: TrailingZeros);
8040	PartialRem = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: HiLoVT, N1: LL,
8041	N2: DAG.getConstant(Val: Mask, DL: dl, VT: HiLoVT));
8042	}
8043
8044	LL = DAG.getNode(
8045	Opcode: ISD::OR, DL: dl, VT: HiLoVT,
8046	N1: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: HiLoVT, N1: LL,
8047	N2: DAG.getShiftAmountConstant(Val: TrailingZeros, VT: HiLoVT, DL: dl)),
8048	N2: DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: HiLoVT, N1: LH,
8049	N2: DAG.getShiftAmountConstant(Val: HBitWidth - TrailingZeros,
8050	VT: HiLoVT, DL: dl)));
8051	LH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: HiLoVT, N1: LH,
8052	N2: DAG.getShiftAmountConstant(Val: TrailingZeros, VT: HiLoVT, DL: dl));
8053	}
8054
8055	// Use uaddo_carry if we can, otherwise use a compare to detect overflow.
8056	EVT SetCCType =
8057	getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: HiLoVT);
8058	if (isOperationLegalOrCustom(Op: ISD::UADDO_CARRY, VT: HiLoVT)) {
8059	SDVTList VTList = DAG.getVTList(VT1: HiLoVT, VT2: SetCCType);
8060	Sum = DAG.getNode(Opcode: ISD::UADDO, DL: dl, VTList, N1: LL, N2: LH);
8061	Sum = DAG.getNode(Opcode: ISD::UADDO_CARRY, DL: dl, VTList, N1: Sum,
8062	N2: DAG.getConstant(Val: `0`, DL: dl, VT: HiLoVT), N3: Sum.getValue(R: `1`));
8063	} else {
8064	Sum = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: HiLoVT, N1: LL, N2: LH);
8065	SDValue Carry = DAG.getSetCC(DL: dl, VT: SetCCType, LHS: Sum, RHS: LL, Cond: ISD::SETULT);
8066	// If the boolean for the target is 0 or 1, we can add the setcc result
8067	// directly.
8068	if (getBooleanContents(Type: HiLoVT) ==
8069	TargetLoweringBase::ZeroOrOneBooleanContent)
8070	Carry = DAG.getZExtOrTrunc(Op: Carry, DL: dl, VT: HiLoVT);
8071	else
8072	Carry = DAG.getSelect(DL: dl, VT: HiLoVT, Cond: Carry, LHS: DAG.getConstant(Val: `1`, DL: dl, VT: HiLoVT),
8073	RHS: DAG.getConstant(Val: `0`, DL: dl, VT: HiLoVT));
8074	Sum = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: HiLoVT, N1: Sum, N2: Carry);
8075	}
8076	}
8077
8078	// If we didn't find a sum, we can't do the expansion.
8079	if (!Sum)
8080	return false;
8081
8082	// Perform a HiLoVT urem on the Sum using truncated divisor.
8083	SDValue RemL =
8084	DAG.getNode(Opcode: ISD::UREM, DL: dl, VT: HiLoVT, N1: Sum,
8085	N2: DAG.getConstant(Val: Divisor.trunc(width: HBitWidth), DL: dl, VT: HiLoVT));
8086	SDValue RemH = DAG.getConstant(Val: `0`, DL: dl, VT: HiLoVT);
8087
8088	if (Opcode != ISD::UREM) {
8089	// Subtract the remainder from the shifted dividend.
8090	SDValue Dividend = DAG.getNode(Opcode: ISD::BUILD_PAIR, DL: dl, VT, N1: LL, N2: LH);
8091	SDValue Rem = DAG.getNode(Opcode: ISD::BUILD_PAIR, DL: dl, VT, N1: RemL, N2: RemH);
8092
8093	Dividend = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Dividend, N2: Rem);
8094
8095	// Multiply by the multiplicative inverse of the divisor modulo
8096	// (1 << BitWidth).
8097	APInt MulFactor = Divisor.multiplicativeInverse();
8098
8099	SDValue Quotient = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: Dividend,
8100	N2: DAG.getConstant(Val: MulFactor, DL: dl, VT));
8101
8102	// Split the quotient into low and high parts.
8103	SDValue QuotL, QuotH;
8104	std::tie(args&: QuotL, args&: QuotH) = DAG.SplitScalar(N: Quotient, DL: dl, LoVT: HiLoVT, HiVT: HiLoVT);
8105	Result.push_back(Elt: QuotL);
8106	Result.push_back(Elt: QuotH);
8107	}
8108
8109	if (Opcode != ISD::UDIV) {
8110	// If we shifted the input, shift the remainder left and add the bits we
8111	// shifted off the input.
8112	if (TrailingZeros) {
8113	RemL = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: HiLoVT, N1: RemL,
8114	N2: DAG.getShiftAmountConstant(Val: TrailingZeros, VT: HiLoVT, DL: dl));
8115	RemL = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: HiLoVT, N1: RemL, N2: PartialRem);
8116	}
8117	Result.push_back(Elt: RemL);
8118	Result.push_back(Elt: DAG.getConstant(Val: `0`, DL: dl, VT: HiLoVT));
8119	}
8120
8121	return true;
8122	}
8123
8124	// Check that (every element of) Z is undef or not an exact multiple of BW.
8125	static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
8126	return ISD::matchUnaryPredicate(
8127	Op: Z,
8128	Match: [=](ConstantSDNode C) { return* !C \|\| C->getAPIntValue().urem(RHS: BW) != `0`; },
8129	/AllowUndef=/AllowUndefs: true, /AllowTruncation=/true);
8130	}
8131
8132	static SDValue expandVPFunnelShift(SDNode *Node, SelectionDAG &DAG) {
8133	EVT VT = Node->getValueType(ResNo: `0`);
8134	SDValue ShX, ShY;
8135	SDValue ShAmt, InvShAmt;
8136	SDValue X = Node->getOperand(Num: `0`);
8137	SDValue Y = Node->getOperand(Num: `1`);
8138	SDValue Z = Node->getOperand(Num: `2`);
8139	SDValue Mask = Node->getOperand(Num: `3`);
8140	SDValue VL = Node->getOperand(Num: `4`);
8141
8142	unsigned BW = VT.getScalarSizeInBits();
8143	bool IsFSHL = Node->getOpcode() == ISD::VP_FSHL;
8144	SDLoc DL(SDValue (Node, `0`));
8145
8146	EVT ShVT = Z.getValueType();
8147	if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8148	// fshl: X << C \| Y >> (BW - C)
8149	// fshr: X << (BW - C) \| Y >> C
8150	// where C = Z % BW is not zero
8151	SDValue BitWidthC = DAG.getConstant(Val: BW, DL, VT: ShVT);
8152	ShAmt = DAG.getNode(Opcode: ISD::VP_UREM, DL, VT: ShVT, N1: Z, N2: BitWidthC, N3: Mask, N4: VL);
8153	InvShAmt = DAG.getNode(Opcode: ISD::VP_SUB, DL, VT: ShVT, N1: BitWidthC, N2: ShAmt, N3: Mask, N4: VL);
8154	ShX = DAG.getNode(Opcode: ISD::VP_SHL, DL, VT, N1: X, N2: IsFSHL ? ShAmt : InvShAmt, N3: Mask,
8155	N4: VL);
8156	ShY = DAG.getNode(Opcode: ISD::VP_SRL, DL, VT, N1: Y, N2: IsFSHL ? InvShAmt : ShAmt, N3: Mask,
8157	N4: VL);
8158	} else {
8159	// fshl: X << (Z % BW) \| Y >> 1 >> (BW - 1 - (Z % BW))
8160	// fshr: X << 1 << (BW - 1 - (Z % BW)) \| Y >> (Z % BW)
8161	SDValue BitMask = DAG.getConstant(Val: BW - `1`, DL, VT: ShVT);
8162	if (isPowerOf2_32(Value: BW)) {
8163	// Z % BW -> Z & (BW - 1)
8164	ShAmt = DAG.getNode(Opcode: ISD::VP_AND, DL, VT: ShVT, N1: Z, N2: BitMask, N3: Mask, N4: VL);
8165	// (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8166	SDValue NotZ = DAG.getNode(Opcode: ISD::VP_XOR, DL, VT: ShVT, N1: Z,
8167	N2: DAG.getAllOnesConstant(DL, VT: ShVT), N3: Mask, N4: VL);
8168	InvShAmt = DAG.getNode(Opcode: ISD::VP_AND, DL, VT: ShVT, N1: NotZ, N2: BitMask, N3: Mask, N4: VL);
8169	} else {
8170	SDValue BitWidthC = DAG.getConstant(Val: BW, DL, VT: ShVT);
8171	ShAmt = DAG.getNode(Opcode: ISD::VP_UREM, DL, VT: ShVT, N1: Z, N2: BitWidthC, N3: Mask, N4: VL);
8172	InvShAmt = DAG.getNode(Opcode: ISD::VP_SUB, DL, VT: ShVT, N1: BitMask, N2: ShAmt, N3: Mask, N4: VL);
8173	}
8174
8175	SDValue One = DAG.getConstant(Val: `1`, DL, VT: ShVT);
8176	if (IsFSHL) {
8177	ShX = DAG.getNode(Opcode: ISD::VP_SHL, DL, VT, N1: X, N2: ShAmt, N3: Mask, N4: VL);
8178	SDValue ShY1 = DAG.getNode(Opcode: ISD::VP_SRL, DL, VT, N1: Y, N2: One, N3: Mask, N4: VL);
8179	ShY = DAG.getNode(Opcode: ISD::VP_SRL, DL, VT, N1: ShY1, N2: InvShAmt, N3: Mask, N4: VL);
8180	} else {
8181	SDValue ShX1 = DAG.getNode(Opcode: ISD::VP_SHL, DL, VT, N1: X, N2: One, N3: Mask, N4: VL);
8182	ShX = DAG.getNode(Opcode: ISD::VP_SHL, DL, VT, N1: ShX1, N2: InvShAmt, N3: Mask, N4: VL);
8183	ShY = DAG.getNode(Opcode: ISD::VP_SRL, DL, VT, N1: Y, N2: ShAmt, N3: Mask, N4: VL);
8184	}
8185	}
8186	return DAG.getNode(Opcode: ISD::VP_OR, DL, VT, N1: ShX, N2: ShY, N3: Mask, N4: VL);
8187	}
8188
8189	SDValue TargetLowering::expandFunnelShift(SDNode *Node,
8190	SelectionDAG &DAG) const {
8191	if (Node->isVPOpcode())
8192	return expandVPFunnelShift(Node, DAG);
8193
8194	EVT VT = Node->getValueType(ResNo: `0`);
8195
8196	if (VT.isVector() && (!isOperationLegalOrCustom(Op: ISD::SHL, VT) \|\|
8197	!isOperationLegalOrCustom(Op: ISD::SRL, VT) \|\|
8198	!isOperationLegalOrCustom(Op: ISD::SUB, VT) \|\|
8199	!isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT)))
8200	return SDValue ();
8201
8202	SDValue X = Node->getOperand(Num: `0`);
8203	SDValue Y = Node->getOperand(Num: `1`);
8204	SDValue Z = Node->getOperand(Num: `2`);
8205
8206	unsigned BW = VT.getScalarSizeInBits();
8207	bool IsFSHL = Node->getOpcode() == ISD::FSHL;
8208	SDLoc DL(SDValue (Node, `0`));
8209
8210	EVT ShVT = Z.getValueType();
8211
8212	// If a funnel shift in the other direction is more supported, use it.
8213	unsigned RevOpcode = IsFSHL ? ISD::FSHR : ISD::FSHL;
8214	if (!isOperationLegalOrCustom(Op: Node->getOpcode(), VT) &&
8215	isOperationLegalOrCustom(Op: RevOpcode, VT) && isPowerOf2_32(Value: BW)) {
8216	if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8217	// fshl X, Y, Z -> fshr X, Y, -Z
8218	// fshr X, Y, Z -> fshl X, Y, -Z
8219	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT: ShVT);
8220	Z = DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Zero, N2: Z);
8221	} else {
8222	// fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
8223	// fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
8224	SDValue One = DAG.getConstant(Val: `1`, DL, VT: ShVT);
8225	if (IsFSHL) {
8226	Y = DAG.getNode(Opcode: RevOpcode, DL, VT, N1: X, N2: Y, N3: One);
8227	X = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: X, N2: One);
8228	} else {
8229	X = DAG.getNode(Opcode: RevOpcode, DL, VT, N1: X, N2: Y, N3: One);
8230	Y = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Y, N2: One);
8231	}
8232	Z = DAG.getNOT(DL, Val: Z, VT: ShVT);
8233	}
8234	return DAG.getNode(Opcode: RevOpcode, DL, VT, N1: X, N2: Y, N3: Z);
8235	}
8236
8237	SDValue ShX, ShY;
8238	SDValue ShAmt, InvShAmt;
8239	if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8240	// fshl: X << C \| Y >> (BW - C)
8241	// fshr: X << (BW - C) \| Y >> C
8242	// where C = Z % BW is not zero
8243	SDValue BitWidthC = DAG.getConstant(Val: BW, DL, VT: ShVT);
8244	ShAmt = DAG.getNode(Opcode: ISD::UREM, DL, VT: ShVT, N1: Z, N2: BitWidthC);
8245	InvShAmt = DAG.getNode(Opcode: ISD::SUB, DL, VT: ShVT, N1: BitWidthC, N2: ShAmt);
8246	ShX = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: X, N2: IsFSHL ? ShAmt : InvShAmt);
8247	ShY = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Y, N2: IsFSHL ? InvShAmt : ShAmt);
8248	} else {
8249	// fshl: X << (Z % BW) \| Y >> 1 >> (BW - 1 - (Z % BW))
8250	// fshr: X << 1 << (BW - 1 - (Z % BW)) \| Y >> (Z % BW)
8251	SDValue Mask = DAG.getConstant(Val: BW - `1`, DL, VT: ShVT);
8252	if (isPowerOf2_32(Value: BW)) {
8253	// Z % BW -> Z & (BW - 1)
8254	ShAmt = DAG.getNode(Opcode: ISD::AND, DL, VT: ShVT, N1: Z, N2: Mask);
8255	// (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8256	InvShAmt = DAG.getNode(Opcode: ISD::AND, DL, VT: ShVT, N1: DAG.getNOT(DL, Val: Z, VT: ShVT), N2: Mask);
8257	} else {
8258	SDValue BitWidthC = DAG.getConstant(Val: BW, DL, VT: ShVT);
8259	ShAmt = DAG.getNode(Opcode: ISD::UREM, DL, VT: ShVT, N1: Z, N2: BitWidthC);
8260	InvShAmt = DAG.getNode(Opcode: ISD::SUB, DL, VT: ShVT, N1: Mask, N2: ShAmt);
8261	}
8262
8263	SDValue One = DAG.getConstant(Val: `1`, DL, VT: ShVT);
8264	if (IsFSHL) {
8265	ShX = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: X, N2: ShAmt);
8266	SDValue ShY1 = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Y, N2: One);
8267	ShY = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: ShY1, N2: InvShAmt);
8268	} else {
8269	SDValue ShX1 = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: X, N2: One);
8270	ShX = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: ShX1, N2: InvShAmt);
8271	ShY = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Y, N2: ShAmt);
8272	}
8273	}
8274	return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: ShX, N2: ShY);
8275	}
8276
8277	// TODO: Merge with expandFunnelShift.
8278	SDValue TargetLowering::expandROT(SDNode Node, bool* AllowVectorOps,
8279	SelectionDAG &DAG) const {
8280	EVT VT = Node->getValueType(ResNo: `0`);
8281	unsigned EltSizeInBits = VT.getScalarSizeInBits();
8282	bool IsLeft = Node->getOpcode() == ISD::ROTL;
8283	SDValue Op0 = Node->getOperand(Num: `0`);
8284	SDValue Op1 = Node->getOperand(Num: `1`);
8285	SDLoc DL(SDValue (Node, `0`));
8286
8287	EVT ShVT = Op1.getValueType();
8288	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT: ShVT);
8289
8290	// If a rotate in the other direction is more supported, use it.
8291	unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
8292	if (!isOperationLegalOrCustom(Op: Node->getOpcode(), VT) &&
8293	isOperationLegalOrCustom(Op: RevRot, VT) && isPowerOf2_32(Value: EltSizeInBits)) {
8294	SDValue Sub = DAG.getNode(Opcode: ISD::SUB, DL, VT: ShVT, N1: Zero, N2: Op1);
8295	return DAG.getNode(Opcode: RevRot, DL, VT, N1: Op0, N2: Sub);
8296	}
8297
8298	if (!AllowVectorOps && VT.isVector() &&
8299	(!isOperationLegalOrCustom(Op: ISD::SHL, VT) \|\|
8300	!isOperationLegalOrCustom(Op: ISD::SRL, VT) \|\|
8301	!isOperationLegalOrCustom(Op: ISD::SUB, VT) \|\|
8302	!isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT) \|\|
8303	!isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT)))
8304	return SDValue ();
8305
8306	unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
8307	unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
8308	SDValue BitWidthMinusOneC = DAG.getConstant(Val: EltSizeInBits - `1`, DL, VT: ShVT);
8309	SDValue ShVal;
8310	SDValue HsVal;
8311	if (isPowerOf2_32(Value: EltSizeInBits)) {
8312	// (rotl x, c) -> x << (c & (w - 1)) \| x >> (-c & (w - 1))
8313	// (rotr x, c) -> x >> (c & (w - 1)) \| x << (-c & (w - 1))
8314	SDValue NegOp1 = DAG.getNode(Opcode: ISD::SUB, DL, VT: ShVT, N1: Zero, N2: Op1);
8315	SDValue ShAmt = DAG.getNode(Opcode: ISD::AND, DL, VT: ShVT, N1: Op1, N2: BitWidthMinusOneC);
8316	ShVal = DAG.getNode(Opcode: ShOpc, DL, VT, N1: Op0, N2: ShAmt);
8317	SDValue HsAmt = DAG.getNode(Opcode: ISD::AND, DL, VT: ShVT, N1: NegOp1, N2: BitWidthMinusOneC);
8318	HsVal = DAG.getNode(Opcode: HsOpc, DL, VT, N1: Op0, N2: HsAmt);
8319	} else {
8320	// (rotl x, c) -> x << (c % w) \| x >> 1 >> (w - 1 - (c % w))
8321	// (rotr x, c) -> x >> (c % w) \| x << 1 << (w - 1 - (c % w))
8322	SDValue BitWidthC = DAG.getConstant(Val: EltSizeInBits, DL, VT: ShVT);
8323	SDValue ShAmt = DAG.getNode(Opcode: ISD::UREM, DL, VT: ShVT, N1: Op1, N2: BitWidthC);
8324	ShVal = DAG.getNode(Opcode: ShOpc, DL, VT, N1: Op0, N2: ShAmt);
8325	SDValue HsAmt = DAG.getNode(Opcode: ISD::SUB, DL, VT: ShVT, N1: BitWidthMinusOneC, N2: ShAmt);
8326	SDValue One = DAG.getConstant(Val: `1`, DL, VT: ShVT);
8327	HsVal =
8328	DAG.getNode(Opcode: HsOpc, DL, VT, N1: DAG.getNode(Opcode: HsOpc, DL, VT, N1: Op0, N2: One), N2: HsAmt);
8329	}
8330	return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: ShVal, N2: HsVal);
8331	}
8332
8333	void TargetLowering::expandShiftParts(SDNode *Node, SDValue &Lo, SDValue &Hi,
8334	SelectionDAG &DAG) const {
8335	assert(Node->getNumOperands() == `3` && "Not a double-shift!");
8336	EVT VT = Node->getValueType(ResNo: `0`);
8337	unsigned VTBits = VT.getScalarSizeInBits();
8338	assert(isPowerOf2_32(VTBits) && "Power-of-two integer type expected");
8339
8340	bool IsSHL = Node->getOpcode() == ISD::SHL_PARTS;
8341	bool IsSRA = Node->getOpcode() == ISD::SRA_PARTS;
8342	SDValue ShOpLo = Node->getOperand(Num: `0`);
8343	SDValue ShOpHi = Node->getOperand(Num: `1`);
8344	SDValue ShAmt = Node->getOperand(Num: `2`);
8345	EVT ShAmtVT = ShAmt.getValueType();
8346	EVT ShAmtCCVT =
8347	getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: ShAmtVT);
8348	SDLoc dl(Node);
8349
8350	// ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and
8351	// ISD::SRA/L nodes haven't. Insert an AND to be safe, it's usually optimized
8352	// away during isel.
8353	SDValue SafeShAmt = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ShAmtVT, N1: ShAmt,
8354	N2: DAG.getConstant(Val: VTBits - `1`, DL: dl, VT: ShAmtVT));
8355	SDValue Tmp1 = IsSRA ? DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: ShOpHi,
8356	N2: DAG.getConstant(Val: VTBits - `1`, DL: dl, VT: ShAmtVT))
8357	: DAG.getConstant(Val: `0`, DL: dl, VT);
8358
8359	SDValue Tmp2, Tmp3;
8360	if (IsSHL) {
8361	Tmp2 = DAG.getNode(Opcode: ISD::FSHL, DL: dl, VT, N1: ShOpHi, N2: ShOpLo, N3: ShAmt);
8362	Tmp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: ShOpLo, N2: SafeShAmt);
8363	} else {
8364	Tmp2 = DAG.getNode(Opcode: ISD::FSHR, DL: dl, VT, N1: ShOpHi, N2: ShOpLo, N3: ShAmt);
8365	Tmp3 = DAG.getNode(Opcode: IsSRA ? ISD::SRA : ISD::SRL, DL: dl, VT, N1: ShOpHi, N2: SafeShAmt);
8366	}
8367
8368	// If the shift amount is larger or equal than the width of a part we don't
8369	// use the result from the FSHL/FSHR. Insert a test and select the appropriate
8370	// values for large shift amounts.
8371	SDValue AndNode = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ShAmtVT, N1: ShAmt,
8372	N2: DAG.getConstant(Val: VTBits, DL: dl, VT: ShAmtVT));
8373	SDValue Cond = DAG.getSetCC(DL: dl, VT: ShAmtCCVT, LHS: AndNode,
8374	RHS: DAG.getConstant(Val: `0`, DL: dl, VT: ShAmtVT), Cond: ISD::SETNE);
8375
8376	if (IsSHL) {
8377	Hi = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT, N1: Cond, N2: Tmp3, N3: Tmp2);
8378	Lo = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT, N1: Cond, N2: Tmp1, N3: Tmp3);
8379	} else {
8380	Lo = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT, N1: Cond, N2: Tmp3, N3: Tmp2);
8381	Hi = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT, N1: Cond, N2: Tmp1, N3: Tmp3);
8382	}
8383	}
8384
8385	bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
8386	SelectionDAG &DAG) const {
8387	unsigned OpNo = Node->isStrictFPOpcode() ? `1` : `0`;
8388	SDValue Src = Node->getOperand(Num: OpNo);
8389	EVT SrcVT = Src.getValueType();
8390	EVT DstVT = Node->getValueType(ResNo: `0`);
8391	SDLoc dl(SDValue (Node, `0`));
8392
8393	// FIXME: Only f32 to i64 conversions are supported.
8394	if (SrcVT != MVT::f32 \|\| DstVT != MVT::i64)
8395	return false;
8396
8397	if (Node->isStrictFPOpcode())
8398	// When a NaN is converted to an integer a trap is allowed. We can't
8399	// use this expansion here because it would eliminate that trap. Other
8400	// traps are also allowed and cannot be eliminated. See
8401	// IEEE 754-2008 sec 5.8.
8402	return false;
8403
8404	// Expand f32 -> i64 conversion
8405	// This algorithm comes from compiler-rt's implementation of fixsfdi:
8406	// https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
8407	unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
8408	EVT IntVT = SrcVT.changeTypeToInteger();
8409	EVT IntShVT = getShiftAmountTy(LHSTy: IntVT, DL: DAG.getDataLayout());
8410
8411	SDValue ExponentMask = DAG.getConstant(Val: `0x7F800000`, DL: dl, VT: IntVT);
8412	SDValue ExponentLoBit = DAG.getConstant(Val: `23`, DL: dl, VT: IntVT);
8413	SDValue Bias = DAG.getConstant(Val: `127`, DL: dl, VT: IntVT);
8414	SDValue SignMask = DAG.getConstant(Val: APInt::getSignMask(BitWidth: SrcEltBits), DL: dl, VT: IntVT);
8415	SDValue SignLowBit = DAG.getConstant(Val: SrcEltBits - `1`, DL: dl, VT: IntVT);
8416	SDValue MantissaMask = DAG.getConstant(Val: `0x007FFFFF`, DL: dl, VT: IntVT);
8417
8418	SDValue Bits = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: IntVT, Operand: Src);
8419
8420	SDValue ExponentBits = DAG.getNode(
8421	Opcode: ISD::SRL, DL: dl, VT: IntVT, N1: DAG.getNode(Opcode: ISD::AND, DL: dl, VT: IntVT, N1: Bits, N2: ExponentMask),
8422	N2: DAG.getZExtOrTrunc(Op: ExponentLoBit, DL: dl, VT: IntShVT));
8423	SDValue Exponent = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: IntVT, N1: ExponentBits, N2: Bias);
8424
8425	SDValue Sign = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT: IntVT,
8426	N1: DAG.getNode(Opcode: ISD::AND, DL: dl, VT: IntVT, N1: Bits, N2: SignMask),
8427	N2: DAG.getZExtOrTrunc(Op: SignLowBit, DL: dl, VT: IntShVT));
8428	Sign = DAG.getSExtOrTrunc(Op: Sign, DL: dl, VT: DstVT);
8429
8430	SDValue R = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: IntVT,
8431	N1: DAG.getNode(Opcode: ISD::AND, DL: dl, VT: IntVT, N1: Bits, N2: MantissaMask),
8432	N2: DAG.getConstant(Val: `0x00800000`, DL: dl, VT: IntVT));
8433
8434	R = DAG.getZExtOrTrunc(Op: R, DL: dl, VT: DstVT);
8435
8436	R = DAG.getSelectCC(
8437	DL: dl, LHS: Exponent, RHS: ExponentLoBit,
8438	True: DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: DstVT, N1: R,
8439	N2: DAG.getZExtOrTrunc(
8440	Op: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: IntVT, N1: Exponent, N2: ExponentLoBit),
8441	DL: dl, VT: IntShVT)),
8442	False: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: DstVT, N1: R,
8443	N2: DAG.getZExtOrTrunc(
8444	Op: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: IntVT, N1: ExponentLoBit, N2: Exponent),
8445	DL: dl, VT: IntShVT)),
8446	Cond: ISD::SETGT);
8447
8448	SDValue Ret = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: DstVT,
8449	N1: DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: DstVT, N1: R, N2: Sign), N2: Sign);
8450
8451	Result = DAG.getSelectCC(DL: dl, LHS: Exponent, RHS: DAG.getConstant(Val: `0`, DL: dl, VT: IntVT),
8452	True: DAG.getConstant(Val: `0`, DL: dl, VT: DstVT), False: Ret, Cond: ISD::SETLT);
8453	return true;
8454	}
8455
8456	bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
8457	SDValue &Chain,
8458	SelectionDAG &DAG) const {
8459	SDLoc dl(SDValue (Node, `0`));
8460	unsigned OpNo = Node->isStrictFPOpcode() ? `1` : `0`;
8461	SDValue Src = Node->getOperand(Num: OpNo);
8462
8463	EVT SrcVT = Src.getValueType();
8464	EVT DstVT = Node->getValueType(ResNo: `0`);
8465	EVT SetCCVT =
8466	getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: SrcVT);
8467	EVT DstSetCCVT =
8468	getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: DstVT);
8469
8470	// Only expand vector types if we have the appropriate vector bit operations.
8471	unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
8472	ISD::FP_TO_SINT;
8473	if (DstVT.isVector() && (!isOperationLegalOrCustom(Op: SIntOpcode, VT: DstVT) \|\|
8474	!isOperationLegalOrCustomOrPromote(Op: ISD::XOR, VT: SrcVT)))
8475	return false;
8476
8477	// If the maximum float value is smaller then the signed integer range,
8478	// the destination signmask can't be represented by the float, so we can
8479	// just use FP_TO_SINT directly.
8480	const fltSemantics &APFSem = SrcVT.getFltSemantics();
8481	APFloat APF(APFSem, APInt::getZero(numBits: SrcVT.getScalarSizeInBits()));
8482	APInt SignMask = APInt::getSignMask(BitWidth: DstVT.getScalarSizeInBits());
8483	if (APFloat::opOverflow &
8484	APF.convertFromAPInt(Input: SignMask, IsSigned: false, RM: APFloat::rmNearestTiesToEven)) {
8485	if (Node->isStrictFPOpcode()) {
8486	Result = DAG.getNode(Opcode: ISD::STRICT_FP_TO_SINT, DL: dl, ResultTys: { DstVT, MVT::Other },
8487	Ops: { Node->getOperand(Num: `0`), Src });
8488	Chain = Result.getValue(R: `1`);
8489	} else
8490	Result = DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: dl, VT: DstVT, Operand: Src);
8491	return true;
8492	}
8493
8494	// Don't expand it if there isn't cheap fsub instruction.
8495	if (!isOperationLegalOrCustom(
8496	Op: Node->isStrictFPOpcode() ? ISD::STRICT_FSUB : ISD::FSUB, VT: SrcVT))
8497	return false;
8498
8499	SDValue Cst = DAG.getConstantFP(Val: APF, DL: dl, VT: SrcVT);
8500	SDValue Sel;
8501
8502	if (Node->isStrictFPOpcode()) {
8503	Sel = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: Cst, Cond: ISD::SETLT,
8504	Chain: Node->getOperand(Num: `0`), /IsSignaling/ true);
8505	Chain = Sel.getValue(R: `1`);
8506	} else {
8507	Sel = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: Cst, Cond: ISD::SETLT);
8508	}
8509
8510	bool Strict = Node->isStrictFPOpcode() \|\|
8511	shouldUseStrictFP_TO_INT(FpVT: SrcVT, IntVT: DstVT, /IsSigned/ false);
8512
8513	if (Strict) {
8514	// Expand based on maximum range of FP_TO_SINT, if the value exceeds the
8515	// signmask then offset (the result of which should be fully representable).
8516	// Sel = Src < 0x8000000000000000
8517	// FltOfs = select Sel, 0, 0x8000000000000000
8518	// IntOfs = select Sel, 0, 0x8000000000000000
8519	// Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8520
8521	// TODO: Should any fast-math-flags be set for the FSUB?
8522	SDValue FltOfs = DAG.getSelect(DL: dl, VT: SrcVT, Cond: Sel,
8523	LHS: DAG.getConstantFP(Val: `0.0`, DL: dl, VT: SrcVT), RHS: Cst);
8524	Sel = DAG.getBoolExtOrTrunc(Op: Sel, SL: dl, VT: DstSetCCVT, OpVT: DstVT);
8525	SDValue IntOfs = DAG.getSelect(DL: dl, VT: DstVT, Cond: Sel,
8526	LHS: DAG.getConstant(Val: `0`, DL: dl, VT: DstVT),
8527	RHS: DAG.getConstant(Val: SignMask, DL: dl, VT: DstVT));
8528	SDValue SInt;
8529	if (Node->isStrictFPOpcode()) {
8530	SDValue Val = DAG.getNode(Opcode: ISD::STRICT_FSUB, DL: dl, ResultTys: { SrcVT, MVT::Other },
8531	Ops: { Chain, Src, FltOfs });
8532	SInt = DAG.getNode(Opcode: ISD::STRICT_FP_TO_SINT, DL: dl, ResultTys: { DstVT, MVT::Other },
8533	Ops: { Val.getValue(R: `1`), Val });
8534	Chain = SInt.getValue(R: `1`);
8535	} else {
8536	SDValue Val = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: SrcVT, N1: Src, N2: FltOfs);
8537	SInt = DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: dl, VT: DstVT, Operand: Val);
8538	}
8539	Result = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: DstVT, N1: SInt, N2: IntOfs);
8540	} else {
8541	// Expand based on maximum range of FP_TO_SINT:
8542	// True = fp_to_sint(Src)
8543	// False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
8544	// Result = select (Src < 0x8000000000000000), True, False
8545
8546	SDValue True = DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: dl, VT: DstVT, Operand: Src);
8547	// TODO: Should any fast-math-flags be set for the FSUB?
8548	SDValue False = DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: dl, VT: DstVT,
8549	Operand: DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: SrcVT, N1: Src, N2: Cst));
8550	False = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: DstVT, N1: False,
8551	N2: DAG.getConstant(Val: SignMask, DL: dl, VT: DstVT));
8552	Sel = DAG.getBoolExtOrTrunc(Op: Sel, SL: dl, VT: DstSetCCVT, OpVT: DstVT);
8553	Result = DAG.getSelect(DL: dl, VT: DstVT, Cond: Sel, LHS: True, RHS: False);
8554	}
8555	return true;
8556	}
8557
8558	bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
8559	SDValue &Chain, SelectionDAG &DAG) const {
8560	// This transform is not correct for converting 0 when rounding mode is set
8561	// to round toward negative infinity which will produce -0.0. So disable
8562	// under strictfp.
8563	if (Node->isStrictFPOpcode())
8564	return false;
8565
8566	SDValue Src = Node->getOperand(Num: `0`);
8567	EVT SrcVT = Src.getValueType();
8568	EVT DstVT = Node->getValueType(ResNo: `0`);
8569
8570	// If the input is known to be non-negative and SINT_TO_FP is legal then use
8571	// it.
8572	if (Node->getFlags().hasNonNeg() &&
8573	isOperationLegalOrCustom(Op: ISD::SINT_TO_FP, VT: SrcVT)) {
8574	Result =
8575	DAG.getNode(Opcode: ISD::SINT_TO_FP, DL: SDLoc (Node), VT: DstVT, Operand: Node->getOperand(Num: `0`));
8576	return true;
8577	}
8578
8579	if (SrcVT.getScalarType() != MVT::i64 \|\| DstVT.getScalarType() != MVT::f64)
8580	return false;
8581
8582	// Only expand vector types if we have the appropriate vector bit
8583	// operations.
8584	if (SrcVT.isVector() && (!isOperationLegalOrCustom(Op: ISD::SRL, VT: SrcVT) \|\|
8585	!isOperationLegalOrCustom(Op: ISD::FADD, VT: DstVT) \|\|
8586	!isOperationLegalOrCustom(Op: ISD::FSUB, VT: DstVT) \|\|
8587	!isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT: SrcVT) \|\|
8588	!isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT: SrcVT)))
8589	return false;
8590
8591	SDLoc dl(SDValue (Node, `0`));
8592
8593	// Implementation of unsigned i64 to f64 following the algorithm in
8594	// __floatundidf in compiler_rt. This implementation performs rounding
8595	// correctly in all rounding modes with the exception of converting 0
8596	// when rounding toward negative infinity. In that case the fsub will
8597	// produce -0.0. This will be added to +0.0 and produce -0.0 which is
8598	// incorrect.
8599	SDValue TwoP52 = DAG.getConstant(UINT64_C(`0x4330000000000000`), DL: dl, VT: SrcVT);
8600	SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
8601	Val: llvm::bit_cast<double>(UINT64_C(`0x4530000000100000`)), DL: dl, VT: DstVT);
8602	SDValue TwoP84 = DAG.getConstant(UINT64_C(`0x4530000000000000`), DL: dl, VT: SrcVT);
8603	SDValue LoMask = DAG.getConstant(UINT64_C(`0x00000000FFFFFFFF`), DL: dl, VT: SrcVT);
8604	SDValue HiShift = DAG.getShiftAmountConstant(Val: `32`, VT: SrcVT, DL: dl);
8605
8606	SDValue Lo = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: SrcVT, N1: Src, N2: LoMask);
8607	SDValue Hi = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: SrcVT, N1: Src, N2: HiShift);
8608	SDValue LoOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: SrcVT, N1: Lo, N2: TwoP52);
8609	SDValue HiOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: SrcVT, N1: Hi, N2: TwoP84);
8610	SDValue LoFlt = DAG.getBitcast(VT: DstVT, V: LoOr);
8611	SDValue HiFlt = DAG.getBitcast(VT: DstVT, V: HiOr);
8612	SDValue HiSub = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: DstVT, N1: HiFlt, N2: TwoP84PlusTwoP52);
8613	Result = DAG.getNode(Opcode: ISD::FADD, DL: dl, VT: DstVT, N1: LoFlt, N2: HiSub);
8614	return true;
8615	}
8616
8617	SDValue
8618	TargetLowering::createSelectForFMINNUM_FMAXNUM(SDNode *Node,
8619	SelectionDAG &DAG) const {
8620	unsigned Opcode = Node->getOpcode();
8621	assert((Opcode == ISD::FMINNUM \|\| Opcode == ISD::FMAXNUM \|\|
8622	Opcode == ISD::STRICT_FMINNUM \|\| Opcode == ISD::STRICT_FMAXNUM) &&
8623	"Wrong opcode");
8624
8625	if (Node->getFlags().hasNoNaNs()) {
8626	ISD::CondCode Pred = Opcode == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
8627	EVT VT = Node->getValueType(ResNo: `0`);
8628	if ((!isCondCodeLegal(CC: Pred, VT: VT.getSimpleVT()) \|\|
8629	!isOperationLegalOrCustom(Op: ISD::VSELECT, VT)) &&
8630	VT.isVector())
8631	return SDValue ();
8632	SDValue Op1 = Node->getOperand(Num: `0`);
8633	SDValue Op2 = Node->getOperand(Num: `1`);
8634	SDValue SelCC = DAG.getSelectCC(DL: SDLoc (Node), LHS: Op1, RHS: Op2, True: Op1, False: Op2, Cond: Pred);
8635	SelCC ->setFlags(Node->getFlags());
8636	return SelCC;
8637	}
8638
8639	return SDValue ();
8640	}
8641
8642	SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
8643	SelectionDAG &DAG) const {
8644	if (SDValue Expanded = expandVectorNaryOpBySplitting(Node, DAG))
8645	return Expanded;
8646
8647	EVT VT = Node->getValueType(ResNo: `0`);
8648	if (VT.isScalableVector())
8649	report_fatal_error(
8650	reason: "Expanding fminnum/fmaxnum for scalable vectors is undefined.");
8651
8652	SDLoc dl(Node);
8653	unsigned NewOp =
8654	Node->getOpcode() == ISD::FMINNUM ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
8655
8656	if (isOperationLegalOrCustom(Op: NewOp, VT)) {
8657	SDValue Quiet0 = Node->getOperand(Num: `0`);
8658	SDValue Quiet1 = Node->getOperand(Num: `1`);
8659
8660	if (!Node->getFlags().hasNoNaNs()) {
8661	// Insert canonicalizes if it's possible we need to quiet to get correct
8662	// sNaN behavior.
8663	if (!DAG.isKnownNeverSNaN(Op: Quiet0)) {
8664	Quiet0 = DAG.getNode(Opcode: ISD::FCANONICALIZE, DL: dl, VT, Operand: Quiet0,
8665	Flags: Node->getFlags());
8666	}
8667	if (!DAG.isKnownNeverSNaN(Op: Quiet1)) {
8668	Quiet1 = DAG.getNode(Opcode: ISD::FCANONICALIZE, DL: dl, VT, Operand: Quiet1,
8669	Flags: Node->getFlags());
8670	}
8671	}
8672
8673	return DAG.getNode(Opcode: NewOp, DL: dl, VT, N1: Quiet0, N2: Quiet1, Flags: Node->getFlags());
8674	}
8675
8676	// If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
8677	// instead if there are no NaNs and there can't be an incompatible zero
8678	// compare: at least one operand isn't +/-0, or there are no signed-zeros.
8679	if ((Node->getFlags().hasNoNaNs() \|\|
8680	(DAG.isKnownNeverNaN(Op: Node->getOperand(Num: `0`)) &&
8681	DAG.isKnownNeverNaN(Op: Node->getOperand(Num: `1`)))) &&
8682	(Node->getFlags().hasNoSignedZeros() \|\|
8683	DAG.isKnownNeverZeroFloat(Op: Node->getOperand(Num: `0`)) \|\|
8684	DAG.isKnownNeverZeroFloat(Op: Node->getOperand(Num: `1`)))) {
8685	unsigned IEEE2018Op =
8686	Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
8687	if (isOperationLegalOrCustom(Op: IEEE2018Op, VT))
8688	return DAG.getNode(Opcode: IEEE2018Op, DL: dl, VT, N1: Node->getOperand(Num: `0`),
8689	N2: Node->getOperand(Num: `1`), Flags: Node->getFlags());
8690	}
8691
8692	if (SDValue SelCC = createSelectForFMINNUM_FMAXNUM(Node, DAG))
8693	return SelCC;
8694
8695	return SDValue ();
8696	}
8697
8698	SDValue TargetLowering::expandFMINIMUM_FMAXIMUM(SDNode *N,
8699	SelectionDAG &DAG) const {
8700	if (SDValue Expanded = expandVectorNaryOpBySplitting(Node: N, DAG))
8701	return Expanded;
8702
8703	SDLoc DL(N);
8704	SDValue LHS = N->getOperand(Num: `0`);
8705	SDValue RHS = N->getOperand(Num: `1`);
8706	unsigned Opc = N->getOpcode();
8707	EVT VT = N->getValueType(ResNo: `0`);
8708	EVT CCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
8709	bool IsMax = Opc == ISD::FMAXIMUM;
8710	SDNodeFlags Flags = N->getFlags();
8711
8712	// First, implement comparison not propagating NaN. If no native fmin or fmax
8713	// available, use plain select with setcc instead.
8714	SDValue MinMax;
8715	unsigned CompOpcIeee = IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
8716	unsigned CompOpc = IsMax ? ISD::FMAXNUM : ISD::FMINNUM;
8717
8718	// FIXME: We should probably define fminnum/fmaxnum variants with correct
8719	// signed zero behavior.
8720	bool MinMaxMustRespectOrderedZero = false;
8721
8722	if (isOperationLegalOrCustom(Op: CompOpcIeee, VT)) {
8723	MinMax = DAG.getNode(Opcode: CompOpcIeee, DL, VT, N1: LHS, N2: RHS, Flags);
8724	MinMaxMustRespectOrderedZero = true;
8725	} else if (isOperationLegalOrCustom(Op: CompOpc, VT)) {
8726	MinMax = DAG.getNode(Opcode: CompOpc, DL, VT, N1: LHS, N2: RHS, Flags);
8727	} else {
8728	if (VT.isVector() && !isOperationLegalOrCustom(Op: ISD::VSELECT, VT))
8729	return DAG.UnrollVectorOp(N);
8730
8731	// NaN (if exists) will be propagated later, so orderness doesn't matter.
8732	SDValue Compare =
8733	DAG.getSetCC(DL, VT: CCVT, LHS, RHS, Cond: IsMax ? ISD::SETOGT : ISD::SETOLT);
8734	MinMax = DAG.getSelect(DL, VT, Cond: Compare, LHS, RHS, Flags);
8735	}
8736
8737	// Propagate any NaN of both operands
8738	if (!N->getFlags().hasNoNaNs() &&
8739	(!DAG.isKnownNeverNaN(Op: RHS) \|\| !DAG.isKnownNeverNaN(Op: LHS))) {
8740	ConstantFP FPNaN = ConstantFP::get(Context&: DAG.getContext(),
8741	V: APFloat::getNaN(Sem: VT.getFltSemantics()));
8742	MinMax = DAG.getSelect(DL, VT, Cond: DAG.getSetCC(DL, VT: CCVT, LHS, RHS, Cond: ISD::SETUO),
8743	LHS: DAG.getConstantFP(V: *FPNaN, DL, VT), RHS: MinMax, Flags);
8744	}
8745
8746	// fminimum/fmaximum requires -0.0 less than +0.0
8747	if (!MinMaxMustRespectOrderedZero && !N->getFlags().hasNoSignedZeros() &&
8748	!DAG.isKnownNeverZeroFloat(Op: RHS) && !DAG.isKnownNeverZeroFloat(Op: LHS)) {
8749	SDValue IsZero = DAG.getSetCC(DL, VT: CCVT, LHS: MinMax,
8750	RHS: DAG.getConstantFP(Val: `0.0`, DL, VT), Cond: ISD::SETOEQ);
8751	SDValue TestZero =
8752	DAG.getTargetConstant(Val: IsMax ? fcPosZero : fcNegZero, DL, VT: MVT::i32);
8753	SDValue LCmp = DAG.getSelect(
8754	DL, VT, Cond: DAG.getNode(Opcode: ISD::IS_FPCLASS, DL, VT: CCVT, N1: LHS, N2: TestZero), LHS,
8755	RHS: MinMax, Flags);
8756	SDValue RCmp = DAG.getSelect(
8757	DL, VT, Cond: DAG.getNode(Opcode: ISD::IS_FPCLASS, DL, VT: CCVT, N1: RHS, N2: TestZero), LHS: RHS,
8758	RHS: LCmp, Flags);
8759	MinMax = DAG.getSelect(DL, VT, Cond: IsZero, LHS: RCmp, RHS: MinMax, Flags);
8760	}
8761
8762	return MinMax;
8763	}
8764
8765	SDValue TargetLowering::expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *Node,
8766	SelectionDAG &DAG) const {
8767	SDLoc DL(Node);
8768	SDValue LHS = Node->getOperand(Num: `0`);
8769	SDValue RHS = Node->getOperand(Num: `1`);
8770	unsigned Opc = Node->getOpcode();
8771	EVT VT = Node->getValueType(ResNo: `0`);
8772	EVT CCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
8773	bool IsMax = Opc == ISD::FMAXIMUMNUM;
8774	const TargetOptions &Options = DAG.getTarget().Options;
8775	SDNodeFlags Flags = Node->getFlags();
8776
8777	unsigned NewOp =
8778	Opc == ISD::FMINIMUMNUM ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
8779
8780	if (isOperationLegalOrCustom(Op: NewOp, VT)) {
8781	if (!Flags.hasNoNaNs()) {
8782	// Insert canonicalizes if it's possible we need to quiet to get correct
8783	// sNaN behavior.
8784	if (!DAG.isKnownNeverSNaN(Op: LHS)) {
8785	LHS = DAG.getNode(Opcode: ISD::FCANONICALIZE, DL, VT, Operand: LHS, Flags);
8786	}
8787	if (!DAG.isKnownNeverSNaN(Op: RHS)) {
8788	RHS = DAG.getNode(Opcode: ISD::FCANONICALIZE, DL, VT, Operand: RHS, Flags);
8789	}
8790	}
8791
8792	return DAG.getNode(Opcode: NewOp, DL, VT, N1: LHS, N2: RHS, Flags);
8793	}
8794
8795	// We can use FMINIMUM/FMAXIMUM if there is no NaN, since it has
8796	// same behaviors for all of other cases: +0.0 vs -0.0 included.
8797	if (Flags.hasNoNaNs() \|\|
8798	(DAG.isKnownNeverNaN(Op: LHS) && DAG.isKnownNeverNaN(Op: RHS))) {
8799	unsigned IEEE2019Op =
8800	Opc == ISD::FMINIMUMNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
8801	if (isOperationLegalOrCustom(Op: IEEE2019Op, VT))
8802	return DAG.getNode(Opcode: IEEE2019Op, DL, VT, N1: LHS, N2: RHS, Flags);
8803	}
8804
8805	// FMINNUM/FMAXMUM returns qNaN if either operand is sNaN, and it may return
8806	// either one for +0.0 vs -0.0.
8807	if ((Flags.hasNoNaNs() \|\|
8808	(DAG.isKnownNeverSNaN(Op: LHS) && DAG.isKnownNeverSNaN(Op: RHS))) &&
8809	(Flags.hasNoSignedZeros() \|\| DAG.isKnownNeverZeroFloat(Op: LHS) \|\|
8810	DAG.isKnownNeverZeroFloat(Op: RHS))) {
8811	unsigned IEEE2008Op = Opc == ISD::FMINIMUMNUM ? ISD::FMINNUM : ISD::FMAXNUM;
8812	if (isOperationLegalOrCustom(Op: IEEE2008Op, VT))
8813	return DAG.getNode(Opcode: IEEE2008Op, DL, VT, N1: LHS, N2: RHS, Flags);
8814	}
8815
8816	if (VT.isVector() && !isOperationLegalOrCustom(Op: ISD::VSELECT, VT))
8817	return DAG.UnrollVectorOp(N: Node);
8818
8819	// If only one operand is NaN, override it with another operand.
8820	if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(Op: LHS)) {
8821	LHS = DAG.getSelectCC(DL, LHS, RHS: LHS, True: RHS, False: LHS, Cond: ISD::SETUO);
8822	}
8823	if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(Op: RHS)) {
8824	RHS = DAG.getSelectCC(DL, LHS: RHS, RHS, True: LHS, False: RHS, Cond: ISD::SETUO);
8825	}
8826
8827	SDValue MinMax =
8828	DAG.getSelectCC(DL, LHS, RHS, True: LHS, False: RHS, Cond: IsMax ? ISD::SETGT : ISD::SETLT);
8829
8830	// TODO: We need quiet sNaN if strictfp.
8831
8832	// Fixup signed zero behavior.
8833	if (Options.NoSignedZerosFPMath \|\| Flags.hasNoSignedZeros() \|\|
8834	DAG.isKnownNeverZeroFloat(Op: LHS) \|\| DAG.isKnownNeverZeroFloat(Op: RHS)) {
8835	return MinMax;
8836	}
8837	SDValue TestZero =
8838	DAG.getTargetConstant(Val: IsMax ? fcPosZero : fcNegZero, DL, VT: MVT::i32);
8839	SDValue IsZero = DAG.getSetCC(DL, VT: CCVT, LHS: MinMax,
8840	RHS: DAG.getConstantFP(Val: `0.0`, DL, VT), Cond: ISD::SETEQ);
8841	SDValue LCmp = DAG.getSelect(
8842	DL, VT, Cond: DAG.getNode(Opcode: ISD::IS_FPCLASS, DL, VT: CCVT, N1: LHS, N2: TestZero), LHS,
8843	RHS: MinMax, Flags);
8844	SDValue RCmp = DAG.getSelect(
8845	DL, VT, Cond: DAG.getNode(Opcode: ISD::IS_FPCLASS, DL, VT: CCVT, N1: RHS, N2: TestZero), LHS: RHS, RHS: LCmp,
8846	Flags);
8847	return DAG.getSelect(DL, VT, Cond: IsZero, LHS: RCmp, RHS: MinMax, Flags);
8848	}
8849
8850	/// Returns a true value if if this FPClassTest can be performed with an ordered
8851	/// fcmp to 0, and a false value if it's an unordered fcmp to 0. Returns
8852	/// std::nullopt if it cannot be performed as a compare with 0.
8853	static std::optional<bool> isFCmpEqualZero(FPClassTest Test,
8854	const fltSemantics &Semantics,
8855	const MachineFunction &MF) {
8856	FPClassTest OrderedMask = Test & ~fcNan;
8857	FPClassTest NanTest = Test & fcNan;
8858	bool IsOrdered = NanTest == fcNone;
8859	bool IsUnordered = NanTest == fcNan;
8860
8861	// Skip cases that are testing for only a qnan or snan.
8862	if (!IsOrdered && !IsUnordered)
8863	return std::nullopt;
8864
8865	if (OrderedMask == fcZero &&
8866	MF.getDenormalMode(FPType: Semantics).Input == DenormalMode::IEEE)
8867	return IsOrdered;
8868	if (OrderedMask == (fcZero \| fcSubnormal) &&
8869	MF.getDenormalMode(FPType: Semantics).inputsAreZero())
8870	return IsOrdered;
8871	return std::nullopt;
8872	}
8873
8874	SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
8875	const FPClassTest OrigTestMask,
8876	SDNodeFlags Flags, const SDLoc &DL,
8877	SelectionDAG &DAG) const {
8878	EVT OperandVT = Op.getValueType();
8879	assert(OperandVT.isFloatingPoint());
8880	FPClassTest Test = OrigTestMask;
8881
8882	// Degenerated cases.
8883	if (Test == fcNone)
8884	return DAG.getBoolConstant(V: false, DL, VT: ResultVT, OpVT: OperandVT);
8885	if (Test == fcAllFlags)
8886	return DAG.getBoolConstant(V: true, DL, VT: ResultVT, OpVT: OperandVT);
8887
8888	// PPC double double is a pair of doubles, of which the higher part determines
8889	// the value class.
8890	if (OperandVT == MVT::ppcf128) {
8891	Op = DAG.getNode(Opcode: ISD::EXTRACT_ELEMENT, DL, VT: MVT::f64, N1: Op,
8892	N2: DAG.getConstant(Val: `1`, DL, VT: MVT::i32));
8893	OperandVT = MVT::f64;
8894	}
8895
8896	// Floating-point type properties.
8897	EVT ScalarFloatVT = OperandVT.getScalarType();
8898	const Type FloatTy = ScalarFloatVT.getTypeForEVT(Context&: DAG.getContext());
8899	const llvm::fltSemantics &Semantics = FloatTy->getFltSemantics();
8900	bool IsF80 = (ScalarFloatVT == MVT::f80);
8901
8902	// Some checks can be implemented using float comparisons, if floating point
8903	// exceptions are ignored.
8904	if (Flags.hasNoFPExcept() &&
8905	isOperationLegalOrCustom(Op: ISD::SETCC, VT: OperandVT.getScalarType())) {
8906	FPClassTest FPTestMask = Test;
8907	bool IsInvertedFP = false;
8908
8909	if (FPClassTest InvertedFPCheck =
8910	invertFPClassTestIfSimpler(Test: FPTestMask, UseFCmp: true)) {
8911	FPTestMask = InvertedFPCheck;
8912	IsInvertedFP = true;
8913	}
8914
8915	ISD::CondCode OrderedCmpOpcode = IsInvertedFP ? ISD::SETUNE : ISD::SETOEQ;
8916	ISD::CondCode UnorderedCmpOpcode = IsInvertedFP ? ISD::SETONE : ISD::SETUEQ;
8917
8918	// See if we can fold an \| fcNan into an unordered compare.
8919	FPClassTest OrderedFPTestMask = FPTestMask & ~fcNan;
8920
8921	// Can't fold the ordered check if we're only testing for snan or qnan
8922	// individually.
8923	if ((FPTestMask & fcNan) != fcNan)
8924	OrderedFPTestMask = FPTestMask;
8925
8926	const bool IsOrdered = FPTestMask == OrderedFPTestMask;
8927
8928	if (std::optional<bool> IsCmp0 =
8929	isFCmpEqualZero(Test: FPTestMask, Semantics, MF: DAG.getMachineFunction());
8930	IsCmp0 && (isCondCodeLegalOrCustom(
8931	CC: *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode,
8932	VT: OperandVT.getScalarType().getSimpleVT()))) {
8933
8934	// If denormals could be implicitly treated as 0, this is not equivalent
8935	// to a compare with 0 since it will also be true for denormals.
8936	return DAG.getSetCC(DL, VT: ResultVT, LHS: Op,
8937	RHS: DAG.getConstantFP(Val: `0.0`, DL, VT: OperandVT),
8938	Cond: *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode);
8939	}
8940
8941	if (FPTestMask == fcNan &&
8942	isCondCodeLegalOrCustom(CC: IsInvertedFP ? ISD::SETO : ISD::SETUO,
8943	VT: OperandVT.getScalarType().getSimpleVT()))
8944	return DAG.getSetCC(DL, VT: ResultVT, LHS: Op, RHS: Op,
8945	Cond: IsInvertedFP ? ISD::SETO : ISD::SETUO);
8946
8947	bool IsOrderedInf = FPTestMask == fcInf;
8948	if ((FPTestMask == fcInf \|\| FPTestMask == (fcInf \| fcNan)) &&
8949	isCondCodeLegalOrCustom(CC: IsOrderedInf ? OrderedCmpOpcode
8950	: UnorderedCmpOpcode,
8951	VT: OperandVT.getScalarType().getSimpleVT()) &&
8952	isOperationLegalOrCustom(Op: ISD::FABS, VT: OperandVT.getScalarType()) &&
8953	(isOperationLegal(Op: ISD::ConstantFP, VT: OperandVT.getScalarType()) \|\|
8954	(OperandVT.isVector() &&
8955	isOperationLegalOrCustom(Op: ISD::BUILD_VECTOR, VT: OperandVT)))) {
8956	// isinf(x) --> fabs(x) == inf
8957	SDValue Abs = DAG.getNode(Opcode: ISD::FABS, DL, VT: OperandVT, Operand: Op);
8958	SDValue Inf =
8959	DAG.getConstantFP(Val: APFloat::getInf(Sem: Semantics), DL, VT: OperandVT);
8960	return DAG.getSetCC(DL, VT: ResultVT, LHS: Abs, RHS: Inf,
8961	Cond: IsOrderedInf ? OrderedCmpOpcode : UnorderedCmpOpcode);
8962	}
8963
8964	if ((OrderedFPTestMask == fcPosInf \|\| OrderedFPTestMask == fcNegInf) &&
8965	isCondCodeLegalOrCustom(CC: IsOrdered ? OrderedCmpOpcode
8966	: UnorderedCmpOpcode,
8967	VT: OperandVT.getSimpleVT())) {
8968	// isposinf(x) --> x == inf
8969	// isneginf(x) --> x == -inf
8970	// isposinf(x) \|\| nan --> x u== inf
8971	// isneginf(x) \|\| nan --> x u== -inf
8972
8973	SDValue Inf = DAG.getConstantFP(
8974	Val: APFloat::getInf(Sem: Semantics, Negative: OrderedFPTestMask == fcNegInf), DL,
8975	VT: OperandVT);
8976	return DAG.getSetCC(DL, VT: ResultVT, LHS: Op, RHS: Inf,
8977	Cond: IsOrdered ? OrderedCmpOpcode : UnorderedCmpOpcode);
8978	}
8979
8980	if (OrderedFPTestMask == (fcSubnormal \| fcZero) && !IsOrdered) {
8981	// TODO: Could handle ordered case, but it produces worse code for
8982	// x86. Maybe handle ordered if fabs is free?
8983
8984	ISD::CondCode OrderedOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
8985	ISD::CondCode UnorderedOp = IsInvertedFP ? ISD::SETOGE : ISD::SETULT;
8986
8987	if (isCondCodeLegalOrCustom(CC: IsOrdered ? OrderedOp : UnorderedOp,
8988	VT: OperandVT.getScalarType().getSimpleVT())) {
8989	// (issubnormal(x) \|\| iszero(x)) --> fabs(x) < smallest_normal
8990
8991	// TODO: Maybe only makes sense if fabs is free. Integer test of
8992	// exponent bits seems better for x86.
8993	SDValue Abs = DAG.getNode(Opcode: ISD::FABS, DL, VT: OperandVT, Operand: Op);
8994	SDValue SmallestNormal = DAG.getConstantFP(
8995	Val: APFloat::getSmallestNormalized(Sem: Semantics), DL, VT: OperandVT);
8996	return DAG.getSetCC(DL, VT: ResultVT, LHS: Abs, RHS: SmallestNormal,
8997	Cond: IsOrdered ? OrderedOp : UnorderedOp);
8998	}
8999	}
9000
9001	if (FPTestMask == fcNormal) {
9002	// TODO: Handle unordered
9003	ISD::CondCode IsFiniteOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
9004	ISD::CondCode IsNormalOp = IsInvertedFP ? ISD::SETOLT : ISD::SETUGE;
9005
9006	if (isCondCodeLegalOrCustom(CC: IsFiniteOp,
9007	VT: OperandVT.getScalarType().getSimpleVT()) &&
9008	isCondCodeLegalOrCustom(CC: IsNormalOp,
9009	VT: OperandVT.getScalarType().getSimpleVT()) &&
9010	isFAbsFree(VT: OperandVT)) {
9011	// isnormal(x) --> fabs(x) < infinity && !(fabs(x) < smallest_normal)
9012	SDValue Inf =
9013	DAG.getConstantFP(Val: APFloat::getInf(Sem: Semantics), DL, VT: OperandVT);
9014	SDValue SmallestNormal = DAG.getConstantFP(
9015	Val: APFloat::getSmallestNormalized(Sem: Semantics), DL, VT: OperandVT);
9016
9017	SDValue Abs = DAG.getNode(Opcode: ISD::FABS, DL, VT: OperandVT, Operand: Op);
9018	SDValue IsFinite = DAG.getSetCC(DL, VT: ResultVT, LHS: Abs, RHS: Inf, Cond: IsFiniteOp);
9019	SDValue IsNormal =
9020	DAG.getSetCC(DL, VT: ResultVT, LHS: Abs, RHS: SmallestNormal, Cond: IsNormalOp);
9021	unsigned LogicOp = IsInvertedFP ? ISD::OR : ISD::AND;
9022	return DAG.getNode(Opcode: LogicOp, DL, VT: ResultVT, N1: IsFinite, N2: IsNormal);
9023	}
9024	}
9025	}
9026
9027	// Some checks may be represented as inversion of simpler check, for example
9028	// "inf\|normal\|subnormal\|zero" => !"nan".
9029	bool IsInverted = false;
9030
9031	if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test, UseFCmp: false)) {
9032	Test = InvertedCheck;
9033	IsInverted = true;
9034	}
9035
9036	// In the general case use integer operations.
9037	unsigned BitSize = OperandVT.getScalarSizeInBits();
9038	EVT IntVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: BitSize);
9039	if (OperandVT.isVector())
9040	IntVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: IntVT,
9041	EC: OperandVT.getVectorElementCount());
9042	SDValue OpAsInt = DAG.getBitcast(VT: IntVT, V: Op);
9043
9044	// Various masks.
9045	APInt SignBit = APInt::getSignMask(BitWidth: BitSize);
9046	APInt ValueMask = APInt::getSignedMaxValue(numBits: BitSize); // All bits but sign.
9047	APInt Inf = APFloat::getInf(Sem: Semantics).bitcastToAPInt(); // Exp and int bit.
9048	const unsigned ExplicitIntBitInF80 = `63`;
9049	APInt ExpMask = Inf;
9050	if (IsF80)
9051	ExpMask.clearBit(BitPosition: ExplicitIntBitInF80);
9052	APInt AllOneMantissa = APFloat::getLargest(Sem: Semantics).bitcastToAPInt() & ~Inf;
9053	APInt QNaNBitMask =
9054	APInt::getOneBitSet(numBits: BitSize, BitNo: AllOneMantissa.getActiveBits() - `1`);
9055	APInt InvertionMask = APInt::getAllOnes(numBits: ResultVT.getScalarSizeInBits());
9056
9057	SDValue ValueMaskV = DAG.getConstant(Val: ValueMask, DL, VT: IntVT);
9058	SDValue SignBitV = DAG.getConstant(Val: SignBit, DL, VT: IntVT);
9059	SDValue ExpMaskV = DAG.getConstant(Val: ExpMask, DL, VT: IntVT);
9060	SDValue ZeroV = DAG.getConstant(Val: `0`, DL, VT: IntVT);
9061	SDValue InfV = DAG.getConstant(Val: Inf, DL, VT: IntVT);
9062	SDValue ResultInvertionMask = DAG.getConstant(Val: InvertionMask, DL, VT: ResultVT);
9063
9064	SDValue Res;
9065	const auto appendResult = [&](SDValue PartialRes) {
9066	if (PartialRes) {
9067	if (Res)
9068	Res = DAG.getNode(Opcode: ISD::OR, DL, VT: ResultVT, N1: Res, N2: PartialRes);
9069	else
9070	Res = PartialRes;
9071	}
9072	};
9073
9074	SDValue IntBitIsSetV; // Explicit integer bit in f80 mantissa is set.
9075	const auto getIntBitIsSet = [&]() -> SDValue {
9076	if (!IntBitIsSetV) {
9077	APInt IntBitMask(BitSize, `0`);
9078	IntBitMask.setBit(ExplicitIntBitInF80);
9079	SDValue IntBitMaskV = DAG.getConstant(Val: IntBitMask, DL, VT: IntVT);
9080	SDValue IntBitV = DAG.getNode(Opcode: ISD::AND, DL, VT: IntVT, N1: OpAsInt, N2: IntBitMaskV);
9081	IntBitIsSetV = DAG.getSetCC(DL, VT: ResultVT, LHS: IntBitV, RHS: ZeroV, Cond: ISD::SETNE);
9082	}
9083	return IntBitIsSetV;
9084	};
9085
9086	// Split the value into sign bit and absolute value.
9087	SDValue AbsV = DAG.getNode(Opcode: ISD::AND, DL, VT: IntVT, N1: OpAsInt, N2: ValueMaskV);
9088	SDValue SignV = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt,
9089	RHS: DAG.getConstant(Val: `0`, DL, VT: IntVT), Cond: ISD::SETLT);
9090
9091	// Tests that involve more than one class should be processed first.
9092	SDValue PartialRes;
9093
9094	if (IsF80)
9095	; // Detect finite numbers of f80 by checking individual classes because
9096	// they have different settings of the explicit integer bit.
9097	else if ((Test & fcFinite) == fcFinite) {
9098	// finite(V) ==> abs(V) < exp_mask
9099	PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: ExpMaskV, Cond: ISD::SETLT);
9100	Test &= ~fcFinite;
9101	} else if ((Test & fcFinite) == fcPosFinite) {
9102	// finite(V) && V > 0 ==> V < exp_mask
9103	PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt, RHS: ExpMaskV, Cond: ISD::SETULT);
9104	Test &= ~fcPosFinite;
9105	} else if ((Test & fcFinite) == fcNegFinite) {
9106	// finite(V) && V < 0 ==> abs(V) < exp_mask && signbit == 1
9107	PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: ExpMaskV, Cond: ISD::SETLT);
9108	PartialRes = DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: PartialRes, N2: SignV);
9109	Test &= ~fcNegFinite;
9110	}
9111	appendResult (PartialRes);
9112
9113	if (FPClassTest PartialCheck = Test & (fcZero \| fcSubnormal)) {
9114	// fcZero \| fcSubnormal => test all exponent bits are 0
9115	// TODO: Handle sign bit specific cases
9116	if (PartialCheck == (fcZero \| fcSubnormal)) {
9117	SDValue ExpBits = DAG.getNode(Opcode: ISD::AND, DL, VT: IntVT, N1: OpAsInt, N2: ExpMaskV);
9118	SDValue ExpIsZero =
9119	DAG.getSetCC(DL, VT: ResultVT, LHS: ExpBits, RHS: ZeroV, Cond: ISD::SETEQ);
9120	appendResult (ExpIsZero);
9121	Test &= ~PartialCheck & fcAllFlags;
9122	}
9123	}
9124
9125	// Check for individual classes.
9126
9127	if (unsigned PartialCheck = Test & fcZero) {
9128	if (PartialCheck == fcPosZero)
9129	PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt, RHS: ZeroV, Cond: ISD::SETEQ);
9130	else if (PartialCheck == fcZero)
9131	PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: ZeroV, Cond: ISD::SETEQ);
9132	else // ISD::fcNegZero
9133	PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt, RHS: SignBitV, Cond: ISD::SETEQ);
9134	appendResult (PartialRes);
9135	}
9136
9137	if (unsigned PartialCheck = Test & fcSubnormal) {
9138	// issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set)
9139	// issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set)
9140	SDValue V = (PartialCheck == fcPosSubnormal) ? OpAsInt : AbsV;
9141	SDValue MantissaV = DAG.getConstant(Val: AllOneMantissa, DL, VT: IntVT);
9142	SDValue VMinusOneV =
9143	DAG.getNode(Opcode: ISD::SUB, DL, VT: IntVT, N1: V, N2: DAG.getConstant(Val: `1`, DL, VT: IntVT));
9144	PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: VMinusOneV, RHS: MantissaV, Cond: ISD::SETULT);
9145	if (PartialCheck == fcNegSubnormal)
9146	PartialRes = DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: PartialRes, N2: SignV);
9147	appendResult (PartialRes);
9148	}
9149
9150	if (unsigned PartialCheck = Test & fcInf) {
9151	if (PartialCheck == fcPosInf)
9152	PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt, RHS: InfV, Cond: ISD::SETEQ);
9153	else if (PartialCheck == fcInf)
9154	PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: InfV, Cond: ISD::SETEQ);
9155	else { // ISD::fcNegInf
9156	APInt NegInf = APFloat::getInf(Sem: Semantics, Negative: true).bitcastToAPInt();
9157	SDValue NegInfV = DAG.getConstant(Val: NegInf, DL, VT: IntVT);
9158	PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt, RHS: NegInfV, Cond: ISD::SETEQ);
9159	}
9160	appendResult (PartialRes);
9161	}
9162
9163	if (unsigned PartialCheck = Test & fcNan) {
9164	APInt InfWithQnanBit = Inf \| QNaNBitMask;
9165	SDValue InfWithQnanBitV = DAG.getConstant(Val: InfWithQnanBit, DL, VT: IntVT);
9166	if (PartialCheck == fcNan) {
9167	// isnan(V) ==> abs(V) > int(inf)
9168	PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: InfV, Cond: ISD::SETGT);
9169	if (IsF80) {
9170	// Recognize unsupported values as NaNs for compatibility with glibc.
9171	// In them (exp(V)==0) == int_bit.
9172	SDValue ExpBits = DAG.getNode(Opcode: ISD::AND, DL, VT: IntVT, N1: AbsV, N2: ExpMaskV);
9173	SDValue ExpIsZero =
9174	DAG.getSetCC(DL, VT: ResultVT, LHS: ExpBits, RHS: ZeroV, Cond: ISD::SETEQ);
9175	SDValue IsPseudo =
9176	DAG.getSetCC(DL, VT: ResultVT, LHS: getIntBitIsSet (), RHS: ExpIsZero, Cond: ISD::SETEQ);
9177	PartialRes = DAG.getNode(Opcode: ISD::OR, DL, VT: ResultVT, N1: PartialRes, N2: IsPseudo);
9178	}
9179	} else if (PartialCheck == fcQNan) {
9180	// isquiet(V) ==> abs(V) >= (unsigned(Inf) \| quiet_bit)
9181	PartialRes =
9182	DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: InfWithQnanBitV, Cond: ISD::SETGE);
9183	} else { // ISD::fcSNan
9184	// issignaling(V) ==> abs(V) > unsigned(Inf) &&
9185	// abs(V) < (unsigned(Inf) \| quiet_bit)
9186	SDValue IsNan = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: InfV, Cond: ISD::SETGT);
9187	SDValue IsNotQnan =
9188	DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: InfWithQnanBitV, Cond: ISD::SETLT);
9189	PartialRes = DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: IsNan, N2: IsNotQnan);
9190	}
9191	appendResult (PartialRes);
9192	}
9193
9194	if (unsigned PartialCheck = Test & fcNormal) {
9195	// isnormal(V) ==> (0 < exp < max_exp) ==> (unsigned(exp-1) < (max_exp-1))
9196	APInt ExpLSB = ExpMask & ~(ExpMask.shl(shiftAmt: `1`));
9197	SDValue ExpLSBV = DAG.getConstant(Val: ExpLSB, DL, VT: IntVT);
9198	SDValue ExpMinus1 = DAG.getNode(Opcode: ISD::SUB, DL, VT: IntVT, N1: AbsV, N2: ExpLSBV);
9199	APInt ExpLimit = ExpMask - ExpLSB;
9200	SDValue ExpLimitV = DAG.getConstant(Val: ExpLimit, DL, VT: IntVT);
9201	PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: ExpMinus1, RHS: ExpLimitV, Cond: ISD::SETULT);
9202	if (PartialCheck == fcNegNormal)
9203	PartialRes = DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: PartialRes, N2: SignV);
9204	else if (PartialCheck == fcPosNormal) {
9205	SDValue PosSignV =
9206	DAG.getNode(Opcode: ISD::XOR, DL, VT: ResultVT, N1: SignV, N2: ResultInvertionMask);
9207	PartialRes = DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: PartialRes, N2: PosSignV);
9208	}
9209	if (IsF80)
9210	PartialRes =
9211	DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: PartialRes, N2: getIntBitIsSet ());
9212	appendResult (PartialRes);
9213	}
9214
9215	if (!Res)
9216	return DAG.getConstant(Val: IsInverted, DL, VT: ResultVT);
9217	if (IsInverted)
9218	Res = DAG.getNode(Opcode: ISD::XOR, DL, VT: ResultVT, N1: Res, N2: ResultInvertionMask);
9219	return Res;
9220	}
9221
9222	// Only expand vector types if we have the appropriate vector bit operations.
9223	static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) {
9224	assert(VT.isVector() && "Expected vector type");
9225	unsigned Len = VT.getScalarSizeInBits();
9226	return TLI.isOperationLegalOrCustom(Op: ISD::ADD, VT) &&
9227	TLI.isOperationLegalOrCustom(Op: ISD::SUB, VT) &&
9228	TLI.isOperationLegalOrCustom(Op: ISD::SRL, VT) &&
9229	(Len == `8` \|\| TLI.isOperationLegalOrCustom(Op: ISD::MUL, VT)) &&
9230	TLI.isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT);
9231	}
9232
9233	SDValue TargetLowering::expandCTPOP(SDNode Node, SelectionDAG &DAG) const* {
9234	SDLoc dl(Node);
9235	EVT VT = Node->getValueType(ResNo: `0`);
9236	EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
9237	SDValue Op = Node->getOperand(Num: `0`);
9238	unsigned Len = VT.getScalarSizeInBits();
9239	assert(VT.isInteger() && "CTPOP not implemented for this type.");
9240
9241	// TODO: Add support for irregular type lengths.
9242	if (!(Len <= `128` && Len % `8` == `0`))
9243	return SDValue ();
9244
9245	// Only expand vector types if we have the appropriate vector bit operations.
9246	if (VT.isVector() && !canExpandVectorCTPOP(TLI: *this, VT))
9247	return SDValue ();
9248
9249	// This is the "best" algorithm from
9250	// http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
9251	SDValue Mask55 =
9252	DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt (`8`, `0x55`)), DL: dl, VT);
9253	SDValue Mask33 =
9254	DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt (`8`, `0x33`)), DL: dl, VT);
9255	SDValue Mask0F =
9256	DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt (`8`, `0x0F`)), DL: dl, VT);
9257
9258	// v = v - ((v >> 1) & 0x55555555...)
9259	Op = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Op,
9260	N2: DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
9261	N1: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op,
9262	N2: DAG.getConstant(Val: `1`, DL: dl, VT: ShVT)),
9263	N2: Mask55));
9264	// v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
9265	Op = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op, N2: Mask33),
9266	N2: DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
9267	N1: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op,
9268	N2: DAG.getConstant(Val: `2`, DL: dl, VT: ShVT)),
9269	N2: Mask33));
9270	// v = (v + (v >> 4)) & 0x0F0F0F0F...
9271	Op = DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
9272	N1: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Op,
9273	N2: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op,
9274	N2: DAG.getConstant(Val: `4`, DL: dl, VT: ShVT))),
9275	N2: Mask0F);
9276
9277	if (Len <= `8`)
9278	return Op;
9279
9280	// Avoid the multiply if we only have 2 bytes to add.
9281	// TODO: Only doing this for scalars because vectors weren't as obviously
9282	// improved.
9283	if (Len == `16` && !VT.isVector()) {
9284	// v = (v + (v >> 8)) & 0x00FF;
9285	return DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
9286	N1: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Op,
9287	N2: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op,
9288	N2: DAG.getConstant(Val: `8`, DL: dl, VT: ShVT))),
9289	N2: DAG.getConstant(Val: `0xFF`, DL: dl, VT));
9290	}
9291
9292	// v = (v 0x01010101...) >> (Len - 8)*
9293	SDValue V;
9294	if (isOperationLegalOrCustomOrPromote(
9295	Op: ISD::MUL, VT: getTypeToTransformTo(Context&: *DAG.getContext(), VT))) {
9296	SDValue Mask01 =
9297	DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt (`8`, `0x01`)), DL: dl, VT);
9298	V = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: Op, N2: Mask01);
9299	} else {
9300	V = Op;
9301	for (unsigned Shift = `8`; Shift < Len; Shift *= `2`) {
9302	SDValue ShiftC = DAG.getShiftAmountConstant(Val: Shift, VT, DL: dl);
9303	V = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: V,
9304	N2: DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: V, N2: ShiftC));
9305	}
9306	}
9307	return DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: V, N2: DAG.getConstant(Val: Len - `8`, DL: dl, VT: ShVT));
9308	}
9309
9310	SDValue TargetLowering::expandVPCTPOP(SDNode Node, SelectionDAG &DAG) const* {
9311	SDLoc dl(Node);
9312	EVT VT = Node->getValueType(ResNo: `0`);
9313	EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
9314	SDValue Op = Node->getOperand(Num: `0`);
9315	SDValue Mask = Node->getOperand(Num: `1`);
9316	SDValue VL = Node->getOperand(Num: `2`);
9317	unsigned Len = VT.getScalarSizeInBits();
9318	assert(VT.isInteger() && "VP_CTPOP not implemented for this type.");
9319
9320	// TODO: Add support for irregular type lengths.
9321	if (!(Len <= `128` && Len % `8` == `0`))
9322	return SDValue ();
9323
9324	// This is same algorithm of expandCTPOP from
9325	// http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
9326	SDValue Mask55 =
9327	DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt (`8`, `0x55`)), DL: dl, VT);
9328	SDValue Mask33 =
9329	DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt (`8`, `0x33`)), DL: dl, VT);
9330	SDValue Mask0F =
9331	DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt (`8`, `0x0F`)), DL: dl, VT);
9332
9333	SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5;
9334
9335	// v = v - ((v >> 1) & 0x55555555...)
9336	Tmp1 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT,
9337	N1: DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op,
9338	N2: DAG.getConstant(Val: `1`, DL: dl, VT: ShVT), N3: Mask, N4: VL),
9339	N2: Mask55, N3: Mask, N4: VL);
9340	Op = DAG.getNode(Opcode: ISD::VP_SUB, DL: dl, VT, N1: Op, N2: Tmp1, N3: Mask, N4: VL);
9341
9342	// v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
9343	Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Op, N2: Mask33, N3: Mask, N4: VL);
9344	Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT,
9345	N1: DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op,
9346	N2: DAG.getConstant(Val: `2`, DL: dl, VT: ShVT), N3: Mask, N4: VL),
9347	N2: Mask33, N3: Mask, N4: VL);
9348	Op = DAG.getNode(Opcode: ISD::VP_ADD, DL: dl, VT, N1: Tmp2, N2: Tmp3, N3: Mask, N4: VL);
9349
9350	// v = (v + (v >> 4)) & 0x0F0F0F0F...
9351	Tmp4 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `4`, DL: dl, VT: ShVT),
9352	N3: Mask, N4: VL),
9353	Tmp5 = DAG.getNode(Opcode: ISD::VP_ADD, DL: dl, VT, N1: Op, N2: Tmp4, N3: Mask, N4: VL);
9354	Op = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp5, N2: Mask0F, N3: Mask, N4: VL);
9355
9356	if (Len <= `8`)
9357	return Op;
9358
9359	// v = (v 0x01010101...) >> (Len - 8)*
9360	SDValue V;
9361	if (isOperationLegalOrCustomOrPromote(
9362	Op: ISD::VP_MUL, VT: getTypeToTransformTo(Context&: *DAG.getContext(), VT))) {
9363	SDValue Mask01 =
9364	DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt (`8`, `0x01`)), DL: dl, VT);
9365	V = DAG.getNode(Opcode: ISD::VP_MUL, DL: dl, VT, N1: Op, N2: Mask01, N3: Mask, N4: VL);
9366	} else {
9367	V = Op;
9368	for (unsigned Shift = `8`; Shift < Len; Shift *= `2`) {
9369	SDValue ShiftC = DAG.getShiftAmountConstant(Val: Shift, VT, DL: dl);
9370	V = DAG.getNode(Opcode: ISD::VP_ADD, DL: dl, VT, N1: V,
9371	N2: DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: V, N2: ShiftC, N3: Mask, N4: VL),
9372	N3: Mask, N4: VL);
9373	}
9374	}
9375	return DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: V, N2: DAG.getConstant(Val: Len - `8`, DL: dl, VT: ShVT),
9376	N3: Mask, N4: VL);
9377	}
9378
9379	SDValue TargetLowering::expandCTLZ(SDNode Node, SelectionDAG &DAG) const* {
9380	SDLoc dl(Node);
9381	EVT VT = Node->getValueType(ResNo: `0`);
9382	EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
9383	SDValue Op = Node->getOperand(Num: `0`);
9384	unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9385
9386	// If the non-ZERO_UNDEF version is supported we can use that instead.
9387	if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
9388	isOperationLegalOrCustom(Op: ISD::CTLZ, VT))
9389	return DAG.getNode(Opcode: ISD::CTLZ, DL: dl, VT, Operand: Op);
9390
9391	// If the ZERO_UNDEF version is supported use that and handle the zero case.
9392	if (isOperationLegalOrCustom(Op: ISD::CTLZ_ZERO_UNDEF, VT)) {
9393	EVT SetCCVT =
9394	getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
9395	SDValue CTLZ = DAG.getNode(Opcode: ISD::CTLZ_ZERO_UNDEF, DL: dl, VT, Operand: Op);
9396	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT);
9397	SDValue SrcIsZero = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Op, RHS: Zero, Cond: ISD::SETEQ);
9398	return DAG.getSelect(DL: dl, VT, Cond: SrcIsZero,
9399	LHS: DAG.getConstant(Val: NumBitsPerElt, DL: dl, VT), RHS: CTLZ);
9400	}
9401
9402	// Only expand vector types if we have the appropriate vector bit operations.
9403	// This includes the operations needed to expand CTPOP if it isn't supported.
9404	if (VT.isVector() && (!isPowerOf2_32(Value: NumBitsPerElt) \|\|
9405	(!isOperationLegalOrCustom(Op: ISD::CTPOP, VT) &&
9406	!canExpandVectorCTPOP(TLI: *this, VT)) \|\|
9407	!isOperationLegalOrCustom(Op: ISD::SRL, VT) \|\|
9408	!isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT)))
9409	return SDValue ();
9410
9411	// for now, we do this:
9412	// x = x \| (x >> 1);
9413	// x = x \| (x >> 2);
9414	// ...
9415	// x = x \| (x >>16);
9416	// x = x \| (x >>32); // for 64-bit input
9417	// return popcount(~x);
9418	//
9419	// Ref: "Hacker's Delight" by Henry Warren
9420	for (unsigned i = `0`; (`1U` << i) < NumBitsPerElt; ++i) {
9421	SDValue Tmp = DAG.getConstant(Val: `1ULL` << i, DL: dl, VT: ShVT);
9422	Op = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Op,
9423	N2: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: Tmp));
9424	}
9425	Op = DAG.getNOT(DL: dl, Val: Op, VT);
9426	return DAG.getNode(Opcode: ISD::CTPOP, DL: dl, VT, Operand: Op);
9427	}
9428
9429	SDValue TargetLowering::expandVPCTLZ(SDNode Node, SelectionDAG &DAG) const* {
9430	SDLoc dl(Node);
9431	EVT VT = Node->getValueType(ResNo: `0`);
9432	EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
9433	SDValue Op = Node->getOperand(Num: `0`);
9434	SDValue Mask = Node->getOperand(Num: `1`);
9435	SDValue VL = Node->getOperand(Num: `2`);
9436	unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9437
9438	// do this:
9439	// x = x \| (x >> 1);
9440	// x = x \| (x >> 2);
9441	// ...
9442	// x = x \| (x >>16);
9443	// x = x \| (x >>32); // for 64-bit input
9444	// return popcount(~x);
9445	for (unsigned i = `0`; (`1U` << i) < NumBitsPerElt; ++i) {
9446	SDValue Tmp = DAG.getConstant(Val: `1ULL` << i, DL: dl, VT: ShVT);
9447	Op = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Op,
9448	N2: DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: Tmp, N3: Mask, N4: VL), N3: Mask,
9449	N4: VL);
9450	}
9451	Op = DAG.getNode(Opcode: ISD::VP_XOR, DL: dl, VT, N1: Op, N2: DAG.getAllOnesConstant(DL: dl, VT),
9452	N3: Mask, N4: VL);
9453	return DAG.getNode(Opcode: ISD::VP_CTPOP, DL: dl, VT, N1: Op, N2: Mask, N3: VL);
9454	}
9455
9456	SDValue TargetLowering::CTTZTableLookup(SDNode *Node, SelectionDAG &DAG,
9457	const SDLoc &DL, EVT VT, SDValue Op,
9458	unsigned BitWidth) const {
9459	if (BitWidth != `32` && BitWidth != `64`)
9460	return SDValue ();
9461	APInt DeBruijn = BitWidth == `32` ? APInt (`32`, `0x077CB531U`)
9462	: APInt (`64`, `0x0218A392CD3D5DBFULL`);
9463	const DataLayout &TD = DAG.getDataLayout();
9464	MachinePointerInfo PtrInfo =
9465	MachinePointerInfo::getConstantPool(MF&: DAG.getMachineFunction());
9466	unsigned ShiftAmt = BitWidth - Log2_32(Value: BitWidth);
9467	SDValue Neg = DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: DAG.getConstant(Val: `0`, DL, VT), N2: Op);
9468	SDValue Lookup = DAG.getNode(
9469	Opcode: ISD::SRL, DL, VT,
9470	N1: DAG.getNode(Opcode: ISD::MUL, DL, VT, N1: DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Op, N2: Neg),
9471	N2: DAG.getConstant(Val: DeBruijn, DL, VT)),
9472	N2: DAG.getConstant(Val: ShiftAmt, DL, VT));
9473	Lookup = DAG.getSExtOrTrunc(Op: Lookup, DL, VT: getPointerTy(DL: TD));
9474
9475	SmallVector<uint8_t> Table(BitWidth, `0`);
9476	for (unsigned i = `0`; i < BitWidth; i++) {
9477	APInt Shl = DeBruijn.shl(shiftAmt: i);
9478	APInt Lshr = Shl.lshr(shiftAmt: ShiftAmt);
9479	Table [Lshr.getZExtValue()] = i;
9480	}
9481
9482	// Create a ConstantArray in Constant Pool
9483	auto CA = ConstantDataArray::get(Context&: DAG.getContext(), Elts&: Table);
9484	SDValue CPIdx = DAG.getConstantPool(C: CA, VT: getPointerTy(DL: TD),
9485	Align: TD.getPrefTypeAlign(Ty: CA->getType()));
9486	SDValue ExtLoad = DAG.getExtLoad(ExtType: ISD::ZEXTLOAD, dl: DL, VT, Chain: DAG.getEntryNode(),
9487	Ptr: DAG.getMemBasePlusOffset(Base: CPIdx, Offset: Lookup, DL),
9488	PtrInfo, MemVT: MVT::i8);
9489	if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF)
9490	return ExtLoad;
9491
9492	EVT SetCCVT =
9493	getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
9494	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT);
9495	SDValue SrcIsZero = DAG.getSetCC(DL, VT: SetCCVT, LHS: Op, RHS: Zero, Cond: ISD::SETEQ);
9496	return DAG.getSelect(DL, VT, Cond: SrcIsZero,
9497	LHS: DAG.getConstant(Val: BitWidth, DL, VT), RHS: ExtLoad);
9498	}
9499
9500	SDValue TargetLowering::expandCTTZ(SDNode Node, SelectionDAG &DAG) const* {
9501	SDLoc dl(Node);
9502	EVT VT = Node->getValueType(ResNo: `0`);
9503	SDValue Op = Node->getOperand(Num: `0`);
9504	unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9505
9506	// If the non-ZERO_UNDEF version is supported we can use that instead.
9507	if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
9508	isOperationLegalOrCustom(Op: ISD::CTTZ, VT))
9509	return DAG.getNode(Opcode: ISD::CTTZ, DL: dl, VT, Operand: Op);
9510
9511	// If the ZERO_UNDEF version is supported use that and handle the zero case.
9512	if (isOperationLegalOrCustom(Op: ISD::CTTZ_ZERO_UNDEF, VT)) {
9513	EVT SetCCVT =
9514	getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
9515	SDValue CTTZ = DAG.getNode(Opcode: ISD::CTTZ_ZERO_UNDEF, DL: dl, VT, Operand: Op);
9516	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT);
9517	SDValue SrcIsZero = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Op, RHS: Zero, Cond: ISD::SETEQ);
9518	return DAG.getSelect(DL: dl, VT, Cond: SrcIsZero,
9519	LHS: DAG.getConstant(Val: NumBitsPerElt, DL: dl, VT), RHS: CTTZ);
9520	}
9521
9522	// Only expand vector types if we have the appropriate vector bit operations.
9523	// This includes the operations needed to expand CTPOP if it isn't supported.
9524	if (VT.isVector() && (!isPowerOf2_32(Value: NumBitsPerElt) \|\|
9525	(!isOperationLegalOrCustom(Op: ISD::CTPOP, VT) &&
9526	!isOperationLegalOrCustom(Op: ISD::CTLZ, VT) &&
9527	!canExpandVectorCTPOP(TLI: *this, VT)) \|\|
9528	!isOperationLegalOrCustom(Op: ISD::SUB, VT) \|\|
9529	!isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT) \|\|
9530	!isOperationLegalOrCustomOrPromote(Op: ISD::XOR, VT)))
9531	return SDValue ();
9532
9533	// Emit Table Lookup if ISD::CTPOP used in the fallback path below is going
9534	// to be expanded or converted to a libcall.
9535	if (!VT.isVector() && !isOperationLegalOrCustomOrPromote(Op: ISD::CTPOP, VT) &&
9536	!isOperationLegal(Op: ISD::CTLZ, VT))
9537	if (SDValue V = CTTZTableLookup(Node, DAG, DL: dl, VT, Op, BitWidth: NumBitsPerElt))
9538	return V;
9539
9540	// for now, we use: { return popcount(~x & (x - 1)); }
9541	// unless the target has ctlz but not ctpop, in which case we use:
9542	// { return 32 - nlz(~x & (x-1)); }
9543	// Ref: "Hacker's Delight" by Henry Warren
9544	SDValue Tmp = DAG.getNode(
9545	Opcode: ISD::AND, DL: dl, VT, N1: DAG.getNOT(DL: dl, Val: Op, VT),
9546	N2: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `1`, DL: dl, VT)));
9547
9548	// If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
9549	if (isOperationLegal(Op: ISD::CTLZ, VT) && !isOperationLegal(Op: ISD::CTPOP, VT)) {
9550	return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: DAG.getConstant(Val: NumBitsPerElt, DL: dl, VT),
9551	N2: DAG.getNode(Opcode: ISD::CTLZ, DL: dl, VT, Operand: Tmp));
9552	}
9553
9554	return DAG.getNode(Opcode: ISD::CTPOP, DL: dl, VT, Operand: Tmp);
9555	}
9556
9557	SDValue TargetLowering::expandVPCTTZ(SDNode Node, SelectionDAG &DAG) const* {
9558	SDValue Op = Node->getOperand(Num: `0`);
9559	SDValue Mask = Node->getOperand(Num: `1`);
9560	SDValue VL = Node->getOperand(Num: `2`);
9561	SDLoc dl(Node);
9562	EVT VT = Node->getValueType(ResNo: `0`);
9563
9564	// Same as the vector part of expandCTTZ, use: popcount(~x & (x - 1))
9565	SDValue Not = DAG.getNode(Opcode: ISD::VP_XOR, DL: dl, VT, N1: Op,
9566	N2: DAG.getAllOnesConstant(DL: dl, VT), N3: Mask, N4: VL);
9567	SDValue MinusOne = DAG.getNode(Opcode: ISD::VP_SUB, DL: dl, VT, N1: Op,
9568	N2: DAG.getConstant(Val: `1`, DL: dl, VT), N3: Mask, N4: VL);
9569	SDValue Tmp = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Not, N2: MinusOne, N3: Mask, N4: VL);
9570	return DAG.getNode(Opcode: ISD::VP_CTPOP, DL: dl, VT, N1: Tmp, N2: Mask, N3: VL);
9571	}
9572
9573	SDValue TargetLowering::expandVPCTTZElements(SDNode *N,
9574	SelectionDAG &DAG) const {
9575	// %cond = to_bool_vec %source
9576	// %splat = splat /val=/VL
9577	// %tz = step_vector
9578	// %v = vp.select %cond, /true=/tz, /false=/%splat
9579	// %r = vp.reduce.umin %v
9580	SDLoc DL(N);
9581	SDValue Source = N->getOperand(Num: `0`);
9582	SDValue Mask = N->getOperand(Num: `1`);
9583	SDValue EVL = N->getOperand(Num: `2`);
9584	EVT SrcVT = Source.getValueType();
9585	EVT ResVT = N->getValueType(ResNo: `0`);
9586	EVT ResVecVT =
9587	EVT::getVectorVT(Context&: *DAG.getContext(), VT: ResVT, EC: SrcVT.getVectorElementCount());
9588
9589	// Convert to boolean vector.
9590	if (SrcVT.getScalarType() != MVT::i1) {
9591	SDValue AllZero = DAG.getConstant(Val: `0`, DL, VT: SrcVT);
9592	SrcVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: MVT::i1,
9593	EC: SrcVT.getVectorElementCount());
9594	Source = DAG.getNode(Opcode: ISD::VP_SETCC, DL, VT: SrcVT, N1: Source, N2: AllZero,
9595	N3: DAG.getCondCode(Cond: ISD::SETNE), N4: Mask, N5: EVL);
9596	}
9597
9598	SDValue ExtEVL = DAG.getZExtOrTrunc(Op: EVL, DL, VT: ResVT);
9599	SDValue Splat = DAG.getSplat(VT: ResVecVT, DL, Op: ExtEVL);
9600	SDValue StepVec = DAG.getStepVector(DL, ResVT: ResVecVT);
9601	SDValue Select =
9602	DAG.getNode(Opcode: ISD::VP_SELECT, DL, VT: ResVecVT, N1: Source, N2: StepVec, N3: Splat, N4: EVL);
9603	return DAG.getNode(Opcode: ISD::VP_REDUCE_UMIN, DL, VT: ResVT, N1: ExtEVL, N2: Select, N3: Mask, N4: EVL);
9604	}
9605
9606	SDValue TargetLowering::expandVectorFindLastActive(SDNode *N,
9607	SelectionDAG &DAG) const {
9608	SDLoc DL(N);
9609	SDValue Mask = N->getOperand(Num: `0`);
9610	EVT MaskVT = Mask.getValueType();
9611	EVT BoolVT = MaskVT.getScalarType();
9612
9613	// Find a suitable type for a stepvector.
9614	ConstantRange VScaleRange(`1`, /isFullSet=/true); // Fixed length default.
9615	if (MaskVT.isScalableVector())
9616	VScaleRange = getVScaleRange(F: &DAG.getMachineFunction().getFunction(), BitWidth: `64`);
9617	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9618	unsigned EltWidth = TLI.getBitWidthForCttzElements(
9619	RetTy: BoolVT.getTypeForEVT(Context&: *DAG.getContext()), EC: MaskVT.getVectorElementCount(),
9620	/ZeroIsPoison=/true, VScaleRange: &VScaleRange);
9621	EVT StepVT = MVT::getIntegerVT(BitWidth: EltWidth);
9622	EVT StepVecVT = MaskVT.changeVectorElementType(EltVT: StepVT);
9623
9624	// If promotion is required to make the type legal, do it here; promotion
9625	// of integers within LegalizeVectorOps is looking for types of the same
9626	// size but with a smaller number of larger elements, not the usual larger
9627	// size with the same number of larger elements.
9628	if (TLI.getTypeAction(VT: StepVecVT.getSimpleVT()) ==
9629	TargetLowering::TypePromoteInteger) {
9630	StepVecVT = TLI.getTypeToTransformTo(Context&: *DAG.getContext(), VT: StepVecVT);
9631	StepVT = StepVecVT.getVectorElementType();
9632	}
9633
9634	// Zero out lanes with inactive elements, then find the highest remaining
9635	// value from the stepvector.
9636	SDValue Zeroes = DAG.getConstant(Val: `0`, DL, VT: StepVecVT);
9637	SDValue StepVec = DAG.getStepVector(DL, ResVT: StepVecVT);
9638	SDValue ActiveElts = DAG.getSelect(DL, VT: StepVecVT, Cond: Mask, LHS: StepVec, RHS: Zeroes);
9639	SDValue HighestIdx = DAG.getNode(Opcode: ISD::VECREDUCE_UMAX, DL, VT: StepVT, Operand: ActiveElts);
9640	return DAG.getZExtOrTrunc(Op: HighestIdx, DL, VT: N->getValueType(ResNo: `0`));
9641	}
9642
9643	SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG,
9644	bool IsNegative) const {
9645	SDLoc dl(N);
9646	EVT VT = N->getValueType(ResNo: `0`);
9647	SDValue Op = N->getOperand(Num: `0`);
9648
9649	// abs(x) -> smax(x,sub(0,x))
9650	if (!IsNegative && isOperationLegal(Op: ISD::SUB, VT) &&
9651	isOperationLegal(Op: ISD::SMAX, VT)) {
9652	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT);
9653	Op = DAG.getFreeze(V: Op);
9654	return DAG.getNode(Opcode: ISD::SMAX, DL: dl, VT, N1: Op,
9655	N2: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Zero, N2: Op));
9656	}
9657
9658	// abs(x) -> umin(x,sub(0,x))
9659	if (!IsNegative && isOperationLegal(Op: ISD::SUB, VT) &&
9660	isOperationLegal(Op: ISD::UMIN, VT)) {
9661	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT);
9662	Op = DAG.getFreeze(V: Op);
9663	return DAG.getNode(Opcode: ISD::UMIN, DL: dl, VT, N1: Op,
9664	N2: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Zero, N2: Op));
9665	}
9666
9667	// 0 - abs(x) -> smin(x, sub(0,x))
9668	if (IsNegative && isOperationLegal(Op: ISD::SUB, VT) &&
9669	isOperationLegal(Op: ISD::SMIN, VT)) {
9670	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT);
9671	Op = DAG.getFreeze(V: Op);
9672	return DAG.getNode(Opcode: ISD::SMIN, DL: dl, VT, N1: Op,
9673	N2: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Zero, N2: Op));
9674	}
9675
9676	// Only expand vector types if we have the appropriate vector operations.
9677	if (VT.isVector() &&
9678	(!isOperationLegalOrCustom(Op: ISD::SRA, VT) \|\|
9679	(!IsNegative && !isOperationLegalOrCustom(Op: ISD::ADD, VT)) \|\|
9680	(IsNegative && !isOperationLegalOrCustom(Op: ISD::SUB, VT)) \|\|
9681	!isOperationLegalOrCustomOrPromote(Op: ISD::XOR, VT)))
9682	return SDValue ();
9683
9684	Op = DAG.getFreeze(V: Op);
9685	SDValue Shift = DAG.getNode(
9686	Opcode: ISD::SRA, DL: dl, VT, N1: Op,
9687	N2: DAG.getShiftAmountConstant(Val: VT.getScalarSizeInBits() - `1`, VT, DL: dl));
9688	SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: Op, N2: Shift);
9689
9690	// abs(x) -> Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
9691	if (!IsNegative)
9692	return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Xor, N2: Shift);
9693
9694	// 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
9695	return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Shift, N2: Xor);
9696	}
9697
9698	SDValue TargetLowering::expandABD(SDNode N, SelectionDAG &DAG) const* {
9699	SDLoc dl(N);
9700	EVT VT = N->getValueType(ResNo: `0`);
9701	SDValue LHS = DAG.getFreeze(V: N->getOperand(Num: `0`));
9702	SDValue RHS = DAG.getFreeze(V: N->getOperand(Num: `1`));
9703	bool IsSigned = N->getOpcode() == ISD::ABDS;
9704
9705	// abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
9706	// abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
9707	unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
9708	unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
9709	if (isOperationLegal(Op: MaxOpc, VT) && isOperationLegal(Op: MinOpc, VT)) {
9710	SDValue Max = DAG.getNode(Opcode: MaxOpc, DL: dl, VT, N1: LHS, N2: RHS);
9711	SDValue Min = DAG.getNode(Opcode: MinOpc, DL: dl, VT, N1: LHS, N2: RHS);
9712	return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Max, N2: Min);
9713	}
9714
9715	// abdu(lhs, rhs) -> or(usubsat(lhs,rhs), usubsat(rhs,lhs))
9716	if (!IsSigned && isOperationLegal(Op: ISD::USUBSAT, VT))
9717	return DAG.getNode(Opcode: ISD::OR, DL: dl, VT,
9718	N1: DAG.getNode(Opcode: ISD::USUBSAT, DL: dl, VT, N1: LHS, N2: RHS),
9719	N2: DAG.getNode(Opcode: ISD::USUBSAT, DL: dl, VT, N1: RHS, N2: LHS));
9720
9721	// If the subtract doesn't overflow then just use abs(sub())
9722	// NOTE: don't use frozen operands for value tracking.
9723	bool IsNonNegative = DAG.SignBitIsZero(Op: N->getOperand(Num: `1`)) &&
9724	DAG.SignBitIsZero(Op: N->getOperand(Num: `0`));
9725
9726	if (DAG.willNotOverflowSub(IsSigned: IsSigned \|\| IsNonNegative, N0: N->getOperand(Num: `0`),
9727	N1: N->getOperand(Num: `1`)))
9728	return DAG.getNode(Opcode: ISD::ABS, DL: dl, VT,
9729	Operand: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: LHS, N2: RHS));
9730
9731	if (DAG.willNotOverflowSub(IsSigned: IsSigned \|\| IsNonNegative, N0: N->getOperand(Num: `1`),
9732	N1: N->getOperand(Num: `0`)))
9733	return DAG.getNode(Opcode: ISD::ABS, DL: dl, VT,
9734	Operand: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: RHS, N2: LHS));
9735
9736	EVT CCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
9737	ISD::CondCode CC = IsSigned ? ISD::CondCode::SETGT : ISD::CondCode::SETUGT;
9738	SDValue Cmp = DAG.getSetCC(DL: dl, VT: CCVT, LHS, RHS, Cond: CC);
9739
9740	// Branchless expansion iff cmp result is allbits:
9741	// abds(lhs, rhs) -> sub(sgt(lhs, rhs), xor(sgt(lhs, rhs), sub(lhs, rhs)))
9742	// abdu(lhs, rhs) -> sub(ugt(lhs, rhs), xor(ugt(lhs, rhs), sub(lhs, rhs)))
9743	if (CCVT == VT && getBooleanContents(Type: VT) == ZeroOrNegativeOneBooleanContent) {
9744	SDValue Diff = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: LHS, N2: RHS);
9745	SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: Diff, N2: Cmp);
9746	return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Cmp, N2: Xor);
9747	}
9748
9749	// Similar to the branchless expansion, use the (sign-extended) usubo overflow
9750	// flag if the (scalar) type is illegal as this is more likely to legalize
9751	// cleanly:
9752	// abdu(lhs, rhs) -> sub(xor(sub(lhs, rhs), uof(lhs, rhs)), uof(lhs, rhs))
9753	if (!IsSigned && VT.isScalarInteger() && !isTypeLegal(VT)) {
9754	SDValue USubO =
9755	DAG.getNode(Opcode: ISD::USUBO, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: MVT::i1), Ops: {LHS, RHS});
9756	SDValue Cmp = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT, Operand: USubO.getValue(R: `1`));
9757	SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: USubO.getValue(R: `0`), N2: Cmp);
9758	return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Xor, N2: Cmp);
9759	}
9760
9761	// FIXME: Should really try to split the vector in case it's legal on a
9762	// subvector.
9763	if (VT.isVector() && !isOperationLegalOrCustom(Op: ISD::VSELECT, VT))
9764	return DAG.UnrollVectorOp(N);
9765
9766	// abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9767	// abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9768	return DAG.getSelect(DL: dl, VT, Cond: Cmp, LHS: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: LHS, N2: RHS),
9769	RHS: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: RHS, N2: LHS));
9770	}
9771
9772	SDValue TargetLowering::expandAVG(SDNode N, SelectionDAG &DAG) const* {
9773	SDLoc dl(N);
9774	EVT VT = N->getValueType(ResNo: `0`);
9775	SDValue LHS = N->getOperand(Num: `0`);
9776	SDValue RHS = N->getOperand(Num: `1`);
9777
9778	unsigned Opc = N->getOpcode();
9779	bool IsFloor = Opc == ISD::AVGFLOORS \|\| Opc == ISD::AVGFLOORU;
9780	bool IsSigned = Opc == ISD::AVGCEILS \|\| Opc == ISD::AVGFLOORS;
9781	unsigned SumOpc = IsFloor ? ISD::ADD : ISD::SUB;
9782	unsigned SignOpc = IsFloor ? ISD::AND : ISD::OR;
9783	unsigned ShiftOpc = IsSigned ? ISD::SRA : ISD::SRL;
9784	unsigned ExtOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
9785	assert((Opc == ISD::AVGFLOORS \|\| Opc == ISD::AVGCEILS \|\|
9786	Opc == ISD::AVGFLOORU \|\| Opc == ISD::AVGCEILU) &&
9787	"Unknown AVG node");
9788
9789	// If the operands are already extended, we can add+shift.
9790	bool IsExt =
9791	(IsSigned && DAG.ComputeNumSignBits(Op: LHS) >= `2` &&
9792	DAG.ComputeNumSignBits(Op: RHS) >= `2`) \|\|
9793	(!IsSigned && DAG.computeKnownBits(Op: LHS).countMinLeadingZeros() >= `1` &&
9794	DAG.computeKnownBits(Op: RHS).countMinLeadingZeros() >= `1`);
9795	if (IsExt) {
9796	SDValue Sum = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: LHS, N2: RHS);
9797	if (!IsFloor)
9798	Sum = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Sum, N2: DAG.getConstant(Val: `1`, DL: dl, VT));
9799	return DAG.getNode(Opcode: ShiftOpc, DL: dl, VT, N1: Sum,
9800	N2: DAG.getShiftAmountConstant(Val: `1`, VT, DL: dl));
9801	}
9802
9803	// For scalars, see if we can efficiently extend/truncate to use add+shift.
9804	if (VT.isScalarInteger()) {
9805	unsigned BW = VT.getScalarSizeInBits();
9806	EVT ExtVT = VT.getIntegerVT(Context&: DAG.getContext(), BitWidth: `2` BW);
9807	if (isTypeLegal(VT: ExtVT) && isTruncateFree(FromVT: ExtVT, ToVT: VT)) {
9808	LHS = DAG.getNode(Opcode: ExtOpc, DL: dl, VT: ExtVT, Operand: LHS);
9809	RHS = DAG.getNode(Opcode: ExtOpc, DL: dl, VT: ExtVT, Operand: RHS);
9810	SDValue Avg = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: ExtVT, N1: LHS, N2: RHS);
9811	if (!IsFloor)
9812	Avg = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: ExtVT, N1: Avg,
9813	N2: DAG.getConstant(Val: `1`, DL: dl, VT: ExtVT));
9814	// Just use SRL as we will be truncating away the extended sign bits.
9815	Avg = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: ExtVT, N1: Avg,
9816	N2: DAG.getShiftAmountConstant(Val: `1`, VT: ExtVT, DL: dl));
9817	return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Avg);
9818	}
9819	}
9820
9821	// avgflooru(lhs, rhs) -> or(lshr(add(lhs, rhs),1),shl(overflow, typesize-1))
9822	if (Opc == ISD::AVGFLOORU && VT.isScalarInteger() && !isTypeLegal(VT)) {
9823	SDValue UAddWithOverflow =
9824	DAG.getNode(Opcode: ISD::UADDO, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: MVT::i1), Ops: {RHS, LHS});
9825
9826	SDValue Sum = UAddWithOverflow.getValue(R: `0`);
9827	SDValue Overflow = UAddWithOverflow.getValue(R: `1`);
9828
9829	// Right shift the sum by 1
9830	SDValue LShrVal = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Sum,
9831	N2: DAG.getShiftAmountConstant(Val: `1`, VT, DL: dl));
9832
9833	SDValue ZeroExtOverflow = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT, Operand: Overflow);
9834	SDValue OverflowShl = DAG.getNode(
9835	Opcode: ISD::SHL, DL: dl, VT, N1: ZeroExtOverflow,
9836	N2: DAG.getShiftAmountConstant(Val: VT.getScalarSizeInBits() - `1`, VT, DL: dl));
9837
9838	return DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: LShrVal, N2: OverflowShl);
9839	}
9840
9841	// avgceils(lhs, rhs) -> sub(or(lhs,rhs),ashr(xor(lhs,rhs),1))
9842	// avgceilu(lhs, rhs) -> sub(or(lhs,rhs),lshr(xor(lhs,rhs),1))
9843	// avgfloors(lhs, rhs) -> add(and(lhs,rhs),ashr(xor(lhs,rhs),1))
9844	// avgflooru(lhs, rhs) -> add(and(lhs,rhs),lshr(xor(lhs,rhs),1))
9845	LHS = DAG.getFreeze(V: LHS);
9846	RHS = DAG.getFreeze(V: RHS);
9847	SDValue Sign = DAG.getNode(Opcode: SignOpc, DL: dl, VT, N1: LHS, N2: RHS);
9848	SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: LHS, N2: RHS);
9849	SDValue Shift =
9850	DAG.getNode(Opcode: ShiftOpc, DL: dl, VT, N1: Xor, N2: DAG.getShiftAmountConstant(Val: `1`, VT, DL: dl));
9851	return DAG.getNode(Opcode: SumOpc, DL: dl, VT, N1: Sign, N2: Shift);
9852	}
9853
9854	SDValue TargetLowering::expandBSWAP(SDNode N, SelectionDAG &DAG) const* {
9855	SDLoc dl(N);
9856	EVT VT = N->getValueType(ResNo: `0`);
9857	SDValue Op = N->getOperand(Num: `0`);
9858
9859	if (!VT.isSimple())
9860	return SDValue ();
9861
9862	EVT SHVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
9863	SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9864	switch (VT.getSimpleVT().getScalarType().SimpleTy) {
9865	default:
9866	return SDValue ();
9867	case MVT::i16:
9868	// Use a rotate by 8. This can be further expanded if necessary.
9869	return DAG.getNode(Opcode: ISD::ROTL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `8`, DL: dl, VT: SHVT));
9870	case MVT::i32:
9871	Tmp4 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `24`, DL: dl, VT: SHVT));
9872	Tmp3 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op,
9873	N2: DAG.getConstant(Val: `0xFF00`, DL: dl, VT));
9874	Tmp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: `8`, DL: dl, VT: SHVT));
9875	Tmp2 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `8`, DL: dl, VT: SHVT));
9876	Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2, N2: DAG.getConstant(Val: `0xFF00`, DL: dl, VT));
9877	Tmp1 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `24`, DL: dl, VT: SHVT));
9878	Tmp4 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp4, N2: Tmp3);
9879	Tmp2 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp1);
9880	return DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp4, N2: Tmp2);
9881	case MVT::i64:
9882	Tmp8 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `56`, DL: dl, VT: SHVT));
9883	Tmp7 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op,
9884	N2: DAG.getConstant(Val: `255ULL`<<`8`, DL: dl, VT));
9885	Tmp7 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp7, N2: DAG.getConstant(Val: `40`, DL: dl, VT: SHVT));
9886	Tmp6 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op,
9887	N2: DAG.getConstant(Val: `255ULL`<<`16`, DL: dl, VT));
9888	Tmp6 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp6, N2: DAG.getConstant(Val: `24`, DL: dl, VT: SHVT));
9889	Tmp5 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op,
9890	N2: DAG.getConstant(Val: `255ULL`<<`24`, DL: dl, VT));
9891	Tmp5 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp5, N2: DAG.getConstant(Val: `8`, DL: dl, VT: SHVT));
9892	Tmp4 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `8`, DL: dl, VT: SHVT));
9893	Tmp4 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp4,
9894	N2: DAG.getConstant(Val: `255ULL`<<`24`, DL: dl, VT));
9895	Tmp3 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `24`, DL: dl, VT: SHVT));
9896	Tmp3 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp3,
9897	N2: DAG.getConstant(Val: `255ULL`<<`16`, DL: dl, VT));
9898	Tmp2 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `40`, DL: dl, VT: SHVT));
9899	Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2,
9900	N2: DAG.getConstant(Val: `255ULL`<<`8`, DL: dl, VT));
9901	Tmp1 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `56`, DL: dl, VT: SHVT));
9902	Tmp8 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp8, N2: Tmp7);
9903	Tmp6 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp6, N2: Tmp5);
9904	Tmp4 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp4, N2: Tmp3);
9905	Tmp2 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp1);
9906	Tmp8 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp8, N2: Tmp6);
9907	Tmp4 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp4, N2: Tmp2);
9908	return DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp8, N2: Tmp4);
9909	}
9910	}
9911
9912	SDValue TargetLowering::expandVPBSWAP(SDNode N, SelectionDAG &DAG) const* {
9913	SDLoc dl(N);
9914	EVT VT = N->getValueType(ResNo: `0`);
9915	SDValue Op = N->getOperand(Num: `0`);
9916	SDValue Mask = N->getOperand(Num: `1`);
9917	SDValue EVL = N->getOperand(Num: `2`);
9918
9919	if (!VT.isSimple())
9920	return SDValue ();
9921
9922	EVT SHVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
9923	SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9924	switch (VT.getSimpleVT().getScalarType().SimpleTy) {
9925	default:
9926	return SDValue ();
9927	case MVT::i16:
9928	Tmp1 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `8`, DL: dl, VT: SHVT),
9929	N3: Mask, N4: EVL);
9930	Tmp2 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `8`, DL: dl, VT: SHVT),
9931	N3: Mask, N4: EVL);
9932	return DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp1, N2: Tmp2, N3: Mask, N4: EVL);
9933	case MVT::i32:
9934	Tmp4 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `24`, DL: dl, VT: SHVT),
9935	N3: Mask, N4: EVL);
9936	Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `0xFF00`, DL: dl, VT),
9937	N3: Mask, N4: EVL);
9938	Tmp3 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: `8`, DL: dl, VT: SHVT),
9939	N3: Mask, N4: EVL);
9940	Tmp2 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `8`, DL: dl, VT: SHVT),
9941	N3: Mask, N4: EVL);
9942	Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp2,
9943	N2: DAG.getConstant(Val: `0xFF00`, DL: dl, VT), N3: Mask, N4: EVL);
9944	Tmp1 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `24`, DL: dl, VT: SHVT),
9945	N3: Mask, N4: EVL);
9946	Tmp4 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp4, N2: Tmp3, N3: Mask, N4: EVL);
9947	Tmp2 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp2, N2: Tmp1, N3: Mask, N4: EVL);
9948	return DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp4, N2: Tmp2, N3: Mask, N4: EVL);
9949	case MVT::i64:
9950	Tmp8 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `56`, DL: dl, VT: SHVT),
9951	N3: Mask, N4: EVL);
9952	Tmp7 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Op,
9953	N2: DAG.getConstant(Val: `255ULL` << `8`, DL: dl, VT), N3: Mask, N4: EVL);
9954	Tmp7 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp7, N2: DAG.getConstant(Val: `40`, DL: dl, VT: SHVT),
9955	N3: Mask, N4: EVL);
9956	Tmp6 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Op,
9957	N2: DAG.getConstant(Val: `255ULL` << `16`, DL: dl, VT), N3: Mask, N4: EVL);
9958	Tmp6 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp6, N2: DAG.getConstant(Val: `24`, DL: dl, VT: SHVT),
9959	N3: Mask, N4: EVL);
9960	Tmp5 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Op,
9961	N2: DAG.getConstant(Val: `255ULL` << `24`, DL: dl, VT), N3: Mask, N4: EVL);
9962	Tmp5 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp5, N2: DAG.getConstant(Val: `8`, DL: dl, VT: SHVT),
9963	N3: Mask, N4: EVL);
9964	Tmp4 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `8`, DL: dl, VT: SHVT),
9965	N3: Mask, N4: EVL);
9966	Tmp4 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp4,
9967	N2: DAG.getConstant(Val: `255ULL` << `24`, DL: dl, VT), N3: Mask, N4: EVL);
9968	Tmp3 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `24`, DL: dl, VT: SHVT),
9969	N3: Mask, N4: EVL);
9970	Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp3,
9971	N2: DAG.getConstant(Val: `255ULL` << `16`, DL: dl, VT), N3: Mask, N4: EVL);
9972	Tmp2 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `40`, DL: dl, VT: SHVT),
9973	N3: Mask, N4: EVL);
9974	Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp2,
9975	N2: DAG.getConstant(Val: `255ULL` << `8`, DL: dl, VT), N3: Mask, N4: EVL);
9976	Tmp1 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `56`, DL: dl, VT: SHVT),
9977	N3: Mask, N4: EVL);
9978	Tmp8 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp8, N2: Tmp7, N3: Mask, N4: EVL);
9979	Tmp6 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp6, N2: Tmp5, N3: Mask, N4: EVL);
9980	Tmp4 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp4, N2: Tmp3, N3: Mask, N4: EVL);
9981	Tmp2 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp2, N2: Tmp1, N3: Mask, N4: EVL);
9982	Tmp8 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp8, N2: Tmp6, N3: Mask, N4: EVL);
9983	Tmp4 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp4, N2: Tmp2, N3: Mask, N4: EVL);
9984	return DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp8, N2: Tmp4, N3: Mask, N4: EVL);
9985	}
9986	}
9987
9988	SDValue TargetLowering::expandBITREVERSE(SDNode N, SelectionDAG &DAG) const* {
9989	SDLoc dl(N);
9990	EVT VT = N->getValueType(ResNo: `0`);
9991	SDValue Op = N->getOperand(Num: `0`);
9992	EVT SHVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
9993	unsigned Sz = VT.getScalarSizeInBits();
9994
9995	SDValue Tmp, Tmp2, Tmp3;
9996
9997	// If we can, perform BSWAP first and then the mask+swap the i4, then i2
9998	// and finally the i1 pairs.
9999	// TODO: We can easily support i4/i2 legal types if any target ever does.
10000	if (Sz >= `8` && isPowerOf2_32(Value: Sz)) {
10001	// Create the masks - repeating the pattern every byte.
10002	APInt Mask4 = APInt::getSplat(NewLen: Sz, V: APInt (`8`, `0x0F`));
10003	APInt Mask2 = APInt::getSplat(NewLen: Sz, V: APInt (`8`, `0x33`));
10004	APInt Mask1 = APInt::getSplat(NewLen: Sz, V: APInt (`8`, `0x55`));
10005
10006	// BSWAP if the type is wider than a single byte.
10007	Tmp = (Sz > `8` ? DAG.getNode(Opcode: ISD::BSWAP, DL: dl, VT, Operand: Op) : Op);
10008
10009	// swap i4: ((V >> 4) & 0x0F) \| ((V & 0x0F) << 4)
10010	Tmp2 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: `4`, DL: dl, VT: SHVT));
10011	Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2, N2: DAG.getConstant(Val: Mask4, DL: dl, VT));
10012	Tmp3 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask4, DL: dl, VT));
10013	Tmp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: `4`, DL: dl, VT: SHVT));
10014	Tmp = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp3);
10015
10016	// swap i2: ((V >> 2) & 0x33) \| ((V & 0x33) << 2)
10017	Tmp2 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: `2`, DL: dl, VT: SHVT));
10018	Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2, N2: DAG.getConstant(Val: Mask2, DL: dl, VT));
10019	Tmp3 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask2, DL: dl, VT));
10020	Tmp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: `2`, DL: dl, VT: SHVT));
10021	Tmp = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp3);
10022
10023	// swap i1: ((V >> 1) & 0x55) \| ((V & 0x55) << 1)
10024	Tmp2 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: `1`, DL: dl, VT: SHVT));
10025	Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2, N2: DAG.getConstant(Val: Mask1, DL: dl, VT));
10026	Tmp3 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask1, DL: dl, VT));
10027	Tmp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: `1`, DL: dl, VT: SHVT));
10028	Tmp = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp3);
10029	return Tmp;
10030	}
10031
10032	Tmp = DAG.getConstant(Val: `0`, DL: dl, VT);
10033	for (unsigned I = `0`, J = Sz-`1`; I < Sz; ++I, --J) {
10034	if (I < J)
10035	Tmp2 =
10036	DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: J - I, DL: dl, VT: SHVT));
10037	else
10038	Tmp2 =
10039	DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: I - J, DL: dl, VT: SHVT));
10040
10041	APInt Shift = APInt::getOneBitSet(numBits: Sz, BitNo: J);
10042	Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2, N2: DAG.getConstant(Val: Shift, DL: dl, VT));
10043	Tmp = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp, N2: Tmp2);
10044	}
10045
10046	return Tmp;
10047	}
10048
10049	SDValue TargetLowering::expandVPBITREVERSE(SDNode N, SelectionDAG &DAG) const* {
10050	assert(N->getOpcode() == ISD::VP_BITREVERSE);
10051
10052	SDLoc dl(N);
10053	EVT VT = N->getValueType(ResNo: `0`);
10054	SDValue Op = N->getOperand(Num: `0`);
10055	SDValue Mask = N->getOperand(Num: `1`);
10056	SDValue EVL = N->getOperand(Num: `2`);
10057	EVT SHVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
10058	unsigned Sz = VT.getScalarSizeInBits();
10059
10060	SDValue Tmp, Tmp2, Tmp3;
10061
10062	// If we can, perform BSWAP first and then the mask+swap the i4, then i2
10063	// and finally the i1 pairs.
10064	// TODO: We can easily support i4/i2 legal types if any target ever does.
10065	if (Sz >= `8` && isPowerOf2_32(Value: Sz)) {
10066	// Create the masks - repeating the pattern every byte.
10067	APInt Mask4 = APInt::getSplat(NewLen: Sz, V: APInt (`8`, `0x0F`));
10068	APInt Mask2 = APInt::getSplat(NewLen: Sz, V: APInt (`8`, `0x33`));
10069	APInt Mask1 = APInt::getSplat(NewLen: Sz, V: APInt (`8`, `0x55`));
10070
10071	// BSWAP if the type is wider than a single byte.
10072	Tmp = (Sz > `8` ? DAG.getNode(Opcode: ISD::VP_BSWAP, DL: dl, VT, N1: Op, N2: Mask, N3: EVL) : Op);
10073
10074	// swap i4: ((V >> 4) & 0x0F) \| ((V & 0x0F) << 4)
10075	Tmp2 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: `4`, DL: dl, VT: SHVT),
10076	N3: Mask, N4: EVL);
10077	Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp2,
10078	N2: DAG.getConstant(Val: Mask4, DL: dl, VT), N3: Mask, N4: EVL);
10079	Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask4, DL: dl, VT),
10080	N3: Mask, N4: EVL);
10081	Tmp3 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: `4`, DL: dl, VT: SHVT),
10082	N3: Mask, N4: EVL);
10083	Tmp = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp2, N2: Tmp3, N3: Mask, N4: EVL);
10084
10085	// swap i2: ((V >> 2) & 0x33) \| ((V & 0x33) << 2)
10086	Tmp2 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: `2`, DL: dl, VT: SHVT),
10087	N3: Mask, N4: EVL);
10088	Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp2,
10089	N2: DAG.getConstant(Val: Mask2, DL: dl, VT), N3: Mask, N4: EVL);
10090	Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask2, DL: dl, VT),
10091	N3: Mask, N4: EVL);
10092	Tmp3 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: `2`, DL: dl, VT: SHVT),
10093	N3: Mask, N4: EVL);
10094	Tmp = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp2, N2: Tmp3, N3: Mask, N4: EVL);
10095
10096	// swap i1: ((V >> 1) & 0x55) \| ((V & 0x55) << 1)
10097	Tmp2 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: `1`, DL: dl, VT: SHVT),
10098	N3: Mask, N4: EVL);
10099	Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp2,
10100	N2: DAG.getConstant(Val: Mask1, DL: dl, VT), N3: Mask, N4: EVL);
10101	Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask1, DL: dl, VT),
10102	N3: Mask, N4: EVL);
10103	Tmp3 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: `1`, DL: dl, VT: SHVT),
10104	N3: Mask, N4: EVL);
10105	Tmp = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp2, N2: Tmp3, N3: Mask, N4: EVL);
10106	return Tmp;
10107	}
10108	return SDValue ();
10109	}
10110
10111	std::pair<SDValue, SDValue>
10112	TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
10113	SelectionDAG &DAG) const {
10114	SDLoc SL(LD);
10115	SDValue Chain = LD->getChain();
10116	SDValue BasePTR = LD->getBasePtr();
10117	EVT SrcVT = LD->getMemoryVT();
10118	EVT DstVT = LD->getValueType(ResNo: `0`);
10119	ISD::LoadExtType ExtType = LD->getExtensionType();
10120
10121	if (SrcVT.isScalableVector())
10122	report_fatal_error(reason: "Cannot scalarize scalable vector loads");
10123
10124	unsigned NumElem = SrcVT.getVectorNumElements();
10125
10126	EVT SrcEltVT = SrcVT.getScalarType();
10127	EVT DstEltVT = DstVT.getScalarType();
10128
10129	// A vector must always be stored in memory as-is, i.e. without any padding
10130	// between the elements, since various code depend on it, e.g. in the
10131	// handling of a bitcast of a vector type to int, which may be done with a
10132	// vector store followed by an integer load. A vector that does not have
10133	// elements that are byte-sized must therefore be stored as an integer
10134	// built out of the extracted vector elements.
10135	if (!SrcEltVT.isByteSized()) {
10136	unsigned NumLoadBits = SrcVT.getStoreSizeInBits();
10137	EVT LoadVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NumLoadBits);
10138
10139	unsigned NumSrcBits = SrcVT.getSizeInBits();
10140	EVT SrcIntVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NumSrcBits);
10141
10142	unsigned SrcEltBits = SrcEltVT.getSizeInBits();
10143	SDValue SrcEltBitMask = DAG.getConstant(
10144	Val: APInt::getLowBitsSet(numBits: NumLoadBits, loBitsSet: SrcEltBits), DL: SL, VT: LoadVT);
10145
10146	// Load the whole vector and avoid masking off the top bits as it makes
10147	// the codegen worse.
10148	SDValue Load =
10149	DAG.getExtLoad(ExtType: ISD::EXTLOAD, dl: SL, VT: LoadVT, Chain, Ptr: BasePTR,
10150	PtrInfo: LD->getPointerInfo(), MemVT: SrcIntVT, Alignment: LD->getBaseAlign(),
10151	MMOFlags: LD->getMemOperand()->getFlags(), AAInfo: LD->getAAInfo());
10152
10153	SmallVector<SDValue, `8`> Vals;
10154	for (unsigned Idx = `0`; Idx < NumElem; ++Idx) {
10155	unsigned ShiftIntoIdx =
10156	(DAG.getDataLayout().isBigEndian() ? (NumElem - `1`) - Idx : Idx);
10157	SDValue ShiftAmount = DAG.getShiftAmountConstant(
10158	Val: ShiftIntoIdx * SrcEltVT.getSizeInBits(), VT: LoadVT, DL: SL);
10159	SDValue ShiftedElt = DAG.getNode(Opcode: ISD::SRL, DL: SL, VT: LoadVT, N1: Load, N2: ShiftAmount);
10160	SDValue Elt =
10161	DAG.getNode(Opcode: ISD::AND, DL: SL, VT: LoadVT, N1: ShiftedElt, N2: SrcEltBitMask);
10162	SDValue Scalar = DAG.getNode(Opcode: ISD::TRUNCATE, DL: SL, VT: SrcEltVT, Operand: Elt);
10163
10164	if (ExtType != ISD::NON_EXTLOAD) {
10165	unsigned ExtendOp = ISD::getExtForLoadExtType(IsFP: false, ExtType);
10166	Scalar = DAG.getNode(Opcode: ExtendOp, DL: SL, VT: DstEltVT, Operand: Scalar);
10167	}
10168
10169	Vals.push_back(Elt: Scalar);
10170	}
10171
10172	SDValue Value = DAG.getBuildVector(VT: DstVT, DL: SL, Ops: Vals);
10173	return std::make_pair(x&: Value, y: Load.getValue(R: `1`));
10174	}
10175
10176	unsigned Stride = SrcEltVT.getSizeInBits() / `8`;
10177	assert(SrcEltVT.isByteSized());
10178
10179	SmallVector<SDValue, `8`> Vals;
10180	SmallVector<SDValue, `8`> LoadChains;
10181
10182	for (unsigned Idx = `0`; Idx < NumElem; ++Idx) {
10183	SDValue ScalarLoad = DAG.getExtLoad(
10184	ExtType, dl: SL, VT: DstEltVT, Chain, Ptr: BasePTR,
10185	PtrInfo: LD->getPointerInfo().getWithOffset(O: Idx * Stride), MemVT: SrcEltVT,
10186	Alignment: LD->getBaseAlign(), MMOFlags: LD->getMemOperand()->getFlags(), AAInfo: LD->getAAInfo());
10187
10188	BasePTR = DAG.getObjectPtrOffset(SL, Ptr: BasePTR, Offset: TypeSize::getFixed(ExactSize: Stride));
10189
10190	Vals.push_back(Elt: ScalarLoad.getValue(R: `0`));
10191	LoadChains.push_back(Elt: ScalarLoad.getValue(R: `1`));
10192	}
10193
10194	SDValue NewChain = DAG.getNode(Opcode: ISD::TokenFactor, DL: SL, VT: MVT::Other, Ops: LoadChains);
10195	SDValue Value = DAG.getBuildVector(VT: DstVT, DL: SL, Ops: Vals);
10196
10197	return std::make_pair(x&: Value, y&: NewChain);
10198	}
10199
10200	SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
10201	SelectionDAG &DAG) const {
10202	SDLoc SL(ST);
10203
10204	SDValue Chain = ST->getChain();
10205	SDValue BasePtr = ST->getBasePtr();
10206	SDValue Value = ST->getValue();
10207	EVT StVT = ST->getMemoryVT();
10208
10209	if (StVT.isScalableVector())
10210	report_fatal_error(reason: "Cannot scalarize scalable vector stores");
10211
10212	// The type of the data we want to save
10213	EVT RegVT = Value.getValueType();
10214	EVT RegSclVT = RegVT.getScalarType();
10215
10216	// The type of data as saved in memory.
10217	EVT MemSclVT = StVT.getScalarType();
10218
10219	unsigned NumElem = StVT.getVectorNumElements();
10220
10221	// A vector must always be stored in memory as-is, i.e. without any padding
10222	// between the elements, since various code depend on it, e.g. in the
10223	// handling of a bitcast of a vector type to int, which may be done with a
10224	// vector store followed by an integer load. A vector that does not have
10225	// elements that are byte-sized must therefore be stored as an integer
10226	// built out of the extracted vector elements.
10227	if (!MemSclVT.isByteSized()) {
10228	unsigned NumBits = StVT.getSizeInBits();
10229	EVT IntVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NumBits);
10230
10231	SDValue CurrVal = DAG.getConstant(Val: `0`, DL: SL, VT: IntVT);
10232
10233	for (unsigned Idx = `0`; Idx < NumElem; ++Idx) {
10234	SDValue Elt = DAG.getExtractVectorElt(DL: SL, VT: RegSclVT, Vec: Value, Idx);
10235	SDValue Trunc = DAG.getNode(Opcode: ISD::TRUNCATE, DL: SL, VT: MemSclVT, Operand: Elt);
10236	SDValue ExtElt = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: SL, VT: IntVT, Operand: Trunc);
10237	unsigned ShiftIntoIdx =
10238	(DAG.getDataLayout().isBigEndian() ? (NumElem - `1`) - Idx : Idx);
10239	SDValue ShiftAmount =
10240	DAG.getConstant(Val: ShiftIntoIdx * MemSclVT.getSizeInBits(), DL: SL, VT: IntVT);
10241	SDValue ShiftedElt =
10242	DAG.getNode(Opcode: ISD::SHL, DL: SL, VT: IntVT, N1: ExtElt, N2: ShiftAmount);
10243	CurrVal = DAG.getNode(Opcode: ISD::OR, DL: SL, VT: IntVT, N1: CurrVal, N2: ShiftedElt);
10244	}
10245
10246	return DAG.getStore(Chain, dl: SL, Val: CurrVal, Ptr: BasePtr, PtrInfo: ST->getPointerInfo(),
10247	Alignment: ST->getBaseAlign(), MMOFlags: ST->getMemOperand()->getFlags(),
10248	AAInfo: ST->getAAInfo());
10249	}
10250
10251	// Store Stride in bytes
10252	unsigned Stride = MemSclVT.getSizeInBits() / `8`;
10253	assert(Stride && "Zero stride!");
10254	// Extract each of the elements from the original vector and save them into
10255	// memory individually.
10256	SmallVector<SDValue, `8`> Stores;
10257	for (unsigned Idx = `0`; Idx < NumElem; ++Idx) {
10258	SDValue Elt = DAG.getExtractVectorElt(DL: SL, VT: RegSclVT, Vec: Value, Idx);
10259
10260	SDValue Ptr =
10261	DAG.getObjectPtrOffset(SL, Ptr: BasePtr, Offset: TypeSize::getFixed(ExactSize: Idx * Stride));
10262
10263	// This scalar TruncStore may be illegal, but we legalize it later.
10264	SDValue Store = DAG.getTruncStore(
10265	Chain, dl: SL, Val: Elt, Ptr, PtrInfo: ST->getPointerInfo().getWithOffset(O: Idx * Stride),
10266	SVT: MemSclVT, Alignment: ST->getBaseAlign(), MMOFlags: ST->getMemOperand()->getFlags(),
10267	AAInfo: ST->getAAInfo());
10268
10269	Stores.push_back(Elt: Store);
10270	}
10271
10272	return DAG.getNode(Opcode: ISD::TokenFactor, DL: SL, VT: MVT::Other, Ops: Stores);
10273	}
10274
10275	std::pair<SDValue, SDValue>
10276	TargetLowering::expandUnalignedLoad(LoadSDNode LD, SelectionDAG &DAG) const* {
10277	assert(LD->getAddressingMode() == ISD::UNINDEXED &&
10278	"unaligned indexed loads not implemented!");
10279	SDValue Chain = LD->getChain();
10280	SDValue Ptr = LD->getBasePtr();
10281	EVT VT = LD->getValueType(ResNo: `0`);
10282	EVT LoadedVT = LD->getMemoryVT();
10283	SDLoc dl(LD);
10284	auto &MF = DAG.getMachineFunction();
10285
10286	if (VT.isFloatingPoint() \|\| VT.isVector()) {
10287	EVT intVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: LoadedVT.getSizeInBits());
10288	if (isTypeLegal(VT: intVT) && isTypeLegal(VT: LoadedVT)) {
10289	if (!isOperationLegalOrCustom(Op: ISD::LOAD, VT: intVT) &&
10290	LoadedVT.isVector()) {
10291	// Scalarize the load and let the individual components be handled.
10292	return scalarizeVectorLoad(LD, DAG);
10293	}
10294
10295	// Expand to a (misaligned) integer load of the same size,
10296	// then bitconvert to floating point or vector.
10297	SDValue newLoad = DAG.getLoad(VT: intVT, dl, Chain, Ptr,
10298	MMO: LD->getMemOperand());
10299	SDValue Result = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: LoadedVT, Operand: newLoad);
10300	if (LoadedVT != VT)
10301	Result = DAG.getNode(Opcode: VT.isFloatingPoint() ? ISD::FP_EXTEND :
10302	ISD::ANY_EXTEND, DL: dl, VT, Operand: Result);
10303
10304	return std::make_pair(x&: Result, y: newLoad.getValue(R: `1`));
10305	}
10306
10307	// Copy the value to a (aligned) stack slot using (unaligned) integer
10308	// loads and stores, then do a (aligned) load from the stack slot.
10309	MVT RegVT = getRegisterType(Context&: *DAG.getContext(), VT: intVT);
10310	unsigned LoadedBytes = LoadedVT.getStoreSize();
10311	unsigned RegBytes = RegVT.getSizeInBits() / `8`;
10312	unsigned NumRegs = (LoadedBytes + RegBytes - `1`) / RegBytes;
10313
10314	// Make sure the stack slot is also aligned for the register type.
10315	SDValue StackBase = DAG.CreateStackTemporary(VT1: LoadedVT, VT2: RegVT);
10316	auto FrameIndex = cast<FrameIndexSDNode>(Val: StackBase.getNode())->getIndex();
10317	SmallVector<SDValue, `8`> Stores;
10318	SDValue StackPtr = StackBase;
10319	unsigned Offset = `0`;
10320
10321	EVT PtrVT = Ptr.getValueType();
10322	EVT StackPtrVT = StackPtr.getValueType();
10323
10324	SDValue PtrIncrement = DAG.getConstant(Val: RegBytes, DL: dl, VT: PtrVT);
10325	SDValue StackPtrIncrement = DAG.getConstant(Val: RegBytes, DL: dl, VT: StackPtrVT);
10326
10327	// Do all but one copies using the full register width.
10328	for (unsigned i = `1`; i < NumRegs; i++) {
10329	// Load one integer register's worth from the original location.
10330	SDValue Load = DAG.getLoad(
10331	VT: RegVT, dl, Chain, Ptr, PtrInfo: LD->getPointerInfo().getWithOffset(O: Offset),
10332	Alignment: LD->getBaseAlign(), MMOFlags: LD->getMemOperand()->getFlags(), AAInfo: LD->getAAInfo());
10333	// Follow the load with a store to the stack slot. Remember the store.
10334	Stores.push_back(Elt: DAG.getStore(
10335	Chain: Load.getValue(R: `1`), dl, Val: Load, Ptr: StackPtr,
10336	PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset)));
10337	// Increment the pointers.
10338	Offset += RegBytes;
10339
10340	Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: PtrIncrement);
10341	StackPtr = DAG.getObjectPtrOffset(SL: dl, Ptr: StackPtr, Offset: StackPtrIncrement);
10342	}
10343
10344	// The last copy may be partial. Do an extending load.
10345	EVT MemVT = EVT::getIntegerVT(Context&: *DAG.getContext(),
10346	BitWidth: `8` * (LoadedBytes - Offset));
10347	SDValue Load = DAG.getExtLoad(
10348	ExtType: ISD::EXTLOAD, dl, VT: RegVT, Chain, Ptr,
10349	PtrInfo: LD->getPointerInfo().getWithOffset(O: Offset), MemVT, Alignment: LD->getBaseAlign(),
10350	MMOFlags: LD->getMemOperand()->getFlags(), AAInfo: LD->getAAInfo());
10351	// Follow the load with a store to the stack slot. Remember the store.
10352	// On big-endian machines this requires a truncating store to ensure
10353	// that the bits end up in the right place.
10354	Stores.push_back(Elt: DAG.getTruncStore(
10355	Chain: Load.getValue(R: `1`), dl, Val: Load, Ptr: StackPtr,
10356	PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset), SVT: MemVT));
10357
10358	// The order of the stores doesn't matter - say it with a TokenFactor.
10359	SDValue TF = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: Stores);
10360
10361	// Finally, perform the original load only redirected to the stack slot.
10362	Load = DAG.getExtLoad(ExtType: LD->getExtensionType(), dl, VT, Chain: TF, Ptr: StackBase,
10363	PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset: `0`),
10364	MemVT: LoadedVT);
10365
10366	// Callers expect a MERGE_VALUES node.
10367	return std::make_pair(x&: Load, y&: TF);
10368	}
10369
10370	assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
10371	"Unaligned load of unsupported type.");
10372
10373	// Compute the new VT that is half the size of the old one. This is an
10374	// integer MVT.
10375	unsigned NumBits = LoadedVT.getSizeInBits();
10376	EVT NewLoadedVT;
10377	NewLoadedVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NumBits/`2`);
10378	NumBits >>= `1`;
10379
10380	Align Alignment = LD->getBaseAlign();
10381	unsigned IncrementSize = NumBits / `8`;
10382	ISD::LoadExtType HiExtType = LD->getExtensionType();
10383
10384	// If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
10385	if (HiExtType == ISD::NON_EXTLOAD)
10386	HiExtType = ISD::ZEXTLOAD;
10387
10388	// Load the value in two parts
10389	SDValue Lo, Hi;
10390	if (DAG.getDataLayout().isLittleEndian()) {
10391	Lo = DAG.getExtLoad(ExtType: ISD::ZEXTLOAD, dl, VT, Chain, Ptr, PtrInfo: LD->getPointerInfo(),
10392	MemVT: NewLoadedVT, Alignment, MMOFlags: LD->getMemOperand()->getFlags(),
10393	AAInfo: LD->getAAInfo());
10394
10395	Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: TypeSize::getFixed(ExactSize: IncrementSize));
10396	Hi = DAG.getExtLoad(ExtType: HiExtType, dl, VT, Chain, Ptr,
10397	PtrInfo: LD->getPointerInfo().getWithOffset(O: IncrementSize),
10398	MemVT: NewLoadedVT, Alignment, MMOFlags: LD->getMemOperand()->getFlags(),
10399	AAInfo: LD->getAAInfo());
10400	} else {
10401	Hi = DAG.getExtLoad(ExtType: HiExtType, dl, VT, Chain, Ptr, PtrInfo: LD->getPointerInfo(),
10402	MemVT: NewLoadedVT, Alignment, MMOFlags: LD->getMemOperand()->getFlags(),
10403	AAInfo: LD->getAAInfo());
10404
10405	Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: TypeSize::getFixed(ExactSize: IncrementSize));
10406	Lo = DAG.getExtLoad(ExtType: ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
10407	PtrInfo: LD->getPointerInfo().getWithOffset(O: IncrementSize),
10408	MemVT: NewLoadedVT, Alignment, MMOFlags: LD->getMemOperand()->getFlags(),
10409	AAInfo: LD->getAAInfo());
10410	}
10411
10412	// aggregate the two parts
10413	SDValue ShiftAmount = DAG.getShiftAmountConstant(Val: NumBits, VT, DL: dl);
10414	SDValue Result = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Hi, N2: ShiftAmount);
10415	Result = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Result, N2: Lo);
10416
10417	SDValue TF = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, N1: Lo.getValue(R: `1`),
10418	N2: Hi.getValue(R: `1`));
10419
10420	return std::make_pair(x&: Result, y&: TF);
10421	}
10422
10423	SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
10424	SelectionDAG &DAG) const {
10425	assert(ST->getAddressingMode() == ISD::UNINDEXED &&
10426	"unaligned indexed stores not implemented!");
10427	SDValue Chain = ST->getChain();
10428	SDValue Ptr = ST->getBasePtr();
10429	SDValue Val = ST->getValue();
10430	EVT VT = Val.getValueType();
10431	Align Alignment = ST->getBaseAlign();
10432	auto &MF = DAG.getMachineFunction();
10433	EVT StoreMemVT = ST->getMemoryVT();
10434
10435	SDLoc dl(ST);
10436	if (StoreMemVT.isFloatingPoint() \|\| StoreMemVT.isVector()) {
10437	EVT intVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: VT.getSizeInBits());
10438	if (isTypeLegal(VT: intVT)) {
10439	if (!isOperationLegalOrCustom(Op: ISD::STORE, VT: intVT) &&
10440	StoreMemVT.isVector()) {
10441	// Scalarize the store and let the individual components be handled.
10442	SDValue Result = scalarizeVectorStore(ST, DAG);
10443	return Result;
10444	}
10445	// Expand to a bitconvert of the value to the integer type of the
10446	// same size, then a (misaligned) int store.
10447	// FIXME: Does not handle truncating floating point stores!
10448	SDValue Result = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: intVT, Operand: Val);
10449	Result = DAG.getStore(Chain, dl, Val: Result, Ptr, PtrInfo: ST->getPointerInfo(),
10450	Alignment, MMOFlags: ST->getMemOperand()->getFlags());
10451	return Result;
10452	}
10453	// Do a (aligned) store to a stack slot, then copy from the stack slot
10454	// to the final destination using (unaligned) integer loads and stores.
10455	MVT RegVT = getRegisterType(
10456	Context&: *DAG.getContext(),
10457	VT: EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: StoreMemVT.getSizeInBits()));
10458	EVT PtrVT = Ptr.getValueType();
10459	unsigned StoredBytes = StoreMemVT.getStoreSize();
10460	unsigned RegBytes = RegVT.getSizeInBits() / `8`;
10461	unsigned NumRegs = (StoredBytes + RegBytes - `1`) / RegBytes;
10462
10463	// Make sure the stack slot is also aligned for the register type.
10464	SDValue StackPtr = DAG.CreateStackTemporary(VT1: StoreMemVT, VT2: RegVT);
10465	auto FrameIndex = cast<FrameIndexSDNode>(Val: StackPtr.getNode())->getIndex();
10466
10467	// Perform the original store, only redirected to the stack slot.
10468	SDValue Store = DAG.getTruncStore(
10469	Chain, dl, Val, Ptr: StackPtr,
10470	PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset: `0`), SVT: StoreMemVT);
10471
10472	EVT StackPtrVT = StackPtr.getValueType();
10473
10474	SDValue PtrIncrement = DAG.getConstant(Val: RegBytes, DL: dl, VT: PtrVT);
10475	SDValue StackPtrIncrement = DAG.getConstant(Val: RegBytes, DL: dl, VT: StackPtrVT);
10476	SmallVector<SDValue, `8`> Stores;
10477	unsigned Offset = `0`;
10478
10479	// Do all but one copies using the full register width.
10480	for (unsigned i = `1`; i < NumRegs; i++) {
10481	// Load one integer register's worth from the stack slot.
10482	SDValue Load = DAG.getLoad(
10483	VT: RegVT, dl, Chain: Store, Ptr: StackPtr,
10484	PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset));
10485	// Store it to the final location. Remember the store.
10486	Stores.push_back(Elt: DAG.getStore(Chain: Load.getValue(R: `1`), dl, Val: Load, Ptr,
10487	PtrInfo: ST->getPointerInfo().getWithOffset(O: Offset),
10488	Alignment: ST->getBaseAlign(),
10489	MMOFlags: ST->getMemOperand()->getFlags()));
10490	// Increment the pointers.
10491	Offset += RegBytes;
10492	StackPtr = DAG.getObjectPtrOffset(SL: dl, Ptr: StackPtr, Offset: StackPtrIncrement);
10493	Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: PtrIncrement);
10494	}
10495
10496	// The last store may be partial. Do a truncating store. On big-endian
10497	// machines this requires an extending load from the stack slot to ensure
10498	// that the bits are in the right place.
10499	EVT LoadMemVT =
10500	EVT::getIntegerVT(Context&: DAG.getContext(), BitWidth: `8` (StoredBytes - Offset));
10501
10502	// Load from the stack slot.
10503	SDValue Load = DAG.getExtLoad(
10504	ExtType: ISD::EXTLOAD, dl, VT: RegVT, Chain: Store, Ptr: StackPtr,
10505	PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset), MemVT: LoadMemVT);
10506
10507	Stores.push_back(Elt: DAG.getTruncStore(
10508	Chain: Load.getValue(R: `1`), dl, Val: Load, Ptr,
10509	PtrInfo: ST->getPointerInfo().getWithOffset(O: Offset), SVT: LoadMemVT,
10510	Alignment: ST->getBaseAlign(), MMOFlags: ST->getMemOperand()->getFlags(), AAInfo: ST->getAAInfo()));
10511	// The order of the stores doesn't matter - say it with a TokenFactor.
10512	SDValue Result = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: Stores);
10513	return Result;
10514	}
10515
10516	assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
10517	"Unaligned store of unknown type.");
10518	// Get the half-size VT
10519	EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(Context&: *DAG.getContext());
10520	unsigned NumBits = NewStoredVT.getFixedSizeInBits();
10521	unsigned IncrementSize = NumBits / `8`;
10522
10523	// Divide the stored value in two parts.
10524	SDValue ShiftAmount =
10525	DAG.getShiftAmountConstant(Val: NumBits, VT: Val.getValueType(), DL: dl);
10526	SDValue Lo = Val;
10527	// If Val is a constant, replace the upper bits with 0. The SRL will constant
10528	// fold and not use the upper bits. A smaller constant may be easier to
10529	// materialize.
10530	if (auto *C = dyn_cast<ConstantSDNode>(Val&: Lo); C && !C->isOpaque())
10531	Lo = DAG.getNode(
10532	Opcode: ISD::AND, DL: dl, VT, N1: Lo,
10533	N2: DAG.getConstant(Val: APInt::getLowBitsSet(numBits: VT.getSizeInBits(), loBitsSet: NumBits), DL: dl,
10534	VT));
10535	SDValue Hi = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Val, N2: ShiftAmount);
10536
10537	// Store the two parts
10538	SDValue Store1, Store2;
10539	Store1 = DAG.getTruncStore(Chain, dl,
10540	Val: DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
10541	Ptr, PtrInfo: ST->getPointerInfo(), SVT: NewStoredVT, Alignment,
10542	MMOFlags: ST->getMemOperand()->getFlags());
10543
10544	Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: TypeSize::getFixed(ExactSize: IncrementSize));
10545	Store2 = DAG.getTruncStore(
10546	Chain, dl, Val: DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
10547	PtrInfo: ST->getPointerInfo().getWithOffset(O: IncrementSize), SVT: NewStoredVT, Alignment,
10548	MMOFlags: ST->getMemOperand()->getFlags(), AAInfo: ST->getAAInfo());
10549
10550	SDValue Result =
10551	DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, N1: Store1, N2: Store2);
10552	return Result;
10553	}
10554
10555	SDValue
10556	TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
10557	const SDLoc &DL, EVT DataVT,
10558	SelectionDAG &DAG,
10559	bool IsCompressedMemory) const {
10560	SDValue Increment;
10561	EVT AddrVT = Addr.getValueType();
10562	EVT MaskVT = Mask.getValueType();
10563	assert(DataVT.getVectorElementCount() == MaskVT.getVectorElementCount() &&
10564	"Incompatible types of Data and Mask");
10565	if (IsCompressedMemory) {
10566	if (DataVT.isScalableVector())
10567	report_fatal_error(
10568	reason: "Cannot currently handle compressed memory with scalable vectors");
10569	// Incrementing the pointer according to number of '1's in the mask.
10570	EVT MaskIntVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: MaskVT.getSizeInBits());
10571	SDValue MaskInIntReg = DAG.getBitcast(VT: MaskIntVT, V: Mask);
10572	if (MaskIntVT.getSizeInBits() < `32`) {
10573	MaskInIntReg = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: MVT::i32, Operand: MaskInIntReg);
10574	MaskIntVT = MVT::i32;
10575	}
10576
10577	// Count '1's with POPCNT.
10578	Increment = DAG.getNode(Opcode: ISD::CTPOP, DL, VT: MaskIntVT, Operand: MaskInIntReg);
10579	Increment = DAG.getZExtOrTrunc(Op: Increment, DL, VT: AddrVT);
10580	// Scale is an element size in bytes.
10581	SDValue Scale = DAG.getConstant(Val: DataVT.getScalarSizeInBits() / `8`, DL,
10582	VT: AddrVT);
10583	Increment = DAG.getNode(Opcode: ISD::MUL, DL, VT: AddrVT, N1: Increment, N2: Scale);
10584	} else if (DataVT.isScalableVector()) {
10585	Increment = DAG.getVScale(DL, VT: AddrVT,
10586	MulImm: APInt (AddrVT.getFixedSizeInBits(),
10587	DataVT.getStoreSize().getKnownMinValue()));
10588	} else
10589	Increment = DAG.getConstant(Val: DataVT.getStoreSize(), DL, VT: AddrVT);
10590
10591	return DAG.getNode(Opcode: ISD::ADD, DL, VT: AddrVT, N1: Addr, N2: Increment);
10592	}
10593
10594	static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx,
10595	EVT VecVT, const SDLoc &dl,
10596	ElementCount SubEC) {
10597	assert(!(SubEC.isScalable() && VecVT.isFixedLengthVector()) &&
10598	"Cannot index a scalable vector within a fixed-width vector");
10599
10600	unsigned NElts = VecVT.getVectorMinNumElements();
10601	unsigned NumSubElts = SubEC.getKnownMinValue();
10602	EVT IdxVT = Idx.getValueType();
10603
10604	if (VecVT.isScalableVector() && !SubEC.isScalable()) {
10605	// If this is a constant index and we know the value plus the number of the
10606	// elements in the subvector minus one is less than the minimum number of
10607	// elements then it's safe to return Idx.
10608	if (auto *IdxCst = dyn_cast<ConstantSDNode>(Val&: Idx))
10609	if (IdxCst->getZExtValue() + (NumSubElts - `1`) < NElts)
10610	return Idx;
10611	SDValue VS =
10612	DAG.getVScale(DL: dl, VT: IdxVT, MulImm: APInt (IdxVT.getFixedSizeInBits(), NElts));
10613	unsigned SubOpcode = NumSubElts <= NElts ? ISD::SUB : ISD::USUBSAT;
10614	SDValue Sub = DAG.getNode(Opcode: SubOpcode, DL: dl, VT: IdxVT, N1: VS,
10615	N2: DAG.getConstant(Val: NumSubElts, DL: dl, VT: IdxVT));
10616	return DAG.getNode(Opcode: ISD::UMIN, DL: dl, VT: IdxVT, N1: Idx, N2: Sub);
10617	}
10618	if (isPowerOf2_32(Value: NElts) && NumSubElts == `1`) {
10619	APInt Imm = APInt::getLowBitsSet(numBits: IdxVT.getSizeInBits(), loBitsSet: Log2_32(Value: NElts));
10620	return DAG.getNode(Opcode: ISD::AND, DL: dl, VT: IdxVT, N1: Idx,
10621	N2: DAG.getConstant(Val: Imm, DL: dl, VT: IdxVT));
10622	}
10623	unsigned MaxIndex = NumSubElts < NElts ? NElts - NumSubElts : `0`;
10624	return DAG.getNode(Opcode: ISD::UMIN, DL: dl, VT: IdxVT, N1: Idx,
10625	N2: DAG.getConstant(Val: MaxIndex, DL: dl, VT: IdxVT));
10626	}
10627
10628	SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG,
10629	SDValue VecPtr, EVT VecVT,
10630	SDValue Index) const {
10631	return getVectorSubVecPointer(
10632	DAG, VecPtr, VecVT,
10633	SubVecVT: EVT::getVectorVT(Context&: *DAG.getContext(), VT: VecVT.getVectorElementType(), NumElements: `1`),
10634	Index);
10635	}
10636
10637	SDValue TargetLowering::getVectorSubVecPointer(SelectionDAG &DAG,
10638	SDValue VecPtr, EVT VecVT,
10639	EVT SubVecVT,
10640	SDValue Index) const {
10641	SDLoc dl(Index);
10642	// Make sure the index type is big enough to compute in.
10643	Index = DAG.getZExtOrTrunc(Op: Index, DL: dl, VT: VecPtr.getValueType());
10644
10645	EVT EltVT = VecVT.getVectorElementType();
10646
10647	// Calculate the element offset and add it to the pointer.
10648	unsigned EltSize = EltVT.getFixedSizeInBits() / `8`; // FIXME: should be ABI size.
10649	assert(EltSize * `8` == EltVT.getFixedSizeInBits() &&
10650	"Converting bits to bytes lost precision");
10651	assert(SubVecVT.getVectorElementType() == EltVT &&
10652	"Sub-vector must be a vector with matching element type");
10653	Index = clampDynamicVectorIndex(DAG, Idx: Index, VecVT, dl,
10654	SubEC: SubVecVT.getVectorElementCount());
10655
10656	EVT IdxVT = Index.getValueType();
10657	if (SubVecVT.isScalableVector())
10658	Index =
10659	DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: IdxVT, N1: Index,
10660	N2: DAG.getVScale(DL: dl, VT: IdxVT, MulImm: APInt (IdxVT.getSizeInBits(), `1`)));
10661
10662	Index = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: IdxVT, N1: Index,
10663	N2: DAG.getConstant(Val: EltSize, DL: dl, VT: IdxVT));
10664	return DAG.getMemBasePlusOffset(Base: VecPtr, Offset: Index, DL: dl);
10665	}
10666
10667	//===----------------------------------------------------------------------===//
10668	// Implementation of Emulated TLS Model
10669	//===----------------------------------------------------------------------===//
10670
10671	SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
10672	SelectionDAG &DAG) const {
10673	// Access to address of TLS varialbe xyz is lowered to a function call:
10674	// __emutls_get_address( address of global variable named "__emutls_v.xyz" )
10675	EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
10676	PointerType VoidPtrType = PointerType::get(C&: DAG.getContext(), AddressSpace: `0`);
10677	SDLoc dl(GA);
10678
10679	ArgListTy Args;
10680	ArgListEntry Entry;
10681	const GlobalValue *GV =
10682	cast<GlobalValue>(Val: GA->getGlobal()->stripPointerCastsAndAliases());
10683	SmallString<`32`> NameString("__emutls_v.");
10684	NameString += GV->getName();
10685	StringRef EmuTlsVarName(NameString);
10686	const GlobalVariable *EmuTlsVar =
10687	GV->getParent()->getNamedGlobal(Name: EmuTlsVarName);
10688	assert(EmuTlsVar && "Cannot find EmuTlsVar ");
10689	Entry.Node = DAG.getGlobalAddress(GV: EmuTlsVar, DL: dl, VT: PtrVT);
10690	Entry.Ty = VoidPtrType;
10691	Args.push_back(x: Entry);
10692
10693	SDValue EmuTlsGetAddr = DAG.getExternalSymbol(Sym: "__emutls_get_address", VT: PtrVT);
10694
10695	TargetLowering::CallLoweringInfo CLI(DAG);
10696	CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
10697	CLI.setLibCallee(CC: CallingConv::C, ResultType: VoidPtrType, Target: EmuTlsGetAddr, ArgsList: std::move(Args));
10698	std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
10699
10700	// TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
10701	// At last for X86 targets, maybe good for other targets too?
10702	MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
10703	MFI.setAdjustsStack(true); // Is this only for X86 target?
10704	MFI.setHasCalls(true);
10705
10706	assert((GA->getOffset() == `0`) &&
10707	"Emulated TLS must have zero offset in GlobalAddressSDNode");
10708	return CallResult.first;
10709	}
10710
10711	SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,
10712	SelectionDAG &DAG) const {
10713	assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
10714	if (!isCtlzFast())
10715	return SDValue ();
10716	ISD::CondCode CC = cast<CondCodeSDNode>(Val: Op.getOperand(i: `2`))->get();
10717	SDLoc dl(Op);
10718	if (isNullConstant(V: Op.getOperand(i: `1`)) && CC == ISD::SETEQ) {
10719	EVT VT = Op.getOperand(i: `0`).getValueType();
10720	SDValue Zext = Op.getOperand(i: `0`);
10721	if (VT.bitsLT(VT: MVT::i32)) {
10722	VT = MVT::i32;
10723	Zext = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: Op.getOperand(i: `0`));
10724	}
10725	unsigned Log2b = Log2_32(Value: VT.getSizeInBits());
10726	SDValue Clz = DAG.getNode(Opcode: ISD::CTLZ, DL: dl, VT, Operand: Zext);
10727	SDValue Scc = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Clz,
10728	N2: DAG.getConstant(Val: Log2b, DL: dl, VT: MVT::i32));
10729	return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i32, Operand: Scc);
10730	}
10731	return SDValue ();
10732	}
10733
10734	SDValue TargetLowering::expandIntMINMAX(SDNode Node, SelectionDAG &DAG) const* {
10735	SDValue Op0 = Node->getOperand(Num: `0`);
10736	SDValue Op1 = Node->getOperand(Num: `1`);
10737	EVT VT = Op0.getValueType();
10738	EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
10739	unsigned Opcode = Node->getOpcode();
10740	SDLoc DL(Node);
10741
10742	// umax(x,1) --> sub(x,cmpeq(x,0)) iff cmp result is allbits
10743	if (Opcode == ISD::UMAX && llvm::isOneOrOneSplat(V: Op1, AllowUndefs: true) && BoolVT == VT &&
10744	getBooleanContents(Type: VT) == ZeroOrNegativeOneBooleanContent) {
10745	Op0 = DAG.getFreeze(V: Op0);
10746	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT);
10747	return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Op0,
10748	N2: DAG.getSetCC(DL, VT, LHS: Op0, RHS: Zero, Cond: ISD::SETEQ));
10749	}
10750
10751	// umin(x,y) -> sub(x,usubsat(x,y))
10752	// TODO: Missing freeze(Op0)?
10753	if (Opcode == ISD::UMIN && isOperationLegal(Op: ISD::SUB, VT) &&
10754	isOperationLegal(Op: ISD::USUBSAT, VT)) {
10755	return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Op0,
10756	N2: DAG.getNode(Opcode: ISD::USUBSAT, DL, VT, N1: Op0, N2: Op1));
10757	}
10758
10759	// umax(x,y) -> add(x,usubsat(y,x))
10760	// TODO: Missing freeze(Op0)?
10761	if (Opcode == ISD::UMAX && isOperationLegal(Op: ISD::ADD, VT) &&
10762	isOperationLegal(Op: ISD::USUBSAT, VT)) {
10763	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Op0,
10764	N2: DAG.getNode(Opcode: ISD::USUBSAT, DL, VT, N1: Op1, N2: Op0));
10765	}
10766
10767	// FIXME: Should really try to split the vector in case it's legal on a
10768	// subvector.
10769	if (VT.isVector() && !isOperationLegalOrCustom(Op: ISD::VSELECT, VT))
10770	return DAG.UnrollVectorOp(N: Node);
10771
10772	// Attempt to find an existing SETCC node that we can reuse.
10773	// TODO: Do we need a generic doesSETCCNodeExist?
10774	// TODO: Missing freeze(Op0)/freeze(Op1)?
10775	auto buildMinMax = [&](ISD::CondCode PrefCC, ISD::CondCode AltCC,
10776	ISD::CondCode PrefCommuteCC,
10777	ISD::CondCode AltCommuteCC) {
10778	SDVTList BoolVTList = DAG.getVTList(VT: BoolVT);
10779	for (ISD::CondCode CC : {PrefCC, AltCC}) {
10780	if (DAG.doesNodeExist(Opcode: ISD::SETCC, VTList: BoolVTList,
10781	Ops: {Op0, Op1, DAG.getCondCode(Cond: CC)})) {
10782	SDValue Cond = DAG.getSetCC(DL, VT: BoolVT, LHS: Op0, RHS: Op1, Cond: CC);
10783	return DAG.getSelect(DL, VT, Cond, LHS: Op0, RHS: Op1);
10784	}
10785	}
10786	for (ISD::CondCode CC : {PrefCommuteCC, AltCommuteCC}) {
10787	if (DAG.doesNodeExist(Opcode: ISD::SETCC, VTList: BoolVTList,
10788	Ops: {Op0, Op1, DAG.getCondCode(Cond: CC)})) {
10789	SDValue Cond = DAG.getSetCC(DL, VT: BoolVT, LHS: Op0, RHS: Op1, Cond: CC);
10790	return DAG.getSelect(DL, VT, Cond, LHS: Op1, RHS: Op0);
10791	}
10792	}
10793	SDValue Cond = DAG.getSetCC(DL, VT: BoolVT, LHS: Op0, RHS: Op1, Cond: PrefCC);
10794	return DAG.getSelect(DL, VT, Cond, LHS: Op0, RHS: Op1);
10795	};
10796
10797	// Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
10798	// -> Y = (A < B) ? B : A
10799	// -> Y = (A >= B) ? A : B
10800	// -> Y = (A <= B) ? B : A
10801	switch (Opcode) {
10802	case ISD::SMAX:
10803	return buildMinMax (ISD::SETGT, ISD::SETGE, ISD::SETLT, ISD::SETLE);
10804	case ISD::SMIN:
10805	return buildMinMax (ISD::SETLT, ISD::SETLE, ISD::SETGT, ISD::SETGE);
10806	case ISD::UMAX:
10807	return buildMinMax (ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE);
10808	case ISD::UMIN:
10809	return buildMinMax (ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE);
10810	}
10811
10812	llvm_unreachable("How did we get here?");
10813	}
10814
10815	SDValue TargetLowering::expandAddSubSat(SDNode Node, SelectionDAG &DAG) const* {
10816	unsigned Opcode = Node->getOpcode();
10817	SDValue LHS = Node->getOperand(Num: `0`);
10818	SDValue RHS = Node->getOperand(Num: `1`);
10819	EVT VT = LHS.getValueType();
10820	SDLoc dl(Node);
10821
10822	assert(VT == RHS.getValueType() && "Expected operands to be the same type");
10823	assert(VT.isInteger() && "Expected operands to be integers");
10824
10825	// usub.sat(a, b) -> umax(a, b) - b
10826	if (Opcode == ISD::USUBSAT && isOperationLegal(Op: ISD::UMAX, VT)) {
10827	SDValue Max = DAG.getNode(Opcode: ISD::UMAX, DL: dl, VT, N1: LHS, N2: RHS);
10828	return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Max, N2: RHS);
10829	}
10830
10831	// uadd.sat(a, b) -> umin(a, ~b) + b
10832	if (Opcode == ISD::UADDSAT && isOperationLegal(Op: ISD::UMIN, VT)) {
10833	SDValue InvRHS = DAG.getNOT(DL: dl, Val: RHS, VT);
10834	SDValue Min = DAG.getNode(Opcode: ISD::UMIN, DL: dl, VT, N1: LHS, N2: InvRHS);
10835	return DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Min, N2: RHS);
10836	}
10837
10838	unsigned OverflowOp;
10839	switch (Opcode) {
10840	case ISD::SADDSAT:
10841	OverflowOp = ISD::SADDO;
10842	break;
10843	case ISD::UADDSAT:
10844	OverflowOp = ISD::UADDO;
10845	break;
10846	case ISD::SSUBSAT:
10847	OverflowOp = ISD::SSUBO;
10848	break;
10849	case ISD::USUBSAT:
10850	OverflowOp = ISD::USUBO;
10851	break;
10852	default:
10853	llvm_unreachable("Expected method to receive signed or unsigned saturation "
10854	"addition or subtraction node.");
10855	}
10856
10857	// FIXME: Should really try to split the vector in case it's legal on a
10858	// subvector.
10859	if (VT.isVector() && !isOperationLegalOrCustom(Op: ISD::VSELECT, VT))
10860	return DAG.UnrollVectorOp(N: Node);
10861
10862	unsigned BitWidth = LHS.getScalarValueSizeInBits();
10863	EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
10864	SDValue Result = DAG.getNode(Opcode: OverflowOp, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: BoolVT), N1: LHS, N2: RHS);
10865	SDValue SumDiff = Result.getValue(R: `0`);
10866	SDValue Overflow = Result.getValue(R: `1`);
10867	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT);
10868	SDValue AllOnes = DAG.getAllOnesConstant(DL: dl, VT);
10869
10870	if (Opcode == ISD::UADDSAT) {
10871	if (getBooleanContents(Type: VT) == ZeroOrNegativeOneBooleanContent) {
10872	// (LHS + RHS) \| OverflowMask
10873	SDValue OverflowMask = DAG.getSExtOrTrunc(Op: Overflow, DL: dl, VT);
10874	return DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: SumDiff, N2: OverflowMask);
10875	}
10876	// Overflow ? 0xffff.... : (LHS + RHS)
10877	return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: AllOnes, RHS: SumDiff);
10878	}
10879
10880	if (Opcode == ISD::USUBSAT) {
10881	if (getBooleanContents(Type: VT) == ZeroOrNegativeOneBooleanContent) {
10882	// (LHS - RHS) & ~OverflowMask
10883	SDValue OverflowMask = DAG.getSExtOrTrunc(Op: Overflow, DL: dl, VT);
10884	SDValue Not = DAG.getNOT(DL: dl, Val: OverflowMask, VT);
10885	return DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: SumDiff, N2: Not);
10886	}
10887	// Overflow ? 0 : (LHS - RHS)
10888	return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: Zero, RHS: SumDiff);
10889	}
10890
10891	if (Opcode == ISD::SADDSAT \|\| Opcode == ISD::SSUBSAT) {
10892	APInt MinVal = APInt::getSignedMinValue(numBits: BitWidth);
10893	APInt MaxVal = APInt::getSignedMaxValue(numBits: BitWidth);
10894
10895	KnownBits KnownLHS = DAG.computeKnownBits(Op: LHS);
10896	KnownBits KnownRHS = DAG.computeKnownBits(Op: RHS);
10897
10898	// If either of the operand signs are known, then they are guaranteed to
10899	// only saturate in one direction. If non-negative they will saturate
10900	// towards SIGNED_MAX, if negative they will saturate towards SIGNED_MIN.
10901	//
10902	// In the case of ISD::SSUBSAT, 'x - y' is equivalent to 'x + (-y)', so the
10903	// sign of 'y' has to be flipped.
10904
10905	bool LHSIsNonNegative = KnownLHS.isNonNegative();
10906	bool RHSIsNonNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNonNegative()
10907	: KnownRHS.isNegative();
10908	if (LHSIsNonNegative \|\| RHSIsNonNegative) {
10909	SDValue SatMax = DAG.getConstant(Val: MaxVal, DL: dl, VT);
10910	return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: SatMax, RHS: SumDiff);
10911	}
10912
10913	bool LHSIsNegative = KnownLHS.isNegative();
10914	bool RHSIsNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNegative()
10915	: KnownRHS.isNonNegative();
10916	if (LHSIsNegative \|\| RHSIsNegative) {
10917	SDValue SatMin = DAG.getConstant(Val: MinVal, DL: dl, VT);
10918	return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: SatMin, RHS: SumDiff);
10919	}
10920	}
10921
10922	// Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff
10923	APInt MinVal = APInt::getSignedMinValue(numBits: BitWidth);
10924	SDValue SatMin = DAG.getConstant(Val: MinVal, DL: dl, VT);
10925	SDValue Shift = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: SumDiff,
10926	N2: DAG.getConstant(Val: BitWidth - `1`, DL: dl, VT));
10927	Result = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: Shift, N2: SatMin);
10928	return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: Result, RHS: SumDiff);
10929	}
10930
10931	SDValue TargetLowering::expandCMP(SDNode Node, SelectionDAG &DAG) const* {
10932	unsigned Opcode = Node->getOpcode();
10933	SDValue LHS = Node->getOperand(Num: `0`);
10934	SDValue RHS = Node->getOperand(Num: `1`);
10935	EVT VT = LHS.getValueType();
10936	EVT ResVT = Node->getValueType(ResNo: `0`);
10937	EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
10938	SDLoc dl(Node);
10939
10940	auto LTPredicate = (Opcode == ISD::UCMP ? ISD::SETULT : ISD::SETLT);
10941	auto GTPredicate = (Opcode == ISD::UCMP ? ISD::SETUGT : ISD::SETGT);
10942	SDValue IsLT = DAG.getSetCC(DL: dl, VT: BoolVT, LHS, RHS, Cond: LTPredicate);
10943	SDValue IsGT = DAG.getSetCC(DL: dl, VT: BoolVT, LHS, RHS, Cond: GTPredicate);
10944
10945	// We can't perform arithmetic on i1 values. Extending them would
10946	// probably result in worse codegen, so let's just use two selects instead.
10947	// Some targets are also just better off using selects rather than subtraction
10948	// because one of the conditions can be merged with one of the selects.
10949	// And finally, if we don't know the contents of high bits of a boolean value
10950	// we can't perform any arithmetic either.
10951	if (shouldExpandCmpUsingSelects(VT) \|\| BoolVT.getScalarSizeInBits() == `1` \|\|
10952	getBooleanContents(Type: BoolVT) == UndefinedBooleanContent) {
10953	SDValue SelectZeroOrOne =
10954	DAG.getSelect(DL: dl, VT: ResVT, Cond: IsGT, LHS: DAG.getConstant(Val: `1`, DL: dl, VT: ResVT),
10955	RHS: DAG.getConstant(Val: `0`, DL: dl, VT: ResVT));
10956	return DAG.getSelect(DL: dl, VT: ResVT, Cond: IsLT, LHS: DAG.getAllOnesConstant(DL: dl, VT: ResVT),
10957	RHS: SelectZeroOrOne);
10958	}
10959
10960	if (getBooleanContents(Type: BoolVT) == ZeroOrNegativeOneBooleanContent)
10961	std::swap(a&: IsGT, b&: IsLT);
10962	return DAG.getSExtOrTrunc(Op: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: BoolVT, N1: IsGT, N2: IsLT), DL: dl,
10963	VT: ResVT);
10964	}
10965
10966	SDValue TargetLowering::expandShlSat(SDNode Node, SelectionDAG &DAG) const* {
10967	unsigned Opcode = Node->getOpcode();
10968	bool IsSigned = Opcode == ISD::SSHLSAT;
10969	SDValue LHS = Node->getOperand(Num: `0`);
10970	SDValue RHS = Node->getOperand(Num: `1`);
10971	EVT VT = LHS.getValueType();
10972	SDLoc dl(Node);
10973
10974	assert((Node->getOpcode() == ISD::SSHLSAT \|\|
10975	Node->getOpcode() == ISD::USHLSAT) &&
10976	"Expected a SHLSAT opcode");
10977	assert(VT == RHS.getValueType() && "Expected operands to be the same type");
10978	assert(VT.isInteger() && "Expected operands to be integers");
10979
10980	if (VT.isVector() && !isOperationLegalOrCustom(Op: ISD::VSELECT, VT))
10981	return DAG.UnrollVectorOp(N: Node);
10982
10983	// If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate.
10984
10985	unsigned BW = VT.getScalarSizeInBits();
10986	EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
10987	SDValue Result = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: LHS, N2: RHS);
10988	SDValue Orig =
10989	DAG.getNode(Opcode: IsSigned ? ISD::SRA : ISD::SRL, DL: dl, VT, N1: Result, N2: RHS);
10990
10991	SDValue SatVal;
10992	if (IsSigned) {
10993	SDValue SatMin = DAG.getConstant(Val: APInt::getSignedMinValue(numBits: BW), DL: dl, VT);
10994	SDValue SatMax = DAG.getConstant(Val: APInt::getSignedMaxValue(numBits: BW), DL: dl, VT);
10995	SDValue Cond =
10996	DAG.getSetCC(DL: dl, VT: BoolVT, LHS, RHS: DAG.getConstant(Val: `0`, DL: dl, VT), Cond: ISD::SETLT);
10997	SatVal = DAG.getSelect(DL: dl, VT, Cond, LHS: SatMin, RHS: SatMax);
10998	} else {
10999	SatVal = DAG.getConstant(Val: APInt::getMaxValue(numBits: BW), DL: dl, VT);
11000	}
11001	SDValue Cond = DAG.getSetCC(DL: dl, VT: BoolVT, LHS, RHS: Orig, Cond: ISD::SETNE);
11002	return DAG.getSelect(DL: dl, VT, Cond, LHS: SatVal, RHS: Result);
11003	}
11004
11005	void TargetLowering::forceExpandMultiply(SelectionDAG &DAG, const SDLoc &dl,
11006	bool Signed, SDValue &Lo, SDValue &Hi,
11007	SDValue LHS, SDValue RHS,
11008	SDValue HiLHS, SDValue HiRHS) const {
11009	EVT VT = LHS.getValueType();
11010	assert(RHS.getValueType() == VT && "Mismatching operand types");
11011
11012	assert((HiLHS && HiRHS) \|\| (!HiLHS && !HiRHS));
11013	assert((!Signed \|\| !HiLHS) &&
11014	"Signed flag should only be set when HiLHS and RiRHS are null");
11015
11016	// We'll expand the multiplication by brute force because we have no other
11017	// options. This is a trivially-generalized version of the code from
11018	// Hacker's Delight (itself derived from Knuth's Algorithm M from section
11019	// 4.3.1). If Signed is set, we can use arithmetic right shifts to propagate
11020	// sign bits while calculating the Hi half.
11021	unsigned Bits = VT.getSizeInBits();
11022	unsigned HalfBits = Bits / `2`;
11023	SDValue Mask = DAG.getConstant(Val: APInt::getLowBitsSet(numBits: Bits, loBitsSet: HalfBits), DL: dl, VT);
11024	SDValue LL = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: LHS, N2: Mask);
11025	SDValue RL = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: RHS, N2: Mask);
11026
11027	SDValue T = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LL, N2: RL);
11028	SDValue TL = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: T, N2: Mask);
11029
11030	SDValue Shift = DAG.getShiftAmountConstant(Val: HalfBits, VT, DL: dl);
11031	// This is always an unsigned shift.
11032	SDValue TH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: T, N2: Shift);
11033
11034	unsigned ShiftOpc = Signed ? ISD::SRA : ISD::SRL;
11035	SDValue LH = DAG.getNode(Opcode: ShiftOpc, DL: dl, VT, N1: LHS, N2: Shift);
11036	SDValue RH = DAG.getNode(Opcode: ShiftOpc, DL: dl, VT, N1: RHS, N2: Shift);
11037
11038	SDValue U =
11039	DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LH, N2: RL), N2: TH);
11040	SDValue UL = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: U, N2: Mask);
11041	SDValue UH = DAG.getNode(Opcode: ShiftOpc, DL: dl, VT, N1: U, N2: Shift);
11042
11043	SDValue V =
11044	DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LL, N2: RH), N2: UL);
11045	SDValue VH = DAG.getNode(Opcode: ShiftOpc, DL: dl, VT, N1: V, N2: Shift);
11046
11047	Lo = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: TL,
11048	N2: DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: V, N2: Shift));
11049
11050	Hi = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LH, N2: RH),
11051	N2: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: UH, N2: VH));
11052
11053	// If HiLHS and HiRHS are set, multiply them by the opposite low part and add
11054	// the products to Hi.
11055	if (HiLHS) {
11056	Hi = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Hi,
11057	N2: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT,
11058	N1: DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: HiRHS, N2: LHS),
11059	N2: DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: RHS, N2: HiLHS)));
11060	}
11061	}
11062
11063	void TargetLowering::forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl,
11064	bool Signed, const SDValue LHS,
11065	const SDValue RHS, SDValue &Lo,
11066	SDValue &Hi) const {
11067	EVT VT = LHS.getValueType();
11068	assert(RHS.getValueType() == VT && "Mismatching operand types");
11069	EVT WideVT = EVT::getIntegerVT(Context&: DAG.getContext(), BitWidth: VT.getSizeInBits() `2`);
11070	// We can fall back to a libcall with an illegal type for the MUL if we
11071	// have a libcall big enough.
11072	RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
11073	if (WideVT == MVT::i16)
11074	LC = RTLIB::MUL_I16;
11075	else if (WideVT == MVT::i32)
11076	LC = RTLIB::MUL_I32;
11077	else if (WideVT == MVT::i64)
11078	LC = RTLIB::MUL_I64;
11079	else if (WideVT == MVT::i128)
11080	LC = RTLIB::MUL_I128;
11081
11082	if (LC == RTLIB::UNKNOWN_LIBCALL \|\| !getLibcallName(Call: LC)) {
11083	forceExpandMultiply(DAG, dl, Signed, Lo, Hi, LHS, RHS);
11084	return;
11085	}
11086
11087	SDValue HiLHS, HiRHS;
11088	if (Signed) {
11089	// The high part is obtained by SRA'ing all but one of the bits of low
11090	// part.
11091	unsigned LoSize = VT.getFixedSizeInBits();
11092	SDValue Shift = DAG.getShiftAmountConstant(Val: LoSize - `1`, VT, DL: dl);
11093	HiLHS = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: LHS, N2: Shift);
11094	HiRHS = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: RHS, N2: Shift);
11095	} else {
11096	HiLHS = DAG.getConstant(Val: `0`, DL: dl, VT);
11097	HiRHS = DAG.getConstant(Val: `0`, DL: dl, VT);
11098	}
11099
11100	// Attempt a libcall.
11101	SDValue Ret;
11102	TargetLowering::MakeLibCallOptions CallOptions;
11103	CallOptions.setIsSigned(Signed);
11104	CallOptions.setIsPostTypeLegalization(true);
11105	if (shouldSplitFunctionArgumentsAsLittleEndian(DL: DAG.getDataLayout())) {
11106	// Halves of WideVT are packed into registers in different order
11107	// depending on platform endianness. This is usually handled by
11108	// the C calling convention, but we can't defer to it in
11109	// the legalizer.
11110	SDValue Args[] = {LHS, HiLHS, RHS, HiRHS};
11111	Ret = makeLibCall(DAG, LC, RetVT: WideVT, Ops: Args, CallOptions, dl).first;
11112	} else {
11113	SDValue Args[] = {HiLHS, LHS, HiRHS, RHS};
11114	Ret = makeLibCall(DAG, LC, RetVT: WideVT, Ops: Args, CallOptions, dl).first;
11115	}
11116	assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
11117	"Ret value is a collection of constituent nodes holding result.");
11118	if (DAG.getDataLayout().isLittleEndian()) {
11119	// Same as above.
11120	Lo = Ret.getOperand(i: `0`);
11121	Hi = Ret.getOperand(i: `1`);
11122	} else {
11123	Lo = Ret.getOperand(i: `1`);
11124	Hi = Ret.getOperand(i: `0`);
11125	}
11126	}
11127
11128	SDValue
11129	TargetLowering::expandFixedPointMul(SDNode Node, SelectionDAG &DAG) const* {
11130	assert((Node->getOpcode() == ISD::SMULFIX \|\|
11131	Node->getOpcode() == ISD::UMULFIX \|\|
11132	Node->getOpcode() == ISD::SMULFIXSAT \|\|
11133	Node->getOpcode() == ISD::UMULFIXSAT) &&
11134	"Expected a fixed point multiplication opcode");
11135
11136	SDLoc dl(Node);
11137	SDValue LHS = Node->getOperand(Num: `0`);
11138	SDValue RHS = Node->getOperand(Num: `1`);
11139	EVT VT = LHS.getValueType();
11140	unsigned Scale = Node->getConstantOperandVal(Num: `2`);
11141	bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT \|\|
11142	Node->getOpcode() == ISD::UMULFIXSAT);
11143	bool Signed = (Node->getOpcode() == ISD::SMULFIX \|\|
11144	Node->getOpcode() == ISD::SMULFIXSAT);
11145	EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
11146	unsigned VTSize = VT.getScalarSizeInBits();
11147
11148	if (!Scale) {
11149	// [us]mul.fix(a, b, 0) -> mul(a, b)
11150	if (!Saturating) {
11151	if (isOperationLegalOrCustom(Op: ISD::MUL, VT))
11152	return DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LHS, N2: RHS);
11153	} else if (Signed && isOperationLegalOrCustom(Op: ISD::SMULO, VT)) {
11154	SDValue Result =
11155	DAG.getNode(Opcode: ISD::SMULO, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: BoolVT), N1: LHS, N2: RHS);
11156	SDValue Product = Result.getValue(R: `0`);
11157	SDValue Overflow = Result.getValue(R: `1`);
11158	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT);
11159
11160	APInt MinVal = APInt::getSignedMinValue(numBits: VTSize);
11161	APInt MaxVal = APInt::getSignedMaxValue(numBits: VTSize);
11162	SDValue SatMin = DAG.getConstant(Val: MinVal, DL: dl, VT);
11163	SDValue SatMax = DAG.getConstant(Val: MaxVal, DL: dl, VT);
11164	// Xor the inputs, if resulting sign bit is 0 the product will be
11165	// positive, else negative.
11166	SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: LHS, N2: RHS);
11167	SDValue ProdNeg = DAG.getSetCC(DL: dl, VT: BoolVT, LHS: Xor, RHS: Zero, Cond: ISD::SETLT);
11168	Result = DAG.getSelect(DL: dl, VT, Cond: ProdNeg, LHS: SatMin, RHS: SatMax);
11169	return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: Result, RHS: Product);
11170	} else if (!Signed && isOperationLegalOrCustom(Op: ISD::UMULO, VT)) {
11171	SDValue Result =
11172	DAG.getNode(Opcode: ISD::UMULO, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: BoolVT), N1: LHS, N2: RHS);
11173	SDValue Product = Result.getValue(R: `0`);
11174	SDValue Overflow = Result.getValue(R: `1`);
11175
11176	APInt MaxVal = APInt::getMaxValue(numBits: VTSize);
11177	SDValue SatMax = DAG.getConstant(Val: MaxVal, DL: dl, VT);
11178	return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: SatMax, RHS: Product);
11179	}
11180	}
11181
11182	assert(((Signed && Scale < VTSize) \|\| (!Signed && Scale <= VTSize)) &&
11183	"Expected scale to be less than the number of bits if signed or at "
11184	"most the number of bits if unsigned.");
11185	assert(LHS.getValueType() == RHS.getValueType() &&
11186	"Expected both operands to be the same type");
11187
11188	// Get the upper and lower bits of the result.
11189	SDValue Lo, Hi;
11190	unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
11191	unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
11192	EVT WideVT = EVT::getIntegerVT(Context&: DAG.getContext(), BitWidth: VTSize `2`);
11193	if (VT.isVector())
11194	WideVT =
11195	EVT::getVectorVT(Context&: *DAG.getContext(), VT: WideVT, EC: VT.getVectorElementCount());
11196	if (isOperationLegalOrCustom(Op: LoHiOp, VT)) {
11197	SDValue Result = DAG.getNode(Opcode: LoHiOp, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: VT), N1: LHS, N2: RHS);
11198	Lo = Result.getValue(R: `0`);
11199	Hi = Result.getValue(R: `1`);
11200	} else if (isOperationLegalOrCustom(Op: HiOp, VT)) {
11201	Lo = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LHS, N2: RHS);
11202	Hi = DAG.getNode(Opcode: HiOp, DL: dl, VT, N1: LHS, N2: RHS);
11203	} else if (isOperationLegalOrCustom(Op: ISD::MUL, VT: WideVT)) {
11204	// Try for a multiplication using a wider type.
11205	unsigned Ext = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
11206	SDValue LHSExt = DAG.getNode(Opcode: Ext, DL: dl, VT: WideVT, Operand: LHS);
11207	SDValue RHSExt = DAG.getNode(Opcode: Ext, DL: dl, VT: WideVT, Operand: RHS);
11208	SDValue Res = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: WideVT, N1: LHSExt, N2: RHSExt);
11209	Lo = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Res);
11210	SDValue Shifted =
11211	DAG.getNode(Opcode: ISD::SRA, DL: dl, VT: WideVT, N1: Res,
11212	N2: DAG.getShiftAmountConstant(Val: VTSize, VT: WideVT, DL: dl));
11213	Hi = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Shifted);
11214	} else if (VT.isVector()) {
11215	return SDValue ();
11216	} else {
11217	forceExpandWideMUL(DAG, dl, Signed, LHS, RHS, Lo, Hi);
11218	}
11219
11220	if (Scale == VTSize)
11221	// Result is just the top half since we'd be shifting by the width of the
11222	// operand. Overflow impossible so this works for both UMULFIX and
11223	// UMULFIXSAT.
11224	return Hi;
11225
11226	// The result will need to be shifted right by the scale since both operands
11227	// are scaled. The result is given to us in 2 halves, so we only want part of
11228	// both in the result.
11229	SDValue Result = DAG.getNode(Opcode: ISD::FSHR, DL: dl, VT, N1: Hi, N2: Lo,
11230	N3: DAG.getShiftAmountConstant(Val: Scale, VT, DL: dl));
11231	if (!Saturating)
11232	return Result;
11233
11234	if (!Signed) {
11235	// Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
11236	// widened multiplication) aren't all zeroes.
11237
11238	// Saturate to max if ((Hi >> Scale) != 0),
11239	// which is the same as if (Hi > ((1 << Scale) - 1))
11240	APInt MaxVal = APInt::getMaxValue(numBits: VTSize);
11241	SDValue LowMask = DAG.getConstant(Val: APInt::getLowBitsSet(numBits: VTSize, loBitsSet: Scale),
11242	DL: dl, VT);
11243	Result = DAG.getSelectCC(DL: dl, LHS: Hi, RHS: LowMask,
11244	True: DAG.getConstant(Val: MaxVal, DL: dl, VT), False: Result,
11245	Cond: ISD::SETUGT);
11246
11247	return Result;
11248	}
11249
11250	// Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
11251	// widened multiplication) aren't all ones or all zeroes.
11252
11253	SDValue SatMin = DAG.getConstant(Val: APInt::getSignedMinValue(numBits: VTSize), DL: dl, VT);
11254	SDValue SatMax = DAG.getConstant(Val: APInt::getSignedMaxValue(numBits: VTSize), DL: dl, VT);
11255
11256	if (Scale == `0`) {
11257	SDValue Sign = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: Lo,
11258	N2: DAG.getShiftAmountConstant(Val: VTSize - `1`, VT, DL: dl));
11259	SDValue Overflow = DAG.getSetCC(DL: dl, VT: BoolVT, LHS: Hi, RHS: Sign, Cond: ISD::SETNE);
11260	// Saturated to SatMin if wide product is negative, and SatMax if wide
11261	// product is positive ...
11262	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT);
11263	SDValue ResultIfOverflow = DAG.getSelectCC(DL: dl, LHS: Hi, RHS: Zero, True: SatMin, False: SatMax,
11264	Cond: ISD::SETLT);
11265	// ... but only if we overflowed.
11266	return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: ResultIfOverflow, RHS: Result);
11267	}
11268
11269	// We handled Scale==0 above so all the bits to examine is in Hi.
11270
11271	// Saturate to max if ((Hi >> (Scale - 1)) > 0),
11272	// which is the same as if (Hi > (1 << (Scale - 1)) - 1)
11273	SDValue LowMask = DAG.getConstant(Val: APInt::getLowBitsSet(numBits: VTSize, loBitsSet: Scale - `1`),
11274	DL: dl, VT);
11275	Result = DAG.getSelectCC(DL: dl, LHS: Hi, RHS: LowMask, True: SatMax, False: Result, Cond: ISD::SETGT);
11276	// Saturate to min if (Hi >> (Scale - 1)) < -1),
11277	// which is the same as if (HI < (-1 << (Scale - 1))
11278	SDValue HighMask =
11279	DAG.getConstant(Val: APInt::getHighBitsSet(numBits: VTSize, hiBitsSet: VTSize - Scale + `1`),
11280	DL: dl, VT);
11281	Result = DAG.getSelectCC(DL: dl, LHS: Hi, RHS: HighMask, True: SatMin, False: Result, Cond: ISD::SETLT);
11282	return Result;
11283	}
11284
11285	SDValue
11286	TargetLowering::expandFixedPointDiv(unsigned Opcode, const SDLoc &dl,
11287	SDValue LHS, SDValue RHS,
11288	unsigned Scale, SelectionDAG &DAG) const {
11289	assert((Opcode == ISD::SDIVFIX \|\| Opcode == ISD::SDIVFIXSAT \|\|
11290	Opcode == ISD::UDIVFIX \|\| Opcode == ISD::UDIVFIXSAT) &&
11291	"Expected a fixed point division opcode");
11292
11293	EVT VT = LHS.getValueType();
11294	bool Signed = Opcode == ISD::SDIVFIX \|\| Opcode == ISD::SDIVFIXSAT;
11295	bool Saturating = Opcode == ISD::SDIVFIXSAT \|\| Opcode == ISD::UDIVFIXSAT;
11296	EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
11297
11298	// If there is enough room in the type to upscale the LHS or downscale the
11299	// RHS before the division, we can perform it in this type without having to
11300	// resize. For signed operations, the LHS headroom is the number of
11301	// redundant sign bits, and for unsigned ones it is the number of zeroes.
11302	// The headroom for the RHS is the number of trailing zeroes.
11303	unsigned LHSLead = Signed ? DAG.ComputeNumSignBits(Op: LHS) - `1`
11304	: DAG.computeKnownBits(Op: LHS).countMinLeadingZeros();
11305	unsigned RHSTrail = DAG.computeKnownBits(Op: RHS).countMinTrailingZeros();
11306
11307	// For signed saturating operations, we need to be able to detect true integer
11308	// division overflow; that is, when you have MIN / -EPS. However, this
11309	// is undefined behavior and if we emit divisions that could take such
11310	// values it may cause undesired behavior (arithmetic exceptions on x86, for
11311	// example).
11312	// Avoid this by requiring an extra bit so that we never get this case.
11313	// FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale
11314	// signed saturating division, we need to emit a whopping 32-bit division.
11315	if (LHSLead + RHSTrail < Scale + (unsigned)(Saturating && Signed))
11316	return SDValue ();
11317
11318	unsigned LHSShift = std::min(a: LHSLead, b: Scale);
11319	unsigned RHSShift = Scale - LHSShift;
11320
11321	// At this point, we know that if we shift the LHS up by LHSShift and the
11322	// RHS down by RHSShift, we can emit a regular division with a final scaling
11323	// factor of Scale.
11324
11325	if (LHSShift)
11326	LHS = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: LHS,
11327	N2: DAG.getShiftAmountConstant(Val: LHSShift, VT, DL: dl));
11328	if (RHSShift)
11329	RHS = DAG.getNode(Opcode: Signed ? ISD::SRA : ISD::SRL, DL: dl, VT, N1: RHS,
11330	N2: DAG.getShiftAmountConstant(Val: RHSShift, VT, DL: dl));
11331
11332	SDValue Quot;
11333	if (Signed) {
11334	// For signed operations, if the resulting quotient is negative and the
11335	// remainder is nonzero, subtract 1 from the quotient to round towards
11336	// negative infinity.
11337	SDValue Rem;
11338	// FIXME: Ideally we would always produce an SDIVREM here, but if the
11339	// type isn't legal, SDIVREM cannot be expanded. There is no reason why
11340	// we couldn't just form a libcall, but the type legalizer doesn't do it.
11341	if (isTypeLegal(VT) &&
11342	isOperationLegalOrCustom(Op: ISD::SDIVREM, VT)) {
11343	Quot = DAG.getNode(Opcode: ISD::SDIVREM, DL: dl,
11344	VTList: DAG.getVTList(VT1: VT, VT2: VT),
11345	N1: LHS, N2: RHS);
11346	Rem = Quot.getValue(R: `1`);
11347	Quot = Quot.getValue(R: `0`);
11348	} else {
11349	Quot = DAG.getNode(Opcode: ISD::SDIV, DL: dl, VT,
11350	N1: LHS, N2: RHS);
11351	Rem = DAG.getNode(Opcode: ISD::SREM, DL: dl, VT,
11352	N1: LHS, N2: RHS);
11353	}
11354	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT);
11355	SDValue RemNonZero = DAG.getSetCC(DL: dl, VT: BoolVT, LHS: Rem, RHS: Zero, Cond: ISD::SETNE);
11356	SDValue LHSNeg = DAG.getSetCC(DL: dl, VT: BoolVT, LHS, RHS: Zero, Cond: ISD::SETLT);
11357	SDValue RHSNeg = DAG.getSetCC(DL: dl, VT: BoolVT, LHS: RHS, RHS: Zero, Cond: ISD::SETLT);
11358	SDValue QuotNeg = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: BoolVT, N1: LHSNeg, N2: RHSNeg);
11359	SDValue Sub1 = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Quot,
11360	N2: DAG.getConstant(Val: `1`, DL: dl, VT));
11361	Quot = DAG.getSelect(DL: dl, VT,
11362	Cond: DAG.getNode(Opcode: ISD::AND, DL: dl, VT: BoolVT, N1: RemNonZero, N2: QuotNeg),
11363	LHS: Sub1, RHS: Quot);
11364	} else
11365	Quot = DAG.getNode(Opcode: ISD::UDIV, DL: dl, VT,
11366	N1: LHS, N2: RHS);
11367
11368	return Quot;
11369	}
11370
11371	void TargetLowering::expandUADDSUBO(
11372	SDNode Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const* {
11373	SDLoc dl(Node);
11374	SDValue LHS = Node->getOperand(Num: `0`);
11375	SDValue RHS = Node->getOperand(Num: `1`);
11376	bool IsAdd = Node->getOpcode() == ISD::UADDO;
11377
11378	// If UADDO_CARRY/SUBO_CARRY is legal, use that instead.
11379	unsigned OpcCarry = IsAdd ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
11380	if (isOperationLegalOrCustom(Op: OpcCarry, VT: Node->getValueType(ResNo: `0`))) {
11381	SDValue CarryIn = DAG.getConstant(Val: `0`, DL: dl, VT: Node->getValueType(ResNo: `1`));
11382	SDValue NodeCarry = DAG.getNode(Opcode: OpcCarry, DL: dl, VTList: Node->getVTList(),
11383	Ops: { LHS, RHS, CarryIn });
11384	Result = SDValue (NodeCarry.getNode(), `0`);
11385	Overflow = SDValue (NodeCarry.getNode(), `1`);
11386	return;
11387	}
11388
11389	Result = DAG.getNode(Opcode: IsAdd ? ISD::ADD : ISD::SUB, DL: dl,
11390	VT: LHS.getValueType(), N1: LHS, N2: RHS);
11391
11392	EVT ResultType = Node->getValueType(ResNo: `1`);
11393	EVT SetCCType = getSetCCResultType(
11394	DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: Node->getValueType(ResNo: `0`));
11395	SDValue SetCC;
11396	if (IsAdd && isOneConstant(V: RHS)) {
11397	// Special case: uaddo X, 1 overflowed if X+1 is 0. This potential reduces
11398	// the live range of X. We assume comparing with 0 is cheap.
11399	// The general case (X + C) < C is not necessarily beneficial. Although we
11400	// reduce the live range of X, we may introduce the materialization of
11401	// constant C.
11402	SetCC =
11403	DAG.getSetCC(DL: dl, VT: SetCCType, LHS: Result,
11404	RHS: DAG.getConstant(Val: `0`, DL: dl, VT: Node->getValueType(ResNo: `0`)), Cond: ISD::SETEQ);
11405	} else if (IsAdd && isAllOnesConstant(V: RHS)) {
11406	// Special case: uaddo X, -1 overflows if X != 0.
11407	SetCC =
11408	DAG.getSetCC(DL: dl, VT: SetCCType, LHS,
11409	RHS: DAG.getConstant(Val: `0`, DL: dl, VT: Node->getValueType(ResNo: `0`)), Cond: ISD::SETNE);
11410	} else {
11411	ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
11412	SetCC = DAG.getSetCC(DL: dl, VT: SetCCType, LHS: Result, RHS: LHS, Cond: CC);
11413	}
11414	Overflow = DAG.getBoolExtOrTrunc(Op: SetCC, SL: dl, VT: ResultType, OpVT: ResultType);
11415	}
11416
11417	void TargetLowering::expandSADDSUBO(
11418	SDNode Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const* {
11419	SDLoc dl(Node);
11420	SDValue LHS = Node->getOperand(Num: `0`);
11421	SDValue RHS = Node->getOperand(Num: `1`);
11422	bool IsAdd = Node->getOpcode() == ISD::SADDO;
11423
11424	Result = DAG.getNode(Opcode: IsAdd ? ISD::ADD : ISD::SUB, DL: dl,
11425	VT: LHS.getValueType(), N1: LHS, N2: RHS);
11426
11427	EVT ResultType = Node->getValueType(ResNo: `1`);
11428	EVT OType = getSetCCResultType(
11429	DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: Node->getValueType(ResNo: `0`));
11430
11431	// If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
11432	unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
11433	if (isOperationLegal(Op: OpcSat, VT: LHS.getValueType())) {
11434	SDValue Sat = DAG.getNode(Opcode: OpcSat, DL: dl, VT: LHS.getValueType(), N1: LHS, N2: RHS);
11435	SDValue SetCC = DAG.getSetCC(DL: dl, VT: OType, LHS: Result, RHS: Sat, Cond: ISD::SETNE);
11436	Overflow = DAG.getBoolExtOrTrunc(Op: SetCC, SL: dl, VT: ResultType, OpVT: ResultType);
11437	return;
11438	}
11439
11440	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT: LHS.getValueType());
11441
11442	// For an addition, the result should be less than one of the operands (LHS)
11443	// if and only if the other operand (RHS) is negative, otherwise there will
11444	// be overflow.
11445	// For a subtraction, the result should be less than one of the operands
11446	// (LHS) if and only if the other operand (RHS) is (non-zero) positive,
11447	// otherwise there will be overflow.
11448	SDValue ResultLowerThanLHS = DAG.getSetCC(DL: dl, VT: OType, LHS: Result, RHS: LHS, Cond: ISD::SETLT);
11449	SDValue ConditionRHS =
11450	DAG.getSetCC(DL: dl, VT: OType, LHS: RHS, RHS: Zero, Cond: IsAdd ? ISD::SETLT : ISD::SETGT);
11451
11452	Overflow = DAG.getBoolExtOrTrunc(
11453	Op: DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: OType, N1: ConditionRHS, N2: ResultLowerThanLHS), SL: dl,
11454	VT: ResultType, OpVT: ResultType);
11455	}
11456
11457	bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,
11458	SDValue &Overflow, SelectionDAG &DAG) const {
11459	SDLoc dl(Node);
11460	EVT VT = Node->getValueType(ResNo: `0`);
11461	EVT SetCCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
11462	SDValue LHS = Node->getOperand(Num: `0`);
11463	SDValue RHS = Node->getOperand(Num: `1`);
11464	bool isSigned = Node->getOpcode() == ISD::SMULO;
11465
11466	// For power-of-two multiplications we can use a simpler shift expansion.
11467	if (ConstantSDNode *RHSC = isConstOrConstSplat(N: RHS)) {
11468	const APInt &C = RHSC->getAPIntValue();
11469	// mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
11470	if (C.isPowerOf2()) {
11471	// smulo(x, signed_min) is same as umulo(x, signed_min).
11472	bool UseArithShift = isSigned && !C.isMinSignedValue();
11473	SDValue ShiftAmt = DAG.getShiftAmountConstant(Val: C.logBase2(), VT, DL: dl);
11474	Result = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: LHS, N2: ShiftAmt);
11475	Overflow = DAG.getSetCC(DL: dl, VT: SetCCVT,
11476	LHS: DAG.getNode(Opcode: UseArithShift ? ISD::SRA : ISD::SRL,
11477	DL: dl, VT, N1: Result, N2: ShiftAmt),
11478	RHS: LHS, Cond: ISD::SETNE);
11479	return true;
11480	}
11481	}
11482
11483	EVT WideVT = EVT::getIntegerVT(Context&: DAG.getContext(), BitWidth: VT.getScalarSizeInBits() `2`);
11484	if (VT.isVector())
11485	WideVT =
11486	EVT::getVectorVT(Context&: *DAG.getContext(), VT: WideVT, EC: VT.getVectorElementCount());
11487
11488	SDValue BottomHalf;
11489	SDValue TopHalf;
11490	static const unsigned Ops[`2`][`3`] =
11491	{ { ISD::MULHU, ISD::UMUL_LOHI, ISD::ZERO_EXTEND },
11492	{ ISD::MULHS, ISD::SMUL_LOHI, ISD::SIGN_EXTEND }};
11493	if (isOperationLegalOrCustom(Op: Ops[isSigned][`0`], VT)) {
11494	BottomHalf = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LHS, N2: RHS);
11495	TopHalf = DAG.getNode(Opcode: Ops[isSigned][`0`], DL: dl, VT, N1: LHS, N2: RHS);
11496	} else if (isOperationLegalOrCustom(Op: Ops[isSigned][`1`], VT)) {
11497	BottomHalf = DAG.getNode(Opcode: Ops[isSigned][`1`], DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: VT), N1: LHS,
11498	N2: RHS);
11499	TopHalf = BottomHalf.getValue(R: `1`);
11500	} else if (isTypeLegal(VT: WideVT)) {
11501	LHS = DAG.getNode(Opcode: Ops[isSigned][`2`], DL: dl, VT: WideVT, Operand: LHS);
11502	RHS = DAG.getNode(Opcode: Ops[isSigned][`2`], DL: dl, VT: WideVT, Operand: RHS);
11503	SDValue Mul = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: WideVT, N1: LHS, N2: RHS);
11504	BottomHalf = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Mul);
11505	SDValue ShiftAmt =
11506	DAG.getShiftAmountConstant(Val: VT.getScalarSizeInBits(), VT: WideVT, DL: dl);
11507	TopHalf = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT,
11508	Operand: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: WideVT, N1: Mul, N2: ShiftAmt));
11509	} else {
11510	if (VT.isVector())
11511	return false;
11512
11513	forceExpandWideMUL(DAG, dl, Signed: isSigned, LHS, RHS, Lo&: BottomHalf, Hi&: TopHalf);
11514	}
11515
11516	Result = BottomHalf;
11517	if (isSigned) {
11518	SDValue ShiftAmt = DAG.getShiftAmountConstant(
11519	Val: VT.getScalarSizeInBits() - `1`, VT: BottomHalf.getValueType(), DL: dl);
11520	SDValue Sign = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: BottomHalf, N2: ShiftAmt);
11521	Overflow = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: TopHalf, RHS: Sign, Cond: ISD::SETNE);
11522	} else {
11523	Overflow = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: TopHalf,
11524	RHS: DAG.getConstant(Val: `0`, DL: dl, VT), Cond: ISD::SETNE);
11525	}
11526
11527	// Truncate the result if SetCC returns a larger type than needed.
11528	EVT RType = Node->getValueType(ResNo: `1`);
11529	if (RType.bitsLT(VT: Overflow.getValueType()))
11530	Overflow = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: RType, Operand: Overflow);
11531
11532	assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
11533	"Unexpected result type for S/UMULO legalization");
11534	return true;
11535	}
11536
11537	SDValue TargetLowering::expandVecReduce(SDNode Node, SelectionDAG &DAG) const* {
11538	SDLoc dl(Node);
11539	unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(VecReduceOpcode: Node->getOpcode());
11540	SDValue Op = Node->getOperand(Num: `0`);
11541	EVT VT = Op.getValueType();
11542
11543	// Try to use a shuffle reduction for power of two vectors.
11544	if (VT.isPow2VectorType()) {
11545	while (VT.getVectorElementCount().isKnownMultipleOf(RHS: `2`)) {
11546	EVT HalfVT = VT.getHalfNumVectorElementsVT(Context&: *DAG.getContext());
11547	if (!isOperationLegalOrCustom(Op: BaseOpcode, VT: HalfVT))
11548	break;
11549
11550	SDValue Lo, Hi;
11551	std::tie(args&: Lo, args&: Hi) = DAG.SplitVector(N: Op, DL: dl);
11552	Op = DAG.getNode(Opcode: BaseOpcode, DL: dl, VT: HalfVT, N1: Lo, N2: Hi, Flags: Node->getFlags());
11553	VT = HalfVT;
11554
11555	// Stop if splitting is enough to make the reduction legal.
11556	if (isOperationLegalOrCustom(Op: Node->getOpcode(), VT: HalfVT))
11557	return DAG.getNode(Opcode: Node->getOpcode(), DL: dl, VT: Node->getValueType(ResNo: `0`), Operand: Op,
11558	Flags: Node->getFlags());
11559	}
11560	}
11561
11562	if (VT.isScalableVector())
11563	reportFatalInternalError(
11564	reason: "Expanding reductions for scalable vectors is undefined.");
11565
11566	EVT EltVT = VT.getVectorElementType();
11567	unsigned NumElts = VT.getVectorNumElements();
11568
11569	SmallVector<SDValue, `8`> Ops;
11570	DAG.ExtractVectorElements(Op, Args&: Ops, Start: `0`, Count: NumElts);
11571
11572	SDValue Res = Ops [`0`];
11573	for (unsigned i = `1`; i < NumElts; i++)
11574	Res = DAG.getNode(Opcode: BaseOpcode, DL: dl, VT: EltVT, N1: Res, N2: Ops [i], Flags: Node->getFlags());
11575
11576	// Result type may be wider than element type.
11577	if (EltVT != Node->getValueType(ResNo: `0`))
11578	Res = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT: Node->getValueType(ResNo: `0`), Operand: Res);
11579	return Res;
11580	}
11581
11582	SDValue TargetLowering::expandVecReduceSeq(SDNode Node, SelectionDAG &DAG) const* {
11583	SDLoc dl(Node);
11584	SDValue AccOp = Node->getOperand(Num: `0`);
11585	SDValue VecOp = Node->getOperand(Num: `1`);
11586	SDNodeFlags Flags = Node->getFlags();
11587
11588	EVT VT = VecOp.getValueType();
11589	EVT EltVT = VT.getVectorElementType();
11590
11591	if (VT.isScalableVector())
11592	report_fatal_error(
11593	reason: "Expanding reductions for scalable vectors is undefined.");
11594
11595	unsigned NumElts = VT.getVectorNumElements();
11596
11597	SmallVector<SDValue, `8`> Ops;
11598	DAG.ExtractVectorElements(Op: VecOp, Args&: Ops, Start: `0`, Count: NumElts);
11599
11600	unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(VecReduceOpcode: Node->getOpcode());
11601
11602	SDValue Res = AccOp;
11603	for (unsigned i = `0`; i < NumElts; i++)
11604	Res = DAG.getNode(Opcode: BaseOpcode, DL: dl, VT: EltVT, N1: Res, N2: Ops [i], Flags);
11605
11606	return Res;
11607	}
11608
11609	bool TargetLowering::expandREM(SDNode *Node, SDValue &Result,
11610	SelectionDAG &DAG) const {
11611	EVT VT = Node->getValueType(ResNo: `0`);
11612	SDLoc dl(Node);
11613	bool isSigned = Node->getOpcode() == ISD::SREM;
11614	unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
11615	unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
11616	SDValue Dividend = Node->getOperand(Num: `0`);
11617	SDValue Divisor = Node->getOperand(Num: `1`);
11618	if (isOperationLegalOrCustom(Op: DivRemOpc, VT)) {
11619	SDVTList VTs = DAG.getVTList(VT1: VT, VT2: VT);
11620	Result = DAG.getNode(Opcode: DivRemOpc, DL: dl, VTList: VTs, N1: Dividend, N2: Divisor).getValue(R: `1`);
11621	return true;
11622	}
11623	if (isOperationLegalOrCustom(Op: DivOpc, VT)) {
11624	// X % Y -> X-X/YY*
11625	SDValue Divide = DAG.getNode(Opcode: DivOpc, DL: dl, VT, N1: Dividend, N2: Divisor);
11626	SDValue Mul = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: Divide, N2: Divisor);
11627	Result = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Dividend, N2: Mul);
11628	return true;
11629	}
11630	return false;
11631	}
11632
11633	SDValue TargetLowering::expandFP_TO_INT_SAT(SDNode *Node,
11634	SelectionDAG &DAG) const {
11635	bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT;
11636	SDLoc dl(SDValue (Node, `0`));
11637	SDValue Src = Node->getOperand(Num: `0`);
11638
11639	// DstVT is the result type, while SatVT is the size to which we saturate
11640	EVT SrcVT = Src.getValueType();
11641	EVT DstVT = Node->getValueType(ResNo: `0`);
11642
11643	EVT SatVT = cast<VTSDNode>(Val: Node->getOperand(Num: `1`))->getVT();
11644	unsigned SatWidth = SatVT.getScalarSizeInBits();
11645	unsigned DstWidth = DstVT.getScalarSizeInBits();
11646	assert(SatWidth <= DstWidth &&
11647	"Expected saturation width smaller than result width");
11648
11649	// Determine minimum and maximum integer values and their corresponding
11650	// floating-point values.
11651	APInt MinInt, MaxInt;
11652	if (IsSigned) {
11653	MinInt = APInt::getSignedMinValue(numBits: SatWidth).sext(width: DstWidth);
11654	MaxInt = APInt::getSignedMaxValue(numBits: SatWidth).sext(width: DstWidth);
11655	} else {
11656	MinInt = APInt::getMinValue(numBits: SatWidth).zext(width: DstWidth);
11657	MaxInt = APInt::getMaxValue(numBits: SatWidth).zext(width: DstWidth);
11658	}
11659
11660	// We cannot risk emitting FP_TO_XINT nodes with a source VT of [b]f16, as
11661	// libcall emission cannot handle this. Large result types will fail.
11662	if (SrcVT == MVT::f16 \|\| SrcVT == MVT::bf16) {
11663	Src = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f32, Operand: Src);
11664	SrcVT = Src.getValueType();
11665	}
11666
11667	const fltSemantics &Sem = SrcVT.getFltSemantics();
11668	APFloat MinFloat(Sem);
11669	APFloat MaxFloat(Sem);
11670
11671	APFloat::opStatus MinStatus =
11672	MinFloat.convertFromAPInt(Input: MinInt, IsSigned, RM: APFloat::rmTowardZero);
11673	APFloat::opStatus MaxStatus =
11674	MaxFloat.convertFromAPInt(Input: MaxInt, IsSigned, RM: APFloat::rmTowardZero);
11675	bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
11676	!(MaxStatus & APFloat::opStatus::opInexact);
11677
11678	SDValue MinFloatNode = DAG.getConstantFP(Val: MinFloat, DL: dl, VT: SrcVT);
11679	SDValue MaxFloatNode = DAG.getConstantFP(Val: MaxFloat, DL: dl, VT: SrcVT);
11680
11681	// If the integer bounds are exactly representable as floats and min/max are
11682	// legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
11683	// of comparisons and selects.
11684	bool MinMaxLegal = isOperationLegal(Op: ISD::FMINNUM, VT: SrcVT) &&
11685	isOperationLegal(Op: ISD::FMAXNUM, VT: SrcVT);
11686	if (AreExactFloatBounds && MinMaxLegal) {
11687	SDValue Clamped = Src;
11688
11689	// Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
11690	Clamped = DAG.getNode(Opcode: ISD::FMAXNUM, DL: dl, VT: SrcVT, N1: Clamped, N2: MinFloatNode);
11691	// Clamp by MaxFloat from above. NaN cannot occur.
11692	Clamped = DAG.getNode(Opcode: ISD::FMINNUM, DL: dl, VT: SrcVT, N1: Clamped, N2: MaxFloatNode);
11693	// Convert clamped value to integer.
11694	SDValue FpToInt = DAG.getNode(Opcode: IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT,
11695	DL: dl, VT: DstVT, Operand: Clamped);
11696
11697	// In the unsigned case we're done, because we mapped NaN to MinFloat,
11698	// which will cast to zero.
11699	if (!IsSigned)
11700	return FpToInt;
11701
11702	// Otherwise, select 0 if Src is NaN.
11703	SDValue ZeroInt = DAG.getConstant(Val: `0`, DL: dl, VT: DstVT);
11704	EVT SetCCVT =
11705	getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: SrcVT);
11706	SDValue IsNan = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: Src, Cond: ISD::CondCode::SETUO);
11707	return DAG.getSelect(DL: dl, VT: DstVT, Cond: IsNan, LHS: ZeroInt, RHS: FpToInt);
11708	}
11709
11710	SDValue MinIntNode = DAG.getConstant(Val: MinInt, DL: dl, VT: DstVT);
11711	SDValue MaxIntNode = DAG.getConstant(Val: MaxInt, DL: dl, VT: DstVT);
11712
11713	// Result of direct conversion. The assumption here is that the operation is
11714	// non-trapping and it's fine to apply it to an out-of-range value if we
11715	// select it away later.
11716	SDValue FpToInt =
11717	DAG.getNode(Opcode: IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, DL: dl, VT: DstVT, Operand: Src);
11718
11719	SDValue Select = FpToInt;
11720
11721	EVT SetCCVT =
11722	getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: SrcVT);
11723
11724	// If Src ULT MinFloat, select MinInt. In particular, this also selects
11725	// MinInt if Src is NaN.
11726	SDValue ULT = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: MinFloatNode, Cond: ISD::SETULT);
11727	Select = DAG.getSelect(DL: dl, VT: DstVT, Cond: ULT, LHS: MinIntNode, RHS: Select);
11728	// If Src OGT MaxFloat, select MaxInt.
11729	SDValue OGT = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: MaxFloatNode, Cond: ISD::SETOGT);
11730	Select = DAG.getSelect(DL: dl, VT: DstVT, Cond: OGT, LHS: MaxIntNode, RHS: Select);
11731
11732	// In the unsigned case we are done, because we mapped NaN to MinInt, which
11733	// is already zero.
11734	if (!IsSigned)
11735	return Select;
11736
11737	// Otherwise, select 0 if Src is NaN.
11738	SDValue ZeroInt = DAG.getConstant(Val: `0`, DL: dl, VT: DstVT);
11739	SDValue IsNan = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: Src, Cond: ISD::CondCode::SETUO);
11740	return DAG.getSelect(DL: dl, VT: DstVT, Cond: IsNan, LHS: ZeroInt, RHS: Select);
11741	}
11742
11743	SDValue TargetLowering::expandRoundInexactToOdd(EVT ResultVT, SDValue Op,
11744	const SDLoc &dl,
11745	SelectionDAG &DAG) const {
11746	EVT OperandVT = Op.getValueType();
11747	if (OperandVT.getScalarType() == ResultVT.getScalarType())
11748	return Op;
11749	EVT ResultIntVT = ResultVT.changeTypeToInteger();
11750	// We are rounding binary64/binary128 -> binary32 -> bfloat16. This
11751	// can induce double-rounding which may alter the results. We can
11752	// correct for this using a trick explained in: Boldo, Sylvie, and
11753	// Guillaume Melquiond. "When double rounding is odd." 17th IMACS
11754	// World Congress. 2005.
11755	SDValue Narrow = DAG.getFPExtendOrRound(Op, DL: dl, VT: ResultVT);
11756	SDValue NarrowAsWide = DAG.getFPExtendOrRound(Op: Narrow, DL: dl, VT: OperandVT);
11757
11758	// We can keep the narrow value as-is if narrowing was exact (no
11759	// rounding error), the wide value was NaN (the narrow value is also
11760	// NaN and should be preserved) or if we rounded to the odd value.
11761	SDValue NarrowBits = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: ResultIntVT, Operand: Narrow);
11762	SDValue One = DAG.getConstant(Val: `1`, DL: dl, VT: ResultIntVT);
11763	SDValue NegativeOne = DAG.getAllOnesConstant(DL: dl, VT: ResultIntVT);
11764	SDValue And = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ResultIntVT, N1: NarrowBits, N2: One);
11765	EVT ResultIntVTCCVT = getSetCCResultType(
11766	DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: And.getValueType());
11767	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT: ResultIntVT);
11768	// The result is already odd so we don't need to do anything.
11769	SDValue AlreadyOdd = DAG.getSetCC(DL: dl, VT: ResultIntVTCCVT, LHS: And, RHS: Zero, Cond: ISD::SETNE);
11770
11771	EVT WideSetCCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(),
11772	VT: Op.getValueType());
11773	// We keep results which are exact, odd or NaN.
11774	SDValue KeepNarrow =
11775	DAG.getSetCC(DL: dl, VT: WideSetCCVT, LHS: Op, RHS: NarrowAsWide, Cond: ISD::SETUEQ);
11776	KeepNarrow = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: WideSetCCVT, N1: KeepNarrow, N2: AlreadyOdd);
11777	// We morally performed a round-down if AbsNarrow is smaller than
11778	// AbsWide.
11779	SDValue AbsWide = DAG.getNode(Opcode: ISD::FABS, DL: dl, VT: OperandVT, Operand: Op);
11780	SDValue AbsNarrowAsWide = DAG.getNode(Opcode: ISD::FABS, DL: dl, VT: OperandVT, Operand: NarrowAsWide);
11781	SDValue NarrowIsRd =
11782	DAG.getSetCC(DL: dl, VT: WideSetCCVT, LHS: AbsWide, RHS: AbsNarrowAsWide, Cond: ISD::SETOGT);
11783	// If the narrow value is odd or exact, pick it.
11784	// Otherwise, narrow is even and corresponds to either the rounded-up
11785	// or rounded-down value. If narrow is the rounded-down value, we want
11786	// the rounded-up value as it will be odd.
11787	SDValue Adjust = DAG.getSelect(DL: dl, VT: ResultIntVT, Cond: NarrowIsRd, LHS: One, RHS: NegativeOne);
11788	SDValue Adjusted = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: ResultIntVT, N1: NarrowBits, N2: Adjust);
11789	Op = DAG.getSelect(DL: dl, VT: ResultIntVT, Cond: KeepNarrow, LHS: NarrowBits, RHS: Adjusted);
11790	return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: ResultVT, Operand: Op);
11791	}
11792
11793	SDValue TargetLowering::expandFP_ROUND(SDNode Node, SelectionDAG &DAG) const* {
11794	assert(Node->getOpcode() == ISD::FP_ROUND && "Unexpected opcode!");
11795	SDValue Op = Node->getOperand(Num: `0`);
11796	EVT VT = Node->getValueType(ResNo: `0`);
11797	SDLoc dl(Node);
11798	if (VT.getScalarType() == MVT::bf16) {
11799	if (Node->getConstantOperandVal(Num: `1`) == `1`) {
11800	return DAG.getNode(Opcode: ISD::FP_TO_BF16, DL: dl, VT, Operand: Node->getOperand(Num: `0`));
11801	}
11802	EVT OperandVT = Op.getValueType();
11803	SDValue IsNaN = DAG.getSetCC(
11804	DL: dl,
11805	VT: getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: OperandVT),
11806	LHS: Op, RHS: Op, Cond: ISD::SETUO);
11807
11808	// We are rounding binary64/binary128 -> binary32 -> bfloat16. This
11809	// can induce double-rounding which may alter the results. We can
11810	// correct for this using a trick explained in: Boldo, Sylvie, and
11811	// Guillaume Melquiond. "When double rounding is odd." 17th IMACS
11812	// World Congress. 2005.
11813	EVT F32 = VT.isVector() ? VT.changeVectorElementType(EltVT: MVT::f32) : MVT::f32;
11814	EVT I32 = F32.changeTypeToInteger();
11815	Op = expandRoundInexactToOdd(ResultVT: F32, Op, dl, DAG);
11816	Op = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: I32, Operand: Op);
11817
11818	// Conversions should set NaN's quiet bit. This also prevents NaNs from
11819	// turning into infinities.
11820	SDValue NaN =
11821	DAG.getNode(Opcode: ISD::OR, DL: dl, VT: I32, N1: Op, N2: DAG.getConstant(Val: `0x400000`, DL: dl, VT: I32));
11822
11823	// Factor in the contribution of the low 16 bits.
11824	SDValue One = DAG.getConstant(Val: `1`, DL: dl, VT: I32);
11825	SDValue Lsb = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: I32, N1: Op,
11826	N2: DAG.getShiftAmountConstant(Val: `16`, VT: I32, DL: dl));
11827	Lsb = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: I32, N1: Lsb, N2: One);
11828	SDValue RoundingBias =
11829	DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: I32, N1: DAG.getConstant(Val: `0x7fff`, DL: dl, VT: I32), N2: Lsb);
11830	SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: I32, N1: Op, N2: RoundingBias);
11831
11832	// Don't round if we had a NaN, we don't want to turn 0x7fffffff into
11833	// 0x80000000.
11834	Op = DAG.getSelect(DL: dl, VT: I32, Cond: IsNaN, LHS: NaN, RHS: Add);
11835
11836	// Now that we have rounded, shift the bits into position.
11837	Op = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: I32, N1: Op,
11838	N2: DAG.getShiftAmountConstant(Val: `16`, VT: I32, DL: dl));
11839	Op = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: I32, Operand: Op);
11840	EVT I16 = I32.isVector() ? I32.changeVectorElementType(EltVT: MVT::i16) : MVT::i16;
11841	Op = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: I16, Operand: Op);
11842	return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT, Operand: Op);
11843	}
11844	return SDValue ();
11845	}
11846
11847	SDValue TargetLowering::expandVectorSplice(SDNode *Node,
11848	SelectionDAG &DAG) const {
11849	assert(Node->getOpcode() == ISD::VECTOR_SPLICE && "Unexpected opcode!");
11850	assert(Node->getValueType(`0`).isScalableVector() &&
11851	"Fixed length vector types expected to use SHUFFLE_VECTOR!");
11852
11853	EVT VT = Node->getValueType(ResNo: `0`);
11854	SDValue V1 = Node->getOperand(Num: `0`);
11855	SDValue V2 = Node->getOperand(Num: `1`);
11856	int64_t Imm = cast<ConstantSDNode>(Val: Node->getOperand(Num: `2`))->getSExtValue();
11857	SDLoc DL(Node);
11858
11859	// Expand through memory thusly:
11860	// Alloca CONCAT_VECTORS_TYPES(V1, V2) Ptr
11861	// Store V1, Ptr
11862	// Store V2, Ptr + sizeof(V1)
11863	// If (Imm < 0)
11864	// TrailingElts = -Imm
11865	// Ptr = Ptr + sizeof(V1) - (TrailingElts sizeof(VT.Elt))*
11866	// else
11867	// Ptr = Ptr + (Imm sizeof(VT.Elt))*
11868	// Res = Load Ptr
11869
11870	Align Alignment = DAG.getReducedAlign(VT, /UseABI=/false);
11871
11872	EVT MemVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: VT.getVectorElementType(),
11873	EC: VT.getVectorElementCount() * `2`);
11874	SDValue StackPtr = DAG.CreateStackTemporary(Bytes: MemVT.getStoreSize(), Alignment);
11875	EVT PtrVT = StackPtr.getValueType();
11876	auto &MF = DAG.getMachineFunction();
11877	auto FrameIndex = cast<FrameIndexSDNode>(Val: StackPtr.getNode())->getIndex();
11878	auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FI: FrameIndex);
11879
11880	// Store the lo part of CONCAT_VECTORS(V1, V2)
11881	SDValue StoreV1 = DAG.getStore(Chain: DAG.getEntryNode(), dl: DL, Val: V1, Ptr: StackPtr, PtrInfo);
11882	// Store the hi part of CONCAT_VECTORS(V1, V2)
11883	SDValue OffsetToV2 = DAG.getVScale(
11884	DL, VT: PtrVT,
11885	MulImm: APInt (PtrVT.getFixedSizeInBits(), VT.getStoreSize().getKnownMinValue()));
11886	SDValue StackPtr2 = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: StackPtr, N2: OffsetToV2);
11887	SDValue StoreV2 = DAG.getStore(Chain: StoreV1, dl: DL, Val: V2, Ptr: StackPtr2, PtrInfo);
11888
11889	if (Imm >= `0`) {
11890	// Load back the required element. getVectorElementPointer takes care of
11891	// clamping the index if it's out-of-bounds.
11892	StackPtr = getVectorElementPointer(DAG, VecPtr: StackPtr, VecVT: VT, Index: Node->getOperand(Num: `2`));
11893	// Load the spliced result
11894	return DAG.getLoad(VT, dl: DL, Chain: StoreV2, Ptr: StackPtr,
11895	PtrInfo: MachinePointerInfo::getUnknownStack(MF));
11896	}
11897
11898	uint64_t TrailingElts = -Imm;
11899
11900	// NOTE: TrailingElts must be clamped so as not to read outside of V1:V2.
11901	TypeSize EltByteSize = VT.getVectorElementType().getStoreSize();
11902	SDValue TrailingBytes =
11903	DAG.getConstant(Val: TrailingElts * EltByteSize, DL, VT: PtrVT);
11904
11905	if (TrailingElts > VT.getVectorMinNumElements()) {
11906	SDValue VLBytes =
11907	DAG.getVScale(DL, VT: PtrVT,
11908	MulImm: APInt (PtrVT.getFixedSizeInBits(),
11909	VT.getStoreSize().getKnownMinValue()));
11910	TrailingBytes = DAG.getNode(Opcode: ISD::UMIN, DL, VT: PtrVT, N1: TrailingBytes, N2: VLBytes);
11911	}
11912
11913	// Calculate the start address of the spliced result.
11914	StackPtr2 = DAG.getNode(Opcode: ISD::SUB, DL, VT: PtrVT, N1: StackPtr2, N2: TrailingBytes);
11915
11916	// Load the spliced result
11917	return DAG.getLoad(VT, dl: DL, Chain: StoreV2, Ptr: StackPtr2,
11918	PtrInfo: MachinePointerInfo::getUnknownStack(MF));
11919	}
11920
11921	SDValue TargetLowering::expandVECTOR_COMPRESS(SDNode *Node,
11922	SelectionDAG &DAG) const {
11923	SDLoc DL(Node);
11924	SDValue Vec = Node->getOperand(Num: `0`);
11925	SDValue Mask = Node->getOperand(Num: `1`);
11926	SDValue Passthru = Node->getOperand(Num: `2`);
11927
11928	EVT VecVT = Vec.getValueType();
11929	EVT ScalarVT = VecVT.getScalarType();
11930	EVT MaskVT = Mask.getValueType();
11931	EVT MaskScalarVT = MaskVT.getScalarType();
11932
11933	// Needs to be handled by targets that have scalable vector types.
11934	if (VecVT.isScalableVector())
11935	report_fatal_error(reason: "Cannot expand masked_compress for scalable vectors.");
11936
11937	SDValue StackPtr = DAG.CreateStackTemporary(
11938	Bytes: VecVT.getStoreSize(), Alignment: DAG.getReducedAlign(VT: VecVT, /UseABI=/false));
11939	int FI = cast<FrameIndexSDNode>(Val: StackPtr.getNode())->getIndex();
11940	MachinePointerInfo PtrInfo =
11941	MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI);
11942
11943	MVT PositionVT = getVectorIdxTy(DL: DAG.getDataLayout());
11944	SDValue Chain = DAG.getEntryNode();
11945	SDValue OutPos = DAG.getConstant(Val: `0`, DL, VT: PositionVT);
11946
11947	bool HasPassthru = !Passthru.isUndef();
11948
11949	// If we have a passthru vector, store it on the stack, overwrite the matching
11950	// positions and then re-write the last element that was potentially
11951	// overwritten even though mask[i] = false.
11952	if (HasPassthru)
11953	Chain = DAG.getStore(Chain, dl: DL, Val: Passthru, Ptr: StackPtr, PtrInfo);
11954
11955	SDValue LastWriteVal;
11956	APInt PassthruSplatVal;
11957	bool IsSplatPassthru =
11958	ISD::isConstantSplatVector(N: Passthru.getNode(), SplatValue&: PassthruSplatVal);
11959
11960	if (IsSplatPassthru) {
11961	// As we do not know which position we wrote to last, we cannot simply
11962	// access that index from the passthru vector. So we first check if passthru
11963	// is a splat vector, to use any element ...
11964	LastWriteVal = DAG.getConstant(Val: PassthruSplatVal, DL, VT: ScalarVT);
11965	} else if (HasPassthru) {
11966	// ... if it is not a splat vector, we need to get the passthru value at
11967	// position = popcount(mask) and re-load it from the stack before it is
11968	// overwritten in the loop below.
11969	EVT PopcountVT = ScalarVT.changeTypeToInteger();
11970	SDValue Popcount = DAG.getNode(
11971	Opcode: ISD::TRUNCATE, DL, VT: MaskVT.changeVectorElementType(EltVT: MVT::i1), Operand: Mask);
11972	Popcount =
11973	DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL,
11974	VT: MaskVT.changeVectorElementType(EltVT: PopcountVT), Operand: Popcount);
11975	Popcount = DAG.getNode(Opcode: ISD::VECREDUCE_ADD, DL, VT: PopcountVT, Operand: Popcount);
11976	SDValue LastElmtPtr =
11977	getVectorElementPointer(DAG, VecPtr: StackPtr, VecVT, Index: Popcount);
11978	LastWriteVal = DAG.getLoad(
11979	VT: ScalarVT, dl: DL, Chain, Ptr: LastElmtPtr,
11980	PtrInfo: MachinePointerInfo::getUnknownStack(MF&: DAG.getMachineFunction()));
11981	Chain = LastWriteVal.getValue(R: `1`);
11982	}
11983
11984	unsigned NumElms = VecVT.getVectorNumElements();
11985	for (unsigned I = `0`; I < NumElms; I++) {
11986	SDValue ValI = DAG.getExtractVectorElt(DL, VT: ScalarVT, Vec, Idx: I);
11987	SDValue OutPtr = getVectorElementPointer(DAG, VecPtr: StackPtr, VecVT, Index: OutPos);
11988	Chain = DAG.getStore(
11989	Chain, dl: DL, Val: ValI, Ptr: OutPtr,
11990	PtrInfo: MachinePointerInfo::getUnknownStack(MF&: DAG.getMachineFunction()));
11991
11992	// Get the mask value and add it to the current output position. This
11993	// either increments by 1 if MaskI is true or adds 0 otherwise.
11994	// Freeze in case we have poison/undef mask entries.
11995	SDValue MaskI =
11996	DAG.getFreeze(V: DAG.getExtractVectorElt(DL, VT: MaskScalarVT, Vec: Mask, Idx: I));
11997	MaskI = DAG.getFreeze(V: MaskI);
11998	MaskI = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i1, Operand: MaskI);
11999	MaskI = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: PositionVT, Operand: MaskI);
12000	OutPos = DAG.getNode(Opcode: ISD::ADD, DL, VT: PositionVT, N1: OutPos, N2: MaskI);
12001
12002	if (HasPassthru && I == NumElms - `1`) {
12003	SDValue EndOfVector =
12004	DAG.getConstant(Val: VecVT.getVectorNumElements() - `1`, DL, VT: PositionVT);
12005	SDValue AllLanesSelected =
12006	DAG.getSetCC(DL, VT: MVT::i1, LHS: OutPos, RHS: EndOfVector, Cond: ISD::CondCode::SETUGT);
12007	OutPos = DAG.getNode(Opcode: ISD::UMIN, DL, VT: PositionVT, N1: OutPos, N2: EndOfVector);
12008	OutPtr = getVectorElementPointer(DAG, VecPtr: StackPtr, VecVT, Index: OutPos);
12009
12010	// Re-write the last ValI if all lanes were selected. Otherwise,
12011	// overwrite the last write it with the passthru value.
12012	LastWriteVal = DAG.getSelect(DL, VT: ScalarVT, Cond: AllLanesSelected, LHS: ValI,
12013	RHS: LastWriteVal, Flags: SDNodeFlags::Unpredictable);
12014	Chain = DAG.getStore(
12015	Chain, dl: DL, Val: LastWriteVal, Ptr: OutPtr,
12016	PtrInfo: MachinePointerInfo::getUnknownStack(MF&: DAG.getMachineFunction()));
12017	}
12018	}
12019
12020	return DAG.getLoad(VT: VecVT, dl: DL, Chain, Ptr: StackPtr, PtrInfo);
12021	}
12022
12023	SDValue TargetLowering::expandPartialReduceMLA(SDNode *N,
12024	SelectionDAG &DAG) const {
12025	SDLoc DL(N);
12026	SDValue Acc = N->getOperand(Num: `0`);
12027	SDValue MulLHS = N->getOperand(Num: `1`);
12028	SDValue MulRHS = N->getOperand(Num: `2`);
12029	EVT AccVT = Acc.getValueType();
12030	EVT MulOpVT = MulLHS.getValueType();
12031
12032	EVT ExtMulOpVT =
12033	EVT::getVectorVT(Context&: *DAG.getContext(), VT: AccVT.getVectorElementType(),
12034	EC: MulOpVT.getVectorElementCount());
12035
12036	unsigned ExtOpcLHS = N->getOpcode() == ISD::PARTIAL_REDUCE_UMLA
12037	? ISD::ZERO_EXTEND
12038	: ISD::SIGN_EXTEND;
12039	unsigned ExtOpcRHS = N->getOpcode() == ISD::PARTIAL_REDUCE_SMLA
12040	? ISD::SIGN_EXTEND
12041	: ISD::ZERO_EXTEND;
12042
12043	if (ExtMulOpVT != MulOpVT) {
12044	MulLHS = DAG.getNode(Opcode: ExtOpcLHS, DL, VT: ExtMulOpVT, Operand: MulLHS);
12045	MulRHS = DAG.getNode(Opcode: ExtOpcRHS, DL, VT: ExtMulOpVT, Operand: MulRHS);
12046	}
12047	SDValue Input = MulLHS;
12048	APInt ConstantOne;
12049	if (!ISD::isConstantSplatVector(N: MulRHS.getNode(), SplatValue&: ConstantOne) \|\|
12050	!ConstantOne.isOne())
12051	Input = DAG.getNode(Opcode: ISD::MUL, DL, VT: ExtMulOpVT, N1: MulLHS, N2: MulRHS);
12052
12053	unsigned Stride = AccVT.getVectorMinNumElements();
12054	unsigned ScaleFactor = MulOpVT.getVectorMinNumElements() / Stride;
12055
12056	// Collect all of the subvectors
12057	std::deque<SDValue> Subvectors = {Acc};
12058	for (unsigned I = `0`; I < ScaleFactor; I++)
12059	Subvectors.push_back(x: DAG.getExtractSubvector(DL, VT: AccVT, Vec: Input, Idx: I * Stride));
12060
12061	// Flatten the subvector tree
12062	while (Subvectors.size() > `1`) {
12063	Subvectors.push_back(
12064	x: DAG.getNode(Opcode: ISD::ADD, DL, VT: AccVT, Ops: {Subvectors [`0`], Subvectors [`1`]}));
12065	Subvectors.pop_front();
12066	Subvectors.pop_front();
12067	}
12068
12069	assert(Subvectors.size() == `1` &&
12070	"There should only be one subvector after tree flattening");
12071
12072	return Subvectors [`0`];
12073	}
12074
12075	bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT,
12076	SDValue &LHS, SDValue &RHS,
12077	SDValue &CC, SDValue Mask,
12078	SDValue EVL, bool &NeedInvert,
12079	const SDLoc &dl, SDValue &Chain,
12080	bool IsSignaling) const {
12081	MVT OpVT = LHS.getSimpleValueType();
12082	ISD::CondCode CCCode = cast<CondCodeSDNode>(Val&: CC)->get();
12083	NeedInvert = false;
12084	assert(!EVL == !Mask && "VP Mask and EVL must either both be set or unset");
12085	bool IsNonVP = !EVL;
12086	switch (getCondCodeAction(CC: CCCode, VT: OpVT)) {
12087	default:
12088	llvm_unreachable("Unknown condition code action!");
12089	case TargetLowering::Legal:
12090	// Nothing to do.
12091	break;
12092	case TargetLowering::Expand: {
12093	ISD::CondCode InvCC = ISD::getSetCCSwappedOperands(Operation: CCCode);
12094	if (isCondCodeLegalOrCustom(CC: InvCC, VT: OpVT)) {
12095	std::swap(a&: LHS, b&: RHS);
12096	CC = DAG.getCondCode(Cond: InvCC);
12097	return true;
12098	}
12099	// Swapping operands didn't work. Try inverting the condition.
12100	bool NeedSwap = false;
12101	InvCC = getSetCCInverse(Operation: CCCode, Type: OpVT);
12102	if (!isCondCodeLegalOrCustom(CC: InvCC, VT: OpVT)) {
12103	// If inverting the condition is not enough, try swapping operands
12104	// on top of it.
12105	InvCC = ISD::getSetCCSwappedOperands(Operation: InvCC);
12106	NeedSwap = true;
12107	}
12108	if (isCondCodeLegalOrCustom(CC: InvCC, VT: OpVT)) {
12109	CC = DAG.getCondCode(Cond: InvCC);
12110	NeedInvert = true;
12111	if (NeedSwap)
12112	std::swap(a&: LHS, b&: RHS);
12113	return true;
12114	}
12115
12116	// Special case: expand i1 comparisons using logical operations.
12117	if (OpVT == MVT::i1) {
12118	SDValue Ret;
12119	switch (CCCode) {
12120	default:
12121	llvm_unreachable("Unknown integer setcc!");
12122	case ISD::SETEQ: // X == Y --> ~(X ^ Y)
12123	Ret = DAG.getNOT(DL: dl, Val: DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: MVT::i1, N1: LHS, N2: RHS),
12124	VT: MVT::i1);
12125	break;
12126	case ISD::SETNE: // X != Y --> (X ^ Y)
12127	Ret = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: MVT::i1, N1: LHS, N2: RHS);
12128	break;
12129	case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
12130	case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
12131	Ret = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i1, N1: RHS,
12132	N2: DAG.getNOT(DL: dl, Val: LHS, VT: MVT::i1));
12133	break;
12134	case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
12135	case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
12136	Ret = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i1, N1: LHS,
12137	N2: DAG.getNOT(DL: dl, Val: RHS, VT: MVT::i1));
12138	break;
12139	case ISD::SETULE: // X <=u Y --> X == 0 \| Y == 1 --> ~X \| Y
12140	case ISD::SETGE: // X >=s Y --> X == 0 \| Y == 1 --> ~X \| Y
12141	Ret = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: MVT::i1, N1: RHS,
12142	N2: DAG.getNOT(DL: dl, Val: LHS, VT: MVT::i1));
12143	break;
12144	case ISD::SETUGE: // X >=u Y --> X == 1 \| Y == 0 --> ~Y \| X
12145	case ISD::SETLE: // X <=s Y --> X == 1 \| Y == 0 --> ~Y \| X
12146	Ret = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: MVT::i1, N1: LHS,
12147	N2: DAG.getNOT(DL: dl, Val: RHS, VT: MVT::i1));
12148	break;
12149	}
12150
12151	LHS = DAG.getZExtOrTrunc(Op: Ret, DL: dl, VT);
12152	RHS = SDValue ();
12153	CC = SDValue ();
12154	return true;
12155	}
12156
12157	ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
12158	unsigned Opc = `0`;
12159	switch (CCCode) {
12160	default:
12161	llvm_unreachable("Don't know how to expand this condition!");
12162	case ISD::SETUO:
12163	if (isCondCodeLegal(CC: ISD::SETUNE, VT: OpVT)) {
12164	CC1 = ISD::SETUNE;
12165	CC2 = ISD::SETUNE;
12166	Opc = ISD::OR;
12167	break;
12168	}
12169	assert(isCondCodeLegal(ISD::SETOEQ, OpVT) &&
12170	"If SETUE is expanded, SETOEQ or SETUNE must be legal!");
12171	NeedInvert = true;
12172	[[fallthrough]];
12173	case ISD::SETO:
12174	assert(isCondCodeLegal(ISD::SETOEQ, OpVT) &&
12175	"If SETO is expanded, SETOEQ must be legal!");
12176	CC1 = ISD::SETOEQ;
12177	CC2 = ISD::SETOEQ;
12178	Opc = ISD::AND;
12179	break;
12180	case ISD::SETONE:
12181	case ISD::SETUEQ:
12182	// If the SETUO or SETO CC isn't legal, we might be able to use
12183	// SETOGT \|\| SETOLT, inverting the result for SETUEQ. We only need one
12184	// of SETOGT/SETOLT to be legal, the other can be emulated by swapping
12185	// the operands.
12186	CC2 = ((unsigned)CCCode & `0x8U`) ? ISD::SETUO : ISD::SETO;
12187	if (!isCondCodeLegal(CC: CC2, VT: OpVT) && (isCondCodeLegal(CC: ISD::SETOGT, VT: OpVT) \|\|
12188	isCondCodeLegal(CC: ISD::SETOLT, VT: OpVT))) {
12189	CC1 = ISD::SETOGT;
12190	CC2 = ISD::SETOLT;
12191	Opc = ISD::OR;
12192	NeedInvert = ((unsigned)CCCode & `0x8U`);
12193	break;
12194	}
12195	[[fallthrough]];
12196	case ISD::SETOEQ:
12197	case ISD::SETOGT:
12198	case ISD::SETOGE:
12199	case ISD::SETOLT:
12200	case ISD::SETOLE:
12201	case ISD::SETUNE:
12202	case ISD::SETUGT:
12203	case ISD::SETUGE:
12204	case ISD::SETULT:
12205	case ISD::SETULE:
12206	// If we are floating point, assign and break, otherwise fall through.
12207	if (!OpVT.isInteger()) {
12208	// We can use the 4th bit to tell if we are the unordered
12209	// or ordered version of the opcode.
12210	CC2 = ((unsigned)CCCode & `0x8U`) ? ISD::SETUO : ISD::SETO;
12211	Opc = ((unsigned)CCCode & `0x8U`) ? ISD::OR : ISD::AND;
12212	CC1 = (ISD::CondCode)(((int)CCCode & `0x7`) \| `0x10`);
12213	break;
12214	}
12215	// Fallthrough if we are unsigned integer.
12216	[[fallthrough]];
12217	case ISD::SETLE:
12218	case ISD::SETGT:
12219	case ISD::SETGE:
12220	case ISD::SETLT:
12221	case ISD::SETNE:
12222	case ISD::SETEQ:
12223	// If all combinations of inverting the condition and swapping operands
12224	// didn't work then we have no means to expand the condition.
12225	llvm_unreachable("Don't know how to expand this condition!");
12226	}
12227
12228	SDValue SetCC1, SetCC2;
12229	if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
12230	// If we aren't the ordered or unorder operation,
12231	// then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
12232	if (IsNonVP) {
12233	SetCC1 = DAG.getSetCC(DL: dl, VT, LHS, RHS, Cond: CC1, Chain, IsSignaling);
12234	SetCC2 = DAG.getSetCC(DL: dl, VT, LHS, RHS, Cond: CC2, Chain, IsSignaling);
12235	} else {
12236	SetCC1 = DAG.getSetCCVP(DL: dl, VT, LHS, RHS, Cond: CC1, Mask, EVL);
12237	SetCC2 = DAG.getSetCCVP(DL: dl, VT, LHS, RHS, Cond: CC2, Mask, EVL);
12238	}
12239	} else {
12240	// Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
12241	if (IsNonVP) {
12242	SetCC1 = DAG.getSetCC(DL: dl, VT, LHS, RHS: LHS, Cond: CC1, Chain, IsSignaling);
12243	SetCC2 = DAG.getSetCC(DL: dl, VT, LHS: RHS, RHS, Cond: CC2, Chain, IsSignaling);
12244	} else {
12245	SetCC1 = DAG.getSetCCVP(DL: dl, VT, LHS, RHS: LHS, Cond: CC1, Mask, EVL);
12246	SetCC2 = DAG.getSetCCVP(DL: dl, VT, LHS: RHS, RHS, Cond: CC2, Mask, EVL);
12247	}
12248	}
12249	if (Chain)
12250	Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, N1: SetCC1.getValue(R: `1`),
12251	N2: SetCC2.getValue(R: `1`));
12252	if (IsNonVP)
12253	LHS = DAG.getNode(Opcode: Opc, DL: dl, VT, N1: SetCC1, N2: SetCC2);
12254	else {
12255	// Transform the binary opcode to the VP equivalent.
12256	assert((Opc == ISD::OR \|\| Opc == ISD::AND) && "Unexpected opcode");
12257	Opc = Opc == ISD::OR ? ISD::VP_OR : ISD::VP_AND;
12258	LHS = DAG.getNode(Opcode: Opc, DL: dl, VT, N1: SetCC1, N2: SetCC2, N3: Mask, N4: EVL);
12259	}
12260	RHS = SDValue ();
12261	CC = SDValue ();
12262	return true;
12263	}
12264	}
12265	return false;
12266	}
12267
12268	SDValue TargetLowering::expandVectorNaryOpBySplitting(SDNode *Node,
12269	SelectionDAG &DAG) const {
12270	EVT VT = Node->getValueType(ResNo: `0`);
12271	// Despite its documentation, GetSplitDestVTs will assert if VT cannot be
12272	// split into two equal parts.
12273	if (!VT.isVector() \|\| !VT.getVectorElementCount().isKnownMultipleOf(RHS: `2`))
12274	return SDValue ();
12275
12276	// Restrict expansion to cases where both parts can be concatenated.
12277	auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VT);
12278	if (LoVT != HiVT \|\| !isTypeLegal(VT: LoVT))
12279	return SDValue ();
12280
12281	SDLoc DL(Node);
12282	unsigned Opcode = Node->getOpcode();
12283
12284	// Don't expand if the result is likely to be unrolled anyway.
12285	if (!isOperationLegalOrCustomOrPromote(Op: Opcode, VT: LoVT))
12286	return SDValue ();
12287
12288	SmallVector<SDValue, `4`> LoOps, HiOps;
12289	for (const SDValue &V : Node->op_values()) {
12290	auto [Lo, Hi] = DAG.SplitVector(N: V, DL, LoVT, HiVT);
12291	LoOps.push_back(Elt: Lo);
12292	HiOps.push_back(Elt: Hi);
12293	}
12294
12295	SDValue SplitOpLo = DAG.getNode(Opcode, DL, VT: LoVT, Ops: LoOps);
12296	SDValue SplitOpHi = DAG.getNode(Opcode, DL, VT: HiVT, Ops: HiOps);
12297	return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT, N1: SplitOpLo, N2: SplitOpHi);
12298	}
12299
12300	SDValue TargetLowering::scalarizeExtractedVectorLoad(EVT ResultVT,
12301	const SDLoc &DL,
12302	EVT InVecVT, SDValue EltNo,
12303	LoadSDNode *OriginalLoad,
12304	SelectionDAG &DAG) const {
12305	assert(OriginalLoad->isSimple());
12306
12307	EVT VecEltVT = InVecVT.getVectorElementType();
12308
12309	// If the vector element type is not a multiple of a byte then we are unable
12310	// to correctly compute an address to load only the extracted element as a
12311	// scalar.
12312	if (!VecEltVT.isByteSized())
12313	return SDValue ();
12314
12315	ISD::LoadExtType ExtTy =
12316	ResultVT.bitsGT(VT: VecEltVT) ? ISD::EXTLOAD : ISD::NON_EXTLOAD;
12317	if (!isOperationLegalOrCustom(Op: ISD::LOAD, VT: VecEltVT))
12318	return SDValue ();
12319
12320	std::optional<unsigned> ByteOffset;
12321	Align Alignment = OriginalLoad->getAlign();
12322	MachinePointerInfo MPI;
12323	if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(Val&: EltNo)) {
12324	int Elt = ConstEltNo->getZExtValue();
12325	ByteOffset = VecEltVT.getSizeInBits() * Elt / `8`;
12326	MPI = OriginalLoad->getPointerInfo().getWithOffset(O: *ByteOffset);
12327	Alignment = commonAlignment(A: Alignment, Offset: *ByteOffset);
12328	} else {
12329	// Discard the pointer info except the address space because the memory
12330	// operand can't represent this new access since the offset is variable.
12331	MPI = MachinePointerInfo (OriginalLoad->getPointerInfo().getAddrSpace());
12332	Alignment = commonAlignment(A: Alignment, Offset: VecEltVT.getSizeInBits() / `8`);
12333	}
12334
12335	if (!shouldReduceLoadWidth(Load: OriginalLoad, ExtTy, NewVT: VecEltVT, ByteOffset))
12336	return SDValue ();
12337
12338	unsigned IsFast = `0`;
12339	if (!allowsMemoryAccess(Context&: *DAG.getContext(), DL: DAG.getDataLayout(), VT: VecEltVT,
12340	AddrSpace: OriginalLoad->getAddressSpace(), Alignment,
12341	Flags: OriginalLoad->getMemOperand()->getFlags(), Fast: &IsFast) \|\|
12342	!IsFast)
12343	return SDValue ();
12344
12345	SDValue NewPtr =
12346	getVectorElementPointer(DAG, VecPtr: OriginalLoad->getBasePtr(), VecVT: InVecVT, Index: EltNo);
12347
12348	// We are replacing a vector load with a scalar load. The new load must have
12349	// identical memory op ordering to the original.
12350	SDValue Load;
12351	if (ResultVT.bitsGT(VT: VecEltVT)) {
12352	// If the result type of vextract is wider than the load, then issue an
12353	// extending load instead.
12354	ISD::LoadExtType ExtType = isLoadExtLegal(ExtType: ISD::ZEXTLOAD, ValVT: ResultVT, MemVT: VecEltVT)
12355	? ISD::ZEXTLOAD
12356	: ISD::EXTLOAD;
12357	Load = DAG.getExtLoad(ExtType, dl: DL, VT: ResultVT, Chain: OriginalLoad->getChain(),
12358	Ptr: NewPtr, PtrInfo: MPI, MemVT: VecEltVT, Alignment,
12359	MMOFlags: OriginalLoad->getMemOperand()->getFlags(),
12360	AAInfo: OriginalLoad->getAAInfo());
12361	DAG.makeEquivalentMemoryOrdering(OldLoad: OriginalLoad, NewMemOp: Load);
12362	} else {
12363	// The result type is narrower or the same width as the vector element
12364	Load = DAG.getLoad(VT: VecEltVT, dl: DL, Chain: OriginalLoad->getChain(), Ptr: NewPtr, PtrInfo: MPI,
12365	Alignment, MMOFlags: OriginalLoad->getMemOperand()->getFlags(),
12366	AAInfo: OriginalLoad->getAAInfo());
12367	DAG.makeEquivalentMemoryOrdering(OldLoad: OriginalLoad, NewMemOp: Load);
12368	if (ResultVT.bitsLT(VT: VecEltVT))
12369	Load = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: ResultVT, Operand: Load);
12370	else
12371	Load = DAG.getBitcast(VT: ResultVT, V: Load);
12372	}
12373
12374	return Load;
12375	}
12376

Browse the source code of llvm_projects/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp