TargetLowering.cpp source code [llvm_projects/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp]

1	//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This implements the TargetLowering class.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "llvm/CodeGen/TargetLowering.h"
14	#include "llvm/ADT/STLExtras.h"
15	#include "llvm/Analysis/VectorUtils.h"
16	#include "llvm/CodeGen/CallingConvLower.h"
17	#include "llvm/CodeGen/CodeGenCommonISel.h"
18	#include "llvm/CodeGen/MachineFrameInfo.h"
19	#include "llvm/CodeGen/MachineFunction.h"
20	#include "llvm/CodeGen/MachineJumpTableInfo.h"
21	#include "llvm/CodeGen/MachineModuleInfoImpls.h"
22	#include "llvm/CodeGen/MachineRegisterInfo.h"
23	#include "llvm/CodeGen/SelectionDAG.h"
24	#include "llvm/CodeGen/TargetRegisterInfo.h"
25	#include "llvm/IR/DataLayout.h"
26	#include "llvm/IR/DerivedTypes.h"
27	#include "llvm/IR/GlobalVariable.h"
28	#include "llvm/IR/LLVMContext.h"
29	#include "llvm/MC/MCAsmInfo.h"
30	#include "llvm/MC/MCExpr.h"
31	#include "llvm/Support/DivisionByConstantInfo.h"
32	#include "llvm/Support/ErrorHandling.h"
33	#include "llvm/Support/KnownBits.h"
34	#include "llvm/Support/MathExtras.h"
35	#include "llvm/Target/TargetMachine.h"
36	#include <cctype>
37	using namespace llvm;
38
39	/// NOTE: The TargetMachine owns TLOF.
40	TargetLowering::TargetLowering(const TargetMachine &tm)
41	: TargetLoweringBase (tm) {}
42
43	const char TargetLowering::getTargetNodeName(unsigned* Opcode) const {
44	return nullptr;
45	}
46
47	bool TargetLowering::isPositionIndependent() const {
48	return getTargetMachine().isPositionIndependent();
49	}
50
51	/// Check whether a given call node is in tail position within its function. If
52	/// so, it sets Chain to the input chain of the tail call.
53	bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
54	SDValue &Chain) const {
55	const Function &F = DAG.getMachineFunction().getFunction();
56
57	// First, check if tail calls have been disabled in this function.
58	if (F.getFnAttribute(Kind: "disable-tail-calls").getValueAsBool())
59	return false;
60
61	// Conservatively require the attributes of the call to match those of
62	// the return. Ignore following attributes because they don't affect the
63	// call sequence.
64	AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs());
65	for (const auto &Attr :
66	{Attribute::Alignment, Attribute::Dereferenceable,
67	Attribute::DereferenceableOrNull, Attribute::NoAlias,
68	Attribute::NonNull, Attribute::NoUndef, Attribute::Range})
69	CallerAttrs.removeAttribute(Val: Attr);
70
71	if (CallerAttrs.hasAttributes())
72	return false;
73
74	// It's not safe to eliminate the sign / zero extension of the return value.
75	if (CallerAttrs.contains(A: Attribute::ZExt) \|\|
76	CallerAttrs.contains(A: Attribute::SExt))
77	return false;
78
79	// Check if the only use is a function return node.
80	return isUsedByReturnOnly(Node, Chain);
81	}
82
83	bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
84	const uint32_t *CallerPreservedMask,
85	const SmallVectorImpl<CCValAssign> &ArgLocs,
86	const SmallVectorImpl<SDValue> &OutVals) const {
87	for (unsigned I = `0`, E = ArgLocs.size(); I != E; ++I) {
88	const CCValAssign &ArgLoc = ArgLocs [I];
89	if (!ArgLoc.isRegLoc())
90	continue;
91	MCRegister Reg = ArgLoc.getLocReg();
92	// Only look at callee saved registers.
93	if (MachineOperand::clobbersPhysReg(RegMask: CallerPreservedMask, PhysReg: Reg))
94	continue;
95	// Check that we pass the value used for the caller.
96	// (We look for a CopyFromReg reading a virtual register that is used
97	// for the function live-in value of register Reg)
98	SDValue Value = OutVals [I];
99	if (Value ->getOpcode() == ISD::AssertZext)
100	Value = Value.getOperand(i: `0`);
101	if (Value ->getOpcode() != ISD::CopyFromReg)
102	return false;
103	Register ArgReg = cast<RegisterSDNode>(Val: Value ->getOperand(Num: `1`))->getReg();
104	if (MRI.getLiveInPhysReg(VReg: ArgReg) != Reg)
105	return false;
106	}
107	return true;
108	}
109
110	/// Set CallLoweringInfo attribute flags based on a call instruction
111	/// and called function attributes.
112	void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
113	unsigned ArgIdx) {
114	IsSExt = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::SExt);
115	IsZExt = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::ZExt);
116	IsInReg = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::InReg);
117	IsSRet = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::StructRet);
118	IsNest = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::Nest);
119	IsByVal = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::ByVal);
120	IsPreallocated = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::Preallocated);
121	IsInAlloca = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::InAlloca);
122	IsReturned = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::Returned);
123	IsSwiftSelf = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::SwiftSelf);
124	IsSwiftAsync = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::SwiftAsync);
125	IsSwiftError = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::SwiftError);
126	Alignment = Call->getParamStackAlign(ArgNo: ArgIdx);
127	IndirectType = nullptr;
128	assert(IsByVal + IsPreallocated + IsInAlloca + IsSRet <= `1` &&
129	"multiple ABI attributes?");
130	if (IsByVal) {
131	IndirectType = Call->getParamByValType(ArgNo: ArgIdx);
132	if (!Alignment)
133	Alignment = Call->getParamAlign(ArgNo: ArgIdx);
134	}
135	if (IsPreallocated)
136	IndirectType = Call->getParamPreallocatedType(ArgNo: ArgIdx);
137	if (IsInAlloca)
138	IndirectType = Call->getParamInAllocaType(ArgNo: ArgIdx);
139	if (IsSRet)
140	IndirectType = Call->getParamStructRetType(ArgNo: ArgIdx);
141	}
142
143	/// Generate a libcall taking the given operands as arguments and returning a
144	/// result of type RetVT.
145	std::pair<SDValue, SDValue>
146	TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
147	ArrayRef<SDValue> Ops,
148	MakeLibCallOptions CallOptions,
149	const SDLoc &dl,
150	SDValue InChain) const {
151	if (!InChain)
152	InChain = DAG.getEntryNode();
153
154	TargetLowering::ArgListTy Args;
155	Args.reserve(n: Ops.size());
156
157	TargetLowering::ArgListEntry Entry;
158	for (unsigned i = `0`; i < Ops.size(); ++i) {
159	SDValue NewOp = Ops [i];
160	Entry.Node = NewOp;
161	Entry.Ty = Entry.Node.getValueType().getTypeForEVT(Context&: *DAG.getContext());
162	Entry.IsSExt = shouldSignExtendTypeInLibCall(Type: NewOp.getValueType(),
163	IsSigned: CallOptions.IsSExt);
164	Entry.IsZExt = !Entry.IsSExt;
165
166	if (CallOptions.IsSoften &&
167	!shouldExtendTypeInLibCall(Type: CallOptions.OpsVTBeforeSoften [i])) {
168	Entry.IsSExt = Entry.IsZExt = false;
169	}
170	Args.push_back(x: Entry);
171	}
172
173	if (LC == RTLIB::UNKNOWN_LIBCALL)
174	report_fatal_error(reason: "Unsupported library call operation!");
175	SDValue Callee = DAG.getExternalSymbol(Sym: getLibcallName(Call: LC),
176	VT: getPointerTy(DL: DAG.getDataLayout()));
177
178	Type RetTy = RetVT.getTypeForEVT(Context&: DAG.getContext());
179	TargetLowering::CallLoweringInfo CLI(DAG);
180	bool signExtend = shouldSignExtendTypeInLibCall(Type: RetVT, IsSigned: CallOptions.IsSExt);
181	bool zeroExtend = !signExtend;
182
183	if (CallOptions.IsSoften &&
184	!shouldExtendTypeInLibCall(Type: CallOptions.RetVTBeforeSoften)) {
185	signExtend = zeroExtend = false;
186	}
187
188	CLI.setDebugLoc(dl)
189	.setChain(InChain)
190	.setLibCallee(CC: getLibcallCallingConv(Call: LC), ResultType: RetTy, Target: Callee, ArgsList: std::move(Args))
191	.setNoReturn(CallOptions.DoesNotReturn)
192	.setDiscardResult(!CallOptions.IsReturnValueUsed)
193	.setIsPostTypeLegalization(CallOptions.IsPostTypeLegalization)
194	.setSExtResult(signExtend)
195	.setZExtResult(zeroExtend);
196	return LowerCallTo(CLI);
197	}
198
199	bool TargetLowering::findOptimalMemOpLowering(
200	std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
201	unsigned SrcAS, const AttributeList &FuncAttributes) const {
202	if (Limit != ~unsigned(`0`) && Op.isMemcpyWithFixedDstAlign() &&
203	Op.getSrcAlign() < Op.getDstAlign())
204	return false;
205
206	EVT VT = getOptimalMemOpType(Op, FuncAttributes);
207
208	if (VT == MVT::Other) {
209	// Use the largest integer type whose alignment constraints are satisfied.
210	// We only need to check DstAlign here as SrcAlign is always greater or
211	// equal to DstAlign (or zero).
212	VT = MVT::LAST_INTEGER_VALUETYPE;
213	if (Op.isFixedDstAlign())
214	while (Op.getDstAlign() < (VT.getSizeInBits() / `8`) &&
215	!allowsMisalignedMemoryAccesses(VT, AddrSpace: DstAS, Alignment: Op.getDstAlign()))
216	VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - `1`);
217	assert(VT.isInteger());
218
219	// Find the largest legal integer type.
220	MVT LVT = MVT::LAST_INTEGER_VALUETYPE;
221	while (!isTypeLegal(VT: LVT))
222	LVT = (MVT::SimpleValueType)(LVT.SimpleTy - `1`);
223	assert(LVT.isInteger());
224
225	// If the type we've chosen is larger than the largest legal integer type
226	// then use that instead.
227	if (VT.bitsGT(VT: LVT))
228	VT = LVT;
229	}
230
231	unsigned NumMemOps = `0`;
232	uint64_t Size = Op.size();
233	while (Size) {
234	unsigned VTSize = VT.getSizeInBits() / `8`;
235	while (VTSize > Size) {
236	// For now, only use non-vector load / store's for the left-over pieces.
237	EVT NewVT = VT;
238	unsigned NewVTSize;
239
240	bool Found = false;
241	if (VT.isVector() \|\| VT.isFloatingPoint()) {
242	NewVT = (VT.getSizeInBits() > `64`) ? MVT::i64 : MVT::i32;
243	if (isOperationLegalOrCustom(Op: ISD::STORE, VT: NewVT) &&
244	isSafeMemOpType(NewVT.getSimpleVT()))
245	Found = true;
246	else if (NewVT == MVT::i64 &&
247	isOperationLegalOrCustom(Op: ISD::STORE, VT: MVT::f64) &&
248	isSafeMemOpType(MVT::f64)) {
249	// i64 is usually not legal on 32-bit targets, but f64 may be.
250	NewVT = MVT::f64;
251	Found = true;
252	}
253	}
254
255	if (!Found) {
256	do {
257	NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - `1`);
258	if (NewVT == MVT::i8)
259	break;
260	} while (!isSafeMemOpType(NewVT.getSimpleVT()));
261	}
262	NewVTSize = NewVT.getSizeInBits() / `8`;
263
264	// If the new VT cannot cover all of the remaining bits, then consider
265	// issuing a (or a pair of) unaligned and overlapping load / store.
266	unsigned Fast;
267	if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
268	allowsMisalignedMemoryAccesses(
269	VT, AddrSpace: DstAS, Alignment: Op.isFixedDstAlign() ? Op.getDstAlign() : Align (`1`),
270	Flags: MachineMemOperand::MONone, &Fast) &&
271	Fast)
272	VTSize = Size;
273	else {
274	VT = NewVT;
275	VTSize = NewVTSize;
276	}
277	}
278
279	if (++NumMemOps > Limit)
280	return false;
281
282	MemOps.push_back(x: VT);
283	Size -= VTSize;
284	}
285
286	return true;
287	}
288
289	/// Soften the operands of a comparison. This code is shared among BR_CC,
290	/// SELECT_CC, and SETCC handlers.
291	void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
292	SDValue &NewLHS, SDValue &NewRHS,
293	ISD::CondCode &CCCode,
294	const SDLoc &dl, const SDValue OldLHS,
295	const SDValue OldRHS) const {
296	SDValue Chain;
297	return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, DL: dl, OldLHS,
298	OldRHS, Chain);
299	}
300
301	void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
302	SDValue &NewLHS, SDValue &NewRHS,
303	ISD::CondCode &CCCode,
304	const SDLoc &dl, const SDValue OldLHS,
305	const SDValue OldRHS,
306	SDValue &Chain,
307	bool IsSignaling) const {
308	// FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
309	// not supporting it. We can update this code when libgcc provides such
310	// functions.
311
312	assert((VT == MVT::f32 \|\| VT == MVT::f64 \|\| VT == MVT::f128 \|\| VT == MVT::ppcf128)
313	&& "Unsupported setcc type!");
314
315	// Expand into one or more soft-fp libcall(s).
316	RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
317	bool ShouldInvertCC = false;
318	switch (CCCode) {
319	case ISD::SETEQ:
320	case ISD::SETOEQ:
321	LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
322	(VT == MVT::f64) ? RTLIB::OEQ_F64 :
323	(VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
324	break;
325	case ISD::SETNE:
326	case ISD::SETUNE:
327	LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
328	(VT == MVT::f64) ? RTLIB::UNE_F64 :
329	(VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
330	break;
331	case ISD::SETGE:
332	case ISD::SETOGE:
333	LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
334	(VT == MVT::f64) ? RTLIB::OGE_F64 :
335	(VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
336	break;
337	case ISD::SETLT:
338	case ISD::SETOLT:
339	LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
340	(VT == MVT::f64) ? RTLIB::OLT_F64 :
341	(VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
342	break;
343	case ISD::SETLE:
344	case ISD::SETOLE:
345	LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
346	(VT == MVT::f64) ? RTLIB::OLE_F64 :
347	(VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
348	break;
349	case ISD::SETGT:
350	case ISD::SETOGT:
351	LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
352	(VT == MVT::f64) ? RTLIB::OGT_F64 :
353	(VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
354	break;
355	case ISD::SETO:
356	ShouldInvertCC = true;
357	[[fallthrough]];
358	case ISD::SETUO:
359	LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
360	(VT == MVT::f64) ? RTLIB::UO_F64 :
361	(VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
362	break;
363	case ISD::SETONE:
364	// SETONE = O && UNE
365	ShouldInvertCC = true;
366	[[fallthrough]];
367	case ISD::SETUEQ:
368	LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
369	(VT == MVT::f64) ? RTLIB::UO_F64 :
370	(VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
371	LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
372	(VT == MVT::f64) ? RTLIB::OEQ_F64 :
373	(VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
374	break;
375	default:
376	// Invert CC for unordered comparisons
377	ShouldInvertCC = true;
378	switch (CCCode) {
379	case ISD::SETULT:
380	LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
381	(VT == MVT::f64) ? RTLIB::OGE_F64 :
382	(VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
383	break;
384	case ISD::SETULE:
385	LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
386	(VT == MVT::f64) ? RTLIB::OGT_F64 :
387	(VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
388	break;
389	case ISD::SETUGT:
390	LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
391	(VT == MVT::f64) ? RTLIB::OLE_F64 :
392	(VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
393	break;
394	case ISD::SETUGE:
395	LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
396	(VT == MVT::f64) ? RTLIB::OLT_F64 :
397	(VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
398	break;
399	default: llvm_unreachable("Do not know how to soften this setcc!");
400	}
401	}
402
403	// Use the target specific return value for comparison lib calls.
404	EVT RetVT = getCmpLibcallReturnType();
405	SDValue Ops[`2`] = {NewLHS, NewRHS};
406	TargetLowering::MakeLibCallOptions CallOptions;
407	EVT OpsVT[`2`] = { OldLHS.getValueType(),
408	OldRHS.getValueType() };
409	CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, Value: true);
410	auto Call = makeLibCall(DAG, LC: LC1, RetVT, Ops, CallOptions, dl, InChain: Chain);
411	NewLHS = Call.first;
412	NewRHS = DAG.getConstant(Val: `0`, DL: dl, VT: RetVT);
413
414	CCCode = getCmpLibcallCC(Call: LC1);
415	if (ShouldInvertCC) {
416	assert(RetVT.isInteger());
417	CCCode = getSetCCInverse(Operation: CCCode, Type: RetVT);
418	}
419
420	if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
421	// Update Chain.
422	Chain = Call.second;
423	} else {
424	EVT SetCCVT =
425	getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: RetVT);
426	SDValue Tmp = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: NewLHS, RHS: NewRHS, Cond: CCCode);
427	auto Call2 = makeLibCall(DAG, LC: LC2, RetVT, Ops, CallOptions, dl, InChain: Chain);
428	CCCode = getCmpLibcallCC(Call: LC2);
429	if (ShouldInvertCC)
430	CCCode = getSetCCInverse(Operation: CCCode, Type: RetVT);
431	NewLHS = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Call2.first, RHS: NewRHS, Cond: CCCode);
432	if (Chain)
433	Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, N1: Call.second,
434	N2: Call2.second);
435	NewLHS = DAG.getNode(Opcode: ShouldInvertCC ? ISD::AND : ISD::OR, DL: dl,
436	VT: Tmp.getValueType(), N1: Tmp, N2: NewLHS);
437	NewRHS = SDValue ();
438	}
439	}
440
441	/// Return the entry encoding for a jump table in the current function. The
442	/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
443	unsigned TargetLowering::getJumpTableEncoding() const {
444	// In non-pic modes, just use the address of a block.
445	if (!isPositionIndependent())
446	return MachineJumpTableInfo::EK_BlockAddress;
447
448	// In PIC mode, if the target supports a GPRel32 directive, use it.
449	if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr)
450	return MachineJumpTableInfo::EK_GPRel32BlockAddress;
451
452	// Otherwise, use a label difference.
453	return MachineJumpTableInfo::EK_LabelDifference32;
454	}
455
456	SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
457	SelectionDAG &DAG) const {
458	// If our PIC model is GP relative, use the global offset table as the base.
459	unsigned JTEncoding = getJumpTableEncoding();
460
461	if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) \|\|
462	(JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress))
463	return DAG.getGLOBAL_OFFSET_TABLE(VT: getPointerTy(DL: DAG.getDataLayout()));
464
465	return Table;
466	}
467
468	/// This returns the relocation base for the given PIC jumptable, the same as
469	/// getPICJumpTableRelocBase, but as an MCExpr.
470	const MCExpr *
471	TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
472	unsigned JTI,MCContext &Ctx) const{
473	// The normal PIC reloc base is the label at the start of the jump table.
474	return MCSymbolRefExpr::create(Symbol: MF->getJTISymbol(JTI, Ctx), Ctx);
475	}
476
477	SDValue TargetLowering::expandIndirectJTBranch(const SDLoc &dl, SDValue Value,
478	SDValue Addr, int JTI,
479	SelectionDAG &DAG) const {
480	SDValue Chain = Value;
481	// Jump table debug info is only needed if CodeView is enabled.
482	if (DAG.getTarget().getTargetTriple().isOSBinFormatCOFF()) {
483	Chain = DAG.getJumpTableDebugInfo(JTI, Chain, DL: dl);
484	}
485	return DAG.getNode(Opcode: ISD::BRIND, DL: dl, VT: MVT::Other, N1: Chain, N2: Addr);
486	}
487
488	bool
489	TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode GA) const* {
490	const TargetMachine &TM = getTargetMachine();
491	const GlobalValue *GV = GA->getGlobal();
492
493	// If the address is not even local to this DSO we will have to load it from
494	// a got and then add the offset.
495	if (!TM.shouldAssumeDSOLocal(GV))
496	return false;
497
498	// If the code is position independent we will have to add a base register.
499	if (isPositionIndependent())
500	return false;
501
502	// Otherwise we can do it.
503	return true;
504	}
505
506	//===----------------------------------------------------------------------===//
507	// Optimization Methods
508	//===----------------------------------------------------------------------===//
509
510	/// If the specified instruction has a constant integer operand and there are
511	/// bits set in that constant that are not demanded, then clear those bits and
512	/// return true.
513	bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
514	const APInt &DemandedBits,
515	const APInt &DemandedElts,
516	TargetLoweringOpt &TLO) const {
517	SDLoc DL(Op);
518	unsigned Opcode = Op.getOpcode();
519
520	// Early-out if we've ended up calling an undemanded node, leave this to
521	// constant folding.
522	if (DemandedBits.isZero() \|\| DemandedElts.isZero())
523	return false;
524
525	// Do target-specific constant optimization.
526	if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
527	return TLO.New.getNode();
528
529	// FIXME: ISD::SELECT, ISD::SELECT_CC
530	switch (Opcode) {
531	default:
532	break;
533	case ISD::XOR:
534	case ISD::AND:
535	case ISD::OR: {
536	auto *Op1C = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: `1`));
537	if (!Op1C \|\| Op1C->isOpaque())
538	return false;
539
540	// If this is a 'not' op, don't touch it because that's a canonical form.
541	const APInt &C = Op1C->getAPIntValue();
542	if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(RHS: C))
543	return false;
544
545	if (!C.isSubsetOf(RHS: DemandedBits)) {
546	EVT VT = Op.getValueType();
547	SDValue NewC = TLO.DAG.getConstant(Val: DemandedBits & C, DL, VT);
548	SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, N1: Op.getOperand(i: `0`), N2: NewC,
549	Flags: Op ->getFlags());
550	return TLO.CombineTo(O: Op, N: NewOp);
551	}
552
553	break;
554	}
555	}
556
557	return false;
558	}
559
560	bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
561	const APInt &DemandedBits,
562	TargetLoweringOpt &TLO) const {
563	EVT VT = Op.getValueType();
564	APInt DemandedElts = VT.isVector()
565	? APInt::getAllOnes(numBits: VT.getVectorNumElements())
566	: APInt (`1`, `1`);
567	return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
568	}
569
570	/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
571	/// This uses isTruncateFree/isZExtFree and ANY_EXTEND for the widening cast,
572	/// but it could be generalized for targets with other types of implicit
573	/// widening casts.
574	bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
575	const APInt &DemandedBits,
576	TargetLoweringOpt &TLO) const {
577	assert(Op.getNumOperands() == `2` &&
578	"ShrinkDemandedOp only supports binary operators!");
579	assert(Op.getNode()->getNumValues() == `1` &&
580	"ShrinkDemandedOp only supports nodes with one result!");
581
582	EVT VT = Op.getValueType();
583	SelectionDAG &DAG = TLO.DAG;
584	SDLoc dl(Op);
585
586	// Early return, as this function cannot handle vector types.
587	if (VT.isVector())
588	return false;
589
590	assert(Op.getOperand(`0`).getValueType().getScalarSizeInBits() == BitWidth &&
591	Op.getOperand(`1`).getValueType().getScalarSizeInBits() == BitWidth &&
592	"ShrinkDemandedOp only supports operands that have the same size!");
593
594	// Don't do this if the node has another user, which may require the
595	// full value.
596	if (!Op.getNode()->hasOneUse())
597	return false;
598
599	// Search for the smallest integer type with free casts to and from
600	// Op's type. For expedience, just check power-of-2 integer types.
601	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
602	unsigned DemandedSize = DemandedBits.getActiveBits();
603	for (unsigned SmallVTBits = llvm::bit_ceil(Value: DemandedSize);
604	SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(A: SmallVTBits)) {
605	EVT SmallVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: SmallVTBits);
606	if (TLI.isTruncateFree(FromVT: VT, ToVT: SmallVT) && TLI.isZExtFree(FromTy: SmallVT, ToTy: VT)) {
607	// We found a type with free casts.
608	SDValue X = DAG.getNode(
609	Opcode: Op.getOpcode(), DL: dl, VT: SmallVT,
610	N1: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: SmallVT, Operand: Op.getOperand(i: `0`)),
611	N2: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: SmallVT, Operand: Op.getOperand(i: `1`)));
612	assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
613	SDValue Z = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT, Operand: X);
614	return TLO.CombineTo(O: Op, N: Z);
615	}
616	}
617	return false;
618	}
619
620	bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
621	DAGCombinerInfo &DCI) const {
622	SelectionDAG &DAG = DCI.DAG;
623	TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
624	!DCI.isBeforeLegalizeOps());
625	KnownBits Known;
626
627	bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
628	if (Simplified) {
629	DCI.AddToWorklist(N: Op.getNode());
630	DCI.CommitTargetLoweringOpt(TLO);
631	}
632	return Simplified;
633	}
634
635	bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
636	const APInt &DemandedElts,
637	DAGCombinerInfo &DCI) const {
638	SelectionDAG &DAG = DCI.DAG;
639	TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
640	!DCI.isBeforeLegalizeOps());
641	KnownBits Known;
642
643	bool Simplified =
644	SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO);
645	if (Simplified) {
646	DCI.AddToWorklist(N: Op.getNode());
647	DCI.CommitTargetLoweringOpt(TLO);
648	}
649	return Simplified;
650	}
651
652	bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
653	KnownBits &Known,
654	TargetLoweringOpt &TLO,
655	unsigned Depth,
656	bool AssumeSingleUse) const {
657	EVT VT = Op.getValueType();
658
659	// Since the number of lanes in a scalable vector is unknown at compile time,
660	// we track one bit which is implicitly broadcast to all lanes. This means
661	// that all lanes in a scalable vector are considered demanded.
662	APInt DemandedElts = VT.isFixedLengthVector()
663	? APInt::getAllOnes(numBits: VT.getVectorNumElements())
664	: APInt (`1`, `1`);
665	return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
666	AssumeSingleUse);
667	}
668
669	// TODO: Under what circumstances can we create nodes? Constant folding?
670	SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
671	SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
672	SelectionDAG &DAG, unsigned Depth) const {
673	EVT VT = Op.getValueType();
674
675	// Limit search depth.
676	if (Depth >= SelectionDAG::MaxRecursionDepth)
677	return SDValue ();
678
679	// Ignore UNDEFs.
680	if (Op.isUndef())
681	return SDValue ();
682
683	// Not demanding any bits/elts from Op.
684	if (DemandedBits == `0` \|\| DemandedElts == `0`)
685	return DAG.getUNDEF(VT);
686
687	bool IsLE = DAG.getDataLayout().isLittleEndian();
688	unsigned NumElts = DemandedElts.getBitWidth();
689	unsigned BitWidth = DemandedBits.getBitWidth();
690	KnownBits LHSKnown, RHSKnown;
691	switch (Op.getOpcode()) {
692	case ISD::BITCAST: {
693	if (VT.isScalableVector())
694	return SDValue ();
695
696	SDValue Src = peekThroughBitcasts(V: Op.getOperand(i: `0`));
697	EVT SrcVT = Src.getValueType();
698	EVT DstVT = Op.getValueType();
699	if (SrcVT == DstVT)
700	return Src;
701
702	unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
703	unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
704	if (NumSrcEltBits == NumDstEltBits)
705	if (SDValue V = SimplifyMultipleUseDemandedBits(
706	Op: Src, DemandedBits, DemandedElts, DAG, Depth: Depth + `1`))
707	return DAG.getBitcast(VT: DstVT, V);
708
709	if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == `0`) {
710	unsigned Scale = NumDstEltBits / NumSrcEltBits;
711	unsigned NumSrcElts = SrcVT.getVectorNumElements();
712	APInt DemandedSrcBits = APInt::getZero(numBits: NumSrcEltBits);
713	APInt DemandedSrcElts = APInt::getZero(numBits: NumSrcElts);
714	for (unsigned i = `0`; i != Scale; ++i) {
715	unsigned EltOffset = IsLE ? i : (Scale - `1` - i);
716	unsigned BitOffset = EltOffset * NumSrcEltBits;
717	APInt Sub = DemandedBits.extractBits(numBits: NumSrcEltBits, bitPosition: BitOffset);
718	if (!Sub.isZero()) {
719	DemandedSrcBits \|= Sub;
720	for (unsigned j = `0`; j != NumElts; ++j)
721	if (DemandedElts [j])
722	DemandedSrcElts.setBit((j * Scale) + i);
723	}
724	}
725
726	if (SDValue V = SimplifyMultipleUseDemandedBits(
727	Op: Src, DemandedBits: DemandedSrcBits, DemandedElts: DemandedSrcElts, DAG, Depth: Depth + `1`))
728	return DAG.getBitcast(VT: DstVT, V);
729	}
730
731	// TODO - bigendian once we have test coverage.
732	if (IsLE && (NumSrcEltBits % NumDstEltBits) == `0`) {
733	unsigned Scale = NumSrcEltBits / NumDstEltBits;
734	unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : `1`;
735	APInt DemandedSrcBits = APInt::getZero(numBits: NumSrcEltBits);
736	APInt DemandedSrcElts = APInt::getZero(numBits: NumSrcElts);
737	for (unsigned i = `0`; i != NumElts; ++i)
738	if (DemandedElts [i]) {
739	unsigned Offset = (i % Scale) * NumDstEltBits;
740	DemandedSrcBits.insertBits(SubBits: DemandedBits, bitPosition: Offset);
741	DemandedSrcElts.setBit(i / Scale);
742	}
743
744	if (SDValue V = SimplifyMultipleUseDemandedBits(
745	Op: Src, DemandedBits: DemandedSrcBits, DemandedElts: DemandedSrcElts, DAG, Depth: Depth + `1`))
746	return DAG.getBitcast(VT: DstVT, V);
747	}
748
749	break;
750	}
751	case ISD::FREEZE: {
752	SDValue N0 = Op.getOperand(i: `0`);
753	if (DAG.isGuaranteedNotToBeUndefOrPoison(Op: N0, DemandedElts,
754	/PoisonOnly=/false))
755	return N0;
756	break;
757	}
758	case ISD::AND: {
759	LHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: `0`), DemandedElts, Depth: Depth + `1`);
760	RHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: `1`), DemandedElts, Depth: Depth + `1`);
761
762	// If all of the demanded bits are known 1 on one side, return the other.
763	// These bits cannot contribute to the result of the 'and' in this
764	// context.
765	if (DemandedBits.isSubsetOf(RHS: LHSKnown.Zero \| RHSKnown.One))
766	return Op.getOperand(i: `0`);
767	if (DemandedBits.isSubsetOf(RHS: RHSKnown.Zero \| LHSKnown.One))
768	return Op.getOperand(i: `1`);
769	break;
770	}
771	case ISD::OR: {
772	LHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: `0`), DemandedElts, Depth: Depth + `1`);
773	RHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: `1`), DemandedElts, Depth: Depth + `1`);
774
775	// If all of the demanded bits are known zero on one side, return the
776	// other. These bits cannot contribute to the result of the 'or' in this
777	// context.
778	if (DemandedBits.isSubsetOf(RHS: LHSKnown.One \| RHSKnown.Zero))
779	return Op.getOperand(i: `0`);
780	if (DemandedBits.isSubsetOf(RHS: RHSKnown.One \| LHSKnown.Zero))
781	return Op.getOperand(i: `1`);
782	break;
783	}
784	case ISD::XOR: {
785	LHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: `0`), DemandedElts, Depth: Depth + `1`);
786	RHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: `1`), DemandedElts, Depth: Depth + `1`);
787
788	// If all of the demanded bits are known zero on one side, return the
789	// other.
790	if (DemandedBits.isSubsetOf(RHS: RHSKnown.Zero))
791	return Op.getOperand(i: `0`);
792	if (DemandedBits.isSubsetOf(RHS: LHSKnown.Zero))
793	return Op.getOperand(i: `1`);
794	break;
795	}
796	case ISD::SHL: {
797	// If we are only demanding sign bits then we can use the shift source
798	// directly.
799	if (std::optional<uint64_t> MaxSA =
800	DAG.getValidMaximumShiftAmount(V: Op, DemandedElts, Depth: Depth + `1`)) {
801	SDValue Op0 = Op.getOperand(i: `0`);
802	unsigned ShAmt = *MaxSA;
803	unsigned NumSignBits =
804	DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + `1`);
805	unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
806	if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
807	return Op0;
808	}
809	break;
810	}
811	case ISD::SETCC: {
812	SDValue Op0 = Op.getOperand(i: `0`);
813	SDValue Op1 = Op.getOperand(i: `1`);
814	ISD::CondCode CC = cast<CondCodeSDNode>(Val: Op.getOperand(i: `2`))->get();
815	// If (1) we only need the sign-bit, (2) the setcc operands are the same
816	// width as the setcc result, and (3) the result of a setcc conforms to 0 or
817	// -1, we may be able to bypass the setcc.
818	if (DemandedBits.isSignMask() &&
819	Op0.getScalarValueSizeInBits() == BitWidth &&
820	getBooleanContents(Type: Op0.getValueType()) ==
821	BooleanContent::ZeroOrNegativeOneBooleanContent) {
822	// If we're testing X < 0, then this compare isn't needed - just use X!
823	// FIXME: We're limiting to integer types here, but this should also work
824	// if we don't care about FP signed-zero. The use of SETLT with FP means
825	// that we don't care about NaNs.
826	if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
827	(isNullConstant(V: Op1) \|\| ISD::isBuildVectorAllZeros(N: Op1.getNode())))
828	return Op0;
829	}
830	break;
831	}
832	case ISD::SIGN_EXTEND_INREG: {
833	// If none of the extended bits are demanded, eliminate the sextinreg.
834	SDValue Op0 = Op.getOperand(i: `0`);
835	EVT ExVT = cast<VTSDNode>(Val: Op.getOperand(i: `1`))->getVT();
836	unsigned ExBits = ExVT.getScalarSizeInBits();
837	if (DemandedBits.getActiveBits() <= ExBits &&
838	shouldRemoveRedundantExtend(Op))
839	return Op0;
840	// If the input is already sign extended, just drop the extension.
841	unsigned NumSignBits = DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + `1`);
842	if (NumSignBits >= (BitWidth - ExBits + `1`))
843	return Op0;
844	break;
845	}
846	case ISD::ANY_EXTEND_VECTOR_INREG:
847	case ISD::SIGN_EXTEND_VECTOR_INREG:
848	case ISD::ZERO_EXTEND_VECTOR_INREG: {
849	if (VT.isScalableVector())
850	return SDValue ();
851
852	// If we only want the lowest element and none of extended bits, then we can
853	// return the bitcasted source vector.
854	SDValue Src = Op.getOperand(i: `0`);
855	EVT SrcVT = Src.getValueType();
856	EVT DstVT = Op.getValueType();
857	if (IsLE && DemandedElts == `1` &&
858	DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
859	DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
860	return DAG.getBitcast(VT: DstVT, V: Src);
861	}
862	break;
863	}
864	case ISD::INSERT_VECTOR_ELT: {
865	if (VT.isScalableVector())
866	return SDValue ();
867
868	// If we don't demand the inserted element, return the base vector.
869	SDValue Vec = Op.getOperand(i: `0`);
870	auto *CIdx = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: `2`));
871	EVT VecVT = Vec.getValueType();
872	if (CIdx && CIdx->getAPIntValue().ult(RHS: VecVT.getVectorNumElements()) &&
873	!DemandedElts [CIdx->getZExtValue()])
874	return Vec;
875	break;
876	}
877	case ISD::INSERT_SUBVECTOR: {
878	if (VT.isScalableVector())
879	return SDValue ();
880
881	SDValue Vec = Op.getOperand(i: `0`);
882	SDValue Sub = Op.getOperand(i: `1`);
883	uint64_t Idx = Op.getConstantOperandVal(i: `2`);
884	unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
885	APInt DemandedSubElts = DemandedElts.extractBits(numBits: NumSubElts, bitPosition: Idx);
886	// If we don't demand the inserted subvector, return the base vector.
887	if (DemandedSubElts == `0`)
888	return Vec;
889	break;
890	}
891	case ISD::VECTOR_SHUFFLE: {
892	assert(!VT.isScalableVector());
893	ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Val&: Op)->getMask();
894
895	// If all the demanded elts are from one operand and are inline,
896	// then we can use the operand directly.
897	bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
898	for (unsigned i = `0`; i != NumElts; ++i) {
899	int M = ShuffleMask [i];
900	if (M < `0` \|\| !DemandedElts [i])
901	continue;
902	AllUndef = false;
903	IdentityLHS &= (M == (int)i);
904	IdentityRHS &= ((M - NumElts) == i);
905	}
906
907	if (AllUndef)
908	return DAG.getUNDEF(VT: Op.getValueType());
909	if (IdentityLHS)
910	return Op.getOperand(i: `0`);
911	if (IdentityRHS)
912	return Op.getOperand(i: `1`);
913	break;
914	}
915	default:
916	// TODO: Probably okay to remove after audit; here to reduce change size
917	// in initial enablement patch for scalable vectors
918	if (VT.isScalableVector())
919	return SDValue ();
920
921	if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
922	if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
923	Op, DemandedBits, DemandedElts, DAG, Depth))
924	return V;
925	break;
926	}
927	return SDValue ();
928	}
929
930	SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
931	SDValue Op, const APInt &DemandedBits, SelectionDAG &DAG,
932	unsigned Depth) const {
933	EVT VT = Op.getValueType();
934	// Since the number of lanes in a scalable vector is unknown at compile time,
935	// we track one bit which is implicitly broadcast to all lanes. This means
936	// that all lanes in a scalable vector are considered demanded.
937	APInt DemandedElts = VT.isFixedLengthVector()
938	? APInt::getAllOnes(numBits: VT.getVectorNumElements())
939	: APInt (`1`, `1`);
940	return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
941	Depth);
942	}
943
944	SDValue TargetLowering::SimplifyMultipleUseDemandedVectorElts(
945	SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
946	unsigned Depth) const {
947	APInt DemandedBits = APInt::getAllOnes(numBits: Op.getScalarValueSizeInBits());
948	return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
949	Depth);
950	}
951
952	// Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1).
953	// or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1).
954	static SDValue combineShiftToAVG(SDValue Op,
955	TargetLowering::TargetLoweringOpt &TLO,
956	const TargetLowering &TLI,
957	const APInt &DemandedBits,
958	const APInt &DemandedElts, unsigned Depth) {
959	assert((Op.getOpcode() == ISD::SRL \|\| Op.getOpcode() == ISD::SRA) &&
960	"SRL or SRA node is required here!");
961	// Is the right shift using an immediate value of 1?
962	ConstantSDNode *N1C = isConstOrConstSplat(N: Op.getOperand(i: `1`), DemandedElts);
963	if (!N1C \|\| !N1C->isOne())
964	return SDValue ();
965
966	// We are looking for an avgfloor
967	// add(ext, ext)
968	// or one of these as a avgceil
969	// add(add(ext, ext), 1)
970	// add(add(ext, 1), ext)
971	// add(ext, add(ext, 1))
972	SDValue Add = Op.getOperand(i: `0`);
973	if (Add.getOpcode() != ISD::ADD)
974	return SDValue ();
975
976	SDValue ExtOpA = Add.getOperand(i: `0`);
977	SDValue ExtOpB = Add.getOperand(i: `1`);
978	SDValue Add2;
979	auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3, SDValue A) {
980	ConstantSDNode *ConstOp;
981	if ((ConstOp = isConstOrConstSplat(N: Op2, DemandedElts)) &&
982	ConstOp->isOne()) {
983	ExtOpA = Op1;
984	ExtOpB = Op3;
985	Add2 = A;
986	return true;
987	}
988	if ((ConstOp = isConstOrConstSplat(N: Op3, DemandedElts)) &&
989	ConstOp->isOne()) {
990	ExtOpA = Op1;
991	ExtOpB = Op2;
992	Add2 = A;
993	return true;
994	}
995	return false;
996	};
997	bool IsCeil =
998	(ExtOpA.getOpcode() == ISD::ADD &&
999	MatchOperands (ExtOpA.getOperand(i: `0`), ExtOpA.getOperand(i: `1`), ExtOpB, ExtOpA)) \|\|
1000	(ExtOpB.getOpcode() == ISD::ADD &&
1001	MatchOperands (ExtOpB.getOperand(i: `0`), ExtOpB.getOperand(i: `1`), ExtOpA, ExtOpB));
1002
1003	// If the shift is signed (sra):
1004	// - Needs >= 2 sign bit for both operands.
1005	// - Needs >= 2 zero bits.
1006	// If the shift is unsigned (srl):
1007	// - Needs >= 1 zero bit for both operands.
1008	// - Needs 1 demanded bit zero and >= 2 sign bits.
1009	SelectionDAG &DAG = TLO.DAG;
1010	unsigned ShiftOpc = Op.getOpcode();
1011	bool IsSigned = false;
1012	unsigned KnownBits;
1013	unsigned NumSignedA = DAG.ComputeNumSignBits(Op: ExtOpA, DemandedElts, Depth);
1014	unsigned NumSignedB = DAG.ComputeNumSignBits(Op: ExtOpB, DemandedElts, Depth);
1015	unsigned NumSigned = std::min(a: NumSignedA, b: NumSignedB) - `1`;
1016	unsigned NumZeroA =
1017	DAG.computeKnownBits(Op: ExtOpA, DemandedElts, Depth).countMinLeadingZeros();
1018	unsigned NumZeroB =
1019	DAG.computeKnownBits(Op: ExtOpB, DemandedElts, Depth).countMinLeadingZeros();
1020	unsigned NumZero = std::min(a: NumZeroA, b: NumZeroB);
1021
1022	switch (ShiftOpc) {
1023	default:
1024	llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG");
1025	case ISD::SRA: {
1026	if (NumZero >= `2` && NumSigned < NumZero) {
1027	IsSigned = false;
1028	KnownBits = NumZero;
1029	break;
1030	}
1031	if (NumSigned >= `1`) {
1032	IsSigned = true;
1033	KnownBits = NumSigned;
1034	break;
1035	}
1036	return SDValue ();
1037	}
1038	case ISD::SRL: {
1039	if (NumZero >= `1` && NumSigned < NumZero) {
1040	IsSigned = false;
1041	KnownBits = NumZero;
1042	break;
1043	}
1044	if (NumSigned >= `1` && DemandedBits.isSignBitClear()) {
1045	IsSigned = true;
1046	KnownBits = NumSigned;
1047	break;
1048	}
1049	return SDValue ();
1050	}
1051	}
1052
1053	unsigned AVGOpc = IsCeil ? (IsSigned ? ISD::AVGCEILS : ISD::AVGCEILU)
1054	: (IsSigned ? ISD::AVGFLOORS : ISD::AVGFLOORU);
1055
1056	// Find the smallest power-2 type that is legal for this vector size and
1057	// operation, given the original type size and the number of known sign/zero
1058	// bits.
1059	EVT VT = Op.getValueType();
1060	unsigned MinWidth =
1061	std::max<unsigned>(a: VT.getScalarSizeInBits() - KnownBits, b: `8`);
1062	EVT NVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: llvm::bit_ceil(Value: MinWidth));
1063	if (NVT.getScalarSizeInBits() > VT.getScalarSizeInBits())
1064	return SDValue ();
1065	if (VT.isVector())
1066	NVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: NVT, EC: VT.getVectorElementCount());
1067	if (TLO.LegalTypes() && !TLI.isOperationLegal(Op: AVGOpc, VT: NVT)) {
1068	// If we could not transform, and (both) adds are nuw/nsw, we can use the
1069	// larger type size to do the transform.
1070	if (TLO.LegalOperations() && !TLI.isOperationLegal(Op: AVGOpc, VT))
1071	return SDValue ();
1072	if (DAG.willNotOverflowAdd(IsSigned, N0: Add.getOperand(i: `0`),
1073	N1: Add.getOperand(i: `1`)) &&
1074	(!Add2 \|\| DAG.willNotOverflowAdd(IsSigned, N0: Add2.getOperand(i: `0`),
1075	N1: Add2.getOperand(i: `1`))))
1076	NVT = VT;
1077	else
1078	return SDValue ();
1079	}
1080
1081	// Don't create a AVGFLOOR node with a scalar constant unless its legal as
1082	// this is likely to stop other folds (reassociation, value tracking etc.)
1083	if (!IsCeil && !TLI.isOperationLegal(Op: AVGOpc, VT: NVT) &&
1084	(isa<ConstantSDNode>(Val: ExtOpA) \|\| isa<ConstantSDNode>(Val: ExtOpB)))
1085	return SDValue ();
1086
1087	SDLoc DL(Op);
1088	SDValue ResultAVG =
1089	DAG.getNode(Opcode: AVGOpc, DL, VT: NVT, N1: DAG.getExtOrTrunc(IsSigned, Op: ExtOpA, DL, VT: NVT),
1090	N2: DAG.getExtOrTrunc(IsSigned, Op: ExtOpB, DL, VT: NVT));
1091	return DAG.getExtOrTrunc(IsSigned, Op: ResultAVG, DL, VT);
1092	}
1093
1094	/// Look at Op. At this point, we know that only the OriginalDemandedBits of the
1095	/// result of Op are ever used downstream. If we can use this information to
1096	/// simplify Op, create a new simplified DAG node and return true, returning the
1097	/// original and new nodes in Old and New. Otherwise, analyze the expression and
1098	/// return a mask of Known bits for the expression (used to simplify the
1099	/// caller). The Known bits may only be accurate for those bits in the
1100	/// OriginalDemandedBits and OriginalDemandedElts.
1101	bool TargetLowering::SimplifyDemandedBits(
1102	SDValue Op, const APInt &OriginalDemandedBits,
1103	const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
1104	unsigned Depth, bool AssumeSingleUse) const {
1105	unsigned BitWidth = OriginalDemandedBits.getBitWidth();
1106	assert(Op.getScalarValueSizeInBits() == BitWidth &&
1107	"Mask size mismatches value type size!");
1108
1109	// Don't know anything.
1110	Known = KnownBits (BitWidth);
1111
1112	EVT VT = Op.getValueType();
1113	bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
1114	unsigned NumElts = OriginalDemandedElts.getBitWidth();
1115	assert((!VT.isFixedLengthVector() \|\| NumElts == VT.getVectorNumElements()) &&
1116	"Unexpected vector size");
1117
1118	APInt DemandedBits = OriginalDemandedBits;
1119	APInt DemandedElts = OriginalDemandedElts;
1120	SDLoc dl(Op);
1121
1122	// Undef operand.
1123	if (Op.isUndef())
1124	return false;
1125
1126	// We can't simplify target constants.
1127	if (Op.getOpcode() == ISD::TargetConstant)
1128	return false;
1129
1130	if (Op.getOpcode() == ISD::Constant) {
1131	// We know all of the bits for a constant!
1132	Known = KnownBits::makeConstant(C: Op ->getAsAPIntVal());
1133	return false;
1134	}
1135
1136	if (Op.getOpcode() == ISD::ConstantFP) {
1137	// We know all of the bits for a floating point constant!
1138	Known = KnownBits::makeConstant(
1139	C: cast<ConstantFPSDNode>(Val&: Op)->getValueAPF().bitcastToAPInt());
1140	return false;
1141	}
1142
1143	// Other users may use these bits.
1144	bool HasMultiUse = false;
1145	if (!AssumeSingleUse && !Op.getNode()->hasOneUse()) {
1146	if (Depth >= SelectionDAG::MaxRecursionDepth) {
1147	// Limit search depth.
1148	return false;
1149	}
1150	// Allow multiple uses, just set the DemandedBits/Elts to all bits.
1151	DemandedBits = APInt::getAllOnes(numBits: BitWidth);
1152	DemandedElts = APInt::getAllOnes(numBits: NumElts);
1153	HasMultiUse = true;
1154	} else if (OriginalDemandedBits == `0` \|\| OriginalDemandedElts == `0`) {
1155	// Not demanding any bits/elts from Op.
1156	return TLO.CombineTo(O: Op, N: TLO.DAG.getUNDEF(VT));
1157	} else if (Depth >= SelectionDAG::MaxRecursionDepth) {
1158	// Limit search depth.
1159	return false;
1160	}
1161
1162	KnownBits Known2;
1163	switch (Op.getOpcode()) {
1164	case ISD::SCALAR_TO_VECTOR: {
1165	if (VT.isScalableVector())
1166	return false;
1167	if (!DemandedElts [`0`])
1168	return TLO.CombineTo(O: Op, N: TLO.DAG.getUNDEF(VT));
1169
1170	KnownBits SrcKnown;
1171	SDValue Src = Op.getOperand(i: `0`);
1172	unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
1173	APInt SrcDemandedBits = DemandedBits.zext(width: SrcBitWidth);
1174	if (SimplifyDemandedBits(Op: Src, DemandedBits: SrcDemandedBits, Known&: SrcKnown, TLO, Depth: Depth + `1`))
1175	return true;
1176
1177	// Upper elements are undef, so only get the knownbits if we just demand
1178	// the bottom element.
1179	if (DemandedElts == `1`)
1180	Known = SrcKnown.anyextOrTrunc(BitWidth);
1181	break;
1182	}
1183	case ISD::BUILD_VECTOR:
1184	// Collect the known bits that are shared by every demanded element.
1185	// TODO: Call SimplifyDemandedBits for non-constant demanded elements.
1186	Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1187	return false; // Don't fall through, will infinitely loop.
1188	case ISD::SPLAT_VECTOR: {
1189	SDValue Scl = Op.getOperand(i: `0`);
1190	APInt DemandedSclBits = DemandedBits.zextOrTrunc(width: Scl.getValueSizeInBits());
1191	KnownBits KnownScl;
1192	if (SimplifyDemandedBits(Op: Scl, DemandedBits: DemandedSclBits, Known&: KnownScl, TLO, Depth: Depth + `1`))
1193	return true;
1194
1195	// Implicitly truncate the bits to match the official semantics of
1196	// SPLAT_VECTOR.
1197	Known = KnownScl.trunc(BitWidth);
1198	break;
1199	}
1200	case ISD::LOAD: {
1201	auto *LD = cast<LoadSDNode>(Val&: Op);
1202	if (getTargetConstantFromLoad(LD)) {
1203	Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1204	return false; // Don't fall through, will infinitely loop.
1205	}
1206	if (ISD::isZEXTLoad(N: Op.getNode()) && Op.getResNo() == `0`) {
1207	// If this is a ZEXTLoad and we are looking at the loaded value.
1208	EVT MemVT = LD->getMemoryVT();
1209	unsigned MemBits = MemVT.getScalarSizeInBits();
1210	Known.Zero.setBitsFrom(MemBits);
1211	return false; // Don't fall through, will infinitely loop.
1212	}
1213	break;
1214	}
1215	case ISD::INSERT_VECTOR_ELT: {
1216	if (VT.isScalableVector())
1217	return false;
1218	SDValue Vec = Op.getOperand(i: `0`);
1219	SDValue Scl = Op.getOperand(i: `1`);
1220	auto *CIdx = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: `2`));
1221	EVT VecVT = Vec.getValueType();
1222
1223	// If index isn't constant, assume we need all vector elements AND the
1224	// inserted element.
1225	APInt DemandedVecElts(DemandedElts);
1226	if (CIdx && CIdx->getAPIntValue().ult(RHS: VecVT.getVectorNumElements())) {
1227	unsigned Idx = CIdx->getZExtValue();
1228	DemandedVecElts.clearBit(BitPosition: Idx);
1229
1230	// Inserted element is not required.
1231	if (!DemandedElts [Idx])
1232	return TLO.CombineTo(O: Op, N: Vec);
1233	}
1234
1235	KnownBits KnownScl;
1236	unsigned NumSclBits = Scl.getScalarValueSizeInBits();
1237	APInt DemandedSclBits = DemandedBits.zextOrTrunc(width: NumSclBits);
1238	if (SimplifyDemandedBits(Op: Scl, DemandedBits: DemandedSclBits, Known&: KnownScl, TLO, Depth: Depth + `1`))
1239	return true;
1240
1241	Known = KnownScl.anyextOrTrunc(BitWidth);
1242
1243	KnownBits KnownVec;
1244	if (SimplifyDemandedBits(Op: Vec, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedVecElts, Known&: KnownVec, TLO,
1245	Depth: Depth + `1`))
1246	return true;
1247
1248	if (!!DemandedVecElts)
1249	Known = Known.intersectWith(RHS: KnownVec);
1250
1251	return false;
1252	}
1253	case ISD::INSERT_SUBVECTOR: {
1254	if (VT.isScalableVector())
1255	return false;
1256	// Demand any elements from the subvector and the remainder from the src its
1257	// inserted into.
1258	SDValue Src = Op.getOperand(i: `0`);
1259	SDValue Sub = Op.getOperand(i: `1`);
1260	uint64_t Idx = Op.getConstantOperandVal(i: `2`);
1261	unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
1262	APInt DemandedSubElts = DemandedElts.extractBits(numBits: NumSubElts, bitPosition: Idx);
1263	APInt DemandedSrcElts = DemandedElts;
1264	DemandedSrcElts.insertBits(SubBits: APInt::getZero(numBits: NumSubElts), bitPosition: Idx);
1265
1266	KnownBits KnownSub, KnownSrc;
1267	if (SimplifyDemandedBits(Op: Sub, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedSubElts, Known&: KnownSub, TLO,
1268	Depth: Depth + `1`))
1269	return true;
1270	if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedSrcElts, Known&: KnownSrc, TLO,
1271	Depth: Depth + `1`))
1272	return true;
1273
1274	Known.Zero.setAllBits();
1275	Known.One.setAllBits();
1276	if (!!DemandedSubElts)
1277	Known = Known.intersectWith(RHS: KnownSub);
1278	if (!!DemandedSrcElts)
1279	Known = Known.intersectWith(RHS: KnownSrc);
1280
1281	// Attempt to avoid multi-use src if we don't need anything from it.
1282	if (!DemandedBits.isAllOnes() \|\| !DemandedSubElts.isAllOnes() \|\|
1283	!DemandedSrcElts.isAllOnes()) {
1284	SDValue NewSub = SimplifyMultipleUseDemandedBits(
1285	Op: Sub, DemandedBits, DemandedElts: DemandedSubElts, DAG&: TLO.DAG, Depth: Depth + `1`);
1286	SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1287	Op: Src, DemandedBits, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + `1`);
1288	if (NewSub \|\| NewSrc) {
1289	NewSub = NewSub ? NewSub : Sub;
1290	NewSrc = NewSrc ? NewSrc : Src;
1291	SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: NewSrc, N2: NewSub,
1292	N3: Op.getOperand(i: `2`));
1293	return TLO.CombineTo(O: Op, N: NewOp);
1294	}
1295	}
1296	break;
1297	}
1298	case ISD::EXTRACT_SUBVECTOR: {
1299	if (VT.isScalableVector())
1300	return false;
1301	// Offset the demanded elts by the subvector index.
1302	SDValue Src = Op.getOperand(i: `0`);
1303	if (Src.getValueType().isScalableVector())
1304	break;
1305	uint64_t Idx = Op.getConstantOperandVal(i: `1`);
1306	unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1307	APInt DemandedSrcElts = DemandedElts.zext(width: NumSrcElts).shl(shiftAmt: Idx);
1308
1309	if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedSrcElts, Known, TLO,
1310	Depth: Depth + `1`))
1311	return true;
1312
1313	// Attempt to avoid multi-use src if we don't need anything from it.
1314	if (!DemandedBits.isAllOnes() \|\| !DemandedSrcElts.isAllOnes()) {
1315	SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
1316	Op: Src, DemandedBits, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + `1`);
1317	if (DemandedSrc) {
1318	SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: DemandedSrc,
1319	N2: Op.getOperand(i: `1`));
1320	return TLO.CombineTo(O: Op, N: NewOp);
1321	}
1322	}
1323	break;
1324	}
1325	case ISD::CONCAT_VECTORS: {
1326	if (VT.isScalableVector())
1327	return false;
1328	Known.Zero.setAllBits();
1329	Known.One.setAllBits();
1330	EVT SubVT = Op.getOperand(i: `0`).getValueType();
1331	unsigned NumSubVecs = Op.getNumOperands();
1332	unsigned NumSubElts = SubVT.getVectorNumElements();
1333	for (unsigned i = `0`; i != NumSubVecs; ++i) {
1334	APInt DemandedSubElts =
1335	DemandedElts.extractBits(numBits: NumSubElts, bitPosition: i * NumSubElts);
1336	if (SimplifyDemandedBits(Op: Op.getOperand(i), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedSubElts,
1337	Known&: Known2, TLO, Depth: Depth + `1`))
1338	return true;
1339	// Known bits are shared by every demanded subvector element.
1340	if (!!DemandedSubElts)
1341	Known = Known.intersectWith(RHS: Known2);
1342	}
1343	break;
1344	}
1345	case ISD::VECTOR_SHUFFLE: {
1346	assert(!VT.isScalableVector());
1347	ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Val&: Op)->getMask();
1348
1349	// Collect demanded elements from shuffle operands..
1350	APInt DemandedLHS, DemandedRHS;
1351	if (!getShuffleDemandedElts(SrcWidth: NumElts, Mask: ShuffleMask, DemandedElts, DemandedLHS,
1352	DemandedRHS))
1353	break;
1354
1355	if (!!DemandedLHS \|\| !!DemandedRHS) {
1356	SDValue Op0 = Op.getOperand(i: `0`);
1357	SDValue Op1 = Op.getOperand(i: `1`);
1358
1359	Known.Zero.setAllBits();
1360	Known.One.setAllBits();
1361	if (!!DemandedLHS) {
1362	if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedLHS, Known&: Known2, TLO,
1363	Depth: Depth + `1`))
1364	return true;
1365	Known = Known.intersectWith(RHS: Known2);
1366	}
1367	if (!!DemandedRHS) {
1368	if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedRHS, Known&: Known2, TLO,
1369	Depth: Depth + `1`))
1370	return true;
1371	Known = Known.intersectWith(RHS: Known2);
1372	}
1373
1374	// Attempt to avoid multi-use ops if we don't need anything from them.
1375	SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1376	Op: Op0, DemandedBits, DemandedElts: DemandedLHS, DAG&: TLO.DAG, Depth: Depth + `1`);
1377	SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1378	Op: Op1, DemandedBits, DemandedElts: DemandedRHS, DAG&: TLO.DAG, Depth: Depth + `1`);
1379	if (DemandedOp0 \|\| DemandedOp1) {
1380	Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1381	Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1382	SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, N1: Op0, N2: Op1, Mask: ShuffleMask);
1383	return TLO.CombineTo(O: Op, N: NewOp);
1384	}
1385	}
1386	break;
1387	}
1388	case ISD::AND: {
1389	SDValue Op0 = Op.getOperand(i: `0`);
1390	SDValue Op1 = Op.getOperand(i: `1`);
1391
1392	// If the RHS is a constant, check to see if the LHS would be zero without
1393	// using the bits from the RHS. Below, we use knowledge about the RHS to
1394	// simplify the LHS, here we're using information from the LHS to simplify
1395	// the RHS.
1396	if (ConstantSDNode *RHSC = isConstOrConstSplat(N: Op1, DemandedElts)) {
1397	// Do not increment Depth here; that can cause an infinite loop.
1398	KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op: Op0, DemandedElts, Depth);
1399	// If the LHS already has zeros where RHSC does, this 'and' is dead.
1400	if ((LHSKnown.Zero & DemandedBits) ==
1401	(~RHSC->getAPIntValue() & DemandedBits))
1402	return TLO.CombineTo(O: Op, N: Op0);
1403
1404	// If any of the set bits in the RHS are known zero on the LHS, shrink
1405	// the constant.
1406	if (ShrinkDemandedConstant(Op, DemandedBits: ~LHSKnown.Zero & DemandedBits,
1407	DemandedElts, TLO))
1408	return true;
1409
1410	// Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1411	// constant, but if this 'and' is only clearing bits that were just set by
1412	// the xor, then this 'and' can be eliminated by shrinking the mask of
1413	// the xor. For example, for a 32-bit X:
1414	// and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1415	if (isBitwiseNot(V: Op0) && Op0.hasOneUse() &&
1416	LHSKnown.One == ~RHSC->getAPIntValue()) {
1417	SDValue Xor = TLO.DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: Op0.getOperand(i: `0`), N2: Op1);
1418	return TLO.CombineTo(O: Op, N: Xor);
1419	}
1420	}
1421
1422	// AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)
1423	// iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits).
1424	if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR && !VT.isScalableVector() &&
1425	(Op0.getOperand(i: `0`).isUndef() \|\|
1426	ISD::isBuildVectorOfConstantSDNodes(N: Op0.getOperand(i: `0`).getNode())) &&
1427	Op0 ->hasOneUse()) {
1428	unsigned NumSubElts =
1429	Op0.getOperand(i: `1`).getValueType().getVectorNumElements();
1430	unsigned SubIdx = Op0.getConstantOperandVal(i: `2`);
1431	APInt DemandedSub =
1432	APInt::getBitsSet(numBits: NumElts, loBit: SubIdx, hiBit: SubIdx + NumSubElts);
1433	KnownBits KnownSubMask =
1434	TLO.DAG.computeKnownBits(Op: Op1, DemandedElts: DemandedSub & DemandedElts, Depth: Depth + `1`);
1435	if (DemandedBits.isSubsetOf(RHS: KnownSubMask.One)) {
1436	SDValue NewAnd =
1437	TLO.DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op0.getOperand(i: `0`), N2: Op1);
1438	SDValue NewInsert =
1439	TLO.DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL: dl, VT, N1: NewAnd,
1440	N2: Op0.getOperand(i: `1`), N3: Op0.getOperand(i: `2`));
1441	return TLO.CombineTo(O: Op, N: NewInsert);
1442	}
1443	}
1444
1445	if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts, Known, TLO,
1446	Depth: Depth + `1`))
1447	return true;
1448	if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: ~Known.Zero & DemandedBits, OriginalDemandedElts: DemandedElts,
1449	Known&: Known2, TLO, Depth: Depth + `1`))
1450	return true;
1451
1452	// If all of the demanded bits are known one on one side, return the other.
1453	// These bits cannot contribute to the result of the 'and'.
1454	if (DemandedBits.isSubsetOf(RHS: Known2.Zero \| Known.One))
1455	return TLO.CombineTo(O: Op, N: Op0);
1456	if (DemandedBits.isSubsetOf(RHS: Known.Zero \| Known2.One))
1457	return TLO.CombineTo(O: Op, N: Op1);
1458	// If all of the demanded bits in the inputs are known zeros, return zero.
1459	if (DemandedBits.isSubsetOf(RHS: Known.Zero \| Known2.Zero))
1460	return TLO.CombineTo(O: Op, N: TLO.DAG.getConstant(Val: `0`, DL: dl, VT));
1461	// If the RHS is a constant, see if we can simplify it.
1462	if (ShrinkDemandedConstant(Op, DemandedBits: ~Known2.Zero & DemandedBits, DemandedElts,
1463	TLO))
1464	return true;
1465	// If the operation can be done in a smaller type, do so.
1466	if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1467	return true;
1468
1469	// Attempt to avoid multi-use ops if we don't need anything from them.
1470	if (!DemandedBits.isAllOnes() \|\| !DemandedElts.isAllOnes()) {
1471	SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1472	Op: Op0, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`);
1473	SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1474	Op: Op1, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`);
1475	if (DemandedOp0 \|\| DemandedOp1) {
1476	Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1477	Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1478	SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: Op0, N2: Op1);
1479	return TLO.CombineTo(O: Op, N: NewOp);
1480	}
1481	}
1482
1483	Known &= Known2;
1484	break;
1485	}
1486	case ISD::OR: {
1487	SDValue Op0 = Op.getOperand(i: `0`);
1488	SDValue Op1 = Op.getOperand(i: `1`);
1489	SDNodeFlags Flags = Op.getNode()->getFlags();
1490	if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts, Known, TLO,
1491	Depth: Depth + `1`)) {
1492	if (Flags.hasDisjoint()) {
1493	Flags.setDisjoint(false);
1494	Op ->setFlags(Flags);
1495	}
1496	return true;
1497	}
1498
1499	if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: ~Known.One & DemandedBits, OriginalDemandedElts: DemandedElts,
1500	Known&: Known2, TLO, Depth: Depth + `1`)) {
1501	if (Flags.hasDisjoint()) {
1502	Flags.setDisjoint(false);
1503	Op ->setFlags(Flags);
1504	}
1505	return true;
1506	}
1507
1508	// If all of the demanded bits are known zero on one side, return the other.
1509	// These bits cannot contribute to the result of the 'or'.
1510	if (DemandedBits.isSubsetOf(RHS: Known2.One \| Known.Zero))
1511	return TLO.CombineTo(O: Op, N: Op0);
1512	if (DemandedBits.isSubsetOf(RHS: Known.One \| Known2.Zero))
1513	return TLO.CombineTo(O: Op, N: Op1);
1514	// If the RHS is a constant, see if we can simplify it.
1515	if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1516	return true;
1517	// If the operation can be done in a smaller type, do so.
1518	if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1519	return true;
1520
1521	// Attempt to avoid multi-use ops if we don't need anything from them.
1522	if (!DemandedBits.isAllOnes() \|\| !DemandedElts.isAllOnes()) {
1523	SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1524	Op: Op0, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`);
1525	SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1526	Op: Op1, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`);
1527	if (DemandedOp0 \|\| DemandedOp1) {
1528	Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1529	Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1530	SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: Op0, N2: Op1);
1531	return TLO.CombineTo(O: Op, N: NewOp);
1532	}
1533	}
1534
1535	// (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1\|C2), (and Y, C2))
1536	// TODO: Use SimplifyMultipleUseDemandedBits to peek through masks.
1537	if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::AND &&
1538	Op0 ->hasOneUse() && Op1 ->hasOneUse()) {
1539	// Attempt to match all commutations - m_c_Or would've been useful!
1540	for (int I = `0`; I != `2`; ++I) {
1541	SDValue X = Op.getOperand(i: I).getOperand(i: `0`);
1542	SDValue C1 = Op.getOperand(i: I).getOperand(i: `1`);
1543	SDValue Alt = Op.getOperand(i: `1` - I).getOperand(i: `0`);
1544	SDValue C2 = Op.getOperand(i: `1` - I).getOperand(i: `1`);
1545	if (Alt.getOpcode() == ISD::OR) {
1546	for (int J = `0`; J != `2`; ++J) {
1547	if (X == Alt.getOperand(i: J)) {
1548	SDValue Y = Alt.getOperand(i: `1` - J);
1549	if (SDValue C12 = TLO.DAG.FoldConstantArithmetic(Opcode: ISD::OR, DL: dl, VT,
1550	Ops: {C1, C2})) {
1551	SDValue MaskX = TLO.DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: X, N2: C12);
1552	SDValue MaskY = TLO.DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Y, N2: C2);
1553	return TLO.CombineTo(
1554	O: Op, N: TLO.DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: MaskX, N2: MaskY));
1555	}
1556	}
1557	}
1558	}
1559	}
1560	}
1561
1562	Known \|= Known2;
1563	break;
1564	}
1565	case ISD::XOR: {
1566	SDValue Op0 = Op.getOperand(i: `0`);
1567	SDValue Op1 = Op.getOperand(i: `1`);
1568
1569	if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts, Known, TLO,
1570	Depth: Depth + `1`))
1571	return true;
1572	if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
1573	Depth: Depth + `1`))
1574	return true;
1575
1576	// If all of the demanded bits are known zero on one side, return the other.
1577	// These bits cannot contribute to the result of the 'xor'.
1578	if (DemandedBits.isSubsetOf(RHS: Known.Zero))
1579	return TLO.CombineTo(O: Op, N: Op0);
1580	if (DemandedBits.isSubsetOf(RHS: Known2.Zero))
1581	return TLO.CombineTo(O: Op, N: Op1);
1582	// If the operation can be done in a smaller type, do so.
1583	if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1584	return true;
1585
1586	// If all of the unknown bits are known to be zero on one side or the other
1587	// turn this into an inclusive* or.*
1588	// e.g. (A & C1)^(B & C2) -> (A & C1)\|(B & C2) iff C1&C2 == 0
1589	if (DemandedBits.isSubsetOf(RHS: Known.Zero \| Known2.Zero))
1590	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Op0, N2: Op1));
1591
1592	ConstantSDNode *C = isConstOrConstSplat(N: Op1, DemandedElts);
1593	if (C) {
1594	// If one side is a constant, and all of the set bits in the constant are
1595	// also known set on the other side, turn this into an AND, as we know
1596	// the bits will be cleared.
1597	// e.g. (X \| C1) ^ C2 --> (X \| C1) & ~C2 iff (C1&C2) == C2
1598	// NB: it is okay if more bits are known than are requested
1599	if (C->getAPIntValue() == Known2.One) {
1600	SDValue ANDC =
1601	TLO.DAG.getConstant(Val: ~C->getAPIntValue() & DemandedBits, DL: dl, VT);
1602	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op0, N2: ANDC));
1603	}
1604
1605	// If the RHS is a constant, see if we can change it. Don't alter a -1
1606	// constant because that's a 'not' op, and that is better for combining
1607	// and codegen.
1608	if (!C->isAllOnes() && DemandedBits.isSubsetOf(RHS: C->getAPIntValue())) {
1609	// We're flipping all demanded bits. Flip the undemanded bits too.
1610	SDValue New = TLO.DAG.getNOT(DL: dl, Val: Op0, VT);
1611	return TLO.CombineTo(O: Op, N: New);
1612	}
1613
1614	unsigned Op0Opcode = Op0.getOpcode();
1615	if ((Op0Opcode == ISD::SRL \|\| Op0Opcode == ISD::SHL) && Op0.hasOneUse()) {
1616	if (ConstantSDNode *ShiftC =
1617	isConstOrConstSplat(N: Op0.getOperand(i: `1`), DemandedElts)) {
1618	// Don't crash on an oversized shift. We can not guarantee that a
1619	// bogus shift has been simplified to undef.
1620	if (ShiftC->getAPIntValue().ult(RHS: BitWidth)) {
1621	uint64_t ShiftAmt = ShiftC->getZExtValue();
1622	APInt Ones = APInt::getAllOnes(numBits: BitWidth);
1623	Ones = Op0Opcode == ISD::SHL ? Ones.shl(shiftAmt: ShiftAmt)
1624	: Ones.lshr(shiftAmt: ShiftAmt);
1625	const TargetLowering &TLI = TLO.DAG.getTargetLoweringInfo();
1626	if ((DemandedBits & C->getAPIntValue()) == (DemandedBits & Ones) &&
1627	TLI.isDesirableToCommuteXorWithShift(N: Op.getNode())) {
1628	// If the xor constant is a demanded mask, do a 'not' before the
1629	// shift:
1630	// xor (X << ShiftC), XorC --> (not X) << ShiftC
1631	// xor (X >> ShiftC), XorC --> (not X) >> ShiftC
1632	SDValue Not = TLO.DAG.getNOT(DL: dl, Val: Op0.getOperand(i: `0`), VT);
1633	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Op0Opcode, DL: dl, VT, N1: Not,
1634	N2: Op0.getOperand(i: `1`)));
1635	}
1636	}
1637	}
1638	}
1639	}
1640
1641	// If we can't turn this into a 'not', try to shrink the constant.
1642	if (!C \|\| !C->isAllOnes())
1643	if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1644	return true;
1645
1646	// Attempt to avoid multi-use ops if we don't need anything from them.
1647	if (!DemandedBits.isAllOnes() \|\| !DemandedElts.isAllOnes()) {
1648	SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1649	Op: Op0, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`);
1650	SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1651	Op: Op1, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`);
1652	if (DemandedOp0 \|\| DemandedOp1) {
1653	Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1654	Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1655	SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: Op0, N2: Op1);
1656	return TLO.CombineTo(O: Op, N: NewOp);
1657	}
1658	}
1659
1660	Known ^= Known2;
1661	break;
1662	}
1663	case ISD::SELECT:
1664	if (SimplifyDemandedBits(Op: Op.getOperand(i: `2`), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1665	Known, TLO, Depth: Depth + `1`))
1666	return true;
1667	if (SimplifyDemandedBits(Op: Op.getOperand(i: `1`), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1668	Known&: Known2, TLO, Depth: Depth + `1`))
1669	return true;
1670
1671	// If the operands are constants, see if we can simplify them.
1672	if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1673	return true;
1674
1675	// Only known if known in both the LHS and RHS.
1676	Known = Known.intersectWith(RHS: Known2);
1677	break;
1678	case ISD::VSELECT:
1679	if (SimplifyDemandedBits(Op: Op.getOperand(i: `2`), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1680	Known, TLO, Depth: Depth + `1`))
1681	return true;
1682	if (SimplifyDemandedBits(Op: Op.getOperand(i: `1`), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1683	Known&: Known2, TLO, Depth: Depth + `1`))
1684	return true;
1685
1686	// Only known if known in both the LHS and RHS.
1687	Known = Known.intersectWith(RHS: Known2);
1688	break;
1689	case ISD::SELECT_CC:
1690	if (SimplifyDemandedBits(Op: Op.getOperand(i: `3`), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1691	Known, TLO, Depth: Depth + `1`))
1692	return true;
1693	if (SimplifyDemandedBits(Op: Op.getOperand(i: `2`), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1694	Known&: Known2, TLO, Depth: Depth + `1`))
1695	return true;
1696
1697	// If the operands are constants, see if we can simplify them.
1698	if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1699	return true;
1700
1701	// Only known if known in both the LHS and RHS.
1702	Known = Known.intersectWith(RHS: Known2);
1703	break;
1704	case ISD::SETCC: {
1705	SDValue Op0 = Op.getOperand(i: `0`);
1706	SDValue Op1 = Op.getOperand(i: `1`);
1707	ISD::CondCode CC = cast<CondCodeSDNode>(Val: Op.getOperand(i: `2`))->get();
1708	// If (1) we only need the sign-bit, (2) the setcc operands are the same
1709	// width as the setcc result, and (3) the result of a setcc conforms to 0 or
1710	// -1, we may be able to bypass the setcc.
1711	if (DemandedBits.isSignMask() &&
1712	Op0.getScalarValueSizeInBits() == BitWidth &&
1713	getBooleanContents(Type: Op0.getValueType()) ==
1714	BooleanContent::ZeroOrNegativeOneBooleanContent) {
1715	// If we're testing X < 0, then this compare isn't needed - just use X!
1716	// FIXME: We're limiting to integer types here, but this should also work
1717	// if we don't care about FP signed-zero. The use of SETLT with FP means
1718	// that we don't care about NaNs.
1719	if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
1720	(isNullConstant(V: Op1) \|\| ISD::isBuildVectorAllZeros(N: Op1.getNode())))
1721	return TLO.CombineTo(O: Op, N: Op0);
1722
1723	// TODO: Should we check for other forms of sign-bit comparisons?
1724	// Examples: X <= -1, X >= 0
1725	}
1726	if (getBooleanContents(Type: Op0.getValueType()) ==
1727	TargetLowering::ZeroOrOneBooleanContent &&
1728	BitWidth > `1`)
1729	Known.Zero.setBitsFrom(`1`);
1730	break;
1731	}
1732	case ISD::SHL: {
1733	SDValue Op0 = Op.getOperand(i: `0`);
1734	SDValue Op1 = Op.getOperand(i: `1`);
1735	EVT ShiftVT = Op1.getValueType();
1736
1737	if (std::optional<uint64_t> KnownSA =
1738	TLO.DAG.getValidShiftAmount(V: Op, DemandedElts, Depth: Depth + `1`)) {
1739	unsigned ShAmt = *KnownSA;
1740	if (ShAmt == `0`)
1741	return TLO.CombineTo(O: Op, N: Op0);
1742
1743	// If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1744	// single shift. We can do this if the bottom bits (which are shifted
1745	// out) are never demanded.
1746	// TODO - support non-uniform vector amounts.
1747	if (Op0.getOpcode() == ISD::SRL) {
1748	if (!DemandedBits.intersects(RHS: APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: ShAmt))) {
1749	if (std::optional<uint64_t> InnerSA =
1750	TLO.DAG.getValidShiftAmount(V: Op0, DemandedElts, Depth: Depth + `2`)) {
1751	unsigned C1 = *InnerSA;
1752	unsigned Opc = ISD::SHL;
1753	int Diff = ShAmt - C1;
1754	if (Diff < `0`) {
1755	Diff = -Diff;
1756	Opc = ISD::SRL;
1757	}
1758	SDValue NewSA = TLO.DAG.getConstant(Val: Diff, DL: dl, VT: ShiftVT);
1759	return TLO.CombineTo(
1760	O: Op, N: TLO.DAG.getNode(Opcode: Opc, DL: dl, VT, N1: Op0.getOperand(i: `0`), N2: NewSA));
1761	}
1762	}
1763	}
1764
1765	// Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1766	// are not demanded. This will likely allow the anyext to be folded away.
1767	// TODO - support non-uniform vector amounts.
1768	if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1769	SDValue InnerOp = Op0.getOperand(i: `0`);
1770	EVT InnerVT = InnerOp.getValueType();
1771	unsigned InnerBits = InnerVT.getScalarSizeInBits();
1772	if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1773	isTypeDesirableForOp(ISD::SHL, VT: InnerVT)) {
1774	SDValue NarrowShl = TLO.DAG.getNode(
1775	Opcode: ISD::SHL, DL: dl, VT: InnerVT, N1: InnerOp,
1776	N2: TLO.DAG.getShiftAmountConstant(Val: ShAmt, VT: InnerVT, DL: dl));
1777	return TLO.CombineTo(
1778	O: Op, N: TLO.DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT, Operand: NarrowShl));
1779	}
1780
1781	// Repeat the SHL optimization above in cases where an extension
1782	// intervenes: (shl (anyext (shr x, c1)), c2) to
1783	// (shl (anyext x), c2-c1). This requires that the bottom c1 bits
1784	// aren't demanded (as above) and that the shifted upper c1 bits of
1785	// x aren't demanded.
1786	// TODO - support non-uniform vector amounts.
1787	if (InnerOp.getOpcode() == ISD::SRL && Op0.hasOneUse() &&
1788	InnerOp.hasOneUse()) {
1789	if (std::optional<uint64_t> SA2 = TLO.DAG.getValidShiftAmount(
1790	V: InnerOp, DemandedElts, Depth: Depth + `2`)) {
1791	unsigned InnerShAmt = *SA2;
1792	if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1793	DemandedBits.getActiveBits() <=
1794	(InnerBits - InnerShAmt + ShAmt) &&
1795	DemandedBits.countr_zero() >= ShAmt) {
1796	SDValue NewSA =
1797	TLO.DAG.getConstant(Val: ShAmt - InnerShAmt, DL: dl, VT: ShiftVT);
1798	SDValue NewExt = TLO.DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT,
1799	Operand: InnerOp.getOperand(i: `0`));
1800	return TLO.CombineTo(
1801	O: Op, N: TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: NewExt, N2: NewSA));
1802	}
1803	}
1804	}
1805	}
1806
1807	APInt InDemandedMask = DemandedBits.lshr(shiftAmt: ShAmt);
1808	if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: InDemandedMask, OriginalDemandedElts: DemandedElts, Known, TLO,
1809	Depth: Depth + `1`)) {
1810	SDNodeFlags Flags = Op.getNode()->getFlags();
1811	if (Flags.hasNoSignedWrap() \|\| Flags.hasNoUnsignedWrap()) {
1812	// Disable the nsw and nuw flags. We can no longer guarantee that we
1813	// won't wrap after simplification.
1814	Flags.setNoSignedWrap(false);
1815	Flags.setNoUnsignedWrap(false);
1816	Op ->setFlags(Flags);
1817	}
1818	return true;
1819	}
1820	Known.Zero <<= ShAmt;
1821	Known.One <<= ShAmt;
1822	// low bits known zero.
1823	Known.Zero.setLowBits(ShAmt);
1824
1825	// Attempt to avoid multi-use ops if we don't need anything from them.
1826	if (!InDemandedMask.isAllOnes() \|\| !DemandedElts.isAllOnes()) {
1827	SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1828	Op: Op0, DemandedBits: InDemandedMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`);
1829	if (DemandedOp0) {
1830	SDValue NewOp = TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: DemandedOp0, N2: Op1);
1831	return TLO.CombineTo(O: Op, N: NewOp);
1832	}
1833	}
1834
1835	// TODO: Can we merge this fold with the one below?
1836	// Try shrinking the operation as long as the shift amount will still be
1837	// in range.
1838	if (ShAmt < DemandedBits.getActiveBits() && !VT.isVector() &&
1839	Op.getNode()->hasOneUse()) {
1840	// Search for the smallest integer type with free casts to and from
1841	// Op's type. For expedience, just check power-of-2 integer types.
1842	unsigned DemandedSize = DemandedBits.getActiveBits();
1843	for (unsigned SmallVTBits = llvm::bit_ceil(Value: DemandedSize);
1844	SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(A: SmallVTBits)) {
1845	EVT SmallVT = EVT::getIntegerVT(Context&: *TLO.DAG.getContext(), BitWidth: SmallVTBits);
1846	if (isNarrowingProfitable(SrcVT: VT, DestVT: SmallVT) &&
1847	isTypeDesirableForOp(ISD::SHL, VT: SmallVT) &&
1848	isTruncateFree(FromVT: VT, ToVT: SmallVT) && isZExtFree(FromTy: SmallVT, ToTy: VT) &&
1849	(!TLO.LegalOperations() \|\| isOperationLegal(Op: ISD::SHL, VT: SmallVT))) {
1850	assert(DemandedSize <= SmallVTBits &&
1851	"Narrowed below demanded bits?");
1852	// We found a type with free casts.
1853	SDValue NarrowShl = TLO.DAG.getNode(
1854	Opcode: ISD::SHL, DL: dl, VT: SmallVT,
1855	N1: TLO.DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: SmallVT, Operand: Op.getOperand(i: `0`)),
1856	N2: TLO.DAG.getShiftAmountConstant(Val: ShAmt, VT: SmallVT, DL: dl));
1857	return TLO.CombineTo(
1858	O: Op, N: TLO.DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT, Operand: NarrowShl));
1859	}
1860	}
1861	}
1862
1863	// Narrow shift to lower half - similar to ShrinkDemandedOp.
1864	// (shl i64:x, K) -> (i64 zero_extend (shl (i32 (trunc i64:x)), K))
1865	// Only do this if we demand the upper half so the knownbits are correct.
1866	unsigned HalfWidth = BitWidth / `2`;
1867	if ((BitWidth % `2`) == `0` && !VT.isVector() && ShAmt < HalfWidth &&
1868	DemandedBits.countLeadingOnes() >= HalfWidth) {
1869	EVT HalfVT = EVT::getIntegerVT(Context&: *TLO.DAG.getContext(), BitWidth: HalfWidth);
1870	if (isNarrowingProfitable(SrcVT: VT, DestVT: HalfVT) &&
1871	isTypeDesirableForOp(ISD::SHL, VT: HalfVT) &&
1872	isTruncateFree(FromVT: VT, ToVT: HalfVT) && isZExtFree(FromTy: HalfVT, ToTy: VT) &&
1873	(!TLO.LegalOperations() \|\| isOperationLegal(Op: ISD::SHL, VT: HalfVT))) {
1874	// If we're demanding the upper bits at all, we must ensure
1875	// that the upper bits of the shift result are known to be zero,
1876	// which is equivalent to the narrow shift being NUW.
1877	if (bool IsNUW = (Known.countMinLeadingZeros() >= HalfWidth)) {
1878	bool IsNSW = Known.countMinSignBits() > HalfWidth;
1879	SDNodeFlags Flags;
1880	Flags.setNoSignedWrap(IsNSW);
1881	Flags.setNoUnsignedWrap(IsNUW);
1882	SDValue NewOp = TLO.DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HalfVT, Operand: Op0);
1883	SDValue NewShiftAmt =
1884	TLO.DAG.getShiftAmountConstant(Val: ShAmt, VT: HalfVT, DL: dl);
1885	SDValue NewShift = TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: HalfVT, N1: NewOp,
1886	N2: NewShiftAmt, Flags);
1887	SDValue NewExt =
1888	TLO.DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: NewShift);
1889	return TLO.CombineTo(O: Op, N: NewExt);
1890	}
1891	}
1892	}
1893	} else {
1894	// This is a variable shift, so we can't shift the demand mask by a known
1895	// amount. But if we are not demanding high bits, then we are not
1896	// demanding those bits from the pre-shifted operand either.
1897	if (unsigned CTLZ = DemandedBits.countl_zero()) {
1898	APInt DemandedFromOp(APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth - CTLZ));
1899	if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: DemandedFromOp, OriginalDemandedElts: DemandedElts, Known, TLO,
1900	Depth: Depth + `1`)) {
1901	SDNodeFlags Flags = Op.getNode()->getFlags();
1902	if (Flags.hasNoSignedWrap() \|\| Flags.hasNoUnsignedWrap()) {
1903	// Disable the nsw and nuw flags. We can no longer guarantee that we
1904	// won't wrap after simplification.
1905	Flags.setNoSignedWrap(false);
1906	Flags.setNoUnsignedWrap(false);
1907	Op ->setFlags(Flags);
1908	}
1909	return true;
1910	}
1911	Known.resetAll();
1912	}
1913	}
1914
1915	// If we are only demanding sign bits then we can use the shift source
1916	// directly.
1917	if (std::optional<uint64_t> MaxSA =
1918	TLO.DAG.getValidMaximumShiftAmount(V: Op, DemandedElts, Depth: Depth + `1`)) {
1919	unsigned ShAmt = *MaxSA;
1920	unsigned NumSignBits =
1921	TLO.DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + `1`);
1922	unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
1923	if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
1924	return TLO.CombineTo(O: Op, N: Op0);
1925	}
1926	break;
1927	}
1928	case ISD::SRL: {
1929	SDValue Op0 = Op.getOperand(i: `0`);
1930	SDValue Op1 = Op.getOperand(i: `1`);
1931	EVT ShiftVT = Op1.getValueType();
1932
1933	if (std::optional<uint64_t> KnownSA =
1934	TLO.DAG.getValidShiftAmount(V: Op, DemandedElts, Depth: Depth + `1`)) {
1935	unsigned ShAmt = *KnownSA;
1936	if (ShAmt == `0`)
1937	return TLO.CombineTo(O: Op, N: Op0);
1938
1939	// If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1940	// single shift. We can do this if the top bits (which are shifted out)
1941	// are never demanded.
1942	// TODO - support non-uniform vector amounts.
1943	if (Op0.getOpcode() == ISD::SHL) {
1944	if (!DemandedBits.intersects(RHS: APInt::getHighBitsSet(numBits: BitWidth, hiBitsSet: ShAmt))) {
1945	if (std::optional<uint64_t> InnerSA =
1946	TLO.DAG.getValidShiftAmount(V: Op0, DemandedElts, Depth: Depth + `2`)) {
1947	unsigned C1 = *InnerSA;
1948	unsigned Opc = ISD::SRL;
1949	int Diff = ShAmt - C1;
1950	if (Diff < `0`) {
1951	Diff = -Diff;
1952	Opc = ISD::SHL;
1953	}
1954	SDValue NewSA = TLO.DAG.getConstant(Val: Diff, DL: dl, VT: ShiftVT);
1955	return TLO.CombineTo(
1956	O: Op, N: TLO.DAG.getNode(Opcode: Opc, DL: dl, VT, N1: Op0.getOperand(i: `0`), N2: NewSA));
1957	}
1958	}
1959	}
1960
1961	APInt InDemandedMask = (DemandedBits << ShAmt);
1962
1963	// If the shift is exact, then it does demand the low bits (and knows that
1964	// they are zero).
1965	if (Op ->getFlags().hasExact())
1966	InDemandedMask.setLowBits(ShAmt);
1967
1968	// Narrow shift to lower half - similar to ShrinkDemandedOp.
1969	// (srl i64:x, K) -> (i64 zero_extend (srl (i32 (trunc i64:x)), K))
1970	if ((BitWidth % `2`) == `0` && !VT.isVector()) {
1971	APInt HiBits = APInt::getHighBitsSet(numBits: BitWidth, hiBitsSet: BitWidth / `2`);
1972	EVT HalfVT = EVT::getIntegerVT(Context&: *TLO.DAG.getContext(), BitWidth: BitWidth / `2`);
1973	if (isNarrowingProfitable(SrcVT: VT, DestVT: HalfVT) &&
1974	isTypeDesirableForOp(ISD::SRL, VT: HalfVT) &&
1975	isTruncateFree(FromVT: VT, ToVT: HalfVT) && isZExtFree(FromTy: HalfVT, ToTy: VT) &&
1976	(!TLO.LegalOperations() \|\| isOperationLegal(Op: ISD::SRL, VT: HalfVT)) &&
1977	((InDemandedMask.countLeadingZeros() >= (BitWidth / `2`)) \|\|
1978	TLO.DAG.MaskedValueIsZero(Op: Op0, Mask: HiBits))) {
1979	SDValue NewOp = TLO.DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HalfVT, Operand: Op0);
1980	SDValue NewShiftAmt =
1981	TLO.DAG.getShiftAmountConstant(Val: ShAmt, VT: HalfVT, DL: dl);
1982	SDValue NewShift =
1983	TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: HalfVT, N1: NewOp, N2: NewShiftAmt);
1984	return TLO.CombineTo(
1985	O: Op, N: TLO.DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: NewShift));
1986	}
1987	}
1988
1989	// Compute the new bits that are at the top now.
1990	if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: InDemandedMask, OriginalDemandedElts: DemandedElts, Known, TLO,
1991	Depth: Depth + `1`))
1992	return true;
1993	Known.Zero.lshrInPlace(ShiftAmt: ShAmt);
1994	Known.One.lshrInPlace(ShiftAmt: ShAmt);
1995	// High bits known zero.
1996	Known.Zero.setHighBits(ShAmt);
1997
1998	// Attempt to avoid multi-use ops if we don't need anything from them.
1999	if (!InDemandedMask.isAllOnes() \|\| !DemandedElts.isAllOnes()) {
2000	SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2001	Op: Op0, DemandedBits: InDemandedMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`);
2002	if (DemandedOp0) {
2003	SDValue NewOp = TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: DemandedOp0, N2: Op1);
2004	return TLO.CombineTo(O: Op, N: NewOp);
2005	}
2006	}
2007	} else {
2008	// Use generic knownbits computation as it has support for non-uniform
2009	// shift amounts.
2010	Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2011	}
2012
2013	// Try to match AVG patterns (after shift simplification).
2014	if (SDValue AVG = combineShiftToAVG(Op, TLO, TLI: *this, DemandedBits,
2015	DemandedElts, Depth: Depth + `1`))
2016	return TLO.CombineTo(O: Op, N: AVG);
2017
2018	break;
2019	}
2020	case ISD::SRA: {
2021	SDValue Op0 = Op.getOperand(i: `0`);
2022	SDValue Op1 = Op.getOperand(i: `1`);
2023	EVT ShiftVT = Op1.getValueType();
2024
2025	// If we only want bits that already match the signbit then we don't need
2026	// to shift.
2027	unsigned NumHiDemandedBits = BitWidth - DemandedBits.countr_zero();
2028	if (TLO.DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + `1`) >=
2029	NumHiDemandedBits)
2030	return TLO.CombineTo(O: Op, N: Op0);
2031
2032	// If this is an arithmetic shift right and only the low-bit is set, we can
2033	// always convert this into a logical shr, even if the shift amount is
2034	// variable. The low bit of the shift cannot be an input sign bit unless
2035	// the shift amount is >= the size of the datatype, which is undefined.
2036	if (DemandedBits.isOne())
2037	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op0, N2: Op1));
2038
2039	if (std::optional<uint64_t> KnownSA =
2040	TLO.DAG.getValidShiftAmount(V: Op, DemandedElts, Depth: Depth + `1`)) {
2041	unsigned ShAmt = *KnownSA;
2042	if (ShAmt == `0`)
2043	return TLO.CombineTo(O: Op, N: Op0);
2044
2045	// fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target
2046	// supports sext_inreg.
2047	if (Op0.getOpcode() == ISD::SHL) {
2048	if (std::optional<uint64_t> InnerSA =
2049	TLO.DAG.getValidShiftAmount(V: Op0, DemandedElts, Depth: Depth + `2`)) {
2050	unsigned LowBits = BitWidth - ShAmt;
2051	EVT ExtVT = EVT::getIntegerVT(Context&: *TLO.DAG.getContext(), BitWidth: LowBits);
2052	if (VT.isVector())
2053	ExtVT = EVT::getVectorVT(Context&: *TLO.DAG.getContext(), VT: ExtVT,
2054	EC: VT.getVectorElementCount());
2055
2056	if (*InnerSA == ShAmt) {
2057	if (!TLO.LegalOperations() \|\|
2058	getOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: ExtVT) == Legal)
2059	return TLO.CombineTo(
2060	O: Op, N: TLO.DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL: dl, VT,
2061	N1: Op0.getOperand(i: `0`),
2062	N2: TLO.DAG.getValueType(ExtVT)));
2063
2064	// Even if we can't convert to sext_inreg, we might be able to
2065	// remove this shift pair if the input is already sign extended.
2066	unsigned NumSignBits =
2067	TLO.DAG.ComputeNumSignBits(Op: Op0.getOperand(i: `0`), DemandedElts);
2068	if (NumSignBits > ShAmt)
2069	return TLO.CombineTo(O: Op, N: Op0.getOperand(i: `0`));
2070	}
2071	}
2072	}
2073
2074	APInt InDemandedMask = (DemandedBits << ShAmt);
2075
2076	// If the shift is exact, then it does demand the low bits (and knows that
2077	// they are zero).
2078	if (Op ->getFlags().hasExact())
2079	InDemandedMask.setLowBits(ShAmt);
2080
2081	// If any of the demanded bits are produced by the sign extension, we also
2082	// demand the input sign bit.
2083	if (DemandedBits.countl_zero() < ShAmt)
2084	InDemandedMask.setSignBit();
2085
2086	if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: InDemandedMask, OriginalDemandedElts: DemandedElts, Known, TLO,
2087	Depth: Depth + `1`))
2088	return true;
2089	Known.Zero.lshrInPlace(ShiftAmt: ShAmt);
2090	Known.One.lshrInPlace(ShiftAmt: ShAmt);
2091
2092	// If the input sign bit is known to be zero, or if none of the top bits
2093	// are demanded, turn this into an unsigned shift right.
2094	if (Known.Zero [BitWidth - ShAmt - `1`] \|\|
2095	DemandedBits.countl_zero() >= ShAmt) {
2096	SDNodeFlags Flags;
2097	Flags.setExact(Op ->getFlags().hasExact());
2098	return TLO.CombineTo(
2099	O: Op, N: TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op0, N2: Op1, Flags));
2100	}
2101
2102	int Log2 = DemandedBits.exactLogBase2();
2103	if (Log2 >= `0`) {
2104	// The bit must come from the sign.
2105	SDValue NewSA = TLO.DAG.getConstant(Val: BitWidth - `1` - Log2, DL: dl, VT: ShiftVT);
2106	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op0, N2: NewSA));
2107	}
2108
2109	if (Known.One [BitWidth - ShAmt - `1`])
2110	// New bits are known one.
2111	Known.One.setHighBits(ShAmt);
2112
2113	// Attempt to avoid multi-use ops if we don't need anything from them.
2114	if (!InDemandedMask.isAllOnes() \|\| !DemandedElts.isAllOnes()) {
2115	SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2116	Op: Op0, DemandedBits: InDemandedMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`);
2117	if (DemandedOp0) {
2118	SDValue NewOp = TLO.DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: DemandedOp0, N2: Op1);
2119	return TLO.CombineTo(O: Op, N: NewOp);
2120	}
2121	}
2122	}
2123
2124	// Try to match AVG patterns (after shift simplification).
2125	if (SDValue AVG = combineShiftToAVG(Op, TLO, TLI: *this, DemandedBits,
2126	DemandedElts, Depth: Depth + `1`))
2127	return TLO.CombineTo(O: Op, N: AVG);
2128
2129	break;
2130	}
2131	case ISD::FSHL:
2132	case ISD::FSHR: {
2133	SDValue Op0 = Op.getOperand(i: `0`);
2134	SDValue Op1 = Op.getOperand(i: `1`);
2135	SDValue Op2 = Op.getOperand(i: `2`);
2136	bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
2137
2138	if (ConstantSDNode *SA = isConstOrConstSplat(N: Op2, DemandedElts)) {
2139	unsigned Amt = SA->getAPIntValue().urem(RHS: BitWidth);
2140
2141	// For fshl, 0-shift returns the 1st arg.
2142	// For fshr, 0-shift returns the 2nd arg.
2143	if (Amt == `0`) {
2144	if (SimplifyDemandedBits(Op: IsFSHL ? Op0 : Op1, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
2145	Known, TLO, Depth: Depth + `1`))
2146	return true;
2147	break;
2148	}
2149
2150	// fshl: (Op0 << Amt) \| (Op1 >> (BW - Amt))
2151	// fshr: (Op0 << (BW - Amt)) \| (Op1 >> Amt)
2152	APInt Demanded0 = DemandedBits.lshr(shiftAmt: IsFSHL ? Amt : (BitWidth - Amt));
2153	APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
2154	if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: Demanded0, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
2155	Depth: Depth + `1`))
2156	return true;
2157	if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: Demanded1, OriginalDemandedElts: DemandedElts, Known, TLO,
2158	Depth: Depth + `1`))
2159	return true;
2160
2161	Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt));
2162	Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt));
2163	Known.One.lshrInPlace(ShiftAmt: IsFSHL ? (BitWidth - Amt) : Amt);
2164	Known.Zero.lshrInPlace(ShiftAmt: IsFSHL ? (BitWidth - Amt) : Amt);
2165	Known = Known.unionWith(RHS: Known2);
2166
2167	// Attempt to avoid multi-use ops if we don't need anything from them.
2168	if (!Demanded0.isAllOnes() \|\| !Demanded1.isAllOnes() \|\|
2169	!DemandedElts.isAllOnes()) {
2170	SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2171	Op: Op0, DemandedBits: Demanded0, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`);
2172	SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2173	Op: Op1, DemandedBits: Demanded1, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`);
2174	if (DemandedOp0 \|\| DemandedOp1) {
2175	DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0;
2176	DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1;
2177	SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: DemandedOp0,
2178	N2: DemandedOp1, N3: Op2);
2179	return TLO.CombineTo(O: Op, N: NewOp);
2180	}
2181	}
2182	}
2183
2184	// For pow-2 bitwidths we only demand the bottom modulo amt bits.
2185	if (isPowerOf2_32(Value: BitWidth)) {
2186	APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - `1`);
2187	if (SimplifyDemandedBits(Op: Op2, OriginalDemandedBits: DemandedAmtBits, OriginalDemandedElts: DemandedElts,
2188	Known&: Known2, TLO, Depth: Depth + `1`))
2189	return true;
2190	}
2191	break;
2192	}
2193	case ISD::ROTL:
2194	case ISD::ROTR: {
2195	SDValue Op0 = Op.getOperand(i: `0`);
2196	SDValue Op1 = Op.getOperand(i: `1`);
2197	bool IsROTL = (Op.getOpcode() == ISD::ROTL);
2198
2199	// If we're rotating an 0/-1 value, then it stays an 0/-1 value.
2200	if (BitWidth == TLO.DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + `1`))
2201	return TLO.CombineTo(O: Op, N: Op0);
2202
2203	if (ConstantSDNode *SA = isConstOrConstSplat(N: Op1, DemandedElts)) {
2204	unsigned Amt = SA->getAPIntValue().urem(RHS: BitWidth);
2205	unsigned RevAmt = BitWidth - Amt;
2206
2207	// rotl: (Op0 << Amt) \| (Op0 >> (BW - Amt))
2208	// rotr: (Op0 << (BW - Amt)) \| (Op0 >> Amt)
2209	APInt Demanded0 = DemandedBits.rotr(rotateAmt: IsROTL ? Amt : RevAmt);
2210	if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: Demanded0, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
2211	Depth: Depth + `1`))
2212	return true;
2213
2214	// rot(x, 0) --> x*
2215	if (Amt == `0`)
2216	return TLO.CombineTo(O: Op, N: Op0);
2217
2218	// See if we don't demand either half of the rotated bits.
2219	if ((!TLO.LegalOperations() \|\| isOperationLegal(Op: ISD::SHL, VT)) &&
2220	DemandedBits.countr_zero() >= (IsROTL ? Amt : RevAmt)) {
2221	Op1 = TLO.DAG.getConstant(Val: IsROTL ? Amt : RevAmt, DL: dl, VT: Op1.getValueType());
2222	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op0, N2: Op1));
2223	}
2224	if ((!TLO.LegalOperations() \|\| isOperationLegal(Op: ISD::SRL, VT)) &&
2225	DemandedBits.countl_zero() >= (IsROTL ? RevAmt : Amt)) {
2226	Op1 = TLO.DAG.getConstant(Val: IsROTL ? RevAmt : Amt, DL: dl, VT: Op1.getValueType());
2227	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op0, N2: Op1));
2228	}
2229	}
2230
2231	// For pow-2 bitwidths we only demand the bottom modulo amt bits.
2232	if (isPowerOf2_32(Value: BitWidth)) {
2233	APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - `1`);
2234	if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: DemandedAmtBits, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
2235	Depth: Depth + `1`))
2236	return true;
2237	}
2238	break;
2239	}
2240	case ISD::SMIN:
2241	case ISD::SMAX:
2242	case ISD::UMIN:
2243	case ISD::UMAX: {
2244	unsigned Opc = Op.getOpcode();
2245	SDValue Op0 = Op.getOperand(i: `0`);
2246	SDValue Op1 = Op.getOperand(i: `1`);
2247
2248	// If we're only demanding signbits, then we can simplify to OR/AND node.
2249	unsigned BitOp =
2250	(Opc == ISD::SMIN \|\| Opc == ISD::UMAX) ? ISD::OR : ISD::AND;
2251	unsigned NumSignBits =
2252	std::min(a: TLO.DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + `1`),
2253	b: TLO.DAG.ComputeNumSignBits(Op: Op1, DemandedElts, Depth: Depth + `1`));
2254	unsigned NumDemandedUpperBits = BitWidth - DemandedBits.countr_zero();
2255	if (NumSignBits >= NumDemandedUpperBits)
2256	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: BitOp, DL: SDLoc (Op), VT, N1: Op0, N2: Op1));
2257
2258	// Check if one arg is always less/greater than (or equal) to the other arg.
2259	KnownBits Known0 = TLO.DAG.computeKnownBits(Op: Op0, DemandedElts, Depth: Depth + `1`);
2260	KnownBits Known1 = TLO.DAG.computeKnownBits(Op: Op1, DemandedElts, Depth: Depth + `1`);
2261	switch (Opc) {
2262	case ISD::SMIN:
2263	if (std::optional<bool> IsSLE = KnownBits::sle(LHS: Known0, RHS: Known1))
2264	return TLO.CombineTo(O: Op, N: *IsSLE ? Op0 : Op1);
2265	if (std::optional<bool> IsSLT = KnownBits::slt(LHS: Known0, RHS: Known1))
2266	return TLO.CombineTo(O: Op, N: *IsSLT ? Op0 : Op1);
2267	Known = KnownBits::smin(LHS: Known0, RHS: Known1);
2268	break;
2269	case ISD::SMAX:
2270	if (std::optional<bool> IsSGE = KnownBits::sge(LHS: Known0, RHS: Known1))
2271	return TLO.CombineTo(O: Op, N: *IsSGE ? Op0 : Op1);
2272	if (std::optional<bool> IsSGT = KnownBits::sgt(LHS: Known0, RHS: Known1))
2273	return TLO.CombineTo(O: Op, N: *IsSGT ? Op0 : Op1);
2274	Known = KnownBits::smax(LHS: Known0, RHS: Known1);
2275	break;
2276	case ISD::UMIN:
2277	if (std::optional<bool> IsULE = KnownBits::ule(LHS: Known0, RHS: Known1))
2278	return TLO.CombineTo(O: Op, N: *IsULE ? Op0 : Op1);
2279	if (std::optional<bool> IsULT = KnownBits::ult(LHS: Known0, RHS: Known1))
2280	return TLO.CombineTo(O: Op, N: *IsULT ? Op0 : Op1);
2281	Known = KnownBits::umin(LHS: Known0, RHS: Known1);
2282	break;
2283	case ISD::UMAX:
2284	if (std::optional<bool> IsUGE = KnownBits::uge(LHS: Known0, RHS: Known1))
2285	return TLO.CombineTo(O: Op, N: *IsUGE ? Op0 : Op1);
2286	if (std::optional<bool> IsUGT = KnownBits::ugt(LHS: Known0, RHS: Known1))
2287	return TLO.CombineTo(O: Op, N: *IsUGT ? Op0 : Op1);
2288	Known = KnownBits::umax(LHS: Known0, RHS: Known1);
2289	break;
2290	}
2291	break;
2292	}
2293	case ISD::BITREVERSE: {
2294	SDValue Src = Op.getOperand(i: `0`);
2295	APInt DemandedSrcBits = DemandedBits.reverseBits();
2296	if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedSrcBits, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
2297	Depth: Depth + `1`))
2298	return true;
2299	Known.One = Known2.One.reverseBits();
2300	Known.Zero = Known2.Zero.reverseBits();
2301	break;
2302	}
2303	case ISD::BSWAP: {
2304	SDValue Src = Op.getOperand(i: `0`);
2305
2306	// If the only bits demanded come from one byte of the bswap result,
2307	// just shift the input byte into position to eliminate the bswap.
2308	unsigned NLZ = DemandedBits.countl_zero();
2309	unsigned NTZ = DemandedBits.countr_zero();
2310
2311	// Round NTZ down to the next byte. If we have 11 trailing zeros, then
2312	// we need all the bits down to bit 8. Likewise, round NLZ. If we
2313	// have 14 leading zeros, round to 8.
2314	NLZ = alignDown(Value: NLZ, Align: `8`);
2315	NTZ = alignDown(Value: NTZ, Align: `8`);
2316	// If we need exactly one byte, we can do this transformation.
2317	if (BitWidth - NLZ - NTZ == `8`) {
2318	// Replace this with either a left or right shift to get the byte into
2319	// the right place.
2320	unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL;
2321	if (!TLO.LegalOperations() \|\| isOperationLegal(Op: ShiftOpcode, VT)) {
2322	unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
2323	SDValue ShAmt = TLO.DAG.getShiftAmountConstant(Val: ShiftAmount, VT, DL: dl);
2324	SDValue NewOp = TLO.DAG.getNode(Opcode: ShiftOpcode, DL: dl, VT, N1: Src, N2: ShAmt);
2325	return TLO.CombineTo(O: Op, N: NewOp);
2326	}
2327	}
2328
2329	APInt DemandedSrcBits = DemandedBits.byteSwap();
2330	if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedSrcBits, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
2331	Depth: Depth + `1`))
2332	return true;
2333	Known.One = Known2.One.byteSwap();
2334	Known.Zero = Known2.Zero.byteSwap();
2335	break;
2336	}
2337	case ISD::CTPOP: {
2338	// If only 1 bit is demanded, replace with PARITY as long as we're before
2339	// op legalization.
2340	// FIXME: Limit to scalars for now.
2341	if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
2342	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::PARITY, DL: dl, VT,
2343	Operand: Op.getOperand(i: `0`)));
2344
2345	Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2346	break;
2347	}
2348	case ISD::SIGN_EXTEND_INREG: {
2349	SDValue Op0 = Op.getOperand(i: `0`);
2350	EVT ExVT = cast<VTSDNode>(Val: Op.getOperand(i: `1`))->getVT();
2351	unsigned ExVTBits = ExVT.getScalarSizeInBits();
2352
2353	// If we only care about the highest bit, don't bother shifting right.
2354	if (DemandedBits.isSignMask()) {
2355	unsigned MinSignedBits =
2356	TLO.DAG.ComputeMaxSignificantBits(Op: Op0, DemandedElts, Depth: Depth + `1`);
2357	bool AlreadySignExtended = ExVTBits >= MinSignedBits;
2358	// However if the input is already sign extended we expect the sign
2359	// extension to be dropped altogether later and do not simplify.
2360	if (!AlreadySignExtended) {
2361	// Compute the correct shift amount type, which must be getShiftAmountTy
2362	// for scalar types after legalization.
2363	SDValue ShiftAmt =
2364	TLO.DAG.getShiftAmountConstant(Val: BitWidth - ExVTBits, VT, DL: dl);
2365	return TLO.CombineTo(O: Op,
2366	N: TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op0, N2: ShiftAmt));
2367	}
2368	}
2369
2370	// If none of the extended bits are demanded, eliminate the sextinreg.
2371	if (DemandedBits.getActiveBits() <= ExVTBits)
2372	return TLO.CombineTo(O: Op, N: Op0);
2373
2374	APInt InputDemandedBits = DemandedBits.getLoBits(numBits: ExVTBits);
2375
2376	// Since the sign extended bits are demanded, we know that the sign
2377	// bit is demanded.
2378	InputDemandedBits.setBit(ExVTBits - `1`);
2379
2380	if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: InputDemandedBits, OriginalDemandedElts: DemandedElts, Known, TLO,
2381	Depth: Depth + `1`))
2382	return true;
2383
2384	// If the sign bit of the input is known set or clear, then we know the
2385	// top bits of the result.
2386
2387	// If the input sign bit is known zero, convert this into a zero extension.
2388	if (Known.Zero [ExVTBits - `1`])
2389	return TLO.CombineTo(O: Op, N: TLO.DAG.getZeroExtendInReg(Op: Op0, DL: dl, VT: ExVT));
2390
2391	APInt Mask = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: ExVTBits);
2392	if (Known.One [ExVTBits - `1`]) { // Input sign bit known set
2393	Known.One.setBitsFrom(ExVTBits);
2394	Known.Zero &= Mask;
2395	} else { // Input sign bit unknown
2396	Known.Zero &= Mask;
2397	Known.One &= Mask;
2398	}
2399	break;
2400	}
2401	case ISD::BUILD_PAIR: {
2402	EVT HalfVT = Op.getOperand(i: `0`).getValueType();
2403	unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
2404
2405	APInt MaskLo = DemandedBits.getLoBits(numBits: HalfBitWidth).trunc(width: HalfBitWidth);
2406	APInt MaskHi = DemandedBits.getHiBits(numBits: HalfBitWidth).trunc(width: HalfBitWidth);
2407
2408	KnownBits KnownLo, KnownHi;
2409
2410	if (SimplifyDemandedBits(Op: Op.getOperand(i: `0`), DemandedBits: MaskLo, Known&: KnownLo, TLO, Depth: Depth + `1`))
2411	return true;
2412
2413	if (SimplifyDemandedBits(Op: Op.getOperand(i: `1`), DemandedBits: MaskHi, Known&: KnownHi, TLO, Depth: Depth + `1`))
2414	return true;
2415
2416	Known = KnownHi.concat(Lo: KnownLo);
2417	break;
2418	}
2419	case ISD::ZERO_EXTEND_VECTOR_INREG:
2420	if (VT.isScalableVector())
2421	return false;
2422	[[fallthrough]];
2423	case ISD::ZERO_EXTEND: {
2424	SDValue Src = Op.getOperand(i: `0`);
2425	EVT SrcVT = Src.getValueType();
2426	unsigned InBits = SrcVT.getScalarSizeInBits();
2427	unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : `1`;
2428	bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
2429
2430	// If none of the top bits are demanded, convert this into an any_extend.
2431	if (DemandedBits.getActiveBits() <= InBits) {
2432	// If we only need the non-extended bits of the bottom element
2433	// then we can just bitcast to the result.
2434	if (IsLE && IsVecInReg && DemandedElts == `1` &&
2435	VT.getSizeInBits() == SrcVT.getSizeInBits())
2436	return TLO.CombineTo(O: Op, N: TLO.DAG.getBitcast(VT, V: Src));
2437
2438	unsigned Opc =
2439	IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
2440	if (!TLO.LegalOperations() \|\| isOperationLegal(Op: Opc, VT))
2441	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Opc, DL: dl, VT, Operand: Src));
2442	}
2443
2444	SDNodeFlags Flags = Op ->getFlags();
2445	APInt InDemandedBits = DemandedBits.trunc(width: InBits);
2446	APInt InDemandedElts = DemandedElts.zext(width: InElts);
2447	if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: InDemandedBits, OriginalDemandedElts: InDemandedElts, Known, TLO,
2448	Depth: Depth + `1`)) {
2449	if (Flags.hasNonNeg()) {
2450	Flags.setNonNeg(false);
2451	Op ->setFlags(Flags);
2452	}
2453	return true;
2454	}
2455	assert(Known.getBitWidth() == InBits && "Src width has changed?");
2456	Known = Known.zext(BitWidth);
2457
2458	// Attempt to avoid multi-use ops if we don't need anything from them.
2459	if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2460	Op: Src, DemandedBits: InDemandedBits, DemandedElts: InDemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`))
2461	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, Operand: NewSrc));
2462	break;
2463	}
2464	case ISD::SIGN_EXTEND_VECTOR_INREG:
2465	if (VT.isScalableVector())
2466	return false;
2467	[[fallthrough]];
2468	case ISD::SIGN_EXTEND: {
2469	SDValue Src = Op.getOperand(i: `0`);
2470	EVT SrcVT = Src.getValueType();
2471	unsigned InBits = SrcVT.getScalarSizeInBits();
2472	unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : `1`;
2473	bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
2474
2475	APInt InDemandedElts = DemandedElts.zext(width: InElts);
2476	APInt InDemandedBits = DemandedBits.trunc(width: InBits);
2477
2478	// Since some of the sign extended bits are demanded, we know that the sign
2479	// bit is demanded.
2480	InDemandedBits.setBit(InBits - `1`);
2481
2482	// If none of the top bits are demanded, convert this into an any_extend.
2483	if (DemandedBits.getActiveBits() <= InBits) {
2484	// If we only need the non-extended bits of the bottom element
2485	// then we can just bitcast to the result.
2486	if (IsLE && IsVecInReg && DemandedElts == `1` &&
2487	VT.getSizeInBits() == SrcVT.getSizeInBits())
2488	return TLO.CombineTo(O: Op, N: TLO.DAG.getBitcast(VT, V: Src));
2489
2490	// Don't lose an all signbits 0/-1 splat on targets with 0/-1 booleans.
2491	if (getBooleanContents(Type: VT) != ZeroOrNegativeOneBooleanContent \|\|
2492	TLO.DAG.ComputeNumSignBits(Op: Src, DemandedElts: InDemandedElts, Depth: Depth + `1`) !=
2493	InBits) {
2494	unsigned Opc =
2495	IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
2496	if (!TLO.LegalOperations() \|\| isOperationLegal(Op: Opc, VT))
2497	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Opc, DL: dl, VT, Operand: Src));
2498	}
2499	}
2500
2501	if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: InDemandedBits, OriginalDemandedElts: InDemandedElts, Known, TLO,
2502	Depth: Depth + `1`))
2503	return true;
2504	assert(Known.getBitWidth() == InBits && "Src width has changed?");
2505
2506	// If the sign bit is known one, the top bits match.
2507	Known = Known.sext(BitWidth);
2508
2509	// If the sign bit is known zero, convert this to a zero extend.
2510	if (Known.isNonNegative()) {
2511	unsigned Opc =
2512	IsVecInReg ? ISD::ZERO_EXTEND_VECTOR_INREG : ISD::ZERO_EXTEND;
2513	if (!TLO.LegalOperations() \|\| isOperationLegal(Op: Opc, VT)) {
2514	SDNodeFlags Flags;
2515	if (!IsVecInReg)
2516	Flags.setNonNeg(true);
2517	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Opc, DL: dl, VT, Operand: Src, Flags));
2518	}
2519	}
2520
2521	// Attempt to avoid multi-use ops if we don't need anything from them.
2522	if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2523	Op: Src, DemandedBits: InDemandedBits, DemandedElts: InDemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`))
2524	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, Operand: NewSrc));
2525	break;
2526	}
2527	case ISD::ANY_EXTEND_VECTOR_INREG:
2528	if (VT.isScalableVector())
2529	return false;
2530	[[fallthrough]];
2531	case ISD::ANY_EXTEND: {
2532	SDValue Src = Op.getOperand(i: `0`);
2533	EVT SrcVT = Src.getValueType();
2534	unsigned InBits = SrcVT.getScalarSizeInBits();
2535	unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : `1`;
2536	bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
2537
2538	// If we only need the bottom element then we can just bitcast.
2539	// TODO: Handle ANY_EXTEND?
2540	if (IsLE && IsVecInReg && DemandedElts == `1` &&
2541	VT.getSizeInBits() == SrcVT.getSizeInBits())
2542	return TLO.CombineTo(O: Op, N: TLO.DAG.getBitcast(VT, V: Src));
2543
2544	APInt InDemandedBits = DemandedBits.trunc(width: InBits);
2545	APInt InDemandedElts = DemandedElts.zext(width: InElts);
2546	if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: InDemandedBits, OriginalDemandedElts: InDemandedElts, Known, TLO,
2547	Depth: Depth + `1`))
2548	return true;
2549	assert(Known.getBitWidth() == InBits && "Src width has changed?");
2550	Known = Known.anyext(BitWidth);
2551
2552	// Attempt to avoid multi-use ops if we don't need anything from them.
2553	if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2554	Op: Src, DemandedBits: InDemandedBits, DemandedElts: InDemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`))
2555	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, Operand: NewSrc));
2556	break;
2557	}
2558	case ISD::TRUNCATE: {
2559	SDValue Src = Op.getOperand(i: `0`);
2560
2561	// Simplify the input, using demanded bit information, and compute the known
2562	// zero/one bits live out.
2563	unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
2564	APInt TruncMask = DemandedBits.zext(width: OperandBitWidth);
2565	if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: TruncMask, OriginalDemandedElts: DemandedElts, Known, TLO,
2566	Depth: Depth + `1`))
2567	return true;
2568	Known = Known.trunc(BitWidth);
2569
2570	// Attempt to avoid multi-use ops if we don't need anything from them.
2571	if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2572	Op: Src, DemandedBits: TruncMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`))
2573	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: NewSrc));
2574
2575	// If the input is only used by this truncate, see if we can shrink it based
2576	// on the known demanded bits.
2577	switch (Src.getOpcode()) {
2578	default:
2579	break;
2580	case ISD::SRL:
2581	// Shrink SRL by a constant if none of the high bits shifted in are
2582	// demanded.
2583	if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
2584	// Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
2585	// undesirable.
2586	break;
2587
2588	if (Src.getNode()->hasOneUse()) {
2589	if (isTruncateFree(Val: Src, VT2: VT) &&
2590	!isTruncateFree(FromVT: Src.getValueType(), ToVT: VT)) {
2591	// If truncate is only free at trunc(srl), do not turn it into
2592	// srl(trunc). The check is done by first check the truncate is free
2593	// at Src's opcode(srl), then check the truncate is not done by
2594	// referencing sub-register. In test, if both trunc(srl) and
2595	// srl(trunc)'s trunc are free, srl(trunc) performs better. If only
2596	// trunc(srl)'s trunc is free, trunc(srl) is better.
2597	break;
2598	}
2599
2600	std::optional<uint64_t> ShAmtC =
2601	TLO.DAG.getValidShiftAmount(V: Src, DemandedElts, Depth: Depth + `2`);
2602	if (!ShAmtC \|\| *ShAmtC >= BitWidth)
2603	break;
2604	uint64_t ShVal = *ShAmtC;
2605
2606	APInt HighBits =
2607	APInt::getHighBitsSet(numBits: OperandBitWidth, hiBitsSet: OperandBitWidth - BitWidth);
2608	HighBits.lshrInPlace(ShiftAmt: ShVal);
2609	HighBits = HighBits.trunc(width: BitWidth);
2610	if (!(HighBits & DemandedBits)) {
2611	// None of the shifted in bits are needed. Add a truncate of the
2612	// shift input, then shift it.
2613	SDValue NewShAmt = TLO.DAG.getShiftAmountConstant(Val: ShVal, VT, DL: dl);
2614	SDValue NewTrunc =
2615	TLO.DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Src.getOperand(i: `0`));
2616	return TLO.CombineTo(
2617	O: Op, N: TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: NewTrunc, N2: NewShAmt));
2618	}
2619	}
2620	break;
2621	}
2622
2623	break;
2624	}
2625	case ISD::AssertZext: {
2626	// AssertZext demands all of the high bits, plus any of the low bits
2627	// demanded by its users.
2628	EVT ZVT = cast<VTSDNode>(Val: Op.getOperand(i: `1`))->getVT();
2629	APInt InMask = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: ZVT.getSizeInBits());
2630	if (SimplifyDemandedBits(Op: Op.getOperand(i: `0`), DemandedBits: ~InMask \| DemandedBits, Known,
2631	TLO, Depth: Depth + `1`))
2632	return true;
2633
2634	Known.Zero \|= ~InMask;
2635	Known.One &= (~Known.Zero);
2636	break;
2637	}
2638	case ISD::EXTRACT_VECTOR_ELT: {
2639	SDValue Src = Op.getOperand(i: `0`);
2640	SDValue Idx = Op.getOperand(i: `1`);
2641	ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
2642	unsigned EltBitWidth = Src.getScalarValueSizeInBits();
2643
2644	if (SrcEltCnt.isScalable())
2645	return false;
2646
2647	// Demand the bits from every vector element without a constant index.
2648	unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2649	APInt DemandedSrcElts = APInt::getAllOnes(numBits: NumSrcElts);
2650	if (auto *CIdx = dyn_cast<ConstantSDNode>(Val&: Idx))
2651	if (CIdx->getAPIntValue().ult(RHS: NumSrcElts))
2652	DemandedSrcElts = APInt::getOneBitSet(numBits: NumSrcElts, BitNo: CIdx->getZExtValue());
2653
2654	// If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
2655	// anything about the extended bits.
2656	APInt DemandedSrcBits = DemandedBits;
2657	if (BitWidth > EltBitWidth)
2658	DemandedSrcBits = DemandedSrcBits.trunc(width: EltBitWidth);
2659
2660	if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedSrcBits, OriginalDemandedElts: DemandedSrcElts, Known&: Known2, TLO,
2661	Depth: Depth + `1`))
2662	return true;
2663
2664	// Attempt to avoid multi-use ops if we don't need anything from them.
2665	if (!DemandedSrcBits.isAllOnes() \|\| !DemandedSrcElts.isAllOnes()) {
2666	if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2667	Op: Src, DemandedBits: DemandedSrcBits, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + `1`)) {
2668	SDValue NewOp =
2669	TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: DemandedSrc, N2: Idx);
2670	return TLO.CombineTo(O: Op, N: NewOp);
2671	}
2672	}
2673
2674	Known = Known2;
2675	if (BitWidth > EltBitWidth)
2676	Known = Known.anyext(BitWidth);
2677	break;
2678	}
2679	case ISD::BITCAST: {
2680	if (VT.isScalableVector())
2681	return false;
2682	SDValue Src = Op.getOperand(i: `0`);
2683	EVT SrcVT = Src.getValueType();
2684	unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
2685
2686	// If this is an FP->Int bitcast and if the sign bit is the only
2687	// thing demanded, turn this into a FGETSIGN.
2688	if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
2689	DemandedBits == APInt::getSignMask(BitWidth: Op.getValueSizeInBits()) &&
2690	SrcVT.isFloatingPoint()) {
2691	bool OpVTLegal = isOperationLegalOrCustom(Op: ISD::FGETSIGN, VT);
2692	bool i32Legal = isOperationLegalOrCustom(Op: ISD::FGETSIGN, VT: MVT::i32);
2693	if ((OpVTLegal \|\| i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
2694	SrcVT != MVT::f128) {
2695	// Cannot eliminate/lower SHL for f128 yet.
2696	EVT Ty = OpVTLegal ? VT : MVT::i32;
2697	// Make a FGETSIGN + SHL to move the sign bit into the appropriate
2698	// place. We expect the SHL to be eliminated by other optimizations.
2699	SDValue Sign = TLO.DAG.getNode(Opcode: ISD::FGETSIGN, DL: dl, VT: Ty, Operand: Src);
2700	unsigned OpVTSizeInBits = Op.getValueSizeInBits();
2701	if (!OpVTLegal && OpVTSizeInBits > `32`)
2702	Sign = TLO.DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: Sign);
2703	unsigned ShVal = Op.getValueSizeInBits() - `1`;
2704	SDValue ShAmt = TLO.DAG.getConstant(Val: ShVal, DL: dl, VT);
2705	return TLO.CombineTo(O: Op,
2706	N: TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Sign, N2: ShAmt));
2707	}
2708	}
2709
2710	// Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
2711	// Demand the elt/bit if any of the original elts/bits are demanded.
2712	if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == `0`) {
2713	unsigned Scale = BitWidth / NumSrcEltBits;
2714	unsigned NumSrcElts = SrcVT.getVectorNumElements();
2715	APInt DemandedSrcBits = APInt::getZero(numBits: NumSrcEltBits);
2716	APInt DemandedSrcElts = APInt::getZero(numBits: NumSrcElts);
2717	for (unsigned i = `0`; i != Scale; ++i) {
2718	unsigned EltOffset = IsLE ? i : (Scale - `1` - i);
2719	unsigned BitOffset = EltOffset * NumSrcEltBits;
2720	APInt Sub = DemandedBits.extractBits(numBits: NumSrcEltBits, bitPosition: BitOffset);
2721	if (!Sub.isZero()) {
2722	DemandedSrcBits \|= Sub;
2723	for (unsigned j = `0`; j != NumElts; ++j)
2724	if (DemandedElts [j])
2725	DemandedSrcElts.setBit((j * Scale) + i);
2726	}
2727	}
2728
2729	APInt KnownSrcUndef, KnownSrcZero;
2730	if (SimplifyDemandedVectorElts(Op: Src, DemandedEltMask: DemandedSrcElts, KnownUndef&: KnownSrcUndef,
2731	KnownZero&: KnownSrcZero, TLO, Depth: Depth + `1`))
2732	return true;
2733
2734	KnownBits KnownSrcBits;
2735	if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedSrcBits, OriginalDemandedElts: DemandedSrcElts,
2736	Known&: KnownSrcBits, TLO, Depth: Depth + `1`))
2737	return true;
2738	} else if (IsLE && (NumSrcEltBits % BitWidth) == `0`) {
2739	// TODO - bigendian once we have test coverage.
2740	unsigned Scale = NumSrcEltBits / BitWidth;
2741	unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : `1`;
2742	APInt DemandedSrcBits = APInt::getZero(numBits: NumSrcEltBits);
2743	APInt DemandedSrcElts = APInt::getZero(numBits: NumSrcElts);
2744	for (unsigned i = `0`; i != NumElts; ++i)
2745	if (DemandedElts [i]) {
2746	unsigned Offset = (i % Scale) * BitWidth;
2747	DemandedSrcBits.insertBits(SubBits: DemandedBits, bitPosition: Offset);
2748	DemandedSrcElts.setBit(i / Scale);
2749	}
2750
2751	if (SrcVT.isVector()) {
2752	APInt KnownSrcUndef, KnownSrcZero;
2753	if (SimplifyDemandedVectorElts(Op: Src, DemandedEltMask: DemandedSrcElts, KnownUndef&: KnownSrcUndef,
2754	KnownZero&: KnownSrcZero, TLO, Depth: Depth + `1`))
2755	return true;
2756	}
2757
2758	KnownBits KnownSrcBits;
2759	if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedSrcBits, OriginalDemandedElts: DemandedSrcElts,
2760	Known&: KnownSrcBits, TLO, Depth: Depth + `1`))
2761	return true;
2762
2763	// Attempt to avoid multi-use ops if we don't need anything from them.
2764	if (!DemandedSrcBits.isAllOnes() \|\| !DemandedSrcElts.isAllOnes()) {
2765	if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2766	Op: Src, DemandedBits: DemandedSrcBits, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + `1`)) {
2767	SDValue NewOp = TLO.DAG.getBitcast(VT, V: DemandedSrc);
2768	return TLO.CombineTo(O: Op, N: NewOp);
2769	}
2770	}
2771	}
2772
2773	// If this is a bitcast, let computeKnownBits handle it. Only do this on a
2774	// recursive call where Known may be useful to the caller.
2775	if (Depth > `0`) {
2776	Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2777	return false;
2778	}
2779	break;
2780	}
2781	case ISD::MUL:
2782	if (DemandedBits.isPowerOf2()) {
2783	// The LSB of XY is set only if (X & 1) == 1 and (Y & 1) == 1.*
2784	// If we demand exactly one bit N and we have "X (C' << N)" where C' is*
2785	// odd (has LSB set), then the left-shifted low bit of X is the answer.
2786	unsigned CTZ = DemandedBits.countr_zero();
2787	ConstantSDNode *C = isConstOrConstSplat(N: Op.getOperand(i: `1`), DemandedElts);
2788	if (C && C->getAPIntValue().countr_zero() == CTZ) {
2789	SDValue AmtC = TLO.DAG.getShiftAmountConstant(Val: CTZ, VT, DL: dl);
2790	SDValue Shl = TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op.getOperand(i: `0`), N2: AmtC);
2791	return TLO.CombineTo(O: Op, N: Shl);
2792	}
2793	}
2794	// For a squared value "X X", the bottom 2 bits are 0 and X[0] because:*
2795	// X X is odd iff X is odd.*
2796	// 'Quadratic Reciprocity': X X -> 0 for bit[1]*
2797	if (Op.getOperand(i: `0`) == Op.getOperand(i: `1`) && DemandedBits.ult(RHS: `4`)) {
2798	SDValue One = TLO.DAG.getConstant(Val: `1`, DL: dl, VT);
2799	SDValue And1 = TLO.DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op.getOperand(i: `0`), N2: One);
2800	return TLO.CombineTo(O: Op, N: And1);
2801	}
2802	[[fallthrough]];
2803	case ISD::ADD:
2804	case ISD::SUB: {
2805	// Add, Sub, and Mul don't demand any bits in positions beyond that
2806	// of the highest bit demanded of them.
2807	SDValue Op0 = Op.getOperand(i: `0`), Op1 = Op.getOperand(i: `1`);
2808	SDNodeFlags Flags = Op.getNode()->getFlags();
2809	unsigned DemandedBitsLZ = DemandedBits.countl_zero();
2810	APInt LoMask = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth - DemandedBitsLZ);
2811	KnownBits KnownOp0, KnownOp1;
2812	auto GetDemandedBitsLHSMask = [&](APInt Demanded,
2813	const KnownBits &KnownRHS) {
2814	if (Op.getOpcode() == ISD::MUL)
2815	Demanded.clearHighBits(hiBits: KnownRHS.countMinTrailingZeros());
2816	return Demanded;
2817	};
2818	if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: LoMask, OriginalDemandedElts: DemandedElts, Known&: KnownOp1, TLO,
2819	Depth: Depth + `1`) \|\|
2820	SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: GetDemandedBitsLHSMask (LoMask, KnownOp1),
2821	OriginalDemandedElts: DemandedElts, Known&: KnownOp0, TLO, Depth: Depth + `1`) \|\|
2822	// See if the operation should be performed at a smaller bit width.
2823	ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
2824	if (Flags.hasNoSignedWrap() \|\| Flags.hasNoUnsignedWrap()) {
2825	// Disable the nsw and nuw flags. We can no longer guarantee that we
2826	// won't wrap after simplification.
2827	Flags.setNoSignedWrap(false);
2828	Flags.setNoUnsignedWrap(false);
2829	Op ->setFlags(Flags);
2830	}
2831	return true;
2832	}
2833
2834	// neg x with only low bit demanded is simply x.
2835	if (Op.getOpcode() == ISD::SUB && DemandedBits.isOne() &&
2836	isNullConstant(V: Op0))
2837	return TLO.CombineTo(O: Op, N: Op1);
2838
2839	// Attempt to avoid multi-use ops if we don't need anything from them.
2840	if (!LoMask.isAllOnes() \|\| !DemandedElts.isAllOnes()) {
2841	SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2842	Op: Op0, DemandedBits: LoMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`);
2843	SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2844	Op: Op1, DemandedBits: LoMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`);
2845	if (DemandedOp0 \|\| DemandedOp1) {
2846	Flags.setNoSignedWrap(false);
2847	Flags.setNoUnsignedWrap(false);
2848	Op0 = DemandedOp0 ? DemandedOp0 : Op0;
2849	Op1 = DemandedOp1 ? DemandedOp1 : Op1;
2850	SDValue NewOp =
2851	TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: Op0, N2: Op1, Flags);
2852	return TLO.CombineTo(O: Op, N: NewOp);
2853	}
2854	}
2855
2856	// If we have a constant operand, we may be able to turn it into -1 if we
2857	// do not demand the high bits. This can make the constant smaller to
2858	// encode, allow more general folding, or match specialized instruction
2859	// patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
2860	// is probably not useful (and could be detrimental).
2861	ConstantSDNode *C = isConstOrConstSplat(N: Op1);
2862	APInt HighMask = APInt::getHighBitsSet(numBits: BitWidth, hiBitsSet: DemandedBitsLZ);
2863	if (C && !C->isAllOnes() && !C->isOne() &&
2864	(C->getAPIntValue() \| HighMask).isAllOnes()) {
2865	SDValue Neg1 = TLO.DAG.getAllOnesConstant(DL: dl, VT);
2866	// Disable the nsw and nuw flags. We can no longer guarantee that we
2867	// won't wrap after simplification.
2868	Flags.setNoSignedWrap(false);
2869	Flags.setNoUnsignedWrap(false);
2870	SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: Op0, N2: Neg1, Flags);
2871	return TLO.CombineTo(O: Op, N: NewOp);
2872	}
2873
2874	// Match a multiply with a disguised negated-power-of-2 and convert to a
2875	// an equivalent shift-left amount.
2876	// Example: (X MulC) + Op1 --> Op1 - (X << log2(-MulC))*
2877	auto getShiftLeftAmt = [&HighMask](SDValue Mul) -> unsigned {
2878	if (Mul.getOpcode() != ISD::MUL \|\| !Mul.hasOneUse())
2879	return `0`;
2880
2881	// Don't touch opaque constants. Also, ignore zero and power-of-2
2882	// multiplies. Those will get folded later.
2883	ConstantSDNode *MulC = isConstOrConstSplat(N: Mul.getOperand(i: `1`));
2884	if (MulC && !MulC->isOpaque() && !MulC->isZero() &&
2885	!MulC->getAPIntValue().isPowerOf2()) {
2886	APInt UnmaskedC = MulC->getAPIntValue() \| HighMask;
2887	if (UnmaskedC.isNegatedPowerOf2())
2888	return (-UnmaskedC).logBase2();
2889	}
2890	return `0`;
2891	};
2892
2893	auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y,
2894	unsigned ShlAmt) {
2895	SDValue ShlAmtC = TLO.DAG.getShiftAmountConstant(Val: ShlAmt, VT, DL: dl);
2896	SDValue Shl = TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: X, N2: ShlAmtC);
2897	SDValue Res = TLO.DAG.getNode(Opcode: NT, DL: dl, VT, N1: Y, N2: Shl);
2898	return TLO.CombineTo(O: Op, N: Res);
2899	};
2900
2901	if (isOperationLegalOrCustom(Op: ISD::SHL, VT)) {
2902	if (Op.getOpcode() == ISD::ADD) {
2903	// (X MulC) + Op1 --> Op1 - (X << log2(-MulC))*
2904	if (unsigned ShAmt = getShiftLeftAmt (Op0))
2905	return foldMul (ISD::SUB, Op0.getOperand(i: `0`), Op1, ShAmt);
2906	// Op0 + (X MulC) --> Op0 - (X << log2(-MulC))*
2907	if (unsigned ShAmt = getShiftLeftAmt (Op1))
2908	return foldMul (ISD::SUB, Op1.getOperand(i: `0`), Op0, ShAmt);
2909	}
2910	if (Op.getOpcode() == ISD::SUB) {
2911	// Op0 - (X MulC) --> Op0 + (X << log2(-MulC))*
2912	if (unsigned ShAmt = getShiftLeftAmt (Op1))
2913	return foldMul (ISD::ADD, Op1.getOperand(i: `0`), Op0, ShAmt);
2914	}
2915	}
2916
2917	if (Op.getOpcode() == ISD::MUL) {
2918	Known = KnownBits::mul(LHS: KnownOp0, RHS: KnownOp1);
2919	} else { // Op.getOpcode() is either ISD::ADD or ISD::SUB.
2920	Known = KnownBits::computeForAddSub(
2921	Add: Op.getOpcode() == ISD::ADD, NSW: Flags.hasNoSignedWrap(),
2922	NUW: Flags.hasNoUnsignedWrap(), LHS: KnownOp0, RHS: KnownOp1);
2923	}
2924	break;
2925	}
2926	default:
2927	// We also ask the target about intrinsics (which could be specific to it).
2928	if (Op.getOpcode() >= ISD::BUILTIN_OP_END \|\|
2929	Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
2930	// TODO: Probably okay to remove after audit; here to reduce change size
2931	// in initial enablement patch for scalable vectors
2932	if (Op.getValueType().isScalableVector())
2933	break;
2934	if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
2935	Known, TLO, Depth))
2936	return true;
2937	break;
2938	}
2939
2940	// Just use computeKnownBits to compute output bits.
2941	Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2942	break;
2943	}
2944
2945	// If we know the value of all of the demanded bits, return this as a
2946	// constant.
2947	if (!isTargetCanonicalConstantNode(Op) &&
2948	DemandedBits.isSubsetOf(RHS: Known.Zero \| Known.One)) {
2949	// Avoid folding to a constant if any OpaqueConstant is involved.
2950	const SDNode *N = Op.getNode();
2951	for (SDNode *Op :
2952	llvm::make_range(x: SDNodeIterator::begin(N), y: SDNodeIterator::end(N))) {
2953	if (auto *C = dyn_cast<ConstantSDNode>(Val: Op))
2954	if (C->isOpaque())
2955	return false;
2956	}
2957	if (VT.isInteger())
2958	return TLO.CombineTo(O: Op, N: TLO.DAG.getConstant(Val: Known.One, DL: dl, VT));
2959	if (VT.isFloatingPoint())
2960	return TLO.CombineTo(
2961	O: Op,
2962	N: TLO.DAG.getConstantFP(
2963	Val: APFloat (TLO.DAG.EVTToAPFloatSemantics(VT), Known.One), DL: dl, VT));
2964	}
2965
2966	// A multi use 'all demanded elts' simplify failed to find any knownbits.
2967	// Try again just for the original demanded elts.
2968	// Ensure we do this AFTER constant folding above.
2969	if (HasMultiUse && Known.isUnknown() && !OriginalDemandedElts.isAllOnes())
2970	Known = TLO.DAG.computeKnownBits(Op, DemandedElts: OriginalDemandedElts, Depth);
2971
2972	return false;
2973	}
2974
2975	bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op,
2976	const APInt &DemandedElts,
2977	DAGCombinerInfo &DCI) const {
2978	SelectionDAG &DAG = DCI.DAG;
2979	TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
2980	!DCI.isBeforeLegalizeOps());
2981
2982	APInt KnownUndef, KnownZero;
2983	bool Simplified =
2984	SimplifyDemandedVectorElts(Op, DemandedEltMask: DemandedElts, KnownUndef, KnownZero, TLO);
2985	if (Simplified) {
2986	DCI.AddToWorklist(N: Op.getNode());
2987	DCI.CommitTargetLoweringOpt(TLO);
2988	}
2989
2990	return Simplified;
2991	}
2992
2993	/// Given a vector binary operation and known undefined elements for each input
2994	/// operand, compute whether each element of the output is undefined.
2995	static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG,
2996	const APInt &UndefOp0,
2997	const APInt &UndefOp1) {
2998	EVT VT = BO.getValueType();
2999	assert(DAG.getTargetLoweringInfo().isBinOp(BO.getOpcode()) && VT.isVector() &&
3000	"Vector binop only");
3001
3002	EVT EltVT = VT.getVectorElementType();
3003	unsigned NumElts = VT.isFixedLengthVector() ? VT.getVectorNumElements() : `1`;
3004	assert(UndefOp0.getBitWidth() == NumElts &&
3005	UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
3006
3007	auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
3008	const APInt &UndefVals) {
3009	if (UndefVals [Index])
3010	return DAG.getUNDEF(VT: EltVT);
3011
3012	if (auto *BV = dyn_cast<BuildVectorSDNode>(Val&: V)) {
3013	// Try hard to make sure that the getNode() call is not creating temporary
3014	// nodes. Ignore opaque integers because they do not constant fold.
3015	SDValue Elt = BV->getOperand(Num: Index);
3016	auto *C = dyn_cast<ConstantSDNode>(Val&: Elt);
3017	if (isa<ConstantFPSDNode>(Val: Elt) \|\| Elt.isUndef() \|\| (C && !C->isOpaque()))
3018	return Elt;
3019	}
3020
3021	return SDValue ();
3022	};
3023
3024	APInt KnownUndef = APInt::getZero(numBits: NumElts);
3025	for (unsigned i = `0`; i != NumElts; ++i) {
3026	// If both inputs for this element are either constant or undef and match
3027	// the element type, compute the constant/undef result for this element of
3028	// the vector.
3029	// TODO: Ideally we would use FoldConstantArithmetic() here, but that does
3030	// not handle FP constants. The code within getNode() should be refactored
3031	// to avoid the danger of creating a bogus temporary node here.
3032	SDValue C0 = getUndefOrConstantElt (BO.getOperand(i: `0`), i, UndefOp0);
3033	SDValue C1 = getUndefOrConstantElt (BO.getOperand(i: `1`), i, UndefOp1);
3034	if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
3035	if (DAG.getNode(Opcode: BO.getOpcode(), DL: SDLoc (BO), VT: EltVT, N1: C0, N2: C1).isUndef())
3036	KnownUndef.setBit(i);
3037	}
3038	return KnownUndef;
3039	}
3040
3041	bool TargetLowering::SimplifyDemandedVectorElts(
3042	SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
3043	APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
3044	bool AssumeSingleUse) const {
3045	EVT VT = Op.getValueType();
3046	unsigned Opcode = Op.getOpcode();
3047	APInt DemandedElts = OriginalDemandedElts;
3048	unsigned NumElts = DemandedElts.getBitWidth();
3049	assert(VT.isVector() && "Expected vector op");
3050
3051	KnownUndef = KnownZero = APInt::getZero(numBits: NumElts);
3052
3053	const TargetLowering &TLI = TLO.DAG.getTargetLoweringInfo();
3054	if (!TLI.shouldSimplifyDemandedVectorElts(Op, TLO))
3055	return false;
3056
3057	// TODO: For now we assume we know nothing about scalable vectors.
3058	if (VT.isScalableVector())
3059	return false;
3060
3061	assert(VT.getVectorNumElements() == NumElts &&
3062	"Mask size mismatches value type element count!");
3063
3064	// Undef operand.
3065	if (Op.isUndef()) {
3066	KnownUndef.setAllBits();
3067	return false;
3068	}
3069
3070	// If Op has other users, assume that all elements are needed.
3071	if (!AssumeSingleUse && !Op.getNode()->hasOneUse())
3072	DemandedElts.setAllBits();
3073
3074	// Not demanding any elements from Op.
3075	if (DemandedElts == `0`) {
3076	KnownUndef.setAllBits();
3077	return TLO.CombineTo(O: Op, N: TLO.DAG.getUNDEF(VT));
3078	}
3079
3080	// Limit search depth.
3081	if (Depth >= SelectionDAG::MaxRecursionDepth)
3082	return false;
3083
3084	SDLoc DL(Op);
3085	unsigned EltSizeInBits = VT.getScalarSizeInBits();
3086	bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
3087
3088	// Helper for demanding the specified elements and all the bits of both binary
3089	// operands.
3090	auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
3091	SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op: Op0, DemandedElts,
3092	DAG&: TLO.DAG, Depth: Depth + `1`);
3093	SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op: Op1, DemandedElts,
3094	DAG&: TLO.DAG, Depth: Depth + `1`);
3095	if (NewOp0 \|\| NewOp1) {
3096	SDValue NewOp =
3097	TLO.DAG.getNode(Opcode, DL: SDLoc (Op), VT, N1: NewOp0 ? NewOp0 : Op0,
3098	N2: NewOp1 ? NewOp1 : Op1, Flags: Op ->getFlags());
3099	return TLO.CombineTo(O: Op, N: NewOp);
3100	}
3101	return false;
3102	};
3103
3104	switch (Opcode) {
3105	case ISD::SCALAR_TO_VECTOR: {
3106	if (!DemandedElts [`0`]) {
3107	KnownUndef.setAllBits();
3108	return TLO.CombineTo(O: Op, N: TLO.DAG.getUNDEF(VT));
3109	}
3110	SDValue ScalarSrc = Op.getOperand(i: `0`);
3111	if (ScalarSrc.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
3112	SDValue Src = ScalarSrc.getOperand(i: `0`);
3113	SDValue Idx = ScalarSrc.getOperand(i: `1`);
3114	EVT SrcVT = Src.getValueType();
3115
3116	ElementCount SrcEltCnt = SrcVT.getVectorElementCount();
3117
3118	if (SrcEltCnt.isScalable())
3119	return false;
3120
3121	unsigned NumSrcElts = SrcEltCnt.getFixedValue();
3122	if (isNullConstant(V: Idx)) {
3123	APInt SrcDemandedElts = APInt::getOneBitSet(numBits: NumSrcElts, BitNo: `0`);
3124	APInt SrcUndef = KnownUndef.zextOrTrunc(width: NumSrcElts);
3125	APInt SrcZero = KnownZero.zextOrTrunc(width: NumSrcElts);
3126	if (SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: SrcDemandedElts, KnownUndef&: SrcUndef, KnownZero&: SrcZero,
3127	TLO, Depth: Depth + `1`))
3128	return true;
3129	}
3130	}
3131	KnownUndef.setHighBits(NumElts - `1`);
3132	break;
3133	}
3134	case ISD::BITCAST: {
3135	SDValue Src = Op.getOperand(i: `0`);
3136	EVT SrcVT = Src.getValueType();
3137
3138	// We only handle vectors here.
3139	// TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits?
3140	if (!SrcVT.isVector())
3141	break;
3142
3143	// Fast handling of 'identity' bitcasts.
3144	unsigned NumSrcElts = SrcVT.getVectorNumElements();
3145	if (NumSrcElts == NumElts)
3146	return SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: DemandedElts, KnownUndef,
3147	KnownZero, TLO, Depth: Depth + `1`);
3148
3149	APInt SrcDemandedElts, SrcZero, SrcUndef;
3150
3151	// Bitcast from 'large element' src vector to 'small element' vector, we
3152	// must demand a source element if any DemandedElt maps to it.
3153	if ((NumElts % NumSrcElts) == `0`) {
3154	unsigned Scale = NumElts / NumSrcElts;
3155	SrcDemandedElts = APIntOps::ScaleBitMask(A: DemandedElts, NewBitWidth: NumSrcElts);
3156	if (SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: SrcDemandedElts, KnownUndef&: SrcUndef, KnownZero&: SrcZero,
3157	TLO, Depth: Depth + `1`))
3158	return true;
3159
3160	// Try calling SimplifyDemandedBits, converting demanded elts to the bits
3161	// of the large element.
3162	// TODO - bigendian once we have test coverage.
3163	if (IsLE) {
3164	unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
3165	APInt SrcDemandedBits = APInt::getZero(numBits: SrcEltSizeInBits);
3166	for (unsigned i = `0`; i != NumElts; ++i)
3167	if (DemandedElts [i]) {
3168	unsigned Ofs = (i % Scale) * EltSizeInBits;
3169	SrcDemandedBits.setBits(loBit: Ofs, hiBit: Ofs + EltSizeInBits);
3170	}
3171
3172	KnownBits Known;
3173	if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: SrcDemandedBits, OriginalDemandedElts: SrcDemandedElts, Known,
3174	TLO, Depth: Depth + `1`))
3175	return true;
3176
3177	// The bitcast has split each wide element into a number of
3178	// narrow subelements. We have just computed the Known bits
3179	// for wide elements. See if element splitting results in
3180	// some subelements being zero. Only for demanded elements!
3181	for (unsigned SubElt = `0`; SubElt != Scale; ++SubElt) {
3182	if (!Known.Zero.extractBits(numBits: EltSizeInBits, bitPosition: SubElt * EltSizeInBits)
3183	.isAllOnes())
3184	continue;
3185	for (unsigned SrcElt = `0`; SrcElt != NumSrcElts; ++SrcElt) {
3186	unsigned Elt = Scale * SrcElt + SubElt;
3187	if (DemandedElts [Elt])
3188	KnownZero.setBit(Elt);
3189	}
3190	}
3191	}
3192
3193	// If the src element is zero/undef then all the output elements will be -
3194	// only demanded elements are guaranteed to be correct.
3195	for (unsigned i = `0`; i != NumSrcElts; ++i) {
3196	if (SrcDemandedElts [i]) {
3197	if (SrcZero [i])
3198	KnownZero.setBits(loBit: i * Scale, hiBit: (i + `1`) * Scale);
3199	if (SrcUndef [i])
3200	KnownUndef.setBits(loBit: i * Scale, hiBit: (i + `1`) * Scale);
3201	}
3202	}
3203	}
3204
3205	// Bitcast from 'small element' src vector to 'large element' vector, we
3206	// demand all smaller source elements covered by the larger demanded element
3207	// of this vector.
3208	if ((NumSrcElts % NumElts) == `0`) {
3209	unsigned Scale = NumSrcElts / NumElts;
3210	SrcDemandedElts = APIntOps::ScaleBitMask(A: DemandedElts, NewBitWidth: NumSrcElts);
3211	if (SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: SrcDemandedElts, KnownUndef&: SrcUndef, KnownZero&: SrcZero,
3212	TLO, Depth: Depth + `1`))
3213	return true;
3214
3215	// If all the src elements covering an output element are zero/undef, then
3216	// the output element will be as well, assuming it was demanded.
3217	for (unsigned i = `0`; i != NumElts; ++i) {
3218	if (DemandedElts [i]) {
3219	if (SrcZero.extractBits(numBits: Scale, bitPosition: i * Scale).isAllOnes())
3220	KnownZero.setBit(i);
3221	if (SrcUndef.extractBits(numBits: Scale, bitPosition: i * Scale).isAllOnes())
3222	KnownUndef.setBit(i);
3223	}
3224	}
3225	}
3226	break;
3227	}
3228	case ISD::FREEZE: {
3229	SDValue N0 = Op.getOperand(i: `0`);
3230	if (TLO.DAG.isGuaranteedNotToBeUndefOrPoison(Op: N0, DemandedElts,
3231	/PoisonOnly=/false))
3232	return TLO.CombineTo(O: Op, N: N0);
3233
3234	// TODO: Replace this with the general fold from DAGCombiner::visitFREEZE
3235	// freeze(op(x, ...)) -> op(freeze(x), ...).
3236	if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && DemandedElts == `1`)
3237	return TLO.CombineTo(
3238	O: Op, N: TLO.DAG.getNode(Opcode: ISD::SCALAR_TO_VECTOR, DL, VT,
3239	Operand: TLO.DAG.getFreeze(V: N0.getOperand(i: `0`))));
3240	break;
3241	}
3242	case ISD::BUILD_VECTOR: {
3243	// Check all elements and simplify any unused elements with UNDEF.
3244	if (!DemandedElts.isAllOnes()) {
3245	// Don't simplify BROADCASTS.
3246	if (llvm::any_of(Range: Op ->op_values(),
3247	P: [&](SDValue Elt) { return Op.getOperand(i: `0`) != Elt; })) {
3248	SmallVector<SDValue, `32`> Ops(Op ->op_begin(), Op ->op_end());
3249	bool Updated = false;
3250	for (unsigned i = `0`; i != NumElts; ++i) {
3251	if (!DemandedElts [i] && !Ops [i].isUndef()) {
3252	Ops [i] = TLO.DAG.getUNDEF(VT: Ops [`0`].getValueType());
3253	KnownUndef.setBit(i);
3254	Updated = true;
3255	}
3256	}
3257	if (Updated)
3258	return TLO.CombineTo(O: Op, N: TLO.DAG.getBuildVector(VT, DL, Ops));
3259	}
3260	}
3261	for (unsigned i = `0`; i != NumElts; ++i) {
3262	SDValue SrcOp = Op.getOperand(i);
3263	if (SrcOp.isUndef()) {
3264	KnownUndef.setBit(i);
3265	} else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
3266	(isNullConstant(V: SrcOp) \|\| isNullFPConstant(V: SrcOp))) {
3267	KnownZero.setBit(i);
3268	}
3269	}
3270	break;
3271	}
3272	case ISD::CONCAT_VECTORS: {
3273	EVT SubVT = Op.getOperand(i: `0`).getValueType();
3274	unsigned NumSubVecs = Op.getNumOperands();
3275	unsigned NumSubElts = SubVT.getVectorNumElements();
3276	for (unsigned i = `0`; i != NumSubVecs; ++i) {
3277	SDValue SubOp = Op.getOperand(i);
3278	APInt SubElts = DemandedElts.extractBits(numBits: NumSubElts, bitPosition: i * NumSubElts);
3279	APInt SubUndef, SubZero;
3280	if (SimplifyDemandedVectorElts(Op: SubOp, OriginalDemandedElts: SubElts, KnownUndef&: SubUndef, KnownZero&: SubZero, TLO,
3281	Depth: Depth + `1`))
3282	return true;
3283	KnownUndef.insertBits(SubBits: SubUndef, bitPosition: i * NumSubElts);
3284	KnownZero.insertBits(SubBits: SubZero, bitPosition: i * NumSubElts);
3285	}
3286
3287	// Attempt to avoid multi-use ops if we don't need anything from them.
3288	if (!DemandedElts.isAllOnes()) {
3289	bool FoundNewSub = false;
3290	SmallVector<SDValue, `2`> DemandedSubOps;
3291	for (unsigned i = `0`; i != NumSubVecs; ++i) {
3292	SDValue SubOp = Op.getOperand(i);
3293	APInt SubElts = DemandedElts.extractBits(numBits: NumSubElts, bitPosition: i * NumSubElts);
3294	SDValue NewSubOp = SimplifyMultipleUseDemandedVectorElts(
3295	Op: SubOp, DemandedElts: SubElts, DAG&: TLO.DAG, Depth: Depth + `1`);
3296	DemandedSubOps.push_back(Elt: NewSubOp ? NewSubOp : SubOp);
3297	FoundNewSub = NewSubOp ? true : FoundNewSub;
3298	}
3299	if (FoundNewSub) {
3300	SDValue NewOp =
3301	TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: SDLoc (Op), VT, Ops: DemandedSubOps);
3302	return TLO.CombineTo(O: Op, N: NewOp);
3303	}
3304	}
3305	break;
3306	}
3307	case ISD::INSERT_SUBVECTOR: {
3308	// Demand any elements from the subvector and the remainder from the src its
3309	// inserted into.
3310	SDValue Src = Op.getOperand(i: `0`);
3311	SDValue Sub = Op.getOperand(i: `1`);
3312	uint64_t Idx = Op.getConstantOperandVal(i: `2`);
3313	unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
3314	APInt DemandedSubElts = DemandedElts.extractBits(numBits: NumSubElts, bitPosition: Idx);
3315	APInt DemandedSrcElts = DemandedElts;
3316	DemandedSrcElts.insertBits(SubBits: APInt::getZero(numBits: NumSubElts), bitPosition: Idx);
3317
3318	APInt SubUndef, SubZero;
3319	if (SimplifyDemandedVectorElts(Op: Sub, OriginalDemandedElts: DemandedSubElts, KnownUndef&: SubUndef, KnownZero&: SubZero, TLO,
3320	Depth: Depth + `1`))
3321	return true;
3322
3323	// If none of the src operand elements are demanded, replace it with undef.
3324	if (!DemandedSrcElts && !Src.isUndef())
3325	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT,
3326	N1: TLO.DAG.getUNDEF(VT), N2: Sub,
3327	N3: Op.getOperand(i: `2`)));
3328
3329	if (SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: DemandedSrcElts, KnownUndef, KnownZero,
3330	TLO, Depth: Depth + `1`))
3331	return true;
3332	KnownUndef.insertBits(SubBits: SubUndef, bitPosition: Idx);
3333	KnownZero.insertBits(SubBits: SubZero, bitPosition: Idx);
3334
3335	// Attempt to avoid multi-use ops if we don't need anything from them.
3336	if (!DemandedSrcElts.isAllOnes() \|\| !DemandedSubElts.isAllOnes()) {
3337	SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3338	Op: Src, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + `1`);
3339	SDValue NewSub = SimplifyMultipleUseDemandedVectorElts(
3340	Op: Sub, DemandedElts: DemandedSubElts, DAG&: TLO.DAG, Depth: Depth + `1`);
3341	if (NewSrc \|\| NewSub) {
3342	NewSrc = NewSrc ? NewSrc : Src;
3343	NewSub = NewSub ? NewSub : Sub;
3344	SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: SDLoc (Op), VT, N1: NewSrc,
3345	N2: NewSub, N3: Op.getOperand(i: `2`));
3346	return TLO.CombineTo(O: Op, N: NewOp);
3347	}
3348	}
3349	break;
3350	}
3351	case ISD::EXTRACT_SUBVECTOR: {
3352	// Offset the demanded elts by the subvector index.
3353	SDValue Src = Op.getOperand(i: `0`);
3354	if (Src.getValueType().isScalableVector())
3355	break;
3356	uint64_t Idx = Op.getConstantOperandVal(i: `1`);
3357	unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3358	APInt DemandedSrcElts = DemandedElts.zext(width: NumSrcElts).shl(shiftAmt: Idx);
3359
3360	APInt SrcUndef, SrcZero;
3361	if (SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: DemandedSrcElts, KnownUndef&: SrcUndef, KnownZero&: SrcZero, TLO,
3362	Depth: Depth + `1`))
3363	return true;
3364	KnownUndef = SrcUndef.extractBits(numBits: NumElts, bitPosition: Idx);
3365	KnownZero = SrcZero.extractBits(numBits: NumElts, bitPosition: Idx);
3366
3367	// Attempt to avoid multi-use ops if we don't need anything from them.
3368	if (!DemandedElts.isAllOnes()) {
3369	SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3370	Op: Src, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + `1`);
3371	if (NewSrc) {
3372	SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: SDLoc (Op), VT, N1: NewSrc,
3373	N2: Op.getOperand(i: `1`));
3374	return TLO.CombineTo(O: Op, N: NewOp);
3375	}
3376	}
3377	break;
3378	}
3379	case ISD::INSERT_VECTOR_ELT: {
3380	SDValue Vec = Op.getOperand(i: `0`);
3381	SDValue Scl = Op.getOperand(i: `1`);
3382	auto *CIdx = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: `2`));
3383
3384	// For a legal, constant insertion index, if we don't need this insertion
3385	// then strip it, else remove it from the demanded elts.
3386	if (CIdx && CIdx->getAPIntValue().ult(RHS: NumElts)) {
3387	unsigned Idx = CIdx->getZExtValue();
3388	if (!DemandedElts [Idx])
3389	return TLO.CombineTo(O: Op, N: Vec);
3390
3391	APInt DemandedVecElts(DemandedElts);
3392	DemandedVecElts.clearBit(BitPosition: Idx);
3393	if (SimplifyDemandedVectorElts(Op: Vec, OriginalDemandedElts: DemandedVecElts, KnownUndef,
3394	KnownZero, TLO, Depth: Depth + `1`))
3395	return true;
3396
3397	KnownUndef.setBitVal(BitPosition: Idx, BitValue: Scl.isUndef());
3398
3399	KnownZero.setBitVal(BitPosition: Idx, BitValue: isNullConstant(V: Scl) \|\| isNullFPConstant(V: Scl));
3400	break;
3401	}
3402
3403	APInt VecUndef, VecZero;
3404	if (SimplifyDemandedVectorElts(Op: Vec, OriginalDemandedElts: DemandedElts, KnownUndef&: VecUndef, KnownZero&: VecZero, TLO,
3405	Depth: Depth + `1`))
3406	return true;
3407	// Without knowing the insertion index we can't set KnownUndef/KnownZero.
3408	break;
3409	}
3410	case ISD::VSELECT: {
3411	SDValue Sel = Op.getOperand(i: `0`);
3412	SDValue LHS = Op.getOperand(i: `1`);
3413	SDValue RHS = Op.getOperand(i: `2`);
3414
3415	// Try to transform the select condition based on the current demanded
3416	// elements.
3417	APInt UndefSel, ZeroSel;
3418	if (SimplifyDemandedVectorElts(Op: Sel, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefSel, KnownZero&: ZeroSel, TLO,
3419	Depth: Depth + `1`))
3420	return true;
3421
3422	// See if we can simplify either vselect operand.
3423	APInt DemandedLHS(DemandedElts);
3424	APInt DemandedRHS(DemandedElts);
3425	APInt UndefLHS, ZeroLHS;
3426	APInt UndefRHS, ZeroRHS;
3427	if (SimplifyDemandedVectorElts(Op: LHS, OriginalDemandedElts: DemandedLHS, KnownUndef&: UndefLHS, KnownZero&: ZeroLHS, TLO,
3428	Depth: Depth + `1`))
3429	return true;
3430	if (SimplifyDemandedVectorElts(Op: RHS, OriginalDemandedElts: DemandedRHS, KnownUndef&: UndefRHS, KnownZero&: ZeroRHS, TLO,
3431	Depth: Depth + `1`))
3432	return true;
3433
3434	KnownUndef = UndefLHS & UndefRHS;
3435	KnownZero = ZeroLHS & ZeroRHS;
3436
3437	// If we know that the selected element is always zero, we don't need the
3438	// select value element.
3439	APInt DemandedSel = DemandedElts & ~KnownZero;
3440	if (DemandedSel != DemandedElts)
3441	if (SimplifyDemandedVectorElts(Op: Sel, OriginalDemandedElts: DemandedSel, KnownUndef&: UndefSel, KnownZero&: ZeroSel, TLO,
3442	Depth: Depth + `1`))
3443	return true;
3444
3445	break;
3446	}
3447	case ISD::VECTOR_SHUFFLE: {
3448	SDValue LHS = Op.getOperand(i: `0`);
3449	SDValue RHS = Op.getOperand(i: `1`);
3450	ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Val&: Op)->getMask();
3451
3452	// Collect demanded elements from shuffle operands..
3453	APInt DemandedLHS(NumElts, `0`);
3454	APInt DemandedRHS(NumElts, `0`);
3455	for (unsigned i = `0`; i != NumElts; ++i) {
3456	int M = ShuffleMask [i];
3457	if (M < `0` \|\| !DemandedElts [i])
3458	continue;
3459	assert(`0` <= M && M < (int)(`2` * NumElts) && "Shuffle index out of range");
3460	if (M < (int)NumElts)
3461	DemandedLHS.setBit(M);
3462	else
3463	DemandedRHS.setBit(M - NumElts);
3464	}
3465
3466	// See if we can simplify either shuffle operand.
3467	APInt UndefLHS, ZeroLHS;
3468	APInt UndefRHS, ZeroRHS;
3469	if (SimplifyDemandedVectorElts(Op: LHS, OriginalDemandedElts: DemandedLHS, KnownUndef&: UndefLHS, KnownZero&: ZeroLHS, TLO,
3470	Depth: Depth + `1`))
3471	return true;
3472	if (SimplifyDemandedVectorElts(Op: RHS, OriginalDemandedElts: DemandedRHS, KnownUndef&: UndefRHS, KnownZero&: ZeroRHS, TLO,
3473	Depth: Depth + `1`))
3474	return true;
3475
3476	// Simplify mask using undef elements from LHS/RHS.
3477	bool Updated = false;
3478	bool IdentityLHS = true, IdentityRHS = true;
3479	SmallVector<int, `32`> NewMask(ShuffleMask);
3480	for (unsigned i = `0`; i != NumElts; ++i) {
3481	int &M = NewMask [i];
3482	if (M < `0`)
3483	continue;
3484	if (!DemandedElts [i] \|\| (M < (int)NumElts && UndefLHS [M]) \|\|
3485	(M >= (int)NumElts && UndefRHS [M - NumElts])) {
3486	Updated = true;
3487	M = -`1`;
3488	}
3489	IdentityLHS &= (M < `0`) \|\| (M == (int)i);
3490	IdentityRHS &= (M < `0`) \|\| ((M - NumElts) == i);
3491	}
3492
3493	// Update legal shuffle masks based on demanded elements if it won't reduce
3494	// to Identity which can cause premature removal of the shuffle mask.
3495	if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
3496	SDValue LegalShuffle =
3497	buildLegalVectorShuffle(VT, DL, N0: LHS, N1: RHS, Mask: NewMask, DAG&: TLO.DAG);
3498	if (LegalShuffle)
3499	return TLO.CombineTo(O: Op, N: LegalShuffle);
3500	}
3501
3502	// Propagate undef/zero elements from LHS/RHS.
3503	for (unsigned i = `0`; i != NumElts; ++i) {
3504	int M = ShuffleMask [i];
3505	if (M < `0`) {
3506	KnownUndef.setBit(i);
3507	} else if (M < (int)NumElts) {
3508	if (UndefLHS [M])
3509	KnownUndef.setBit(i);
3510	if (ZeroLHS [M])
3511	KnownZero.setBit(i);
3512	} else {
3513	if (UndefRHS [M - NumElts])
3514	KnownUndef.setBit(i);
3515	if (ZeroRHS [M - NumElts])
3516	KnownZero.setBit(i);
3517	}
3518	}
3519	break;
3520	}
3521	case ISD::ANY_EXTEND_VECTOR_INREG:
3522	case ISD::SIGN_EXTEND_VECTOR_INREG:
3523	case ISD::ZERO_EXTEND_VECTOR_INREG: {
3524	APInt SrcUndef, SrcZero;
3525	SDValue Src = Op.getOperand(i: `0`);
3526	unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3527	APInt DemandedSrcElts = DemandedElts.zext(width: NumSrcElts);
3528	if (SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: DemandedSrcElts, KnownUndef&: SrcUndef, KnownZero&: SrcZero, TLO,
3529	Depth: Depth + `1`))
3530	return true;
3531	KnownZero = SrcZero.zextOrTrunc(width: NumElts);
3532	KnownUndef = SrcUndef.zextOrTrunc(width: NumElts);
3533
3534	if (IsLE && Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
3535	Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
3536	DemandedSrcElts == `1`) {
3537	// aext - if we just need the bottom element then we can bitcast.
3538	return TLO.CombineTo(O: Op, N: TLO.DAG.getBitcast(VT, V: Src));
3539	}
3540
3541	if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
3542	// zext(undef) upper bits are guaranteed to be zero.
3543	if (DemandedElts.isSubsetOf(RHS: KnownUndef))
3544	return TLO.CombineTo(O: Op, N: TLO.DAG.getConstant(Val: `0`, DL: SDLoc (Op), VT));
3545	KnownUndef.clearAllBits();
3546
3547	// zext - if we just need the bottom element then we can mask:
3548	// zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
3549	if (IsLE && DemandedSrcElts == `1` && Src.getOpcode() == ISD::AND &&
3550	Op ->isOnlyUserOf(N: Src.getNode()) &&
3551	Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
3552	SDLoc DL(Op);
3553	EVT SrcVT = Src.getValueType();
3554	EVT SrcSVT = SrcVT.getScalarType();
3555	SmallVector<SDValue> MaskElts;
3556	MaskElts.push_back(Elt: TLO.DAG.getAllOnesConstant(DL, VT: SrcSVT));
3557	MaskElts.append(NumInputs: NumSrcElts - `1`, Elt: TLO.DAG.getConstant(Val: `0`, DL, VT: SrcSVT));
3558	SDValue Mask = TLO.DAG.getBuildVector(VT: SrcVT, DL, Ops: MaskElts);
3559	if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
3560	Opcode: ISD::AND, DL, VT: SrcVT, Ops: {Src.getOperand(i: `1`), Mask})) {
3561	Fold = TLO.DAG.getNode(Opcode: ISD::AND, DL, VT: SrcVT, N1: Src.getOperand(i: `0`), N2: Fold);
3562	return TLO.CombineTo(O: Op, N: TLO.DAG.getBitcast(VT, V: Fold));
3563	}
3564	}
3565	}
3566	break;
3567	}
3568
3569	// TODO: There are more binop opcodes that could be handled here - MIN,
3570	// MAX, saturated math, etc.
3571	case ISD::ADD: {
3572	SDValue Op0 = Op.getOperand(i: `0`);
3573	SDValue Op1 = Op.getOperand(i: `1`);
3574	if (Op0 == Op1 && Op ->isOnlyUserOf(N: Op0.getNode())) {
3575	APInt UndefLHS, ZeroLHS;
3576	if (SimplifyDemandedVectorElts(Op: Op0, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefLHS, KnownZero&: ZeroLHS, TLO,
3577	Depth: Depth + `1`, /AssumeSingleUse/ true))
3578	return true;
3579	}
3580	[[fallthrough]];
3581	}
3582	case ISD::AVGCEILS:
3583	case ISD::AVGCEILU:
3584	case ISD::AVGFLOORS:
3585	case ISD::AVGFLOORU:
3586	case ISD::OR:
3587	case ISD::XOR:
3588	case ISD::SUB:
3589	case ISD::FADD:
3590	case ISD::FSUB:
3591	case ISD::FMUL:
3592	case ISD::FDIV:
3593	case ISD::FREM: {
3594	SDValue Op0 = Op.getOperand(i: `0`);
3595	SDValue Op1 = Op.getOperand(i: `1`);
3596
3597	APInt UndefRHS, ZeroRHS;
3598	if (SimplifyDemandedVectorElts(Op: Op1, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefRHS, KnownZero&: ZeroRHS, TLO,
3599	Depth: Depth + `1`))
3600	return true;
3601	APInt UndefLHS, ZeroLHS;
3602	if (SimplifyDemandedVectorElts(Op: Op0, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefLHS, KnownZero&: ZeroLHS, TLO,
3603	Depth: Depth + `1`))
3604	return true;
3605
3606	KnownZero = ZeroLHS & ZeroRHS;
3607	KnownUndef = getKnownUndefForVectorBinop(BO: Op, DAG&: TLO.DAG, UndefOp0: UndefLHS, UndefOp1: UndefRHS);
3608
3609	// Attempt to avoid multi-use ops if we don't need anything from them.
3610	// TODO - use KnownUndef to relax the demandedelts?
3611	if (!DemandedElts.isAllOnes())
3612	if (SimplifyDemandedVectorEltsBinOp (Op0, Op1))
3613	return true;
3614	break;
3615	}
3616	case ISD::SHL:
3617	case ISD::SRL:
3618	case ISD::SRA:
3619	case ISD::ROTL:
3620	case ISD::ROTR: {
3621	SDValue Op0 = Op.getOperand(i: `0`);
3622	SDValue Op1 = Op.getOperand(i: `1`);
3623
3624	APInt UndefRHS, ZeroRHS;
3625	if (SimplifyDemandedVectorElts(Op: Op1, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefRHS, KnownZero&: ZeroRHS, TLO,
3626	Depth: Depth + `1`))
3627	return true;
3628	APInt UndefLHS, ZeroLHS;
3629	if (SimplifyDemandedVectorElts(Op: Op0, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefLHS, KnownZero&: ZeroLHS, TLO,
3630	Depth: Depth + `1`))
3631	return true;
3632
3633	KnownZero = ZeroLHS;
3634	KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
3635
3636	// Attempt to avoid multi-use ops if we don't need anything from them.
3637	// TODO - use KnownUndef to relax the demandedelts?
3638	if (!DemandedElts.isAllOnes())
3639	if (SimplifyDemandedVectorEltsBinOp (Op0, Op1))
3640	return true;
3641	break;
3642	}
3643	case ISD::MUL:
3644	case ISD::MULHU:
3645	case ISD::MULHS:
3646	case ISD::AND: {
3647	SDValue Op0 = Op.getOperand(i: `0`);
3648	SDValue Op1 = Op.getOperand(i: `1`);
3649
3650	APInt SrcUndef, SrcZero;
3651	if (SimplifyDemandedVectorElts(Op: Op1, OriginalDemandedElts: DemandedElts, KnownUndef&: SrcUndef, KnownZero&: SrcZero, TLO,
3652	Depth: Depth + `1`))
3653	return true;
3654	// If we know that a demanded element was zero in Op1 we don't need to
3655	// demand it in Op0 - its guaranteed to be zero.
3656	APInt DemandedElts0 = DemandedElts & ~SrcZero;
3657	if (SimplifyDemandedVectorElts(Op: Op0, OriginalDemandedElts: DemandedElts0, KnownUndef, KnownZero,
3658	TLO, Depth: Depth + `1`))
3659	return true;
3660
3661	KnownUndef &= DemandedElts0;
3662	KnownZero &= DemandedElts0;
3663
3664	// If every element pair has a zero/undef then just fold to zero.
3665	// fold (and x, undef) -> 0 / (and x, 0) -> 0
3666	// fold (mul x, undef) -> 0 / (mul x, 0) -> 0
3667	if (DemandedElts.isSubsetOf(RHS: SrcZero \| KnownZero \| SrcUndef \| KnownUndef))
3668	return TLO.CombineTo(O: Op, N: TLO.DAG.getConstant(Val: `0`, DL: SDLoc (Op), VT));
3669
3670	// If either side has a zero element, then the result element is zero, even
3671	// if the other is an UNDEF.
3672	// TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
3673	// and then handle 'and' nodes with the rest of the binop opcodes.
3674	KnownZero \|= SrcZero;
3675	KnownUndef &= SrcUndef;
3676	KnownUndef &= ~KnownZero;
3677
3678	// Attempt to avoid multi-use ops if we don't need anything from them.
3679	if (!DemandedElts.isAllOnes())
3680	if (SimplifyDemandedVectorEltsBinOp (Op0, Op1))
3681	return true;
3682	break;
3683	}
3684	case ISD::TRUNCATE:
3685	case ISD::SIGN_EXTEND:
3686	case ISD::ZERO_EXTEND:
3687	if (SimplifyDemandedVectorElts(Op: Op.getOperand(i: `0`), OriginalDemandedElts: DemandedElts, KnownUndef,
3688	KnownZero, TLO, Depth: Depth + `1`))
3689	return true;
3690
3691	if (Op.getOpcode() == ISD::ZERO_EXTEND) {
3692	// zext(undef) upper bits are guaranteed to be zero.
3693	if (DemandedElts.isSubsetOf(RHS: KnownUndef))
3694	return TLO.CombineTo(O: Op, N: TLO.DAG.getConstant(Val: `0`, DL: SDLoc (Op), VT));
3695	KnownUndef.clearAllBits();
3696	}
3697	break;
3698	default: {
3699	if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
3700	if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
3701	KnownZero, TLO, Depth))
3702	return true;
3703	} else {
3704	KnownBits Known;
3705	APInt DemandedBits = APInt::getAllOnes(numBits: EltSizeInBits);
3706	if (SimplifyDemandedBits(Op, OriginalDemandedBits: DemandedBits, OriginalDemandedElts, Known,
3707	TLO, Depth, AssumeSingleUse))
3708	return true;
3709	}
3710	break;
3711	}
3712	}
3713	assert((KnownUndef & KnownZero) == `0` && "Elements flagged as undef AND zero");
3714
3715	// Constant fold all undef cases.
3716	// TODO: Handle zero cases as well.
3717	if (DemandedElts.isSubsetOf(RHS: KnownUndef))
3718	return TLO.CombineTo(O: Op, N: TLO.DAG.getUNDEF(VT));
3719
3720	return false;
3721	}
3722
3723	/// Determine which of the bits specified in Mask are known to be either zero or
3724	/// one and return them in the Known.
3725	void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
3726	KnownBits &Known,
3727	const APInt &DemandedElts,
3728	const SelectionDAG &DAG,
3729	unsigned Depth) const {
3730	assert((Op.getOpcode() >= ISD::BUILTIN_OP_END \|\|
3731	Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN \|\|
3732	Op.getOpcode() == ISD::INTRINSIC_W_CHAIN \|\|
3733	Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3734	"Should use MaskedValueIsZero if you don't know whether Op"
3735	" is a target node!");
3736	Known.resetAll();
3737	}
3738
3739	void TargetLowering::computeKnownBitsForTargetInstr(
3740	GISelKnownBits &Analysis, Register R, KnownBits &Known,
3741	const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3742	unsigned Depth) const {
3743	Known.resetAll();
3744	}
3745
3746	void TargetLowering::computeKnownBitsForFrameIndex(
3747	const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
3748	// The low bits are known zero if the pointer is aligned.
3749	Known.Zero.setLowBits(Log2(A: MF.getFrameInfo().getObjectAlign(ObjectIdx: FrameIdx)));
3750	}
3751
3752	Align TargetLowering::computeKnownAlignForTargetInstr(
3753	GISelKnownBits &Analysis, Register R, const MachineRegisterInfo &MRI,
3754	unsigned Depth) const {
3755	return Align (`1`);
3756	}
3757
3758	/// This method can be implemented by targets that want to expose additional
3759	/// information about sign bits to the DAG Combiner.
3760	unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
3761	const APInt &,
3762	const SelectionDAG &,
3763	unsigned Depth) const {
3764	assert((Op.getOpcode() >= ISD::BUILTIN_OP_END \|\|
3765	Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN \|\|
3766	Op.getOpcode() == ISD::INTRINSIC_W_CHAIN \|\|
3767	Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3768	"Should use ComputeNumSignBits if you don't know whether Op"
3769	" is a target node!");
3770	return `1`;
3771	}
3772
3773	unsigned TargetLowering::computeNumSignBitsForTargetInstr(
3774	GISelKnownBits &Analysis, Register R, const APInt &DemandedElts,
3775	const MachineRegisterInfo &MRI, unsigned Depth) const {
3776	return `1`;
3777	}
3778
3779	bool TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
3780	SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
3781	TargetLoweringOpt &TLO, unsigned Depth) const {
3782	assert((Op.getOpcode() >= ISD::BUILTIN_OP_END \|\|
3783	Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN \|\|
3784	Op.getOpcode() == ISD::INTRINSIC_W_CHAIN \|\|
3785	Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3786	"Should use SimplifyDemandedVectorElts if you don't know whether Op"
3787	" is a target node!");
3788	return false;
3789	}
3790
3791	bool TargetLowering::SimplifyDemandedBitsForTargetNode(
3792	SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3793	KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
3794	assert((Op.getOpcode() >= ISD::BUILTIN_OP_END \|\|
3795	Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN \|\|
3796	Op.getOpcode() == ISD::INTRINSIC_W_CHAIN \|\|
3797	Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3798	"Should use SimplifyDemandedBits if you don't know whether Op"
3799	" is a target node!");
3800	computeKnownBitsForTargetNode(Op, Known, DemandedElts, DAG: TLO.DAG, Depth);
3801	return false;
3802	}
3803
3804	SDValue TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
3805	SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3806	SelectionDAG &DAG, unsigned Depth) const {
3807	assert(
3808	(Op.getOpcode() >= ISD::BUILTIN_OP_END \|\|
3809	Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN \|\|
3810	Op.getOpcode() == ISD::INTRINSIC_W_CHAIN \|\|
3811	Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3812	"Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
3813	" is a target node!");
3814	return SDValue ();
3815	}
3816
3817	SDValue
3818	TargetLowering::buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0,
3819	SDValue N1, MutableArrayRef<int> Mask,
3820	SelectionDAG &DAG) const {
3821	bool LegalMask = isShuffleMaskLegal(Mask, VT);
3822	if (!LegalMask) {
3823	std::swap(a&: N0, b&: N1);
3824	ShuffleVectorSDNode::commuteMask(Mask);
3825	LegalMask = isShuffleMaskLegal(Mask, VT);
3826	}
3827
3828	if (!LegalMask)
3829	return SDValue ();
3830
3831	return DAG.getVectorShuffle(VT, dl: DL, N1: N0, N2: N1, Mask);
3832	}
3833
3834	const Constant TargetLowering::getTargetConstantFromLoad(LoadSDNode) const {
3835	return nullptr;
3836	}
3837
3838	bool TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode(
3839	SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3840	bool PoisonOnly, unsigned Depth) const {
3841	assert(
3842	(Op.getOpcode() >= ISD::BUILTIN_OP_END \|\|
3843	Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN \|\|
3844	Op.getOpcode() == ISD::INTRINSIC_W_CHAIN \|\|
3845	Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3846	"Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"
3847	" is a target node!");
3848
3849	// If Op can't create undef/poison and none of its operands are undef/poison
3850	// then Op is never undef/poison.
3851	return !canCreateUndefOrPoisonForTargetNode(Op, DemandedElts, DAG, PoisonOnly,
3852	/ConsiderFlags/ true, Depth) &&
3853	all_of(Range: Op ->ops(), P: [&](SDValue V) {
3854	return DAG.isGuaranteedNotToBeUndefOrPoison(Op: V, PoisonOnly,
3855	Depth: Depth + `1`);
3856	});
3857	}
3858
3859	bool TargetLowering::canCreateUndefOrPoisonForTargetNode(
3860	SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3861	bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
3862	assert((Op.getOpcode() >= ISD::BUILTIN_OP_END \|\|
3863	Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN \|\|
3864	Op.getOpcode() == ISD::INTRINSIC_W_CHAIN \|\|
3865	Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3866	"Should use canCreateUndefOrPoison if you don't know whether Op"
3867	" is a target node!");
3868	// Be conservative and return true.
3869	return true;
3870	}
3871
3872	bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
3873	const SelectionDAG &DAG,
3874	bool SNaN,
3875	unsigned Depth) const {
3876	assert((Op.getOpcode() >= ISD::BUILTIN_OP_END \|\|
3877	Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN \|\|
3878	Op.getOpcode() == ISD::INTRINSIC_W_CHAIN \|\|
3879	Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3880	"Should use isKnownNeverNaN if you don't know whether Op"
3881	" is a target node!");
3882	return false;
3883	}
3884
3885	bool TargetLowering::isSplatValueForTargetNode(SDValue Op,
3886	const APInt &DemandedElts,
3887	APInt &UndefElts,
3888	const SelectionDAG &DAG,
3889	unsigned Depth) const {
3890	assert((Op.getOpcode() >= ISD::BUILTIN_OP_END \|\|
3891	Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN \|\|
3892	Op.getOpcode() == ISD::INTRINSIC_W_CHAIN \|\|
3893	Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3894	"Should use isSplatValue if you don't know whether Op"
3895	" is a target node!");
3896	return false;
3897	}
3898
3899	// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
3900	// work with truncating build vectors and vectors with elements of less than
3901	// 8 bits.
3902	bool TargetLowering::isConstTrueVal(SDValue N) const {
3903	if (!N)
3904	return false;
3905
3906	unsigned EltWidth;
3907	APInt CVal;
3908	if (ConstantSDNode CN = isConstOrConstSplat(N, /AllowUndefs=/*false,
3909	/AllowTruncation=/true)) {
3910	CVal = CN->getAPIntValue();
3911	EltWidth = N.getValueType().getScalarSizeInBits();
3912	} else
3913	return false;
3914
3915	// If this is a truncating splat, truncate the splat value.
3916	// Otherwise, we may fail to match the expected values below.
3917	if (EltWidth < CVal.getBitWidth())
3918	CVal = CVal.trunc(width: EltWidth);
3919
3920	switch (getBooleanContents(Type: N.getValueType())) {
3921	case UndefinedBooleanContent:
3922	return CVal [`0`];
3923	case ZeroOrOneBooleanContent:
3924	return CVal.isOne();
3925	case ZeroOrNegativeOneBooleanContent:
3926	return CVal.isAllOnes();
3927	}
3928
3929	llvm_unreachable("Invalid boolean contents");
3930	}
3931
3932	bool TargetLowering::isConstFalseVal(SDValue N) const {
3933	if (!N)
3934	return false;
3935
3936	const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val&: N);
3937	if (!CN) {
3938	const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Val&: N);
3939	if (!BV)
3940	return false;
3941
3942	// Only interested in constant splats, we don't care about undef
3943	// elements in identifying boolean constants and getConstantSplatNode
3944	// returns NULL if all ops are undef;
3945	CN = BV->getConstantSplatNode();
3946	if (!CN)
3947	return false;
3948	}
3949
3950	if (getBooleanContents(Type: N ->getValueType(ResNo: `0`)) == UndefinedBooleanContent)
3951	return !CN->getAPIntValue()[`0`];
3952
3953	return CN->isZero();
3954	}
3955
3956	bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
3957	bool SExt) const {
3958	if (VT == MVT::i1)
3959	return N->isOne();
3960
3961	TargetLowering::BooleanContent Cnt = getBooleanContents(Type: VT);
3962	switch (Cnt) {
3963	case TargetLowering::ZeroOrOneBooleanContent:
3964	// An extended value of 1 is always true, unless its original type is i1,
3965	// in which case it will be sign extended to -1.
3966	return (N->isOne() && !SExt) \|\| (SExt && (N->getValueType(ResNo: `0`) != MVT::i1));
3967	case TargetLowering::UndefinedBooleanContent:
3968	case TargetLowering::ZeroOrNegativeOneBooleanContent:
3969	return N->isAllOnes() && SExt;
3970	}
3971	llvm_unreachable("Unexpected enumeration.");
3972	}
3973
3974	/// This helper function of SimplifySetCC tries to optimize the comparison when
3975	/// either operand of the SetCC node is a bitwise-and instruction.
3976	SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
3977	ISD::CondCode Cond, const SDLoc &DL,
3978	DAGCombinerInfo &DCI) const {
3979	if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
3980	std::swap(a&: N0, b&: N1);
3981
3982	SelectionDAG &DAG = DCI.DAG;
3983	EVT OpVT = N0.getValueType();
3984	if (N0.getOpcode() != ISD::AND \|\| !OpVT.isInteger() \|\|
3985	(Cond != ISD::SETEQ && Cond != ISD::SETNE))
3986	return SDValue ();
3987
3988	// (X & Y) != 0 --> zextOrTrunc(X & Y)
3989	// iff everything but LSB is known zero:
3990	if (Cond == ISD::SETNE && isNullConstant(V: N1) &&
3991	(getBooleanContents(Type: OpVT) == TargetLowering::UndefinedBooleanContent \|\|
3992	getBooleanContents(Type: OpVT) == TargetLowering::ZeroOrOneBooleanContent)) {
3993	unsigned NumEltBits = OpVT.getScalarSizeInBits();
3994	APInt UpperBits = APInt::getHighBitsSet(numBits: NumEltBits, hiBitsSet: NumEltBits - `1`);
3995	if (DAG.MaskedValueIsZero(Op: N0, Mask: UpperBits))
3996	return DAG.getBoolExtOrTrunc(Op: N0, SL: DL, VT, OpVT);
3997	}
3998
3999	// Try to eliminate a power-of-2 mask constant by converting to a signbit
4000	// test in a narrow type that we can truncate to with no cost. Examples:
4001	// (i32 X & 32768) == 0 --> (trunc X to i16) >= 0
4002	// (i32 X & 32768) != 0 --> (trunc X to i16) < 0
4003	// TODO: This conservatively checks for type legality on the source and
4004	// destination types. That may inhibit optimizations, but it also
4005	// allows setcc->shift transforms that may be more beneficial.
4006	auto *AndC = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: `1`));
4007	if (AndC && isNullConstant(V: N1) && AndC->getAPIntValue().isPowerOf2() &&
4008	isTypeLegal(VT: OpVT) && N0.hasOneUse()) {
4009	EVT NarrowVT = EVT::getIntegerVT(Context&: *DAG.getContext(),
4010	BitWidth: AndC->getAPIntValue().getActiveBits());
4011	if (isTruncateFree(FromVT: OpVT, ToVT: NarrowVT) && isTypeLegal(VT: NarrowVT)) {
4012	SDValue Trunc = DAG.getZExtOrTrunc(Op: N0.getOperand(i: `0`), DL, VT: NarrowVT);
4013	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT: NarrowVT);
4014	return DAG.getSetCC(DL, VT, LHS: Trunc, RHS: Zero,
4015	Cond: Cond == ISD::SETEQ ? ISD::SETGE : ISD::SETLT);
4016	}
4017	}
4018
4019	// Match these patterns in any of their permutations:
4020	// (X & Y) == Y
4021	// (X & Y) != Y
4022	SDValue X, Y;
4023	if (N0.getOperand(i: `0`) == N1) {
4024	X = N0.getOperand(i: `1`);
4025	Y = N0.getOperand(i: `0`);
4026	} else if (N0.getOperand(i: `1`) == N1) {
4027	X = N0.getOperand(i: `0`);
4028	Y = N0.getOperand(i: `1`);
4029	} else {
4030	return SDValue ();
4031	}
4032
4033	// TODO: We should invert (X & Y) eq/ne 0 -> (X & Y) ne/eq Y if
4034	// `isXAndYEqZeroPreferableToXAndYEqY` is false. This is a bit difficult as
4035	// its liable to create and infinite loop.
4036	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT: OpVT);
4037	if (isXAndYEqZeroPreferableToXAndYEqY(Cond, OpVT) &&
4038	DAG.isKnownToBeAPowerOfTwo(Val: Y)) {
4039	// Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
4040	// Note that where Y is variable and is known to have at most one bit set
4041	// (for example, if it is Z & 1) we cannot do this; the expressions are not
4042	// equivalent when Y == 0.
4043	assert(OpVT.isInteger());
4044	Cond = ISD::getSetCCInverse(Operation: Cond, Type: OpVT);
4045	if (DCI.isBeforeLegalizeOps() \|\|
4046	isCondCodeLegal(CC: Cond, VT: N0.getSimpleValueType()))
4047	return DAG.getSetCC(DL, VT, LHS: N0, RHS: Zero, Cond);
4048	} else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
4049	// If the target supports an 'and-not' or 'and-complement' logic operation,
4050	// try to use that to make a comparison operation more efficient.
4051	// But don't do this transform if the mask is a single bit because there are
4052	// more efficient ways to deal with that case (for example, 'bt' on x86 or
4053	// 'rlwinm' on PPC).
4054
4055	// Bail out if the compare operand that we want to turn into a zero is
4056	// already a zero (otherwise, infinite loop).
4057	if (isNullConstant(V: Y))
4058	return SDValue ();
4059
4060	// Transform this into: ~X & Y == 0.
4061	SDValue NotX = DAG.getNOT(DL: SDLoc (X), Val: X, VT: OpVT);
4062	SDValue NewAnd = DAG.getNode(Opcode: ISD::AND, DL: SDLoc (N0), VT: OpVT, N1: NotX, N2: Y);
4063	return DAG.getSetCC(DL, VT, LHS: NewAnd, RHS: Zero, Cond);
4064	}
4065
4066	return SDValue ();
4067	}
4068
4069	/// There are multiple IR patterns that could be checking whether certain
4070	/// truncation of a signed number would be lossy or not. The pattern which is
4071	/// best at IR level, may not lower optimally. Thus, we want to unfold it.
4072	/// We are looking for the following pattern: (KeptBits is a constant)
4073	/// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
4074	/// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
4075	/// KeptBits also can't be 1, that would have been folded to %x dstcond 0
4076	/// We will unfold it into the natural trunc+sext pattern:
4077	/// ((%x << C) a>> C) dstcond %x
4078	/// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
4079	SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
4080	EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
4081	const SDLoc &DL) const {
4082	// We must be comparing with a constant.
4083	ConstantSDNode *C1;
4084	if (!(C1 = dyn_cast<ConstantSDNode>(Val&: N1)))
4085	return SDValue ();
4086
4087	// N0 should be: add %x, (1 << (KeptBits-1))
4088	if (N0 ->getOpcode() != ISD::ADD)
4089	return SDValue ();
4090
4091	// And we must be 'add'ing a constant.
4092	ConstantSDNode *C01;
4093	if (!(C01 = dyn_cast<ConstantSDNode>(Val: N0 ->getOperand(Num: `1`))))
4094	return SDValue ();
4095
4096	SDValue X = N0 ->getOperand(Num: `0`);
4097	EVT XVT = X.getValueType();
4098
4099	// Validate constants ...
4100
4101	APInt I1 = C1->getAPIntValue();
4102
4103	ISD::CondCode NewCond;
4104	if (Cond == ISD::CondCode::SETULT) {
4105	NewCond = ISD::CondCode::SETEQ;
4106	} else if (Cond == ISD::CondCode::SETULE) {
4107	NewCond = ISD::CondCode::SETEQ;
4108	// But need to 'canonicalize' the constant.
4109	I1 += `1`;
4110	} else if (Cond == ISD::CondCode::SETUGT) {
4111	NewCond = ISD::CondCode::SETNE;
4112	// But need to 'canonicalize' the constant.
4113	I1 += `1`;
4114	} else if (Cond == ISD::CondCode::SETUGE) {
4115	NewCond = ISD::CondCode::SETNE;
4116	} else
4117	return SDValue ();
4118
4119	APInt I01 = C01->getAPIntValue();
4120
4121	auto checkConstants = [&I1, &I01]() -> bool {
4122	// Both of them must be power-of-two, and the constant from setcc is bigger.
4123	return I1.ugt(RHS: I01) && I1.isPowerOf2() && I01.isPowerOf2();
4124	};
4125
4126	if (checkConstants ()) {
4127	// Great, e.g. got icmp ult i16 (add i16 %x, 128), 256
4128	} else {
4129	// What if we invert constants? (and the target predicate)
4130	I1.negate();
4131	I01.negate();
4132	assert(XVT.isInteger());
4133	NewCond = getSetCCInverse(Operation: NewCond, Type: XVT);
4134	if (!checkConstants ())
4135	return SDValue ();
4136	// Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
4137	}
4138
4139	// They are power-of-two, so which bit is set?
4140	const unsigned KeptBits = I1.logBase2();
4141	const unsigned KeptBitsMinusOne = I01.logBase2();
4142
4143	// Magic!
4144	if (KeptBits != (KeptBitsMinusOne + `1`))
4145	return SDValue ();
4146	assert(KeptBits > `0` && KeptBits < XVT.getSizeInBits() && "unreachable");
4147
4148	// We don't want to do this in every single case.
4149	SelectionDAG &DAG = DCI.DAG;
4150	if (!DAG.getTargetLoweringInfo().shouldTransformSignedTruncationCheck(
4151	XVT, KeptBits))
4152	return SDValue ();
4153
4154	// Unfold into: sext_inreg(%x) cond %x
4155	// Where 'cond' will be either 'eq' or 'ne'.
4156	SDValue SExtInReg = DAG.getNode(
4157	Opcode: ISD::SIGN_EXTEND_INREG, DL, VT: XVT, N1: X,
4158	N2: DAG.getValueType(EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: KeptBits)));
4159	return DAG.getSetCC(DL, VT: SCCVT, LHS: SExtInReg, RHS: X, Cond: NewCond);
4160	}
4161
4162	// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
4163	SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
4164	EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
4165	DAGCombinerInfo &DCI, const SDLoc &DL) const {
4166	assert(isConstOrConstSplat(N1C) && isConstOrConstSplat(N1C)->isZero() &&
4167	"Should be a comparison with 0.");
4168	assert((Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) &&
4169	"Valid only for [in]equality comparisons.");
4170
4171	unsigned NewShiftOpcode;
4172	SDValue X, C, Y;
4173
4174	SelectionDAG &DAG = DCI.DAG;
4175	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4176
4177	// Look for '(C l>>/<< Y)'.
4178	auto Match = [&NewShiftOpcode, &X, &C, &Y, &TLI, &DAG](SDValue V) {
4179	// The shift should be one-use.
4180	if (!V.hasOneUse())
4181	return false;
4182	unsigned OldShiftOpcode = V.getOpcode();
4183	switch (OldShiftOpcode) {
4184	case ISD::SHL:
4185	NewShiftOpcode = ISD::SRL;
4186	break;
4187	case ISD::SRL:
4188	NewShiftOpcode = ISD::SHL;
4189	break;
4190	default:
4191	return false; // must be a logical shift.
4192	}
4193	// We should be shifting a constant.
4194	// FIXME: best to use isConstantOrConstantVector().
4195	C = V.getOperand(i: `0`);
4196	ConstantSDNode *CC =
4197	isConstOrConstSplat(N: C, /AllowUndefs=/true, /AllowTruncation=/true);
4198	if (!CC)
4199	return false;
4200	Y = V.getOperand(i: `1`);
4201
4202	ConstantSDNode *XC =
4203	isConstOrConstSplat(N: X, /AllowUndefs=/true, /AllowTruncation=/true);
4204	return TLI.shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
4205	X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
4206	};
4207
4208	// LHS of comparison should be an one-use 'and'.
4209	if (N0.getOpcode() != ISD::AND \|\| !N0.hasOneUse())
4210	return SDValue ();
4211
4212	X = N0.getOperand(i: `0`);
4213	SDValue Mask = N0.getOperand(i: `1`);
4214
4215	// 'and' is commutative!
4216	if (!Match (Mask)) {
4217	std::swap(a&: X, b&: Mask);
4218	if (!Match (Mask))
4219	return SDValue ();
4220	}
4221
4222	EVT VT = X.getValueType();
4223
4224	// Produce:
4225	// ((X 'OppositeShiftOpcode' Y) & C) Cond 0
4226	SDValue T0 = DAG.getNode(Opcode: NewShiftOpcode, DL, VT, N1: X, N2: Y);
4227	SDValue T1 = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: T0, N2: C);
4228	SDValue T2 = DAG.getSetCC(DL, VT: SCCVT, LHS: T1, RHS: N1C, Cond);
4229	return T2;
4230	}
4231
4232	/// Try to fold an equality comparison with a {add/sub/xor} binary operation as
4233	/// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
4234	/// handle the commuted versions of these patterns.
4235	SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
4236	ISD::CondCode Cond, const SDLoc &DL,
4237	DAGCombinerInfo &DCI) const {
4238	unsigned BOpcode = N0.getOpcode();
4239	assert((BOpcode == ISD::ADD \|\| BOpcode == ISD::SUB \|\| BOpcode == ISD::XOR) &&
4240	"Unexpected binop");
4241	assert((Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) && "Unexpected condcode");
4242
4243	// (X + Y) == X --> Y == 0
4244	// (X - Y) == X --> Y == 0
4245	// (X ^ Y) == X --> Y == 0
4246	SelectionDAG &DAG = DCI.DAG;
4247	EVT OpVT = N0.getValueType();
4248	SDValue X = N0.getOperand(i: `0`);
4249	SDValue Y = N0.getOperand(i: `1`);
4250	if (X == N1)
4251	return DAG.getSetCC(DL, VT, LHS: Y, RHS: DAG.getConstant(Val: `0`, DL, VT: OpVT), Cond);
4252
4253	if (Y != N1)
4254	return SDValue ();
4255
4256	// (X + Y) == Y --> X == 0
4257	// (X ^ Y) == Y --> X == 0
4258	if (BOpcode == ISD::ADD \|\| BOpcode == ISD::XOR)
4259	return DAG.getSetCC(DL, VT, LHS: X, RHS: DAG.getConstant(Val: `0`, DL, VT: OpVT), Cond);
4260
4261	// The shift would not be valid if the operands are boolean (i1).
4262	if (!N0.hasOneUse() \|\| OpVT.getScalarSizeInBits() == `1`)
4263	return SDValue ();
4264
4265	// (X - Y) == Y --> X == Y << 1
4266	SDValue One = DAG.getShiftAmountConstant(Val: `1`, VT: OpVT, DL);
4267	SDValue YShl1 = DAG.getNode(Opcode: ISD::SHL, DL, VT: N1.getValueType(), N1: Y, N2: One);
4268	if (!DCI.isCalledByLegalizer())
4269	DCI.AddToWorklist(N: YShl1.getNode());
4270	return DAG.getSetCC(DL, VT, LHS: X, RHS: YShl1, Cond);
4271	}
4272
4273	static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT,
4274	SDValue N0, const APInt &C1,
4275	ISD::CondCode Cond, const SDLoc &dl,
4276	SelectionDAG &DAG) {
4277	// Look through truncs that don't change the value of a ctpop.
4278	// FIXME: Add vector support? Need to be careful with setcc result type below.
4279	SDValue CTPOP = N0;
4280	if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
4281	N0.getScalarValueSizeInBits() > Log2_32(Value: N0.getOperand(i: `0`).getScalarValueSizeInBits()))
4282	CTPOP = N0.getOperand(i: `0`);
4283
4284	if (CTPOP.getOpcode() != ISD::CTPOP \|\| !CTPOP.hasOneUse())
4285	return SDValue ();
4286
4287	EVT CTVT = CTPOP.getValueType();
4288	SDValue CTOp = CTPOP.getOperand(i: `0`);
4289
4290	// Expand a power-of-2-or-zero comparison based on ctpop:
4291	// (ctpop x) u< 2 -> (x & x-1) == 0
4292	// (ctpop x) u> 1 -> (x & x-1) != 0
4293	if (Cond == ISD::SETULT \|\| Cond == ISD::SETUGT) {
4294	// Keep the CTPOP if it is a cheap vector op.
4295	if (CTVT.isVector() && TLI.isCtpopFast(VT: CTVT))
4296	return SDValue ();
4297
4298	unsigned CostLimit = TLI.getCustomCtpopCost(VT: CTVT, Cond);
4299	if (C1.ugt(RHS: CostLimit + (Cond == ISD::SETULT)))
4300	return SDValue ();
4301	if (C1 == `0` && (Cond == ISD::SETULT))
4302	return SDValue (); // This is handled elsewhere.
4303
4304	unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);
4305
4306	SDValue NegOne = DAG.getAllOnesConstant(DL: dl, VT: CTVT);
4307	SDValue Result = CTOp;
4308	for (unsigned i = `0`; i < Passes; i++) {
4309	SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: CTVT, N1: Result, N2: NegOne);
4310	Result = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: CTVT, N1: Result, N2: Add);
4311	}
4312	ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;
4313	return DAG.getSetCC(DL: dl, VT, LHS: Result, RHS: DAG.getConstant(Val: `0`, DL: dl, VT: CTVT), Cond: CC);
4314	}
4315
4316	// Expand a power-of-2 comparison based on ctpop
4317	if ((Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) && C1 == `1`) {
4318	// Keep the CTPOP if it is cheap.
4319	if (TLI.isCtpopFast(VT: CTVT))
4320	return SDValue ();
4321
4322	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT: CTVT);
4323	SDValue NegOne = DAG.getAllOnesConstant(DL: dl, VT: CTVT);
4324	assert(CTVT.isInteger());
4325	SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: CTVT, N1: CTOp, N2: NegOne);
4326
4327	// Its not uncommon for known-never-zero X to exist in (ctpop X) eq/ne 1, so
4328	// check before emitting a potentially unnecessary op.
4329	if (DAG.isKnownNeverZero(Op: CTOp)) {
4330	// (ctpop x) == 1 --> (x & x-1) == 0
4331	// (ctpop x) != 1 --> (x & x-1) != 0
4332	SDValue And = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: CTVT, N1: CTOp, N2: Add);
4333	SDValue RHS = DAG.getSetCC(DL: dl, VT, LHS: And, RHS: Zero, Cond);
4334	return RHS;
4335	}
4336
4337	// (ctpop x) == 1 --> (x ^ x-1) > x-1
4338	// (ctpop x) != 1 --> (x ^ x-1) <= x-1
4339	SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: CTVT, N1: CTOp, N2: Add);
4340	ISD::CondCode CmpCond = Cond == ISD::SETEQ ? ISD::SETUGT : ISD::SETULE;
4341	return DAG.getSetCC(DL: dl, VT, LHS: Xor, RHS: Add, Cond: CmpCond);
4342	}
4343
4344	return SDValue ();
4345	}
4346
4347	static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1,
4348	ISD::CondCode Cond, const SDLoc &dl,
4349	SelectionDAG &DAG) {
4350	if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4351	return SDValue ();
4352
4353	auto C1 = isConstOrConstSplat(N: N1, /* AllowUndefs / true);
4354	if (!C1 \|\| !(C1->isZero() \|\| C1->isAllOnes()))
4355	return SDValue ();
4356
4357	auto getRotateSource = [](SDValue X) {
4358	if (X.getOpcode() == ISD::ROTL \|\| X.getOpcode() == ISD::ROTR)
4359	return X.getOperand(i: `0`);
4360	return SDValue ();
4361	};
4362
4363	// Peek through a rotated value compared against 0 or -1:
4364	// (rot X, Y) == 0/-1 --> X == 0/-1
4365	// (rot X, Y) != 0/-1 --> X != 0/-1
4366	if (SDValue R = getRotateSource (N0))
4367	return DAG.getSetCC(DL: dl, VT, LHS: R, RHS: N1, Cond);
4368
4369	// Peek through an 'or' of a rotated value compared against 0:
4370	// or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
4371	// or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
4372	//
4373	// TODO: Add the 'and' with -1 sibling.
4374	// TODO: Recurse through a series of 'or' ops to find the rotate.
4375	EVT OpVT = N0.getValueType();
4376	if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) {
4377	if (SDValue R = getRotateSource (N0.getOperand(i: `0`))) {
4378	SDValue NewOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1: R, N2: N0.getOperand(i: `1`));
4379	return DAG.getSetCC(DL: dl, VT, LHS: NewOr, RHS: N1, Cond);
4380	}
4381	if (SDValue R = getRotateSource (N0.getOperand(i: `1`))) {
4382	SDValue NewOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1: R, N2: N0.getOperand(i: `0`));
4383	return DAG.getSetCC(DL: dl, VT, LHS: NewOr, RHS: N1, Cond);
4384	}
4385	}
4386
4387	return SDValue ();
4388	}
4389
4390	static SDValue foldSetCCWithFunnelShift(EVT VT, SDValue N0, SDValue N1,
4391	ISD::CondCode Cond, const SDLoc &dl,
4392	SelectionDAG &DAG) {
4393	// If we are testing for all-bits-clear, we might be able to do that with
4394	// less shifting since bit-order does not matter.
4395	if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4396	return SDValue ();
4397
4398	auto C1 = isConstOrConstSplat(N: N1, /* AllowUndefs / true);
4399	if (!C1 \|\| !C1->isZero())
4400	return SDValue ();
4401
4402	if (!N0.hasOneUse() \|\|
4403	(N0.getOpcode() != ISD::FSHL && N0.getOpcode() != ISD::FSHR))
4404	return SDValue ();
4405
4406	unsigned BitWidth = N0.getScalarValueSizeInBits();
4407	auto *ShAmtC = isConstOrConstSplat(N: N0.getOperand(i: `2`));
4408	if (!ShAmtC \|\| ShAmtC->getAPIntValue().uge(RHS: BitWidth))
4409	return SDValue ();
4410
4411	// Canonicalize fshr as fshl to reduce pattern-matching.
4412	unsigned ShAmt = ShAmtC->getZExtValue();
4413	if (N0.getOpcode() == ISD::FSHR)
4414	ShAmt = BitWidth - ShAmt;
4415
4416	// Match an 'or' with a specific operand 'Other' in either commuted variant.
4417	SDValue X, Y;
4418	auto matchOr = [&X, &Y](SDValue Or, SDValue Other) {
4419	if (Or.getOpcode() != ISD::OR \|\| !Or.hasOneUse())
4420	return false;
4421	if (Or.getOperand(i: `0`) == Other) {
4422	X = Or.getOperand(i: `0`);
4423	Y = Or.getOperand(i: `1`);
4424	return true;
4425	}
4426	if (Or.getOperand(i: `1`) == Other) {
4427	X = Or.getOperand(i: `1`);
4428	Y = Or.getOperand(i: `0`);
4429	return true;
4430	}
4431	return false;
4432	};
4433
4434	EVT OpVT = N0.getValueType();
4435	EVT ShAmtVT = N0.getOperand(i: `2`).getValueType();
4436	SDValue F0 = N0.getOperand(i: `0`);
4437	SDValue F1 = N0.getOperand(i: `1`);
4438	if (matchOr (F0, F1)) {
4439	// fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0
4440	SDValue NewShAmt = DAG.getConstant(Val: ShAmt, DL: dl, VT: ShAmtVT);
4441	SDValue Shift = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: OpVT, N1: Y, N2: NewShAmt);
4442	SDValue NewOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1: Shift, N2: X);
4443	return DAG.getSetCC(DL: dl, VT, LHS: NewOr, RHS: N1, Cond);
4444	}
4445	if (matchOr (F1, F0)) {
4446	// fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0
4447	SDValue NewShAmt = DAG.getConstant(Val: BitWidth - ShAmt, DL: dl, VT: ShAmtVT);
4448	SDValue Shift = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: OpVT, N1: Y, N2: NewShAmt);
4449	SDValue NewOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1: Shift, N2: X);
4450	return DAG.getSetCC(DL: dl, VT, LHS: NewOr, RHS: N1, Cond);
4451	}
4452
4453	return SDValue ();
4454	}
4455
4456	/// Try to simplify a setcc built with the specified operands and cc. If it is
4457	/// unable to simplify it, return a null SDValue.
4458	SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
4459	ISD::CondCode Cond, bool foldBooleans,
4460	DAGCombinerInfo &DCI,
4461	const SDLoc &dl) const {
4462	SelectionDAG &DAG = DCI.DAG;
4463	const DataLayout &Layout = DAG.getDataLayout();
4464	EVT OpVT = N0.getValueType();
4465	AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4466
4467	// Constant fold or commute setcc.
4468	if (SDValue Fold = DAG.FoldSetCC(VT, N1: N0, N2: N1, Cond, dl))
4469	return Fold;
4470
4471	bool N0ConstOrSplat =
4472	isConstOrConstSplat(N: N0, /AllowUndefs/ false, /AllowTruncate/ AllowTruncation: true);
4473	bool N1ConstOrSplat =
4474	isConstOrConstSplat(N: N1, /AllowUndefs/ false, /AllowTruncate/ AllowTruncation: true);
4475
4476	// Canonicalize toward having the constant on the RHS.
4477	// TODO: Handle non-splat vector constants. All undef causes trouble.
4478	// FIXME: We can't yet fold constant scalable vector splats, so avoid an
4479	// infinite loop here when we encounter one.
4480	ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Operation: Cond);
4481	if (N0ConstOrSplat && !N1ConstOrSplat &&
4482	(DCI.isBeforeLegalizeOps() \|\|
4483	isCondCodeLegal(CC: SwappedCC, VT: N0.getSimpleValueType())))
4484	return DAG.getSetCC(DL: dl, VT, LHS: N1, RHS: N0, Cond: SwappedCC);
4485
4486	// If we have a subtract with the same 2 non-constant operands as this setcc
4487	// -- but in reverse order -- then try to commute the operands of this setcc
4488	// to match. A matching pair of setcc (cmp) and sub may be combined into 1
4489	// instruction on some targets.
4490	if (!N0ConstOrSplat && !N1ConstOrSplat &&
4491	(DCI.isBeforeLegalizeOps() \|\|
4492	isCondCodeLegal(CC: SwappedCC, VT: N0.getSimpleValueType())) &&
4493	DAG.doesNodeExist(Opcode: ISD::SUB, VTList: DAG.getVTList(VT: OpVT), Ops: {N1, N0}) &&
4494	!DAG.doesNodeExist(Opcode: ISD::SUB, VTList: DAG.getVTList(VT: OpVT), Ops: {N0, N1}))
4495	return DAG.getSetCC(DL: dl, VT, LHS: N1, RHS: N0, Cond: SwappedCC);
4496
4497	if (SDValue V = foldSetCCWithRotate(VT, N0, N1, Cond, dl, DAG))
4498	return V;
4499
4500	if (SDValue V = foldSetCCWithFunnelShift(VT, N0, N1, Cond, dl, DAG))
4501	return V;
4502
4503	if (auto *N1C = isConstOrConstSplat(N: N1)) {
4504	const APInt &C1 = N1C->getAPIntValue();
4505
4506	// Optimize some CTPOP cases.
4507	if (SDValue V = simplifySetCCWithCTPOP(TLI: *this, VT, N0, C1, Cond, dl, DAG))
4508	return V;
4509
4510	// For equality to 0 of a no-wrap multiply, decompose and test each op:
4511	// X Y == 0 --> (X == 0) \|\| (Y == 0)*
4512	// X Y != 0 --> (X != 0) && (Y != 0)*
4513	// TODO: This bails out if minsize is set, but if the target doesn't have a
4514	// single instruction multiply for this type, it would likely be
4515	// smaller to decompose.
4516	if (C1.isZero() && (Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) &&
4517	N0.getOpcode() == ISD::MUL && N0.hasOneUse() &&
4518	(N0 ->getFlags().hasNoUnsignedWrap() \|\|
4519	N0 ->getFlags().hasNoSignedWrap()) &&
4520	!Attr.hasFnAttr(Kind: Attribute::MinSize)) {
4521	SDValue IsXZero = DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: `0`), RHS: N1, Cond);
4522	SDValue IsYZero = DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: `1`), RHS: N1, Cond);
4523	unsigned LogicOp = Cond == ISD::SETEQ ? ISD::OR : ISD::AND;
4524	return DAG.getNode(Opcode: LogicOp, DL: dl, VT, N1: IsXZero, N2: IsYZero);
4525	}
4526
4527	// If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
4528	// equality comparison, then we're just comparing whether X itself is
4529	// zero.
4530	if (N0.getOpcode() == ISD::SRL && (C1.isZero() \|\| C1.isOne()) &&
4531	N0.getOperand(i: `0`).getOpcode() == ISD::CTLZ &&
4532	llvm::has_single_bit<uint32_t>(Value: N0.getScalarValueSizeInBits())) {
4533	if (ConstantSDNode *ShAmt = isConstOrConstSplat(N: N0.getOperand(i: `1`))) {
4534	if ((Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) &&
4535	ShAmt->getAPIntValue() == Log2_32(Value: N0.getScalarValueSizeInBits())) {
4536	if ((C1 == `0`) == (Cond == ISD::SETEQ)) {
4537	// (srl (ctlz x), 5) == 0 -> X != 0
4538	// (srl (ctlz x), 5) != 1 -> X != 0
4539	Cond = ISD::SETNE;
4540	} else {
4541	// (srl (ctlz x), 5) != 0 -> X == 0
4542	// (srl (ctlz x), 5) == 1 -> X == 0
4543	Cond = ISD::SETEQ;
4544	}
4545	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT: N0.getValueType());
4546	return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: `0`).getOperand(i: `0`), RHS: Zero,
4547	Cond);
4548	}
4549	}
4550	}
4551	}
4552
4553	// FIXME: Support vectors.
4554	if (auto *N1C = dyn_cast<ConstantSDNode>(Val: N1.getNode())) {
4555	const APInt &C1 = N1C->getAPIntValue();
4556
4557	// (zext x) == C --> x == (trunc C)
4558	// (sext x) == C --> x == (trunc C)
4559	if ((Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) &&
4560	DCI.isBeforeLegalize() && N0 ->hasOneUse()) {
4561	unsigned MinBits = N0.getValueSizeInBits();
4562	SDValue PreExt;
4563	bool Signed = false;
4564	if (N0 ->getOpcode() == ISD::ZERO_EXTEND) {
4565	// ZExt
4566	MinBits = N0 ->getOperand(Num: `0`).getValueSizeInBits();
4567	PreExt = N0 ->getOperand(Num: `0`);
4568	} else if (N0 ->getOpcode() == ISD::AND) {
4569	// DAGCombine turns costly ZExts into ANDs
4570	if (auto *C = dyn_cast<ConstantSDNode>(Val: N0 ->getOperand(Num: `1`)))
4571	if ((C->getAPIntValue()+`1`).isPowerOf2()) {
4572	MinBits = C->getAPIntValue().countr_one();
4573	PreExt = N0 ->getOperand(Num: `0`);
4574	}
4575	} else if (N0 ->getOpcode() == ISD::SIGN_EXTEND) {
4576	// SExt
4577	MinBits = N0 ->getOperand(Num: `0`).getValueSizeInBits();
4578	PreExt = N0 ->getOperand(Num: `0`);
4579	Signed = true;
4580	} else if (auto *LN0 = dyn_cast<LoadSDNode>(Val&: N0)) {
4581	// ZEXTLOAD / SEXTLOAD
4582	if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
4583	MinBits = LN0->getMemoryVT().getSizeInBits();
4584	PreExt = N0;
4585	} else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
4586	Signed = true;
4587	MinBits = LN0->getMemoryVT().getSizeInBits();
4588	PreExt = N0;
4589	}
4590	}
4591
4592	// Figure out how many bits we need to preserve this constant.
4593	unsigned ReqdBits = Signed ? C1.getSignificantBits() : C1.getActiveBits();
4594
4595	// Make sure we're not losing bits from the constant.
4596	if (MinBits > `0` &&
4597	MinBits < C1.getBitWidth() &&
4598	MinBits >= ReqdBits) {
4599	EVT MinVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: MinBits);
4600	if (isTypeDesirableForOp(ISD::SETCC, VT: MinVT)) {
4601	// Will get folded away.
4602	SDValue Trunc = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MinVT, Operand: PreExt);
4603	if (MinBits == `1` && C1 == `1`)
4604	// Invert the condition.
4605	return DAG.getSetCC(DL: dl, VT, LHS: Trunc, RHS: DAG.getConstant(Val: `0`, DL: dl, VT: MVT::i1),
4606	Cond: Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4607	SDValue C = DAG.getConstant(Val: C1.trunc(width: MinBits), DL: dl, VT: MinVT);
4608	return DAG.getSetCC(DL: dl, VT, LHS: Trunc, RHS: C, Cond);
4609	}
4610
4611	// If truncating the setcc operands is not desirable, we can still
4612	// simplify the expression in some cases:
4613	// setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
4614	// setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
4615	// setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
4616	// setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
4617	// setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
4618	// setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
4619	SDValue TopSetCC = N0 ->getOperand(Num: `0`);
4620	unsigned N0Opc = N0 ->getOpcode();
4621	bool SExt = (N0Opc == ISD::SIGN_EXTEND);
4622	if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
4623	TopSetCC.getOpcode() == ISD::SETCC &&
4624	(N0Opc == ISD::ZERO_EXTEND \|\| N0Opc == ISD::SIGN_EXTEND) &&
4625	(isConstFalseVal(N: N1) \|\|
4626	isExtendedTrueVal(N: N1C, VT: N0 ->getValueType(ResNo: `0`), SExt))) {
4627
4628	bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) \|\|
4629	(!N1C->isZero() && Cond == ISD::SETNE);
4630
4631	if (!Inverse)
4632	return TopSetCC;
4633
4634	ISD::CondCode InvCond = ISD::getSetCCInverse(
4635	Operation: cast<CondCodeSDNode>(Val: TopSetCC.getOperand(i: `2`))->get(),
4636	Type: TopSetCC.getOperand(i: `0`).getValueType());
4637	return DAG.getSetCC(DL: dl, VT, LHS: TopSetCC.getOperand(i: `0`),
4638	RHS: TopSetCC.getOperand(i: `1`),
4639	Cond: InvCond);
4640	}
4641	}
4642	}
4643
4644	// If the LHS is '(and load, const)', the RHS is 0, the test is for
4645	// equality or unsigned, and all 1 bits of the const are in the same
4646	// partial word, see if we can shorten the load.
4647	if (DCI.isBeforeLegalize() &&
4648	!ISD::isSignedIntSetCC(Code: Cond) &&
4649	N0.getOpcode() == ISD::AND && C1 == `0` &&
4650	N0.getNode()->hasOneUse() &&
4651	isa<LoadSDNode>(Val: N0.getOperand(i: `0`)) &&
4652	N0.getOperand(i: `0`).getNode()->hasOneUse() &&
4653	isa<ConstantSDNode>(Val: N0.getOperand(i: `1`))) {
4654	auto *Lod = cast<LoadSDNode>(Val: N0.getOperand(i: `0`));
4655	APInt bestMask;
4656	unsigned bestWidth = `0`, bestOffset = `0`;
4657	if (Lod->isSimple() && Lod->isUnindexed() &&
4658	(Lod->getMemoryVT().isByteSized() \|\|
4659	isPaddedAtMostSignificantBitsWhenStored(VT: Lod->getMemoryVT()))) {
4660	unsigned memWidth = Lod->getMemoryVT().getStoreSizeInBits();
4661	unsigned origWidth = N0.getValueSizeInBits();
4662	unsigned maskWidth = origWidth;
4663	// We can narrow (e.g.) 16-bit extending loads on 32-bit target to
4664	// 8 bits, but have to be careful...
4665	if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
4666	origWidth = Lod->getMemoryVT().getSizeInBits();
4667	const APInt &Mask = N0.getConstantOperandAPInt(i: `1`);
4668	// Only consider power-of-2 widths (and at least one byte) as candiates
4669	// for the narrowed load.
4670	for (unsigned width = `8`; width < origWidth; width *= `2`) {
4671	EVT newVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: width);
4672	if (!shouldReduceLoadWidth(Load: Lod, ExtTy: ISD::NON_EXTLOAD, NewVT: newVT))
4673	continue;
4674	APInt newMask = APInt::getLowBitsSet(numBits: maskWidth, loBitsSet: width);
4675	// Avoid accessing any padding here for now (we could use memWidth
4676	// instead of origWidth here otherwise).
4677	unsigned maxOffset = origWidth - width;
4678	for (unsigned offset = `0`; offset <= maxOffset; offset += `8`) {
4679	if (Mask.isSubsetOf(RHS: newMask)) {
4680	unsigned ptrOffset =
4681	Layout.isLittleEndian() ? offset : memWidth - width - offset;
4682	unsigned IsFast = `0`;
4683	Align NewAlign = commonAlignment(A: Lod->getAlign(), Offset: ptrOffset / `8`);
4684	if (allowsMemoryAccess(
4685	Context&: *DAG.getContext(), DL: Layout, VT: newVT, AddrSpace: Lod->getAddressSpace(),
4686	Alignment: NewAlign, Flags: Lod->getMemOperand()->getFlags(), Fast: &IsFast) &&
4687	IsFast) {
4688	bestOffset = ptrOffset / `8`;
4689	bestMask = Mask.lshr(shiftAmt: offset);
4690	bestWidth = width;
4691	break;
4692	}
4693	}
4694	newMask <<= `8`;
4695	}
4696	if (bestWidth)
4697	break;
4698	}
4699	}
4700	if (bestWidth) {
4701	EVT newVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: bestWidth);
4702	SDValue Ptr = Lod->getBasePtr();
4703	if (bestOffset != `0`)
4704	Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: TypeSize::getFixed(ExactSize: bestOffset));
4705	SDValue NewLoad =
4706	DAG.getLoad(VT: newVT, dl, Chain: Lod->getChain(), Ptr,
4707	PtrInfo: Lod->getPointerInfo().getWithOffset(O: bestOffset),
4708	Alignment: Lod->getOriginalAlign());
4709	SDValue And =
4710	DAG.getNode(Opcode: ISD::AND, DL: dl, VT: newVT, N1: NewLoad,
4711	N2: DAG.getConstant(Val: bestMask.trunc(width: bestWidth), DL: dl, VT: newVT));
4712	return DAG.getSetCC(DL: dl, VT, LHS: And, RHS: DAG.getConstant(Val: `0LL`, DL: dl, VT: newVT), Cond);
4713	}
4714	}
4715
4716	// If the LHS is a ZERO_EXTEND, perform the comparison on the input.
4717	if (N0.getOpcode() == ISD::ZERO_EXTEND) {
4718	unsigned InSize = N0.getOperand(i: `0`).getValueSizeInBits();
4719
4720	// If the comparison constant has bits in the upper part, the
4721	// zero-extended value could never match.
4722	if (C1.intersects(RHS: APInt::getHighBitsSet(numBits: C1.getBitWidth(),
4723	hiBitsSet: C1.getBitWidth() - InSize))) {
4724	switch (Cond) {
4725	case ISD::SETUGT:
4726	case ISD::SETUGE:
4727	case ISD::SETEQ:
4728	return DAG.getConstant(Val: `0`, DL: dl, VT);
4729	case ISD::SETULT:
4730	case ISD::SETULE:
4731	case ISD::SETNE:
4732	return DAG.getConstant(Val: `1`, DL: dl, VT);
4733	case ISD::SETGT:
4734	case ISD::SETGE:
4735	// True if the sign bit of C1 is set.
4736	return DAG.getConstant(Val: C1.isNegative(), DL: dl, VT);
4737	case ISD::SETLT:
4738	case ISD::SETLE:
4739	// True if the sign bit of C1 isn't set.
4740	return DAG.getConstant(Val: C1.isNonNegative(), DL: dl, VT);
4741	default:
4742	break;
4743	}
4744	}
4745
4746	// Otherwise, we can perform the comparison with the low bits.
4747	switch (Cond) {
4748	case ISD::SETEQ:
4749	case ISD::SETNE:
4750	case ISD::SETUGT:
4751	case ISD::SETUGE:
4752	case ISD::SETULT:
4753	case ISD::SETULE: {
4754	EVT newVT = N0.getOperand(i: `0`).getValueType();
4755	if (DCI.isBeforeLegalizeOps() \|\|
4756	(isOperationLegal(Op: ISD::SETCC, VT: newVT) &&
4757	isCondCodeLegal(CC: Cond, VT: newVT.getSimpleVT()))) {
4758	EVT NewSetCCVT = getSetCCResultType(DL: Layout, Context&: *DAG.getContext(), VT: newVT);
4759	SDValue NewConst = DAG.getConstant(Val: C1.trunc(width: InSize), DL: dl, VT: newVT);
4760
4761	SDValue NewSetCC = DAG.getSetCC(DL: dl, VT: NewSetCCVT, LHS: N0.getOperand(i: `0`),
4762	RHS: NewConst, Cond);
4763	return DAG.getBoolExtOrTrunc(Op: NewSetCC, SL: dl, VT, OpVT: N0.getValueType());
4764	}
4765	break;
4766	}
4767	default:
4768	break; // todo, be more careful with signed comparisons
4769	}
4770	} else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
4771	(Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) &&
4772	!isSExtCheaperThanZExt(FromTy: cast<VTSDNode>(Val: N0.getOperand(i: `1`))->getVT(),
4773	ToTy: OpVT)) {
4774	EVT ExtSrcTy = cast<VTSDNode>(Val: N0.getOperand(i: `1`))->getVT();
4775	unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
4776	EVT ExtDstTy = N0.getValueType();
4777	unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
4778
4779	// If the constant doesn't fit into the number of bits for the source of
4780	// the sign extension, it is impossible for both sides to be equal.
4781	if (C1.getSignificantBits() > ExtSrcTyBits)
4782	return DAG.getBoolConstant(V: Cond == ISD::SETNE, DL: dl, VT, OpVT);
4783
4784	assert(ExtDstTy == N0.getOperand(`0`).getValueType() &&
4785	ExtDstTy != ExtSrcTy && "Unexpected types!");
4786	APInt Imm = APInt::getLowBitsSet(numBits: ExtDstTyBits, loBitsSet: ExtSrcTyBits);
4787	SDValue ZextOp = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ExtDstTy, N1: N0.getOperand(i: `0`),
4788	N2: DAG.getConstant(Val: Imm, DL: dl, VT: ExtDstTy));
4789	if (!DCI.isCalledByLegalizer())
4790	DCI.AddToWorklist(N: ZextOp.getNode());
4791	// Otherwise, make this a use of a zext.
4792	return DAG.getSetCC(DL: dl, VT, LHS: ZextOp,
4793	RHS: DAG.getConstant(Val: C1 & Imm, DL: dl, VT: ExtDstTy), Cond);
4794	} else if ((N1C->isZero() \|\| N1C->isOne()) &&
4795	(Cond == ISD::SETEQ \|\| Cond == ISD::SETNE)) {
4796	// SETCC (X), [0\|1], [EQ\|NE] -> X if X is known 0/1. i1 types are
4797	// excluded as they are handled below whilst checking for foldBooleans.
4798	if ((N0.getOpcode() == ISD::SETCC \|\| VT.getScalarType() != MVT::i1) &&
4799	isTypeLegal(VT) && VT.bitsLE(VT: N0.getValueType()) &&
4800	(N0.getValueType() == MVT::i1 \|\|
4801	getBooleanContents(Type: N0.getValueType()) == ZeroOrOneBooleanContent) &&
4802	DAG.MaskedValueIsZero(
4803	Op: N0, Mask: APInt::getBitsSetFrom(numBits: N0.getValueSizeInBits(), loBit: `1`))) {
4804	bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
4805	if (TrueWhenTrue)
4806	return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: N0);
4807	// Invert the condition.
4808	if (N0.getOpcode() == ISD::SETCC) {
4809	ISD::CondCode CC = cast<CondCodeSDNode>(Val: N0.getOperand(i: `2`))->get();
4810	CC = ISD::getSetCCInverse(Operation: CC, Type: N0.getOperand(i: `0`).getValueType());
4811	if (DCI.isBeforeLegalizeOps() \|\|
4812	isCondCodeLegal(CC, VT: N0.getOperand(i: `0`).getSimpleValueType()))
4813	return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: `0`), RHS: N0.getOperand(i: `1`), Cond: CC);
4814	}
4815	}
4816
4817	if ((N0.getOpcode() == ISD::XOR \|\|
4818	(N0.getOpcode() == ISD::AND &&
4819	N0.getOperand(i: `0`).getOpcode() == ISD::XOR &&
4820	N0.getOperand(i: `1`) == N0.getOperand(i: `0`).getOperand(i: `1`))) &&
4821	isOneConstant(V: N0.getOperand(i: `1`))) {
4822	// If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
4823	// can only do this if the top bits are known zero.
4824	unsigned BitWidth = N0.getValueSizeInBits();
4825	if (DAG.MaskedValueIsZero(Op: N0,
4826	Mask: APInt::getHighBitsSet(numBits: BitWidth,
4827	hiBitsSet: BitWidth-`1`))) {
4828	// Okay, get the un-inverted input value.
4829	SDValue Val;
4830	if (N0.getOpcode() == ISD::XOR) {
4831	Val = N0.getOperand(i: `0`);
4832	} else {
4833	assert(N0.getOpcode() == ISD::AND &&
4834	N0.getOperand(`0`).getOpcode() == ISD::XOR);
4835	// ((X^1)&1)^1 -> X & 1
4836	Val = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: N0.getValueType(),
4837	N1: N0.getOperand(i: `0`).getOperand(i: `0`),
4838	N2: N0.getOperand(i: `1`));
4839	}
4840
4841	return DAG.getSetCC(DL: dl, VT, LHS: Val, RHS: N1,
4842	Cond: Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4843	}
4844	} else if (N1C->isOne()) {
4845	SDValue Op0 = N0;
4846	if (Op0.getOpcode() == ISD::TRUNCATE)
4847	Op0 = Op0.getOperand(i: `0`);
4848
4849	if ((Op0.getOpcode() == ISD::XOR) &&
4850	Op0.getOperand(i: `0`).getOpcode() == ISD::SETCC &&
4851	Op0.getOperand(i: `1`).getOpcode() == ISD::SETCC) {
4852	SDValue XorLHS = Op0.getOperand(i: `0`);
4853	SDValue XorRHS = Op0.getOperand(i: `1`);
4854	// Ensure that the input setccs return an i1 type or 0/1 value.
4855	if (Op0.getValueType() == MVT::i1 \|\|
4856	(getBooleanContents(Type: XorLHS.getOperand(i: `0`).getValueType()) ==
4857	ZeroOrOneBooleanContent &&
4858	getBooleanContents(Type: XorRHS.getOperand(i: `0`).getValueType()) ==
4859	ZeroOrOneBooleanContent)) {
4860	// (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
4861	Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ;
4862	return DAG.getSetCC(DL: dl, VT, LHS: XorLHS, RHS: XorRHS, Cond);
4863	}
4864	}
4865	if (Op0.getOpcode() == ISD::AND && isOneConstant(V: Op0.getOperand(i: `1`))) {
4866	// If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
4867	if (Op0.getValueType().bitsGT(VT))
4868	Op0 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
4869	N1: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Op0.getOperand(i: `0`)),
4870	N2: DAG.getConstant(Val: `1`, DL: dl, VT));
4871	else if (Op0.getValueType().bitsLT(VT))
4872	Op0 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
4873	N1: DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT, Operand: Op0.getOperand(i: `0`)),
4874	N2: DAG.getConstant(Val: `1`, DL: dl, VT));
4875
4876	return DAG.getSetCC(DL: dl, VT, LHS: Op0,
4877	RHS: DAG.getConstant(Val: `0`, DL: dl, VT: Op0.getValueType()),
4878	Cond: Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4879	}
4880	if (Op0.getOpcode() == ISD::AssertZext &&
4881	cast<VTSDNode>(Val: Op0.getOperand(i: `1`))->getVT() == MVT::i1)
4882	return DAG.getSetCC(DL: dl, VT, LHS: Op0,
4883	RHS: DAG.getConstant(Val: `0`, DL: dl, VT: Op0.getValueType()),
4884	Cond: Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4885	}
4886	}
4887
4888	// Given:
4889	// icmp eq/ne (urem %x, %y), 0
4890	// Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
4891	// icmp eq/ne %x, 0
4892	if (N0.getOpcode() == ISD::UREM && N1C->isZero() &&
4893	(Cond == ISD::SETEQ \|\| Cond == ISD::SETNE)) {
4894	KnownBits XKnown = DAG.computeKnownBits(Op: N0.getOperand(i: `0`));
4895	KnownBits YKnown = DAG.computeKnownBits(Op: N0.getOperand(i: `1`));
4896	if (XKnown.countMaxPopulation() == `1` && YKnown.countMinPopulation() >= `2`)
4897	return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: `0`), RHS: N1, Cond);
4898	}
4899
4900	// Fold set_cc seteq (ashr X, BW-1), -1 -> set_cc setlt X, 0
4901	// and set_cc setne (ashr X, BW-1), -1 -> set_cc setge X, 0
4902	if ((Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) &&
4903	N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(Val: N0.getOperand(i: `1`)) &&
4904	N0.getConstantOperandAPInt(i: `1`) == OpVT.getScalarSizeInBits() - `1` &&
4905	N1C && N1C->isAllOnes()) {
4906	return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: `0`),
4907	RHS: DAG.getConstant(Val: `0`, DL: dl, VT: OpVT),
4908	Cond: Cond == ISD::SETEQ ? ISD::SETLT : ISD::SETGE);
4909	}
4910
4911	if (SDValue V =
4912	optimizeSetCCOfSignedTruncationCheck(SCCVT: VT, N0, N1, Cond, DCI, DL: dl))
4913	return V;
4914	}
4915
4916	// These simplifications apply to splat vectors as well.
4917	// TODO: Handle more splat vector cases.
4918	if (auto *N1C = isConstOrConstSplat(N: N1)) {
4919	const APInt &C1 = N1C->getAPIntValue();
4920
4921	APInt MinVal, MaxVal;
4922	unsigned OperandBitSize = N1C->getValueType(ResNo: `0`).getScalarSizeInBits();
4923	if (ISD::isSignedIntSetCC(Code: Cond)) {
4924	MinVal = APInt::getSignedMinValue(numBits: OperandBitSize);
4925	MaxVal = APInt::getSignedMaxValue(numBits: OperandBitSize);
4926	} else {
4927	MinVal = APInt::getMinValue(numBits: OperandBitSize);
4928	MaxVal = APInt::getMaxValue(numBits: OperandBitSize);
4929	}
4930
4931	// Canonicalize GE/LE comparisons to use GT/LT comparisons.
4932	if (Cond == ISD::SETGE \|\| Cond == ISD::SETUGE) {
4933	// X >= MIN --> true
4934	if (C1 == MinVal)
4935	return DAG.getBoolConstant(V: true, DL: dl, VT, OpVT);
4936
4937	if (!VT.isVector()) { // TODO: Support this for vectors.
4938	// X >= C0 --> X > (C0 - 1)
4939	APInt C = C1 - `1`;
4940	ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT;
4941	if ((DCI.isBeforeLegalizeOps() \|\|
4942	isCondCodeLegal(CC: NewCC, VT: VT.getSimpleVT())) &&
4943	(!N1C->isOpaque() \|\| (C.getBitWidth() <= `64` &&
4944	isLegalICmpImmediate(C.getSExtValue())))) {
4945	return DAG.getSetCC(DL: dl, VT, LHS: N0,
4946	RHS: DAG.getConstant(Val: C, DL: dl, VT: N1.getValueType()),
4947	Cond: NewCC);
4948	}
4949	}
4950	}
4951
4952	if (Cond == ISD::SETLE \|\| Cond == ISD::SETULE) {
4953	// X <= MAX --> true
4954	if (C1 == MaxVal)
4955	return DAG.getBoolConstant(V: true, DL: dl, VT, OpVT);
4956
4957	// X <= C0 --> X < (C0 + 1)
4958	if (!VT.isVector()) { // TODO: Support this for vectors.
4959	APInt C = C1 + `1`;
4960	ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT;
4961	if ((DCI.isBeforeLegalizeOps() \|\|
4962	isCondCodeLegal(CC: NewCC, VT: VT.getSimpleVT())) &&
4963	(!N1C->isOpaque() \|\| (C.getBitWidth() <= `64` &&
4964	isLegalICmpImmediate(C.getSExtValue())))) {
4965	return DAG.getSetCC(DL: dl, VT, LHS: N0,
4966	RHS: DAG.getConstant(Val: C, DL: dl, VT: N1.getValueType()),
4967	Cond: NewCC);
4968	}
4969	}
4970	}
4971
4972	if (Cond == ISD::SETLT \|\| Cond == ISD::SETULT) {
4973	if (C1 == MinVal)
4974	return DAG.getBoolConstant(V: false, DL: dl, VT, OpVT); // X < MIN --> false
4975
4976	// TODO: Support this for vectors after legalize ops.
4977	if (!VT.isVector() \|\| DCI.isBeforeLegalizeOps()) {
4978	// Canonicalize setlt X, Max --> setne X, Max
4979	if (C1 == MaxVal)
4980	return DAG.getSetCC(DL: dl, VT, LHS: N0, RHS: N1, Cond: ISD::SETNE);
4981
4982	// If we have setult X, 1, turn it into seteq X, 0
4983	if (C1 == MinVal +`1`)
4984	return DAG.getSetCC(DL: dl, VT, LHS: N0,
4985	RHS: DAG.getConstant(Val: MinVal, DL: dl, VT: N0.getValueType()),
4986	Cond: ISD::SETEQ);
4987	}
4988	}
4989
4990	if (Cond == ISD::SETGT \|\| Cond == ISD::SETUGT) {
4991	if (C1 == MaxVal)
4992	return DAG.getBoolConstant(V: false, DL: dl, VT, OpVT); // X > MAX --> false
4993
4994	// TODO: Support this for vectors after legalize ops.
4995	if (!VT.isVector() \|\| DCI.isBeforeLegalizeOps()) {
4996	// Canonicalize setgt X, Min --> setne X, Min
4997	if (C1 == MinVal)
4998	return DAG.getSetCC(DL: dl, VT, LHS: N0, RHS: N1, Cond: ISD::SETNE);
4999
5000	// If we have setugt X, Max-1, turn it into seteq X, Max
5001	if (C1 == MaxVal -`1`)
5002	return DAG.getSetCC(DL: dl, VT, LHS: N0,
5003	RHS: DAG.getConstant(Val: MaxVal, DL: dl, VT: N0.getValueType()),
5004	Cond: ISD::SETEQ);
5005	}
5006	}
5007
5008	if (Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) {
5009	// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
5010	if (C1.isZero())
5011	if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
5012	SCCVT: VT, N0, N1C: N1, Cond, DCI, DL: dl))
5013	return CC;
5014
5015	// For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y).
5016	// For example, when high 32-bits of i64 X are known clear:
5017	// all bits clear: (X \| (Y<<32)) == 0 --> (X \| Y) == 0
5018	// all bits set: (X \| (Y<<32)) == -1 --> (X & Y) == -1
5019	bool CmpZero = N1C->isZero();
5020	bool CmpNegOne = N1C->isAllOnes();
5021	if ((CmpZero \|\| CmpNegOne) && N0.hasOneUse()) {
5022	// Match or(lo,shl(hi,bw/2)) pattern.
5023	auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
5024	unsigned EltBits = V.getScalarValueSizeInBits();
5025	if (V.getOpcode() != ISD::OR \|\| (EltBits % `2`) != `0`)
5026	return false;
5027	SDValue LHS = V.getOperand(i: `0`);
5028	SDValue RHS = V.getOperand(i: `1`);
5029	APInt HiBits = APInt::getHighBitsSet(numBits: EltBits, hiBitsSet: EltBits / `2`);
5030	// Unshifted element must have zero upperbits.
5031	if (RHS.getOpcode() == ISD::SHL &&
5032	isa<ConstantSDNode>(Val: RHS.getOperand(i: `1`)) &&
5033	RHS.getConstantOperandAPInt(i: `1`) == (EltBits / `2`) &&
5034	DAG.MaskedValueIsZero(Op: LHS, Mask: HiBits)) {
5035	Lo = LHS;
5036	Hi = RHS.getOperand(i: `0`);
5037	return true;
5038	}
5039	if (LHS.getOpcode() == ISD::SHL &&
5040	isa<ConstantSDNode>(Val: LHS.getOperand(i: `1`)) &&
5041	LHS.getConstantOperandAPInt(i: `1`) == (EltBits / `2`) &&
5042	DAG.MaskedValueIsZero(Op: RHS, Mask: HiBits)) {
5043	Lo = RHS;
5044	Hi = LHS.getOperand(i: `0`);
5045	return true;
5046	}
5047	return false;
5048	};
5049
5050	auto MergeConcat = [&](SDValue Lo, SDValue Hi) {
5051	unsigned EltBits = N0.getScalarValueSizeInBits();
5052	unsigned HalfBits = EltBits / `2`;
5053	APInt HiBits = APInt::getHighBitsSet(numBits: EltBits, hiBitsSet: HalfBits);
5054	SDValue LoBits = DAG.getConstant(Val: ~HiBits, DL: dl, VT: OpVT);
5055	SDValue HiMask = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: OpVT, N1: Hi, N2: LoBits);
5056	SDValue NewN0 =
5057	DAG.getNode(Opcode: CmpZero ? ISD::OR : ISD::AND, DL: dl, VT: OpVT, N1: Lo, N2: HiMask);
5058	SDValue NewN1 = CmpZero ? DAG.getConstant(Val: `0`, DL: dl, VT: OpVT) : LoBits;
5059	return DAG.getSetCC(DL: dl, VT, LHS: NewN0, RHS: NewN1, Cond);
5060	};
5061
5062	SDValue Lo, Hi;
5063	if (IsConcat (N0, Lo, Hi))
5064	return MergeConcat (Lo, Hi);
5065
5066	if (N0.getOpcode() == ISD::AND \|\| N0.getOpcode() == ISD::OR) {
5067	SDValue Lo0, Lo1, Hi0, Hi1;
5068	if (IsConcat (N0.getOperand(i: `0`), Lo0, Hi0) &&
5069	IsConcat (N0.getOperand(i: `1`), Lo1, Hi1)) {
5070	return MergeConcat (DAG.getNode(Opcode: N0.getOpcode(), DL: dl, VT: OpVT, N1: Lo0, N2: Lo1),
5071	DAG.getNode(Opcode: N0.getOpcode(), DL: dl, VT: OpVT, N1: Hi0, N2: Hi1));
5072	}
5073	}
5074	}
5075	}
5076
5077	// If we have "setcc X, C0", check to see if we can shrink the immediate
5078	// by changing cc.
5079	// TODO: Support this for vectors after legalize ops.
5080	if (!VT.isVector() \|\| DCI.isBeforeLegalizeOps()) {
5081	// SETUGT X, SINTMAX -> SETLT X, 0
5082	// SETUGE X, SINTMIN -> SETLT X, 0
5083	if ((Cond == ISD::SETUGT && C1.isMaxSignedValue()) \|\|
5084	(Cond == ISD::SETUGE && C1.isMinSignedValue()))
5085	return DAG.getSetCC(DL: dl, VT, LHS: N0,
5086	RHS: DAG.getConstant(Val: `0`, DL: dl, VT: N1.getValueType()),
5087	Cond: ISD::SETLT);
5088
5089	// SETULT X, SINTMIN -> SETGT X, -1
5090	// SETULE X, SINTMAX -> SETGT X, -1
5091	if ((Cond == ISD::SETULT && C1.isMinSignedValue()) \|\|
5092	(Cond == ISD::SETULE && C1.isMaxSignedValue()))
5093	return DAG.getSetCC(DL: dl, VT, LHS: N0,
5094	RHS: DAG.getAllOnesConstant(DL: dl, VT: N1.getValueType()),
5095	Cond: ISD::SETGT);
5096	}
5097	}
5098
5099	// Back to non-vector simplifications.
5100	// TODO: Can we do these for vector splats?
5101	if (auto *N1C = dyn_cast<ConstantSDNode>(Val: N1.getNode())) {
5102	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5103	const APInt &C1 = N1C->getAPIntValue();
5104	EVT ShValTy = N0.getValueType();
5105
5106	// Fold bit comparisons when we can. This will result in an
5107	// incorrect value when boolean false is negative one, unless
5108	// the bitsize is 1 in which case the false value is the same
5109	// in practice regardless of the representation.
5110	if ((VT.getSizeInBits() == `1` \|\|
5111	getBooleanContents(Type: N0.getValueType()) == ZeroOrOneBooleanContent) &&
5112	(Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) &&
5113	(VT == ShValTy \|\| (isTypeLegal(VT) && VT.bitsLE(VT: ShValTy))) &&
5114	N0.getOpcode() == ISD::AND) {
5115	if (auto *AndRHS = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: `1`))) {
5116	if (Cond == ISD::SETNE && C1 == `0`) {// (X & 8) != 0 --> (X & 8) >> 3
5117	// Perform the xform if the AND RHS is a single bit.
5118	unsigned ShCt = AndRHS->getAPIntValue().logBase2();
5119	if (AndRHS->getAPIntValue().isPowerOf2() &&
5120	!TLI.shouldAvoidTransformToShift(VT: ShValTy, Amount: ShCt)) {
5121	return DAG.getNode(
5122	Opcode: ISD::TRUNCATE, DL: dl, VT,
5123	Operand: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: ShValTy, N1: N0,
5124	N2: DAG.getShiftAmountConstant(Val: ShCt, VT: ShValTy, DL: dl)));
5125	}
5126	} else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
5127	// (X & 8) == 8 --> (X & 8) >> 3
5128	// Perform the xform if C1 is a single bit.
5129	unsigned ShCt = C1.logBase2();
5130	if (C1.isPowerOf2() &&
5131	!TLI.shouldAvoidTransformToShift(VT: ShValTy, Amount: ShCt)) {
5132	return DAG.getNode(
5133	Opcode: ISD::TRUNCATE, DL: dl, VT,
5134	Operand: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: ShValTy, N1: N0,
5135	N2: DAG.getShiftAmountConstant(Val: ShCt, VT: ShValTy, DL: dl)));
5136	}
5137	}
5138	}
5139	}
5140
5141	if (C1.getSignificantBits() <= `64` &&
5142	!isLegalICmpImmediate(C1.getSExtValue())) {
5143	// (X & -256) == 256 -> (X >> 8) == 1
5144	if ((Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) &&
5145	N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
5146	if (auto *AndRHS = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: `1`))) {
5147	const APInt &AndRHSC = AndRHS->getAPIntValue();
5148	if (AndRHSC.isNegatedPowerOf2() && (AndRHSC & C1) == C1) {
5149	unsigned ShiftBits = AndRHSC.countr_zero();
5150	if (!TLI.shouldAvoidTransformToShift(VT: ShValTy, Amount: ShiftBits)) {
5151	SDValue Shift = DAG.getNode(
5152	Opcode: ISD::SRL, DL: dl, VT: ShValTy, N1: N0.getOperand(i: `0`),
5153	N2: DAG.getShiftAmountConstant(Val: ShiftBits, VT: ShValTy, DL: dl));
5154	SDValue CmpRHS = DAG.getConstant(Val: C1.lshr(shiftAmt: ShiftBits), DL: dl, VT: ShValTy);
5155	return DAG.getSetCC(DL: dl, VT, LHS: Shift, RHS: CmpRHS, Cond);
5156	}
5157	}
5158	}
5159	} else if (Cond == ISD::SETULT \|\| Cond == ISD::SETUGE \|\|
5160	Cond == ISD::SETULE \|\| Cond == ISD::SETUGT) {
5161	bool AdjOne = (Cond == ISD::SETULE \|\| Cond == ISD::SETUGT);
5162	// X < 0x100000000 -> (X >> 32) < 1
5163	// X >= 0x100000000 -> (X >> 32) >= 1
5164	// X <= 0x0ffffffff -> (X >> 32) < 1
5165	// X > 0x0ffffffff -> (X >> 32) >= 1
5166	unsigned ShiftBits;
5167	APInt NewC = C1;
5168	ISD::CondCode NewCond = Cond;
5169	if (AdjOne) {
5170	ShiftBits = C1.countr_one();
5171	NewC = NewC + `1`;
5172	NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
5173	} else {
5174	ShiftBits = C1.countr_zero();
5175	}
5176	NewC.lshrInPlace(ShiftAmt: ShiftBits);
5177	if (ShiftBits && NewC.getSignificantBits() <= `64` &&
5178	isLegalICmpImmediate(NewC.getSExtValue()) &&
5179	!TLI.shouldAvoidTransformToShift(VT: ShValTy, Amount: ShiftBits)) {
5180	SDValue Shift =
5181	DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: ShValTy, N1: N0,
5182	N2: DAG.getShiftAmountConstant(Val: ShiftBits, VT: ShValTy, DL: dl));
5183	SDValue CmpRHS = DAG.getConstant(Val: NewC, DL: dl, VT: ShValTy);
5184	return DAG.getSetCC(DL: dl, VT, LHS: Shift, RHS: CmpRHS, Cond: NewCond);
5185	}
5186	}
5187	}
5188	}
5189
5190	if (!isa<ConstantFPSDNode>(Val: N0) && isa<ConstantFPSDNode>(Val: N1)) {
5191	auto *CFP = cast<ConstantFPSDNode>(Val&: N1);
5192	assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
5193
5194	// Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
5195	// constant if knowing that the operand is non-nan is enough. We prefer to
5196	// have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
5197	// materialize 0.0.
5198	if (Cond == ISD::SETO \|\| Cond == ISD::SETUO)
5199	return DAG.getSetCC(DL: dl, VT, LHS: N0, RHS: N0, Cond);
5200
5201	// setcc (fneg x), C -> setcc swap(pred) x, -C
5202	if (N0.getOpcode() == ISD::FNEG) {
5203	ISD::CondCode SwapCond = ISD::getSetCCSwappedOperands(Operation: Cond);
5204	if (DCI.isBeforeLegalizeOps() \|\|
5205	isCondCodeLegal(CC: SwapCond, VT: N0.getSimpleValueType())) {
5206	SDValue NegN1 = DAG.getNode(Opcode: ISD::FNEG, DL: dl, VT: N0.getValueType(), Operand: N1);
5207	return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: `0`), RHS: NegN1, Cond: SwapCond);
5208	}
5209	}
5210
5211	// setueq/setoeq X, (fabs Inf) -> is_fpclass X, fcInf
5212	if (isOperationLegalOrCustom(Op: ISD::IS_FPCLASS, VT: N0.getValueType()) &&
5213	!isFPImmLegal(CFP->getValueAPF(), CFP->getValueType(ResNo: `0`))) {
5214	bool IsFabs = N0.getOpcode() == ISD::FABS;
5215	SDValue Op = IsFabs ? N0.getOperand(i: `0`) : N0;
5216	if ((Cond == ISD::SETOEQ \|\| Cond == ISD::SETUEQ) && CFP->isInfinity()) {
5217	FPClassTest Flag = CFP->isNegative() ? (IsFabs ? fcNone : fcNegInf)
5218	: (IsFabs ? fcInf : fcPosInf);
5219	if (Cond == ISD::SETUEQ)
5220	Flag \|= fcNan;
5221	return DAG.getNode(Opcode: ISD::IS_FPCLASS, DL: dl, VT, N1: Op,
5222	N2: DAG.getTargetConstant(Val: Flag, DL: dl, VT: MVT::i32));
5223	}
5224	}
5225
5226	// If the condition is not legal, see if we can find an equivalent one
5227	// which is legal.
5228	if (!isCondCodeLegal(CC: Cond, VT: N0.getSimpleValueType())) {
5229	// If the comparison was an awkward floating-point == or != and one of
5230	// the comparison operands is infinity or negative infinity, convert the
5231	// condition to a less-awkward <= or >=.
5232	if (CFP->getValueAPF().isInfinity()) {
5233	bool IsNegInf = CFP->getValueAPF().isNegative();
5234	ISD::CondCode NewCond = ISD::SETCC_INVALID;
5235	switch (Cond) {
5236	case ISD::SETOEQ: NewCond = IsNegInf ? ISD::SETOLE : ISD::SETOGE; break;
5237	case ISD::SETUEQ: NewCond = IsNegInf ? ISD::SETULE : ISD::SETUGE; break;
5238	case ISD::SETUNE: NewCond = IsNegInf ? ISD::SETUGT : ISD::SETULT; break;
5239	case ISD::SETONE: NewCond = IsNegInf ? ISD::SETOGT : ISD::SETOLT; break;
5240	default: break;
5241	}
5242	if (NewCond != ISD::SETCC_INVALID &&
5243	isCondCodeLegal(CC: NewCond, VT: N0.getSimpleValueType()))
5244	return DAG.getSetCC(DL: dl, VT, LHS: N0, RHS: N1, Cond: NewCond);
5245	}
5246	}
5247	}
5248
5249	if (N0 == N1) {
5250	// The sext(setcc()) => setcc() optimization relies on the appropriate
5251	// constant being emitted.
5252	assert(!N0.getValueType().isInteger() &&
5253	"Integer types should be handled by FoldSetCC");
5254
5255	bool EqTrue = ISD::isTrueWhenEqual(Cond);
5256	unsigned UOF = ISD::getUnorderedFlavor(Cond);
5257	if (UOF == `2`) // FP operators that are undefined on NaNs.
5258	return DAG.getBoolConstant(V: EqTrue, DL: dl, VT, OpVT);
5259	if (UOF == unsigned(EqTrue))
5260	return DAG.getBoolConstant(V: EqTrue, DL: dl, VT, OpVT);
5261	// Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
5262	// if it is not already.
5263	ISD::CondCode NewCond = UOF == `0` ? ISD::SETO : ISD::SETUO;
5264	if (NewCond != Cond &&
5265	(DCI.isBeforeLegalizeOps() \|\|
5266	isCondCodeLegal(CC: NewCond, VT: N0.getSimpleValueType())))
5267	return DAG.getSetCC(DL: dl, VT, LHS: N0, RHS: N1, Cond: NewCond);
5268	}
5269
5270	// ~X > ~Y --> Y > X
5271	// ~X < ~Y --> Y < X
5272	// ~X < C --> X > ~C
5273	// ~X > C --> X < ~C
5274	if ((isSignedIntSetCC(Code: Cond) \|\| isUnsignedIntSetCC(Code: Cond)) &&
5275	N0.getValueType().isInteger()) {
5276	if (isBitwiseNot(V: N0)) {
5277	if (isBitwiseNot(V: N1))
5278	return DAG.getSetCC(DL: dl, VT, LHS: N1.getOperand(i: `0`), RHS: N0.getOperand(i: `0`), Cond);
5279
5280	if (DAG.isConstantIntBuildVectorOrConstantInt(N: N1) &&
5281	!DAG.isConstantIntBuildVectorOrConstantInt(N: N0.getOperand(i: `0`))) {
5282	SDValue Not = DAG.getNOT(DL: dl, Val: N1, VT: OpVT);
5283	return DAG.getSetCC(DL: dl, VT, LHS: Not, RHS: N0.getOperand(i: `0`), Cond);
5284	}
5285	}
5286	}
5287
5288	if ((Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) &&
5289	N0.getValueType().isInteger()) {
5290	if (N0.getOpcode() == ISD::ADD \|\| N0.getOpcode() == ISD::SUB \|\|
5291	N0.getOpcode() == ISD::XOR) {
5292	// Simplify (X+Y) == (X+Z) --> Y == Z
5293	if (N0.getOpcode() == N1.getOpcode()) {
5294	if (N0.getOperand(i: `0`) == N1.getOperand(i: `0`))
5295	return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: `1`), RHS: N1.getOperand(i: `1`), Cond);
5296	if (N0.getOperand(i: `1`) == N1.getOperand(i: `1`))
5297	return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: `0`), RHS: N1.getOperand(i: `0`), Cond);
5298	if (isCommutativeBinOp(Opcode: N0.getOpcode())) {
5299	// If X op Y == Y op X, try other combinations.
5300	if (N0.getOperand(i: `0`) == N1.getOperand(i: `1`))
5301	return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: `1`), RHS: N1.getOperand(i: `0`),
5302	Cond);
5303	if (N0.getOperand(i: `1`) == N1.getOperand(i: `0`))
5304	return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: `0`), RHS: N1.getOperand(i: `1`),
5305	Cond);
5306	}
5307	}
5308
5309	// If RHS is a legal immediate value for a compare instruction, we need
5310	// to be careful about increasing register pressure needlessly.
5311	bool LegalRHSImm = false;
5312
5313	if (auto *RHSC = dyn_cast<ConstantSDNode>(Val&: N1)) {
5314	if (auto *LHSR = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: `1`))) {
5315	// Turn (X+C1) == C2 --> X == C2-C1
5316	if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse())
5317	return DAG.getSetCC(
5318	DL: dl, VT, LHS: N0.getOperand(i: `0`),
5319	RHS: DAG.getConstant(Val: RHSC->getAPIntValue() - LHSR->getAPIntValue(),
5320	DL: dl, VT: N0.getValueType()),
5321	Cond);
5322
5323	// Turn (X^C1) == C2 --> X == C1^C2
5324	if (N0.getOpcode() == ISD::XOR && N0.getNode()->hasOneUse())
5325	return DAG.getSetCC(
5326	DL: dl, VT, LHS: N0.getOperand(i: `0`),
5327	RHS: DAG.getConstant(Val: LHSR->getAPIntValue() ^ RHSC->getAPIntValue(),
5328	DL: dl, VT: N0.getValueType()),
5329	Cond);
5330	}
5331
5332	// Turn (C1-X) == C2 --> X == C1-C2
5333	if (auto *SUBC = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: `0`)))
5334	if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse())
5335	return DAG.getSetCC(
5336	DL: dl, VT, LHS: N0.getOperand(i: `1`),
5337	RHS: DAG.getConstant(Val: SUBC->getAPIntValue() - RHSC->getAPIntValue(),
5338	DL: dl, VT: N0.getValueType()),
5339	Cond);
5340
5341	// Could RHSC fold directly into a compare?
5342	if (RHSC->getValueType(ResNo: `0`).getSizeInBits() <= `64`)
5343	LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
5344	}
5345
5346	// (X+Y) == X --> Y == 0 and similar folds.
5347	// Don't do this if X is an immediate that can fold into a cmp
5348	// instruction and X+Y has other uses. It could be an induction variable
5349	// chain, and the transform would increase register pressure.
5350	if (!LegalRHSImm \|\| N0.hasOneUse())
5351	if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, DL: dl, DCI))
5352	return V;
5353	}
5354
5355	if (N1.getOpcode() == ISD::ADD \|\| N1.getOpcode() == ISD::SUB \|\|
5356	N1.getOpcode() == ISD::XOR)
5357	if (SDValue V = foldSetCCWithBinOp(VT, N0: N1, N1: N0, Cond, DL: dl, DCI))
5358	return V;
5359
5360	if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, DL: dl, DCI))
5361	return V;
5362	}
5363
5364	// Fold remainder of division by a constant.
5365	if ((N0.getOpcode() == ISD::UREM \|\| N0.getOpcode() == ISD::SREM) &&
5366	N0.hasOneUse() && (Cond == ISD::SETEQ \|\| Cond == ISD::SETNE)) {
5367	// When division is cheap or optimizing for minimum size,
5368	// fall through to DIVREM creation by skipping this fold.
5369	if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Kind: Attribute::MinSize)) {
5370	if (N0.getOpcode() == ISD::UREM) {
5371	if (SDValue Folded = buildUREMEqFold(SETCCVT: VT, REMNode: N0, CompTargetNode: N1, Cond, DCI, DL: dl))
5372	return Folded;
5373	} else if (N0.getOpcode() == ISD::SREM) {
5374	if (SDValue Folded = buildSREMEqFold(SETCCVT: VT, REMNode: N0, CompTargetNode: N1, Cond, DCI, DL: dl))
5375	return Folded;
5376	}
5377	}
5378	}
5379
5380	// Fold away ALL boolean setcc's.
5381	if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
5382	SDValue Temp;
5383	switch (Cond) {
5384	default: llvm_unreachable("Unknown integer setcc!");
5385	case ISD::SETEQ: // X == Y -> ~(X^Y)
5386	Temp = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: OpVT, N1: N0, N2: N1);
5387	N0 = DAG.getNOT(DL: dl, Val: Temp, VT: OpVT);
5388	if (!DCI.isCalledByLegalizer())
5389	DCI.AddToWorklist(N: Temp.getNode());
5390	break;
5391	case ISD::SETNE: // X != Y --> (X^Y)
5392	N0 = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: OpVT, N1: N0, N2: N1);
5393	break;
5394	case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
5395	case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
5396	Temp = DAG.getNOT(DL: dl, Val: N0, VT: OpVT);
5397	N0 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: OpVT, N1, N2: Temp);
5398	if (!DCI.isCalledByLegalizer())
5399	DCI.AddToWorklist(N: Temp.getNode());
5400	break;
5401	case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
5402	case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
5403	Temp = DAG.getNOT(DL: dl, Val: N1, VT: OpVT);
5404	N0 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: OpVT, N1: N0, N2: Temp);
5405	if (!DCI.isCalledByLegalizer())
5406	DCI.AddToWorklist(N: Temp.getNode());
5407	break;
5408	case ISD::SETULE: // X <=u Y --> X == 0 \| Y == 1 --> ~X \| Y
5409	case ISD::SETGE: // X >=s Y --> X == 0 \| Y == 1 --> ~X \| Y
5410	Temp = DAG.getNOT(DL: dl, Val: N0, VT: OpVT);
5411	N0 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1, N2: Temp);
5412	if (!DCI.isCalledByLegalizer())
5413	DCI.AddToWorklist(N: Temp.getNode());
5414	break;
5415	case ISD::SETUGE: // X >=u Y --> X == 1 \| Y == 0 --> ~Y \| X
5416	case ISD::SETLE: // X <=s Y --> X == 1 \| Y == 0 --> ~Y \| X
5417	Temp = DAG.getNOT(DL: dl, Val: N1, VT: OpVT);
5418	N0 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1: N0, N2: Temp);
5419	break;
5420	}
5421	if (VT.getScalarType() != MVT::i1) {
5422	if (!DCI.isCalledByLegalizer())
5423	DCI.AddToWorklist(N: N0.getNode());
5424	// FIXME: If running after legalize, we probably can't do this.
5425	ISD::NodeType ExtendCode = getExtendForContent(Content: getBooleanContents(Type: OpVT));
5426	N0 = DAG.getNode(Opcode: ExtendCode, DL: dl, VT, Operand: N0);
5427	}
5428	return N0;
5429	}
5430
5431	// Could not fold it.
5432	return SDValue ();
5433	}
5434
5435	/// Returns true (and the GlobalValue and the offset) if the node is a
5436	/// GlobalAddress + offset.
5437	bool TargetLowering::isGAPlusOffset(SDNode WN, const* GlobalValue *&GA,
5438	int64_t &Offset) const {
5439
5440	SDNode *N = unwrapAddress(N: SDValue (WN, `0`)).getNode();
5441
5442	if (auto *GASD = dyn_cast<GlobalAddressSDNode>(Val: N)) {
5443	GA = GASD->getGlobal();
5444	Offset += GASD->getOffset();
5445	return true;
5446	}
5447
5448	if (N->getOpcode() == ISD::ADD) {
5449	SDValue N1 = N->getOperand(Num: `0`);
5450	SDValue N2 = N->getOperand(Num: `1`);
5451	if (isGAPlusOffset(WN: N1.getNode(), GA, Offset)) {
5452	if (auto *V = dyn_cast<ConstantSDNode>(Val&: N2)) {
5453	Offset += V->getSExtValue();
5454	return true;
5455	}
5456	} else if (isGAPlusOffset(WN: N2.getNode(), GA, Offset)) {
5457	if (auto *V = dyn_cast<ConstantSDNode>(Val&: N1)) {
5458	Offset += V->getSExtValue();
5459	return true;
5460	}
5461	}
5462	}
5463
5464	return false;
5465	}
5466
5467	SDValue TargetLowering::PerformDAGCombine(SDNode *N,
5468	DAGCombinerInfo &DCI) const {
5469	// Default implementation: no optimization.
5470	return SDValue ();
5471	}
5472
5473	//===----------------------------------------------------------------------===//
5474	// Inline Assembler Implementation Methods
5475	//===----------------------------------------------------------------------===//
5476
5477	TargetLowering::ConstraintType
5478	TargetLowering::getConstraintType(StringRef Constraint) const {
5479	unsigned S = Constraint.size();
5480
5481	if (S == `1`) {
5482	switch (Constraint [`0`]) {
5483	default: break;
5484	case `'r'`:
5485	return C_RegisterClass;
5486	case `'m'`: // memory
5487	case `'o'`: // offsetable
5488	case `'V'`: // not offsetable
5489	return C_Memory;
5490	case `'p'`: // Address.
5491	return C_Address;
5492	case `'n'`: // Simple Integer
5493	case `'E'`: // Floating Point Constant
5494	case `'F'`: // Floating Point Constant
5495	return C_Immediate;
5496	case `'i'`: // Simple Integer or Relocatable Constant
5497	case `'s'`: // Relocatable Constant
5498	case `'X'`: // Allow ANY value.
5499	case `'I'`: // Target registers.
5500	case `'J'`:
5501	case `'K'`:
5502	case `'L'`:
5503	case `'M'`:
5504	case `'N'`:
5505	case `'O'`:
5506	case `'P'`:
5507	case `'<'`:
5508	case `'>'`:
5509	return C_Other;
5510	}
5511	}
5512
5513	if (S > `1` && Constraint [`0`] == `'{'` && Constraint [S - `1`] == `'}'`) {
5514	if (S == `8` && Constraint.substr(Start: `1`, N: `6`) == "memory") // "{memory}"
5515	return C_Memory;
5516	return C_Register;
5517	}
5518	return C_Unknown;
5519	}
5520
5521	/// Try to replace an X constraint, which matches anything, with another that
5522	/// has more specific requirements based on the type of the corresponding
5523	/// operand.
5524	const char TargetLowering::LowerXConstraint(EVT ConstraintVT) const* {
5525	if (ConstraintVT.isInteger())
5526	return "r";
5527	if (ConstraintVT.isFloatingPoint())
5528	return "f"; // works for many targets
5529	return nullptr;
5530	}
5531
5532	SDValue TargetLowering::LowerAsmOutputForConstraint(
5533	SDValue &Chain, SDValue &Glue, const SDLoc &DL,
5534	const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
5535	return SDValue ();
5536	}
5537
5538	/// Lower the specified operand into the Ops vector.
5539	/// If it is invalid, don't add anything to Ops.
5540	void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
5541	StringRef Constraint,
5542	std::vector<SDValue> &Ops,
5543	SelectionDAG &DAG) const {
5544
5545	if (Constraint.size() > `1`)
5546	return;
5547
5548	char ConstraintLetter = Constraint [`0`];
5549	switch (ConstraintLetter) {
5550	default: break;
5551	case `'X'`: // Allows any operand
5552	case `'i'`: // Simple Integer or Relocatable Constant
5553	case `'n'`: // Simple Integer
5554	case `'s'`: { // Relocatable Constant
5555
5556	ConstantSDNode *C;
5557	uint64_t Offset = `0`;
5558
5559	// Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
5560	// etc., since getelementpointer is variadic. We can't use
5561	// SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
5562	// while in this case the GA may be furthest from the root node which is
5563	// likely an ISD::ADD.
5564	while (true) {
5565	if ((C = dyn_cast<ConstantSDNode>(Val&: Op)) && ConstraintLetter != `'s'`) {
5566	// gcc prints these as sign extended. Sign extend value to 64 bits
5567	// now; without this it would get ZExt'd later in
5568	// ScheduleDAGSDNodes::EmitNode, which is very generic.
5569	bool IsBool = C->getConstantIntValue()->getBitWidth() == `1`;
5570	BooleanContent BCont = getBooleanContents(Type: MVT::i64);
5571	ISD::NodeType ExtOpc =
5572	IsBool ? getExtendForContent(Content: BCont) : ISD::SIGN_EXTEND;
5573	int64_t ExtVal =
5574	ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue() : C->getSExtValue();
5575	Ops.push_back(
5576	x: DAG.getTargetConstant(Val: Offset + ExtVal, DL: SDLoc (C), VT: MVT::i64));
5577	return;
5578	}
5579	if (ConstraintLetter != `'n'`) {
5580	if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Val&: Op)) {
5581	Ops.push_back(x: DAG.getTargetGlobalAddress(GV: GA->getGlobal(), DL: SDLoc (Op),
5582	VT: GA->getValueType(ResNo: `0`),
5583	offset: Offset + GA->getOffset()));
5584	return;
5585	}
5586	if (const auto *BA = dyn_cast<BlockAddressSDNode>(Val&: Op)) {
5587	Ops.push_back(x: DAG.getTargetBlockAddress(
5588	BA: BA->getBlockAddress(), VT: BA->getValueType(ResNo: `0`),
5589	Offset: Offset + BA->getOffset(), TargetFlags: BA->getTargetFlags()));
5590	return;
5591	}
5592	if (isa<BasicBlockSDNode>(Val: Op)) {
5593	Ops.push_back(x: Op);
5594	return;
5595	}
5596	}
5597	const unsigned OpCode = Op.getOpcode();
5598	if (OpCode == ISD::ADD \|\| OpCode == ISD::SUB) {
5599	if ((C = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: `0`))))
5600	Op = Op.getOperand(i: `1`);
5601	// Subtraction is not commutative.
5602	else if (OpCode == ISD::ADD &&
5603	(C = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: `1`))))
5604	Op = Op.getOperand(i: `0`);
5605	else
5606	return;
5607	Offset += (OpCode == ISD::ADD ? `1` : -`1`) * C->getSExtValue();
5608	continue;
5609	}
5610	return;
5611	}
5612	break;
5613	}
5614	}
5615	}
5616
5617	void TargetLowering::CollectTargetIntrinsicOperands(
5618	const CallInst &I, SmallVectorImpl<SDValue> &Ops, SelectionDAG &DAG) const {
5619	}
5620
5621	std::pair<unsigned, const TargetRegisterClass *>
5622	TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
5623	StringRef Constraint,
5624	MVT VT) const {
5625	if (!Constraint.starts_with(Prefix: "{"))
5626	return std::make_pair(x: `0u`, y: static_cast<TargetRegisterClass >(nullptr*));
5627	assert(*(Constraint.end() - `1`) == `'}'` && "Not a brace enclosed constraint?");
5628
5629	// Remove the braces from around the name.
5630	StringRef RegName(Constraint.data() + `1`, Constraint.size() - `2`);
5631
5632	std::pair<unsigned, const TargetRegisterClass *> R =
5633	std::make_pair(x: `0u`, y: static_cast<const TargetRegisterClass >(nullptr*));
5634
5635	// Figure out which register class contains this reg.
5636	for (const TargetRegisterClass *RC : RI->regclasses()) {
5637	// If none of the value types for this register class are valid, we
5638	// can't use it. For example, 64-bit reg classes on 32-bit targets.
5639	if (!isLegalRC(TRI: RI, RC: RC))
5640	continue;
5641
5642	for (const MCPhysReg &PR : *RC) {
5643	if (RegName.equals_insensitive(RHS: RI->getRegAsmName(Reg: PR))) {
5644	std::pair<unsigned, const TargetRegisterClass *> S =
5645	std::make_pair(x: PR, y&: RC);
5646
5647	// If this register class has the requested value type, return it,
5648	// otherwise keep searching and return the first class found
5649	// if no other is found which explicitly has the requested type.
5650	if (RI->isTypeLegalForClass(RC: *RC, T: VT))
5651	return S;
5652	if (!R.second)
5653	R = S;
5654	}
5655	}
5656	}
5657
5658	return R;
5659	}
5660
5661	//===----------------------------------------------------------------------===//
5662	// Constraint Selection.
5663
5664	/// Return true of this is an input operand that is a matching constraint like
5665	/// "4".
5666	bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {
5667	assert(!ConstraintCode.empty() && "No known constraint!");
5668	return isdigit(static_cast<unsigned char>(ConstraintCode [`0`]));
5669	}
5670
5671	/// If this is an input matching constraint, this method returns the output
5672	/// operand it matches.
5673	unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
5674	assert(!ConstraintCode.empty() && "No known constraint!");
5675	return atoi(nptr: ConstraintCode.c_str());
5676	}
5677
5678	/// Split up the constraint string from the inline assembly value into the
5679	/// specific constraints and their prefixes, and also tie in the associated
5680	/// operand values.
5681	/// If this returns an empty vector, and if the constraint string itself
5682	/// isn't empty, there was an error parsing.
5683	TargetLowering::AsmOperandInfoVector
5684	TargetLowering::ParseConstraints(const DataLayout &DL,
5685	const TargetRegisterInfo *TRI,
5686	const CallBase &Call) const {
5687	/// Information about all of the constraints.
5688	AsmOperandInfoVector ConstraintOperands;
5689	const InlineAsm *IA = cast<InlineAsm>(Val: Call.getCalledOperand());
5690	unsigned maCount = `0`; // Largest number of multiple alternative constraints.
5691
5692	// Do a prepass over the constraints, canonicalizing them, and building up the
5693	// ConstraintOperands list.
5694	unsigned ArgNo = `0`; // ArgNo - The argument of the CallInst.
5695	unsigned ResNo = `0`; // ResNo - The result number of the next output.
5696	unsigned LabelNo = `0`; // LabelNo - CallBr indirect dest number.
5697
5698	for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
5699	ConstraintOperands.emplace_back(args: std::move(CI));
5700	AsmOperandInfo &OpInfo = ConstraintOperands.back();
5701
5702	// Update multiple alternative constraint count.
5703	if (OpInfo.multipleAlternatives.size() > maCount)
5704	maCount = OpInfo.multipleAlternatives.size();
5705
5706	OpInfo.ConstraintVT = MVT::Other;
5707
5708	// Compute the value type for each operand.
5709	switch (OpInfo.Type) {
5710	case InlineAsm::isOutput:
5711	// Indirect outputs just consume an argument.
5712	if (OpInfo.isIndirect) {
5713	OpInfo.CallOperandVal = Call.getArgOperand(i: ArgNo);
5714	break;
5715	}
5716
5717	// The return value of the call is this value. As such, there is no
5718	// corresponding argument.
5719	assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
5720	if (auto *STy = dyn_cast<StructType>(Val: Call.getType())) {
5721	OpInfo.ConstraintVT =
5722	getSimpleValueType(DL, Ty: STy->getElementType(N: ResNo));
5723	} else {
5724	assert(ResNo == `0` && "Asm only has one result!");
5725	OpInfo.ConstraintVT =
5726	getAsmOperandValueType(DL, Ty: Call.getType()).getSimpleVT();
5727	}
5728	++ResNo;
5729	break;
5730	case InlineAsm::isInput:
5731	OpInfo.CallOperandVal = Call.getArgOperand(i: ArgNo);
5732	break;
5733	case InlineAsm::isLabel:
5734	OpInfo.CallOperandVal = cast<CallBrInst>(Val: &Call)->getIndirectDest(i: LabelNo);
5735	++LabelNo;
5736	continue;
5737	case InlineAsm::isClobber:
5738	// Nothing to do.
5739	break;
5740	}
5741
5742	if (OpInfo.CallOperandVal) {
5743	llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
5744	if (OpInfo.isIndirect) {
5745	OpTy = Call.getParamElementType(ArgNo);
5746	assert(OpTy && "Indirect operand must have elementtype attribute");
5747	}
5748
5749	// Look for vector wrapped in a struct. e.g. { <16 x i8> }.
5750	if (StructType *STy = dyn_cast<StructType>(Val: OpTy))
5751	if (STy->getNumElements() == `1`)
5752	OpTy = STy->getElementType(N: `0`);
5753
5754	// If OpTy is not a single value, it may be a struct/union that we
5755	// can tile with integers.
5756	if (!OpTy->isSingleValueType() && OpTy->isSized()) {
5757	unsigned BitSize = DL.getTypeSizeInBits(Ty: OpTy);
5758	switch (BitSize) {
5759	default: break;
5760	case `1`:
5761	case `8`:
5762	case `16`:
5763	case `32`:
5764	case `64`:
5765	case `128`:
5766	OpTy = IntegerType::get(C&: OpTy->getContext(), NumBits: BitSize);
5767	break;
5768	}
5769	}
5770
5771	EVT VT = getAsmOperandValueType(DL, Ty: OpTy, AllowUnknown: true);
5772	OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
5773	ArgNo++;
5774	}
5775	}
5776
5777	// If we have multiple alternative constraints, select the best alternative.
5778	if (!ConstraintOperands.empty()) {
5779	if (maCount) {
5780	unsigned bestMAIndex = `0`;
5781	int bestWeight = -`1`;
5782	// weight: -1 = invalid match, and 0 = so-so match to 5 = good match.
5783	int weight = -`1`;
5784	unsigned maIndex;
5785	// Compute the sums of the weights for each alternative, keeping track
5786	// of the best (highest weight) one so far.
5787	for (maIndex = `0`; maIndex < maCount; ++maIndex) {
5788	int weightSum = `0`;
5789	for (unsigned cIndex = `0`, eIndex = ConstraintOperands.size();
5790	cIndex != eIndex; ++cIndex) {
5791	AsmOperandInfo &OpInfo = ConstraintOperands [cIndex];
5792	if (OpInfo.Type == InlineAsm::isClobber)
5793	continue;
5794
5795	// If this is an output operand with a matching input operand,
5796	// look up the matching input. If their types mismatch, e.g. one
5797	// is an integer, the other is floating point, or their sizes are
5798	// different, flag it as an maCantMatch.
5799	if (OpInfo.hasMatchingInput()) {
5800	AsmOperandInfo &Input = ConstraintOperands [OpInfo.MatchingInput];
5801	if (OpInfo.ConstraintVT != Input.ConstraintVT) {
5802	if ((OpInfo.ConstraintVT.isInteger() !=
5803	Input.ConstraintVT.isInteger()) \|\|
5804	(OpInfo.ConstraintVT.getSizeInBits() !=
5805	Input.ConstraintVT.getSizeInBits())) {
5806	weightSum = -`1`; // Can't match.
5807	break;
5808	}
5809	}
5810	}
5811	weight = getMultipleConstraintMatchWeight(info&: OpInfo, maIndex);
5812	if (weight == -`1`) {
5813	weightSum = -`1`;
5814	break;
5815	}
5816	weightSum += weight;
5817	}
5818	// Update best.
5819	if (weightSum > bestWeight) {
5820	bestWeight = weightSum;
5821	bestMAIndex = maIndex;
5822	}
5823	}
5824
5825	// Now select chosen alternative in each constraint.
5826	for (AsmOperandInfo &cInfo : ConstraintOperands)
5827	if (cInfo.Type != InlineAsm::isClobber)
5828	cInfo.selectAlternative(index: bestMAIndex);
5829	}
5830	}
5831
5832	// Check and hook up tied operands, choose constraint code to use.
5833	for (unsigned cIndex = `0`, eIndex = ConstraintOperands.size();
5834	cIndex != eIndex; ++cIndex) {
5835	AsmOperandInfo &OpInfo = ConstraintOperands [cIndex];
5836
5837	// If this is an output operand with a matching input operand, look up the
5838	// matching input. If their types mismatch, e.g. one is an integer, the
5839	// other is floating point, or their sizes are different, flag it as an
5840	// error.
5841	if (OpInfo.hasMatchingInput()) {
5842	AsmOperandInfo &Input = ConstraintOperands [OpInfo.MatchingInput];
5843
5844	if (OpInfo.ConstraintVT != Input.ConstraintVT) {
5845	std::pair<unsigned, const TargetRegisterClass *> MatchRC =
5846	getRegForInlineAsmConstraint(RI: TRI, Constraint: OpInfo.ConstraintCode,
5847	VT: OpInfo.ConstraintVT);
5848	std::pair<unsigned, const TargetRegisterClass *> InputRC =
5849	getRegForInlineAsmConstraint(RI: TRI, Constraint: Input.ConstraintCode,
5850	VT: Input.ConstraintVT);
5851	if ((OpInfo.ConstraintVT.isInteger() !=
5852	Input.ConstraintVT.isInteger()) \|\|
5853	(MatchRC.second != InputRC.second)) {
5854	report_fatal_error(reason: "Unsupported asm: input constraint"
5855	" with a matching output constraint of"
5856	" incompatible type!");
5857	}
5858	}
5859	}
5860	}
5861
5862	return ConstraintOperands;
5863	}
5864
5865	/// Return a number indicating our preference for chosing a type of constraint
5866	/// over another, for the purpose of sorting them. Immediates are almost always
5867	/// preferrable (when they can be emitted). A higher return value means a
5868	/// stronger preference for one constraint type relative to another.
5869	/// FIXME: We should prefer registers over memory but doing so may lead to
5870	/// unrecoverable register exhaustion later.
5871	/// https://github.com/llvm/llvm-project/issues/20571
5872	static unsigned getConstraintPiority(TargetLowering::ConstraintType CT) {
5873	switch (CT) {
5874	case TargetLowering::C_Immediate:
5875	case TargetLowering::C_Other:
5876	return `4`;
5877	case TargetLowering::C_Memory:
5878	case TargetLowering::C_Address:
5879	return `3`;
5880	case TargetLowering::C_RegisterClass:
5881	return `2`;
5882	case TargetLowering::C_Register:
5883	return `1`;
5884	case TargetLowering::C_Unknown:
5885	return `0`;
5886	}
5887	llvm_unreachable("Invalid constraint type");
5888	}
5889
5890	/// Examine constraint type and operand type and determine a weight value.
5891	/// This object must already have been set up with the operand type
5892	/// and the current alternative constraint selected.
5893	TargetLowering::ConstraintWeight
5894	TargetLowering::getMultipleConstraintMatchWeight(
5895	AsmOperandInfo &info, int maIndex) const {
5896	InlineAsm::ConstraintCodeVector *rCodes;
5897	if (maIndex >= (int)info.multipleAlternatives.size())
5898	rCodes = &info.Codes;
5899	else
5900	rCodes = &info.multipleAlternatives [maIndex].Codes;
5901	ConstraintWeight BestWeight = CW_Invalid;
5902
5903	// Loop over the options, keeping track of the most general one.
5904	for (const std::string &rCode : *rCodes) {
5905	ConstraintWeight weight =
5906	getSingleConstraintMatchWeight(info, constraint: rCode.c_str());
5907	if (weight > BestWeight)
5908	BestWeight = weight;
5909	}
5910
5911	return BestWeight;
5912	}
5913
5914	/// Examine constraint type and operand type and determine a weight value.
5915	/// This object must already have been set up with the operand type
5916	/// and the current alternative constraint selected.
5917	TargetLowering::ConstraintWeight
5918	TargetLowering::getSingleConstraintMatchWeight(
5919	AsmOperandInfo &info, const char constraint) const* {
5920	ConstraintWeight weight = CW_Invalid;
5921	Value *CallOperandVal = info.CallOperandVal;
5922	// If we don't have a value, we can't do a match,
5923	// but allow it at the lowest weight.
5924	if (!CallOperandVal)
5925	return CW_Default;
5926	// Look at the constraint type.
5927	switch (*constraint) {
5928	case `'i'`: // immediate integer.
5929	case `'n'`: // immediate integer with a known value.
5930	if (isa<ConstantInt>(Val: CallOperandVal))
5931	weight = CW_Constant;
5932	break;
5933	case `'s'`: // non-explicit intregal immediate.
5934	if (isa<GlobalValue>(Val: CallOperandVal))
5935	weight = CW_Constant;
5936	break;
5937	case `'E'`: // immediate float if host format.
5938	case `'F'`: // immediate float.
5939	if (isa<ConstantFP>(Val: CallOperandVal))
5940	weight = CW_Constant;
5941	break;
5942	case `'<'`: // memory operand with autodecrement.
5943	case `'>'`: // memory operand with autoincrement.
5944	case `'m'`: // memory operand.
5945	case `'o'`: // offsettable memory operand
5946	case `'V'`: // non-offsettable memory operand
5947	weight = CW_Memory;
5948	break;
5949	case `'r'`: // general register.
5950	case `'g'`: // general register, memory operand or immediate integer.
5951	// note: Clang converts "g" to "imr".
5952	if (CallOperandVal->getType()->isIntegerTy())
5953	weight = CW_Register;
5954	break;
5955	case `'X'`: // any operand.
5956	default:
5957	weight = CW_Default;
5958	break;
5959	}
5960	return weight;
5961	}
5962
5963	/// If there are multiple different constraints that we could pick for this
5964	/// operand (e.g. "imr") try to pick the 'best' one.
5965	/// This is somewhat tricky: constraints (TargetLowering::ConstraintType) fall
5966	/// into seven classes:
5967	/// Register -> one specific register
5968	/// RegisterClass -> a group of regs
5969	/// Memory -> memory
5970	/// Address -> a symbolic memory reference
5971	/// Immediate -> immediate values
5972	/// Other -> magic values (such as "Flag Output Operands")
5973	/// Unknown -> something we don't recognize yet and can't handle
5974	/// Ideally, we would pick the most specific constraint possible: if we have
5975	/// something that fits into a register, we would pick it. The problem here
5976	/// is that if we have something that could either be in a register or in
5977	/// memory that use of the register could cause selection of other
5978	/// operands to fail: they might only succeed if we pick memory. Because of
5979	/// this the heuristic we use is:
5980	///
5981	/// 1) If there is an 'other' constraint, and if the operand is valid for
5982	/// that constraint, use it. This makes us take advantage of 'i'
5983	/// constraints when available.
5984	/// 2) Otherwise, pick the most general constraint present. This prefers
5985	/// 'm' over 'r', for example.
5986	///
5987	TargetLowering::ConstraintGroup TargetLowering::getConstraintPreferences(
5988	TargetLowering::AsmOperandInfo &OpInfo) const {
5989	ConstraintGroup Ret;
5990
5991	Ret.reserve(N: OpInfo.Codes.size());
5992	for (StringRef Code : OpInfo.Codes) {
5993	TargetLowering::ConstraintType CType = getConstraintType(Constraint: Code);
5994
5995	// Indirect 'other' or 'immediate' constraints are not allowed.
5996	if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory \|\|
5997	CType == TargetLowering::C_Register \|\|
5998	CType == TargetLowering::C_RegisterClass))
5999	continue;
6000
6001	// Things with matching constraints can only be registers, per gcc
6002	// documentation. This mainly affects "g" constraints.
6003	if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
6004	continue;
6005
6006	Ret.emplace_back(Args&: Code, Args&: CType);
6007	}
6008
6009	std::stable_sort(
6010	first: Ret.begin(), last: Ret.end(), comp: [](ConstraintPair a, ConstraintPair b) {
6011	return getConstraintPiority(CT: a.second) > getConstraintPiority(CT: b.second);
6012	});
6013
6014	return Ret;
6015	}
6016
6017	/// If we have an immediate, see if we can lower it. Return true if we can,
6018	/// false otherwise.
6019	static bool lowerImmediateIfPossible(TargetLowering::ConstraintPair &P,
6020	SDValue Op, SelectionDAG *DAG,
6021	const TargetLowering &TLI) {
6022
6023	assert((P.second == TargetLowering::C_Other \|\|
6024	P.second == TargetLowering::C_Immediate) &&
6025	"need immediate or other");
6026
6027	if (!Op.getNode())
6028	return false;
6029
6030	std::vector<SDValue> ResultOps;
6031	TLI.LowerAsmOperandForConstraint(Op, Constraint: P.first, Ops&: ResultOps, DAG&: *DAG);
6032	return !ResultOps.empty();
6033	}
6034
6035	/// Determines the constraint code and constraint type to use for the specific
6036	/// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
6037	void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
6038	SDValue Op,
6039	SelectionDAG DAG) const* {
6040	assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
6041
6042	// Single-letter constraints ('r') are very common.
6043	if (OpInfo.Codes.size() == `1`) {
6044	OpInfo.ConstraintCode = OpInfo.Codes [`0`];
6045	OpInfo.ConstraintType = getConstraintType(Constraint: OpInfo.ConstraintCode);
6046	} else {
6047	ConstraintGroup G = getConstraintPreferences(OpInfo);
6048	if (G.empty())
6049	return;
6050
6051	unsigned BestIdx = `0`;
6052	for (const unsigned E = G.size();
6053	BestIdx < E && (G [BestIdx].second == TargetLowering::C_Other \|\|
6054	G [BestIdx].second == TargetLowering::C_Immediate);
6055	++BestIdx) {
6056	if (lowerImmediateIfPossible(P&: G [BestIdx], Op, DAG, TLI: *this))
6057	break;
6058	// If we're out of constraints, just pick the first one.
6059	if (BestIdx + `1` == E) {
6060	BestIdx = `0`;
6061	break;
6062	}
6063	}
6064
6065	OpInfo.ConstraintCode = G [BestIdx].first;
6066	OpInfo.ConstraintType = G [BestIdx].second;
6067	}
6068
6069	// 'X' matches anything.
6070	if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
6071	// Constants are handled elsewhere. For Functions, the type here is the
6072	// type of the result, which is not what we want to look at; leave them
6073	// alone.
6074	Value *v = OpInfo.CallOperandVal;
6075	if (isa<ConstantInt>(Val: v) \|\| isa<Function>(Val: v)) {
6076	return;
6077	}
6078
6079	if (isa<BasicBlock>(Val: v) \|\| isa<BlockAddress>(Val: v)) {
6080	OpInfo.ConstraintCode = "i";
6081	return;
6082	}
6083
6084	// Otherwise, try to resolve it to something we know about by looking at
6085	// the actual operand type.
6086	if (const char *Repl = LowerXConstraint(ConstraintVT: OpInfo.ConstraintVT)) {
6087	OpInfo.ConstraintCode = Repl;
6088	OpInfo.ConstraintType = getConstraintType(Constraint: OpInfo.ConstraintCode);
6089	}
6090	}
6091	}
6092
6093	/// Given an exact SDIV by a constant, create a multiplication
6094	/// with the multiplicative inverse of the constant.
6095	/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6096	static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
6097	const SDLoc &dl, SelectionDAG &DAG,
6098	SmallVectorImpl<SDNode *> &Created) {
6099	SDValue Op0 = N->getOperand(Num: `0`);
6100	SDValue Op1 = N->getOperand(Num: `1`);
6101	EVT VT = N->getValueType(ResNo: `0`);
6102	EVT SVT = VT.getScalarType();
6103	EVT ShVT = TLI.getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
6104	EVT ShSVT = ShVT.getScalarType();
6105
6106	bool UseSRA = false;
6107	SmallVector<SDValue, `16`> Shifts, Factors;
6108
6109	auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6110	if (C->isZero())
6111	return false;
6112	APInt Divisor = C->getAPIntValue();
6113	unsigned Shift = Divisor.countr_zero();
6114	if (Shift) {
6115	Divisor.ashrInPlace(ShiftAmt: Shift);
6116	UseSRA = true;
6117	}
6118	APInt Factor = Divisor.multiplicativeInverse();
6119	Shifts.push_back(Elt: DAG.getConstant(Val: Shift, DL: dl, VT: ShSVT));
6120	Factors.push_back(Elt: DAG.getConstant(Val: Factor, DL: dl, VT: SVT));
6121	return true;
6122	};
6123
6124	// Collect all magic values from the build vector.
6125	if (!ISD::matchUnaryPredicate(Op: Op1, Match: BuildSDIVPattern))
6126	return SDValue ();
6127
6128	SDValue Shift, Factor;
6129	if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6130	Shift = DAG.getBuildVector(VT: ShVT, DL: dl, Ops: Shifts);
6131	Factor = DAG.getBuildVector(VT, DL: dl, Ops: Factors);
6132	} else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6133	assert(Shifts.size() == `1` && Factors.size() == `1` &&
6134	"Expected matchUnaryPredicate to return one element for scalable "
6135	"vectors");
6136	Shift = DAG.getSplatVector(VT: ShVT, DL: dl, Op: Shifts [`0`]);
6137	Factor = DAG.getSplatVector(VT, DL: dl, Op: Factors [`0`]);
6138	} else {
6139	assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6140	Shift = Shifts [`0`];
6141	Factor = Factors [`0`];
6142	}
6143
6144	SDValue Res = Op0;
6145	if (UseSRA) {
6146	SDNodeFlags Flags;
6147	Flags.setExact(true);
6148	Res = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: Res, N2: Shift, Flags);
6149	Created.push_back(Elt: Res.getNode());
6150	}
6151
6152	return DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: Res, N2: Factor);
6153	}
6154
6155	/// Given an exact UDIV by a constant, create a multiplication
6156	/// with the multiplicative inverse of the constant.
6157	/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6158	static SDValue BuildExactUDIV(const TargetLowering &TLI, SDNode *N,
6159	const SDLoc &dl, SelectionDAG &DAG,
6160	SmallVectorImpl<SDNode *> &Created) {
6161	EVT VT = N->getValueType(ResNo: `0`);
6162	EVT SVT = VT.getScalarType();
6163	EVT ShVT = TLI.getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
6164	EVT ShSVT = ShVT.getScalarType();
6165
6166	bool UseSRL = false;
6167	SmallVector<SDValue, `16`> Shifts, Factors;
6168
6169	auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6170	if (C->isZero())
6171	return false;
6172	APInt Divisor = C->getAPIntValue();
6173	unsigned Shift = Divisor.countr_zero();
6174	if (Shift) {
6175	Divisor.lshrInPlace(ShiftAmt: Shift);
6176	UseSRL = true;
6177	}
6178	// Calculate the multiplicative inverse modulo BW.
6179	APInt Factor = Divisor.multiplicativeInverse();
6180	Shifts.push_back(Elt: DAG.getConstant(Val: Shift, DL: dl, VT: ShSVT));
6181	Factors.push_back(Elt: DAG.getConstant(Val: Factor, DL: dl, VT: SVT));
6182	return true;
6183	};
6184
6185	SDValue Op1 = N->getOperand(Num: `1`);
6186
6187	// Collect all magic values from the build vector.
6188	if (!ISD::matchUnaryPredicate(Op: Op1, Match: BuildUDIVPattern))
6189	return SDValue ();
6190
6191	SDValue Shift, Factor;
6192	if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6193	Shift = DAG.getBuildVector(VT: ShVT, DL: dl, Ops: Shifts);
6194	Factor = DAG.getBuildVector(VT, DL: dl, Ops: Factors);
6195	} else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6196	assert(Shifts.size() == `1` && Factors.size() == `1` &&
6197	"Expected matchUnaryPredicate to return one element for scalable "
6198	"vectors");
6199	Shift = DAG.getSplatVector(VT: ShVT, DL: dl, Op: Shifts [`0`]);
6200	Factor = DAG.getSplatVector(VT, DL: dl, Op: Factors [`0`]);
6201	} else {
6202	assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6203	Shift = Shifts [`0`];
6204	Factor = Factors [`0`];
6205	}
6206
6207	SDValue Res = N->getOperand(Num: `0`);
6208	if (UseSRL) {
6209	SDNodeFlags Flags;
6210	Flags.setExact(true);
6211	Res = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Res, N2: Shift, Flags);
6212	Created.push_back(Elt: Res.getNode());
6213	}
6214
6215	return DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: Res, N2: Factor);
6216	}
6217
6218	SDValue TargetLowering::BuildSDIVPow2(SDNode N, const* APInt &Divisor,
6219	SelectionDAG &DAG,
6220	SmallVectorImpl<SDNode > &Created) const* {
6221	AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6222	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6223	if (TLI.isIntDivCheap(VT: N->getValueType(ResNo: `0`), Attr))
6224	return SDValue (N, `0`); // Lower SDIV as SDIV
6225	return SDValue ();
6226	}
6227
6228	SDValue
6229	TargetLowering::BuildSREMPow2(SDNode N, const* APInt &Divisor,
6230	SelectionDAG &DAG,
6231	SmallVectorImpl<SDNode > &Created) const* {
6232	AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6233	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6234	if (TLI.isIntDivCheap(VT: N->getValueType(ResNo: `0`), Attr))
6235	return SDValue (N, `0`); // Lower SREM as SREM
6236	return SDValue ();
6237	}
6238
6239	/// Build sdiv by power-of-2 with conditional move instructions
6240	/// Ref: "Hacker's Delight" by Henry Warren 10-1
6241	/// If conditional move/branch is preferred, we lower sdiv x, +/-2k into:
6242	/// bgez x, label
6243	/// add x, x, 2k-1
6244	/// label:
6245	/// sra res, x, k
6246	/// neg res, res (when the divisor is negative)
6247	SDValue TargetLowering::buildSDIVPow2WithCMov(
6248	SDNode N, const* APInt &Divisor, SelectionDAG &DAG,
6249	SmallVectorImpl<SDNode > &Created) const* {
6250	unsigned Lg2 = Divisor.countr_zero();
6251	EVT VT = N->getValueType(ResNo: `0`);
6252
6253	SDLoc DL(N);
6254	SDValue N0 = N->getOperand(Num: `0`);
6255	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT);
6256	APInt Lg2Mask = APInt::getLowBitsSet(numBits: VT.getSizeInBits(), loBitsSet: Lg2);
6257	SDValue Pow2MinusOne = DAG.getConstant(Val: Lg2Mask, DL, VT);
6258
6259	// If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
6260	EVT CCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
6261	SDValue Cmp = DAG.getSetCC(DL, VT: CCVT, LHS: N0, RHS: Zero, Cond: ISD::SETLT);
6262	SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: N0, N2: Pow2MinusOne);
6263	SDValue CMov = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: Cmp, N2: Add, N3: N0);
6264
6265	Created.push_back(Elt: Cmp.getNode());
6266	Created.push_back(Elt: Add.getNode());
6267	Created.push_back(Elt: CMov.getNode());
6268
6269	// Divide by pow2.
6270	SDValue SRA =
6271	DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: CMov, N2: DAG.getConstant(Val: Lg2, DL, VT));
6272
6273	// If we're dividing by a positive value, we're done. Otherwise, we must
6274	// negate the result.
6275	if (Divisor.isNonNegative())
6276	return SRA;
6277
6278	Created.push_back(Elt: SRA.getNode());
6279	return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Zero, N2: SRA);
6280	}
6281
6282	/// Given an ISD::SDIV node expressing a divide by constant,
6283	/// return a DAG expression to select that will generate the same value by
6284	/// multiplying by a magic number.
6285	/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6286	SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
6287	bool IsAfterLegalization,
6288	SmallVectorImpl<SDNode > &Created) const* {
6289	SDLoc dl(N);
6290	EVT VT = N->getValueType(ResNo: `0`);
6291	EVT SVT = VT.getScalarType();
6292	EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
6293	EVT ShSVT = ShVT.getScalarType();
6294	unsigned EltBits = VT.getScalarSizeInBits();
6295	EVT MulVT;
6296
6297	// Check to see if we can do this.
6298	// FIXME: We should be more aggressive here.
6299	if (!isTypeLegal(VT)) {
6300	// Limit this to simple scalars for now.
6301	if (VT.isVector() \|\| !VT.isSimple())
6302	return SDValue ();
6303
6304	// If this type will be promoted to a large enough type with a legal
6305	// multiply operation, we can go ahead and do this transform.
6306	if (getTypeAction(VT: VT.getSimpleVT()) != TypePromoteInteger)
6307	return SDValue ();
6308
6309	MulVT = getTypeToTransformTo(Context&: *DAG.getContext(), VT);
6310	if (MulVT.getSizeInBits() < (`2` * EltBits) \|\|
6311	!isOperationLegal(Op: ISD::MUL, VT: MulVT))
6312	return SDValue ();
6313	}
6314
6315	// If the sdiv has an 'exact' bit we can use a simpler lowering.
6316	if (N->getFlags().hasExact())
6317	return BuildExactSDIV(TLI: *this, N, dl, DAG, Created);
6318
6319	SmallVector<SDValue, `16`> MagicFactors, Factors, Shifts, ShiftMasks;
6320
6321	auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6322	if (C->isZero())
6323	return false;
6324
6325	const APInt &Divisor = C->getAPIntValue();
6326	SignedDivisionByConstantInfo magics = SignedDivisionByConstantInfo::get(D: Divisor);
6327	int NumeratorFactor = `0`;
6328	int ShiftMask = -`1`;
6329
6330	if (Divisor.isOne() \|\| Divisor.isAllOnes()) {
6331	// If d is +1/-1, we just multiply the numerator by +1/-1.
6332	NumeratorFactor = Divisor.getSExtValue();
6333	magics.Magic = `0`;
6334	magics.ShiftAmount = `0`;
6335	ShiftMask = `0`;
6336	} else if (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) {
6337	// If d > 0 and m < 0, add the numerator.
6338	NumeratorFactor = `1`;
6339	} else if (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) {
6340	// If d < 0 and m > 0, subtract the numerator.
6341	NumeratorFactor = -`1`;
6342	}
6343
6344	MagicFactors.push_back(Elt: DAG.getConstant(Val: magics.Magic, DL: dl, VT: SVT));
6345	Factors.push_back(Elt: DAG.getConstant(Val: NumeratorFactor, DL: dl, VT: SVT));
6346	Shifts.push_back(Elt: DAG.getConstant(Val: magics.ShiftAmount, DL: dl, VT: ShSVT));
6347	ShiftMasks.push_back(Elt: DAG.getConstant(Val: ShiftMask, DL: dl, VT: SVT));
6348	return true;
6349	};
6350
6351	SDValue N0 = N->getOperand(Num: `0`);
6352	SDValue N1 = N->getOperand(Num: `1`);
6353
6354	// Collect the shifts / magic values from each element.
6355	if (!ISD::matchUnaryPredicate(Op: N1, Match: BuildSDIVPattern))
6356	return SDValue ();
6357
6358	SDValue MagicFactor, Factor, Shift, ShiftMask;
6359	if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6360	MagicFactor = DAG.getBuildVector(VT, DL: dl, Ops: MagicFactors);
6361	Factor = DAG.getBuildVector(VT, DL: dl, Ops: Factors);
6362	Shift = DAG.getBuildVector(VT: ShVT, DL: dl, Ops: Shifts);
6363	ShiftMask = DAG.getBuildVector(VT, DL: dl, Ops: ShiftMasks);
6364	} else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6365	assert(MagicFactors.size() == `1` && Factors.size() == `1` &&
6366	Shifts.size() == `1` && ShiftMasks.size() == `1` &&
6367	"Expected matchUnaryPredicate to return one element for scalable "
6368	"vectors");
6369	MagicFactor = DAG.getSplatVector(VT, DL: dl, Op: MagicFactors [`0`]);
6370	Factor = DAG.getSplatVector(VT, DL: dl, Op: Factors [`0`]);
6371	Shift = DAG.getSplatVector(VT: ShVT, DL: dl, Op: Shifts [`0`]);
6372	ShiftMask = DAG.getSplatVector(VT, DL: dl, Op: ShiftMasks [`0`]);
6373	} else {
6374	assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6375	MagicFactor = MagicFactors [`0`];
6376	Factor = Factors [`0`];
6377	Shift = Shifts [`0`];
6378	ShiftMask = ShiftMasks [`0`];
6379	}
6380
6381	// Multiply the numerator (operand 0) by the magic value.
6382	// FIXME: We should support doing a MUL in a wider type.
6383	auto GetMULHS = [&](SDValue X, SDValue Y) {
6384	// If the type isn't legal, use a wider mul of the type calculated
6385	// earlier.
6386	if (!isTypeLegal(VT)) {
6387	X = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: MulVT, Operand: X);
6388	Y = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: MulVT, Operand: Y);
6389	Y = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: MulVT, N1: X, N2: Y);
6390	Y = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: MulVT, N1: Y,
6391	N2: DAG.getShiftAmountConstant(Val: EltBits, VT: MulVT, DL: dl));
6392	return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Y);
6393	}
6394
6395	if (isOperationLegalOrCustom(Op: ISD::MULHS, VT, LegalOnly: IsAfterLegalization))
6396	return DAG.getNode(Opcode: ISD::MULHS, DL: dl, VT, N1: X, N2: Y);
6397	if (isOperationLegalOrCustom(Op: ISD::SMUL_LOHI, VT, LegalOnly: IsAfterLegalization)) {
6398	SDValue LoHi =
6399	DAG.getNode(Opcode: ISD::SMUL_LOHI, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: VT), N1: X, N2: Y);
6400	return SDValue (LoHi.getNode(), `1`);
6401	}
6402	// If type twice as wide legal, widen and use a mul plus a shift.
6403	unsigned Size = VT.getScalarSizeInBits();
6404	EVT WideVT = EVT::getIntegerVT(Context&: DAG.getContext(), BitWidth: Size `2`);
6405	if (VT.isVector())
6406	WideVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: WideVT,
6407	EC: VT.getVectorElementCount());
6408	if (isOperationLegalOrCustom(Op: ISD::MUL, VT: WideVT)) {
6409	X = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: WideVT, Operand: X);
6410	Y = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: WideVT, Operand: Y);
6411	Y = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: WideVT, N1: X, N2: Y);
6412	Y = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: WideVT, N1: Y,
6413	N2: DAG.getShiftAmountConstant(Val: EltBits, VT: WideVT, DL: dl));
6414	return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Y);
6415	}
6416	return SDValue ();
6417	};
6418
6419	SDValue Q = GetMULHS (N0, MagicFactor);
6420	if (!Q)
6421	return SDValue ();
6422
6423	Created.push_back(Elt: Q.getNode());
6424
6425	// (Optionally) Add/subtract the numerator using Factor.
6426	Factor = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: N0, N2: Factor);
6427	Created.push_back(Elt: Factor.getNode());
6428	Q = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Q, N2: Factor);
6429	Created.push_back(Elt: Q.getNode());
6430
6431	// Shift right algebraic by shift value.
6432	Q = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: Q, N2: Shift);
6433	Created.push_back(Elt: Q.getNode());
6434
6435	// Extract the sign bit, mask it and add it to the quotient.
6436	SDValue SignShift = DAG.getConstant(Val: EltBits - `1`, DL: dl, VT: ShVT);
6437	SDValue T = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Q, N2: SignShift);
6438	Created.push_back(Elt: T.getNode());
6439	T = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: T, N2: ShiftMask);
6440	Created.push_back(Elt: T.getNode());
6441	return DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Q, N2: T);
6442	}
6443
6444	/// Given an ISD::UDIV node expressing a divide by constant,
6445	/// return a DAG expression to select that will generate the same value by
6446	/// multiplying by a magic number.
6447	/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6448	SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
6449	bool IsAfterLegalization,
6450	SmallVectorImpl<SDNode > &Created) const* {
6451	SDLoc dl(N);
6452	EVT VT = N->getValueType(ResNo: `0`);
6453	EVT SVT = VT.getScalarType();
6454	EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
6455	EVT ShSVT = ShVT.getScalarType();
6456	unsigned EltBits = VT.getScalarSizeInBits();
6457	EVT MulVT;
6458
6459	// Check to see if we can do this.
6460	// FIXME: We should be more aggressive here.
6461	if (!isTypeLegal(VT)) {
6462	// Limit this to simple scalars for now.
6463	if (VT.isVector() \|\| !VT.isSimple())
6464	return SDValue ();
6465
6466	// If this type will be promoted to a large enough type with a legal
6467	// multiply operation, we can go ahead and do this transform.
6468	if (getTypeAction(VT: VT.getSimpleVT()) != TypePromoteInteger)
6469	return SDValue ();
6470
6471	MulVT = getTypeToTransformTo(Context&: *DAG.getContext(), VT);
6472	if (MulVT.getSizeInBits() < (`2` * EltBits) \|\|
6473	!isOperationLegal(Op: ISD::MUL, VT: MulVT))
6474	return SDValue ();
6475	}
6476
6477	// If the udiv has an 'exact' bit we can use a simpler lowering.
6478	if (N->getFlags().hasExact())
6479	return BuildExactUDIV(TLI: *this, N, dl, DAG, Created);
6480
6481	SDValue N0 = N->getOperand(Num: `0`);
6482	SDValue N1 = N->getOperand(Num: `1`);
6483
6484	// Try to use leading zeros of the dividend to reduce the multiplier and
6485	// avoid expensive fixups.
6486	unsigned KnownLeadingZeros = DAG.computeKnownBits(Op: N0).countMinLeadingZeros();
6487
6488	bool UseNPQ = false, UsePreShift = false, UsePostShift = false;
6489	SmallVector<SDValue, `16`> PreShifts, PostShifts, MagicFactors, NPQFactors;
6490
6491	auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6492	if (C->isZero())
6493	return false;
6494	const APInt& Divisor = C->getAPIntValue();
6495
6496	SDValue PreShift, MagicFactor, NPQFactor, PostShift;
6497
6498	// Magic algorithm doesn't work for division by 1. We need to emit a select
6499	// at the end.
6500	if (Divisor.isOne()) {
6501	PreShift = PostShift = DAG.getUNDEF(VT: ShSVT);
6502	MagicFactor = NPQFactor = DAG.getUNDEF(VT: SVT);
6503	} else {
6504	UnsignedDivisionByConstantInfo magics =
6505	UnsignedDivisionByConstantInfo::get(
6506	D: Divisor, LeadingZeros: std::min(a: KnownLeadingZeros, b: Divisor.countl_zero()));
6507
6508	MagicFactor = DAG.getConstant(Val: magics.Magic, DL: dl, VT: SVT);
6509
6510	assert(magics.PreShift < Divisor.getBitWidth() &&
6511	"We shouldn't generate an undefined shift!");
6512	assert(magics.PostShift < Divisor.getBitWidth() &&
6513	"We shouldn't generate an undefined shift!");
6514	assert((!magics.IsAdd \|\| magics.PreShift == `0`) &&
6515	"Unexpected pre-shift");
6516	PreShift = DAG.getConstant(Val: magics.PreShift, DL: dl, VT: ShSVT);
6517	PostShift = DAG.getConstant(Val: magics.PostShift, DL: dl, VT: ShSVT);
6518	NPQFactor = DAG.getConstant(
6519	Val: magics.IsAdd ? APInt::getOneBitSet(numBits: EltBits, BitNo: EltBits - `1`)
6520	: APInt::getZero(numBits: EltBits),
6521	DL: dl, VT: SVT);
6522	UseNPQ \|= magics.IsAdd;
6523	UsePreShift \|= magics.PreShift != `0`;
6524	UsePostShift \|= magics.PostShift != `0`;
6525	}
6526
6527	PreShifts.push_back(Elt: PreShift);
6528	MagicFactors.push_back(Elt: MagicFactor);
6529	NPQFactors.push_back(Elt: NPQFactor);
6530	PostShifts.push_back(Elt: PostShift);
6531	return true;
6532	};
6533
6534	// Collect the shifts/magic values from each element.
6535	if (!ISD::matchUnaryPredicate(Op: N1, Match: BuildUDIVPattern))
6536	return SDValue ();
6537
6538	SDValue PreShift, PostShift, MagicFactor, NPQFactor;
6539	if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6540	PreShift = DAG.getBuildVector(VT: ShVT, DL: dl, Ops: PreShifts);
6541	MagicFactor = DAG.getBuildVector(VT, DL: dl, Ops: MagicFactors);
6542	NPQFactor = DAG.getBuildVector(VT, DL: dl, Ops: NPQFactors);
6543	PostShift = DAG.getBuildVector(VT: ShVT, DL: dl, Ops: PostShifts);
6544	} else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6545	assert(PreShifts.size() == `1` && MagicFactors.size() == `1` &&
6546	NPQFactors.size() == `1` && PostShifts.size() == `1` &&
6547	"Expected matchUnaryPredicate to return one for scalable vectors");
6548	PreShift = DAG.getSplatVector(VT: ShVT, DL: dl, Op: PreShifts [`0`]);
6549	MagicFactor = DAG.getSplatVector(VT, DL: dl, Op: MagicFactors [`0`]);
6550	NPQFactor = DAG.getSplatVector(VT, DL: dl, Op: NPQFactors [`0`]);
6551	PostShift = DAG.getSplatVector(VT: ShVT, DL: dl, Op: PostShifts [`0`]);
6552	} else {
6553	assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6554	PreShift = PreShifts [`0`];
6555	MagicFactor = MagicFactors [`0`];
6556	PostShift = PostShifts [`0`];
6557	}
6558
6559	SDValue Q = N0;
6560	if (UsePreShift) {
6561	Q = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Q, N2: PreShift);
6562	Created.push_back(Elt: Q.getNode());
6563	}
6564
6565	// FIXME: We should support doing a MUL in a wider type.
6566	auto GetMULHU = [&](SDValue X, SDValue Y) {
6567	// If the type isn't legal, use a wider mul of the type calculated
6568	// earlier.
6569	if (!isTypeLegal(VT)) {
6570	X = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: MulVT, Operand: X);
6571	Y = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: MulVT, Operand: Y);
6572	Y = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: MulVT, N1: X, N2: Y);
6573	Y = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: MulVT, N1: Y,
6574	N2: DAG.getShiftAmountConstant(Val: EltBits, VT: MulVT, DL: dl));
6575	return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Y);
6576	}
6577
6578	if (isOperationLegalOrCustom(Op: ISD::MULHU, VT, LegalOnly: IsAfterLegalization))
6579	return DAG.getNode(Opcode: ISD::MULHU, DL: dl, VT, N1: X, N2: Y);
6580	if (isOperationLegalOrCustom(Op: ISD::UMUL_LOHI, VT, LegalOnly: IsAfterLegalization)) {
6581	SDValue LoHi =
6582	DAG.getNode(Opcode: ISD::UMUL_LOHI, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: VT), N1: X, N2: Y);
6583	return SDValue (LoHi.getNode(), `1`);
6584	}
6585	// If type twice as wide legal, widen and use a mul plus a shift.
6586	unsigned Size = VT.getScalarSizeInBits();
6587	EVT WideVT = EVT::getIntegerVT(Context&: DAG.getContext(), BitWidth: Size `2`);
6588	if (VT.isVector())
6589	WideVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: WideVT,
6590	EC: VT.getVectorElementCount());
6591	if (isOperationLegalOrCustom(Op: ISD::MUL, VT: WideVT)) {
6592	X = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: WideVT, Operand: X);
6593	Y = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: WideVT, Operand: Y);
6594	Y = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: WideVT, N1: X, N2: Y);
6595	Y = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: WideVT, N1: Y,
6596	N2: DAG.getShiftAmountConstant(Val: EltBits, VT: WideVT, DL: dl));
6597	return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Y);
6598	}
6599	return SDValue (); // No mulhu or equivalent
6600	};
6601
6602	// Multiply the numerator (operand 0) by the magic value.
6603	Q = GetMULHU (Q, MagicFactor);
6604	if (!Q)
6605	return SDValue ();
6606
6607	Created.push_back(Elt: Q.getNode());
6608
6609	if (UseNPQ) {
6610	SDValue NPQ = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: N0, N2: Q);
6611	Created.push_back(Elt: NPQ.getNode());
6612
6613	// For vectors we might have a mix of non-NPQ/NPQ paths, so use
6614	// MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
6615	if (VT.isVector())
6616	NPQ = GetMULHU (NPQ, NPQFactor);
6617	else
6618	NPQ = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: NPQ, N2: DAG.getConstant(Val: `1`, DL: dl, VT: ShVT));
6619
6620	Created.push_back(Elt: NPQ.getNode());
6621
6622	Q = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: NPQ, N2: Q);
6623	Created.push_back(Elt: Q.getNode());
6624	}
6625
6626	if (UsePostShift) {
6627	Q = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Q, N2: PostShift);
6628	Created.push_back(Elt: Q.getNode());
6629	}
6630
6631	EVT SetCCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
6632
6633	SDValue One = DAG.getConstant(Val: `1`, DL: dl, VT);
6634	SDValue IsOne = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: N1, RHS: One, Cond: ISD::SETEQ);
6635	return DAG.getSelect(DL: dl, VT, Cond: IsOne, LHS: N0, RHS: Q);
6636	}
6637
6638	/// If all values in Values that don't* match the predicate are same 'splat'*
6639	/// value, then replace all values with that splat value.
6640	/// Else, if AlternativeReplacement was provided, then replace all values that
6641	/// do match predicate with AlternativeReplacement value.
6642	static void
6643	turnVectorIntoSplatVector(MutableArrayRef<SDValue> Values,
6644	std::function<bool(SDValue)> Predicate,
6645	SDValue AlternativeReplacement = SDValue ()) {
6646	SDValue Replacement;
6647	// Is there a value for which the Predicate does NOT* match? What is it?*
6648	auto SplatValue = llvm::find_if_not(Range&: Values, P: Predicate);
6649	if (SplatValue != Values.end()) {
6650	// Does Values consist only of SplatValue's and values matching Predicate?
6651	if (llvm::all_of(Range&: Values, P: [Predicate, SplatValue](SDValue Value) {
6652	return Value == *SplatValue \|\| Predicate (Value);
6653	})) // Then we shall replace values matching predicate with SplatValue.
6654	Replacement = *SplatValue;
6655	}
6656	if (!Replacement) {
6657	// Oops, we did not find the "baseline" splat value.
6658	if (!AlternativeReplacement)
6659	return; // Nothing to do.
6660	// Let's replace with provided value then.
6661	Replacement = AlternativeReplacement;
6662	}
6663	std::replace_if(first: Values.begin(), last: Values.end(), pred: Predicate, new_value: Replacement);
6664	}
6665
6666	/// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
6667	/// where the divisor is constant and the comparison target is zero,
6668	/// return a DAG expression that will generate the same comparison result
6669	/// using only multiplications, additions and shifts/rotations.
6670	/// Ref: "Hacker's Delight" 10-17.
6671	SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
6672	SDValue CompTargetNode,
6673	ISD::CondCode Cond,
6674	DAGCombinerInfo &DCI,
6675	const SDLoc &DL) const {
6676	SmallVector<SDNode *, `5`> Built;
6677	if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
6678	DCI, DL, Created&: Built)) {
6679	for (SDNode *N : Built)
6680	DCI.AddToWorklist(N);
6681	return Folded;
6682	}
6683
6684	return SDValue ();
6685	}
6686
6687	SDValue
6688	TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
6689	SDValue CompTargetNode, ISD::CondCode Cond,
6690	DAGCombinerInfo &DCI, const SDLoc &DL,
6691	SmallVectorImpl<SDNode > &Created) const* {
6692	// fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
6693	// - D must be constant, with D = D0 2^K where D0 is odd*
6694	// - P is the multiplicative inverse of D0 modulo 2^W
6695	// - Q = floor(((2^W) - 1) / D)
6696	// where W is the width of the common type of N and D.
6697	assert((Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) &&
6698	"Only applicable for (in)equality comparisons.");
6699
6700	SelectionDAG &DAG = DCI.DAG;
6701
6702	EVT VT = REMNode.getValueType();
6703	EVT SVT = VT.getScalarType();
6704	EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
6705	EVT ShSVT = ShVT.getScalarType();
6706
6707	// If MUL is unavailable, we cannot proceed in any case.
6708	if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::MUL, VT))
6709	return SDValue ();
6710
6711	bool ComparingWithAllZeros = true;
6712	bool AllComparisonsWithNonZerosAreTautological = true;
6713	bool HadTautologicalLanes = false;
6714	bool AllLanesAreTautological = true;
6715	bool HadEvenDivisor = false;
6716	bool AllDivisorsArePowerOfTwo = true;
6717	bool HadTautologicalInvertedLanes = false;
6718	SmallVector<SDValue, `16`> PAmts, KAmts, QAmts, IAmts;
6719
6720	auto BuildUREMPattern = [&](ConstantSDNode CDiv, ConstantSDNode CCmp) {
6721	// Division by 0 is UB. Leave it to be constant-folded elsewhere.
6722	if (CDiv->isZero())
6723	return false;
6724
6725	const APInt &D = CDiv->getAPIntValue();
6726	const APInt &Cmp = CCmp->getAPIntValue();
6727
6728	ComparingWithAllZeros &= Cmp.isZero();
6729
6730	// x u% C1` is always* less than C1. So given `x u% C1 == C2`,*
6731	// if C2 is not less than C1, the comparison is always false.
6732	// But we will only be able to produce the comparison that will give the
6733	// opposive tautological answer. So this lane would need to be fixed up.
6734	bool TautologicalInvertedLane = D.ule(RHS: Cmp);
6735	HadTautologicalInvertedLanes \|= TautologicalInvertedLane;
6736
6737	// If all lanes are tautological (either all divisors are ones, or divisor
6738	// is not greater than the constant we are comparing with),
6739	// we will prefer to avoid the fold.
6740	bool TautologicalLane = D.isOne() \|\| TautologicalInvertedLane;
6741	HadTautologicalLanes \|= TautologicalLane;
6742	AllLanesAreTautological &= TautologicalLane;
6743
6744	// If we are comparing with non-zero, we need'll need to subtract said
6745	// comparison value from the LHS. But there is no point in doing that if
6746	// every lane where we are comparing with non-zero is tautological..
6747	if (!Cmp.isZero())
6748	AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
6749
6750	// Decompose D into D0 2^K*
6751	unsigned K = D.countr_zero();
6752	assert((!D.isOne() \|\| (K == `0`)) && "For divisor '1' we won't rotate.");
6753	APInt D0 = D.lshr(shiftAmt: K);
6754
6755	// D is even if it has trailing zeros.
6756	HadEvenDivisor \|= (K != `0`);
6757	// D is a power-of-two if D0 is one.
6758	// If all divisors are power-of-two, we will prefer to avoid the fold.
6759	AllDivisorsArePowerOfTwo &= D0.isOne();
6760
6761	// P = inv(D0, 2^W)
6762	// 2^W requires W + 1 bits, so we have to extend and then truncate.
6763	unsigned W = D.getBitWidth();
6764	APInt P = D0.multiplicativeInverse();
6765	assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
6766
6767	// Q = floor((2^W - 1) u/ D)
6768	// R = ((2^W - 1) u% D)
6769	APInt Q, R;
6770	APInt::udivrem(LHS: APInt::getAllOnes(numBits: W), RHS: D, Quotient&: Q, Remainder&: R);
6771
6772	// If we are comparing with zero, then that comparison constant is okay,
6773	// else it may need to be one less than that.
6774	if (Cmp.ugt(RHS: R))
6775	Q -= `1`;
6776
6777	assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&
6778	"We are expecting that K is always less than all-ones for ShSVT");
6779
6780	// If the lane is tautological the result can be constant-folded.
6781	if (TautologicalLane) {
6782	// Set P and K amount to a bogus values so we can try to splat them.
6783	P = `0`;
6784	K = -`1`;
6785	// And ensure that comparison constant is tautological,
6786	// it will always compare true/false.
6787	Q = -`1`;
6788	}
6789
6790	PAmts.push_back(Elt: DAG.getConstant(Val: P, DL, VT: SVT));
6791	KAmts.push_back(
6792	Elt: DAG.getConstant(Val: APInt (ShSVT.getSizeInBits(), K), DL, VT: ShSVT));
6793	QAmts.push_back(Elt: DAG.getConstant(Val: Q, DL, VT: SVT));
6794	return true;
6795	};
6796
6797	SDValue N = REMNode.getOperand(i: `0`);
6798	SDValue D = REMNode.getOperand(i: `1`);
6799
6800	// Collect the values from each element.
6801	if (!ISD::matchBinaryPredicate(LHS: D, RHS: CompTargetNode, Match: BuildUREMPattern))
6802	return SDValue ();
6803
6804	// If all lanes are tautological, the result can be constant-folded.
6805	if (AllLanesAreTautological)
6806	return SDValue ();
6807
6808	// If this is a urem by a powers-of-two, avoid the fold since it can be
6809	// best implemented as a bit test.
6810	if (AllDivisorsArePowerOfTwo)
6811	return SDValue ();
6812
6813	SDValue PVal, KVal, QVal;
6814	if (D.getOpcode() == ISD::BUILD_VECTOR) {
6815	if (HadTautologicalLanes) {
6816	// Try to turn PAmts into a splat, since we don't care about the values
6817	// that are currently '0'. If we can't, just keep '0'`s.
6818	turnVectorIntoSplatVector(Values: PAmts, Predicate: isNullConstant);
6819	// Try to turn KAmts into a splat, since we don't care about the values
6820	// that are currently '-1'. If we can't, change them to '0'`s.
6821	turnVectorIntoSplatVector(Values: KAmts, Predicate: isAllOnesConstant,
6822	AlternativeReplacement: DAG.getConstant(Val: `0`, DL, VT: ShSVT));
6823	}
6824
6825	PVal = DAG.getBuildVector(VT, DL, Ops: PAmts);
6826	KVal = DAG.getBuildVector(VT: ShVT, DL, Ops: KAmts);
6827	QVal = DAG.getBuildVector(VT, DL, Ops: QAmts);
6828	} else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
6829	assert(PAmts.size() == `1` && KAmts.size() == `1` && QAmts.size() == `1` &&
6830	"Expected matchBinaryPredicate to return one element for "
6831	"SPLAT_VECTORs");
6832	PVal = DAG.getSplatVector(VT, DL, Op: PAmts [`0`]);
6833	KVal = DAG.getSplatVector(VT: ShVT, DL, Op: KAmts [`0`]);
6834	QVal = DAG.getSplatVector(VT, DL, Op: QAmts [`0`]);
6835	} else {
6836	PVal = PAmts [`0`];
6837	KVal = KAmts [`0`];
6838	QVal = QAmts [`0`];
6839	}
6840
6841	if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
6842	if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::SUB, VT))
6843	return SDValue (); // FIXME: Could/should use `ISD::ADD`?
6844	assert(CompTargetNode.getValueType() == N.getValueType() &&
6845	"Expecting that the types on LHS and RHS of comparisons match.");
6846	N = DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: N, N2: CompTargetNode);
6847	}
6848
6849	// (mul N, P)
6850	SDValue Op0 = DAG.getNode(Opcode: ISD::MUL, DL, VT, N1: N, N2: PVal);
6851	Created.push_back(Elt: Op0.getNode());
6852
6853	// Rotate right only if any divisor was even. We avoid rotates for all-odd
6854	// divisors as a performance improvement, since rotating by 0 is a no-op.
6855	if (HadEvenDivisor) {
6856	// We need ROTR to do this.
6857	if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::ROTR, VT))
6858	return SDValue ();
6859	// UREM: (rotr (mul N, P), K)
6860	Op0 = DAG.getNode(Opcode: ISD::ROTR, DL, VT, N1: Op0, N2: KVal);
6861	Created.push_back(Elt: Op0.getNode());
6862	}
6863
6864	// UREM: (setule/setugt (rotr (mul N, P), K), Q)
6865	SDValue NewCC =
6866	DAG.getSetCC(DL, VT: SETCCVT, LHS: Op0, RHS: QVal,
6867	Cond: ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
6868	if (!HadTautologicalInvertedLanes)
6869	return NewCC;
6870
6871	// If any lanes previously compared always-false, the NewCC will give
6872	// always-true result for them, so we need to fixup those lanes.
6873	// Or the other way around for inequality predicate.
6874	assert(VT.isVector() && "Can/should only get here for vectors.");
6875	Created.push_back(Elt: NewCC.getNode());
6876
6877	// x u% C1` is always* less than C1. So given `x u% C1 == C2`,*
6878	// if C2 is not less than C1, the comparison is always false.
6879	// But we have produced the comparison that will give the
6880	// opposive tautological answer. So these lanes would need to be fixed up.
6881	SDValue TautologicalInvertedChannels =
6882	DAG.getSetCC(DL, VT: SETCCVT, LHS: D, RHS: CompTargetNode, Cond: ISD::SETULE);
6883	Created.push_back(Elt: TautologicalInvertedChannels.getNode());
6884
6885	// NOTE: we avoid letting illegal types through even if we're before legalize
6886	// ops – legalization has a hard time producing good code for this.
6887	if (isOperationLegalOrCustom(Op: ISD::VSELECT, VT: SETCCVT)) {
6888	// If we have a vector select, let's replace the comparison results in the
6889	// affected lanes with the correct tautological result.
6890	SDValue Replacement = DAG.getBoolConstant(V: Cond == ISD::SETEQ ? false : true,
6891	DL, VT: SETCCVT, OpVT: SETCCVT);
6892	return DAG.getNode(Opcode: ISD::VSELECT, DL, VT: SETCCVT, N1: TautologicalInvertedChannels,
6893	N2: Replacement, N3: NewCC);
6894	}
6895
6896	// Else, we can just invert the comparison result in the appropriate lanes.
6897	//
6898	// NOTE: see the note above VSELECT above.
6899	if (isOperationLegalOrCustom(Op: ISD::XOR, VT: SETCCVT))
6900	return DAG.getNode(Opcode: ISD::XOR, DL, VT: SETCCVT, N1: NewCC,
6901	N2: TautologicalInvertedChannels);
6902
6903	return SDValue (); // Don't know how to lower.
6904	}
6905
6906	/// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
6907	/// where the divisor is constant and the comparison target is zero,
6908	/// return a DAG expression that will generate the same comparison result
6909	/// using only multiplications, additions and shifts/rotations.
6910	/// Ref: "Hacker's Delight" 10-17.
6911	SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
6912	SDValue CompTargetNode,
6913	ISD::CondCode Cond,
6914	DAGCombinerInfo &DCI,
6915	const SDLoc &DL) const {
6916	SmallVector<SDNode *, `7`> Built;
6917	if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
6918	DCI, DL, Created&: Built)) {
6919	assert(Built.size() <= `7` && "Max size prediction failed.");
6920	for (SDNode *N : Built)
6921	DCI.AddToWorklist(N);
6922	return Folded;
6923	}
6924
6925	return SDValue ();
6926	}
6927
6928	SDValue
6929	TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
6930	SDValue CompTargetNode, ISD::CondCode Cond,
6931	DAGCombinerInfo &DCI, const SDLoc &DL,
6932	SmallVectorImpl<SDNode > &Created) const* {
6933	// Derived from Hacker's Delight, 2nd Edition, by Hank Warren. Section 10-17.
6934	// Fold:
6935	// (seteq/ne (srem N, D), 0)
6936	// To:
6937	// (setule/ugt (rotr (add (mul N, P), A), K), Q)
6938	//
6939	// - D must be constant, with D = D0 2^K where D0 is odd*
6940	// - P is the multiplicative inverse of D0 modulo 2^W
6941	// - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
6942	// - Q = floor((2 A) / (2^K))*
6943	// where W is the width of the common type of N and D.
6944	//
6945	// When D is a power of two (and thus D0 is 1), the normal
6946	// formula for A and Q don't apply, because the derivation
6947	// depends on D not dividing 2^(W-1), and thus theorem ZRS
6948	// does not apply. This specifically fails when N = INT_MIN.
6949	//
6950	// Instead, for power-of-two D, we use:
6951	// - A = 2^(W-1)
6952	// \|-> Order-preserving map from [-2^(W-1), 2^(W-1) - 1] to [0,2^W - 1])
6953	// - Q = 2^(W-K) - 1
6954	// \|-> Test that the top K bits are zero after rotation
6955	assert((Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) &&
6956	"Only applicable for (in)equality comparisons.");
6957
6958	SelectionDAG &DAG = DCI.DAG;
6959
6960	EVT VT = REMNode.getValueType();
6961	EVT SVT = VT.getScalarType();
6962	EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
6963	EVT ShSVT = ShVT.getScalarType();
6964
6965	// If we are after ops legalization, and MUL is unavailable, we can not
6966	// proceed.
6967	if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::MUL, VT))
6968	return SDValue ();
6969
6970	// TODO: Could support comparing with non-zero too.
6971	ConstantSDNode *CompTarget = isConstOrConstSplat(N: CompTargetNode);
6972	if (!CompTarget \|\| !CompTarget->isZero())
6973	return SDValue ();
6974
6975	bool HadIntMinDivisor = false;
6976	bool HadOneDivisor = false;
6977	bool AllDivisorsAreOnes = true;
6978	bool HadEvenDivisor = false;
6979	bool NeedToApplyOffset = false;
6980	bool AllDivisorsArePowerOfTwo = true;
6981	SmallVector<SDValue, `16`> PAmts, AAmts, KAmts, QAmts;
6982
6983	auto BuildSREMPattern = [&](ConstantSDNode *C) {
6984	// Division by 0 is UB. Leave it to be constant-folded elsewhere.
6985	if (C->isZero())
6986	return false;
6987
6988	// FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
6989
6990	// WARNING: this fold is only valid for positive divisors!
6991	APInt D = C->getAPIntValue();
6992	if (D.isNegative())
6993	D.negate(); // `rem %X, -C` is equivalent to `rem %X, C`
6994
6995	HadIntMinDivisor \|= D.isMinSignedValue();
6996
6997	// If all divisors are ones, we will prefer to avoid the fold.
6998	HadOneDivisor \|= D.isOne();
6999	AllDivisorsAreOnes &= D.isOne();
7000
7001	// Decompose D into D0 2^K*
7002	unsigned K = D.countr_zero();
7003	assert((!D.isOne() \|\| (K == `0`)) && "For divisor '1' we won't rotate.");
7004	APInt D0 = D.lshr(shiftAmt: K);
7005
7006	if (!D.isMinSignedValue()) {
7007	// D is even if it has trailing zeros; unless it's INT_MIN, in which case
7008	// we don't care about this lane in this fold, we'll special-handle it.
7009	HadEvenDivisor \|= (K != `0`);
7010	}
7011
7012	// D is a power-of-two if D0 is one. This includes INT_MIN.
7013	// If all divisors are power-of-two, we will prefer to avoid the fold.
7014	AllDivisorsArePowerOfTwo &= D0.isOne();
7015
7016	// P = inv(D0, 2^W)
7017	// 2^W requires W + 1 bits, so we have to extend and then truncate.
7018	unsigned W = D.getBitWidth();
7019	APInt P = D0.multiplicativeInverse();
7020	assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
7021
7022	// A = floor((2^(W - 1) - 1) / D0) & -2^K
7023	APInt A = APInt::getSignedMaxValue(numBits: W).udiv(RHS: D0);
7024	A.clearLowBits(loBits: K);
7025
7026	if (!D.isMinSignedValue()) {
7027	// If divisor INT_MIN, then we don't care about this lane in this fold,
7028	// we'll special-handle it.
7029	NeedToApplyOffset \|= A != `0`;
7030	}
7031
7032	// Q = floor((2 A) / (2^K))*
7033	APInt Q = (`2` * A).udiv(RHS: APInt::getOneBitSet(numBits: W, BitNo: K));
7034
7035	assert(APInt::getAllOnes(SVT.getSizeInBits()).ugt(A) &&
7036	"We are expecting that A is always less than all-ones for SVT");
7037	assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&
7038	"We are expecting that K is always less than all-ones for ShSVT");
7039
7040	// If D was a power of two, apply the alternate constant derivation.
7041	if (D0.isOne()) {
7042	// A = 2^(W-1)
7043	A = APInt::getSignedMinValue(numBits: W);
7044	// - Q = 2^(W-K) - 1
7045	Q = APInt::getAllOnes(numBits: W - K).zext(width: W);
7046	}
7047
7048	// If the divisor is 1 the result can be constant-folded. Likewise, we
7049	// don't care about INT_MIN lanes, those can be set to undef if appropriate.
7050	if (D.isOne()) {
7051	// Set P, A and K to a bogus values so we can try to splat them.
7052	P = `0`;
7053	A = -`1`;
7054	K = -`1`;
7055
7056	// x ?% 1 == 0 <--> true <--> x u<= -1
7057	Q = -`1`;
7058	}
7059
7060	PAmts.push_back(Elt: DAG.getConstant(Val: P, DL, VT: SVT));
7061	AAmts.push_back(Elt: DAG.getConstant(Val: A, DL, VT: SVT));
7062	KAmts.push_back(
7063	Elt: DAG.getConstant(Val: APInt (ShSVT.getSizeInBits(), K), DL, VT: ShSVT));
7064	QAmts.push_back(Elt: DAG.getConstant(Val: Q, DL, VT: SVT));
7065	return true;
7066	};
7067
7068	SDValue N = REMNode.getOperand(i: `0`);
7069	SDValue D = REMNode.getOperand(i: `1`);
7070
7071	// Collect the values from each element.
7072	if (!ISD::matchUnaryPredicate(Op: D, Match: BuildSREMPattern))
7073	return SDValue ();
7074
7075	// If this is a srem by a one, avoid the fold since it can be constant-folded.
7076	if (AllDivisorsAreOnes)
7077	return SDValue ();
7078
7079	// If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
7080	// since it can be best implemented as a bit test.
7081	if (AllDivisorsArePowerOfTwo)
7082	return SDValue ();
7083
7084	SDValue PVal, AVal, KVal, QVal;
7085	if (D.getOpcode() == ISD::BUILD_VECTOR) {
7086	if (HadOneDivisor) {
7087	// Try to turn PAmts into a splat, since we don't care about the values
7088	// that are currently '0'. If we can't, just keep '0'`s.
7089	turnVectorIntoSplatVector(Values: PAmts, Predicate: isNullConstant);
7090	// Try to turn AAmts into a splat, since we don't care about the
7091	// values that are currently '-1'. If we can't, change them to '0'`s.
7092	turnVectorIntoSplatVector(Values: AAmts, Predicate: isAllOnesConstant,
7093	AlternativeReplacement: DAG.getConstant(Val: `0`, DL, VT: SVT));
7094	// Try to turn KAmts into a splat, since we don't care about the values
7095	// that are currently '-1'. If we can't, change them to '0'`s.
7096	turnVectorIntoSplatVector(Values: KAmts, Predicate: isAllOnesConstant,
7097	AlternativeReplacement: DAG.getConstant(Val: `0`, DL, VT: ShSVT));
7098	}
7099
7100	PVal = DAG.getBuildVector(VT, DL, Ops: PAmts);
7101	AVal = DAG.getBuildVector(VT, DL, Ops: AAmts);
7102	KVal = DAG.getBuildVector(VT: ShVT, DL, Ops: KAmts);
7103	QVal = DAG.getBuildVector(VT, DL, Ops: QAmts);
7104	} else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
7105	assert(PAmts.size() == `1` && AAmts.size() == `1` && KAmts.size() == `1` &&
7106	QAmts.size() == `1` &&
7107	"Expected matchUnaryPredicate to return one element for scalable "
7108	"vectors");
7109	PVal = DAG.getSplatVector(VT, DL, Op: PAmts [`0`]);
7110	AVal = DAG.getSplatVector(VT, DL, Op: AAmts [`0`]);
7111	KVal = DAG.getSplatVector(VT: ShVT, DL, Op: KAmts [`0`]);
7112	QVal = DAG.getSplatVector(VT, DL, Op: QAmts [`0`]);
7113	} else {
7114	assert(isa<ConstantSDNode>(D) && "Expected a constant");
7115	PVal = PAmts [`0`];
7116	AVal = AAmts [`0`];
7117	KVal = KAmts [`0`];
7118	QVal = QAmts [`0`];
7119	}
7120
7121	// (mul N, P)
7122	SDValue Op0 = DAG.getNode(Opcode: ISD::MUL, DL, VT, N1: N, N2: PVal);
7123	Created.push_back(Elt: Op0.getNode());
7124
7125	if (NeedToApplyOffset) {
7126	// We need ADD to do this.
7127	if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::ADD, VT))
7128	return SDValue ();
7129
7130	// (add (mul N, P), A)
7131	Op0 = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Op0, N2: AVal);
7132	Created.push_back(Elt: Op0.getNode());
7133	}
7134
7135	// Rotate right only if any divisor was even. We avoid rotates for all-odd
7136	// divisors as a performance improvement, since rotating by 0 is a no-op.
7137	if (HadEvenDivisor) {
7138	// We need ROTR to do this.
7139	if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::ROTR, VT))
7140	return SDValue ();
7141	// SREM: (rotr (add (mul N, P), A), K)
7142	Op0 = DAG.getNode(Opcode: ISD::ROTR, DL, VT, N1: Op0, N2: KVal);
7143	Created.push_back(Elt: Op0.getNode());
7144	}
7145
7146	// SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
7147	SDValue Fold =
7148	DAG.getSetCC(DL, VT: SETCCVT, LHS: Op0, RHS: QVal,
7149	Cond: ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
7150
7151	// If we didn't have lanes with INT_MIN divisor, then we're done.
7152	if (!HadIntMinDivisor)
7153	return Fold;
7154
7155	// That fold is only valid for positive divisors. Which effectively means,
7156	// it is invalid for INT_MIN divisors. So if we have such a lane,
7157	// we must fix-up results for said lanes.
7158	assert(VT.isVector() && "Can/should only get here for vectors.");
7159
7160	// NOTE: we avoid letting illegal types through even if we're before legalize
7161	// ops – legalization has a hard time producing good code for the code that
7162	// follows.
7163	if (!isOperationLegalOrCustom(Op: ISD::SETCC, VT: SETCCVT) \|\|
7164	!isOperationLegalOrCustom(Op: ISD::AND, VT) \|\|
7165	!isCondCodeLegalOrCustom(CC: Cond, VT: VT.getSimpleVT()) \|\|
7166	!isOperationLegalOrCustom(Op: ISD::VSELECT, VT: SETCCVT))
7167	return SDValue ();
7168
7169	Created.push_back(Elt: Fold.getNode());
7170
7171	SDValue IntMin = DAG.getConstant(
7172	Val: APInt::getSignedMinValue(numBits: SVT.getScalarSizeInBits()), DL, VT);
7173	SDValue IntMax = DAG.getConstant(
7174	Val: APInt::getSignedMaxValue(numBits: SVT.getScalarSizeInBits()), DL, VT);
7175	SDValue Zero =
7176	DAG.getConstant(Val: APInt::getZero(numBits: SVT.getScalarSizeInBits()), DL, VT);
7177
7178	// Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.
7179	SDValue DivisorIsIntMin = DAG.getSetCC(DL, VT: SETCCVT, LHS: D, RHS: IntMin, Cond: ISD::SETEQ);
7180	Created.push_back(Elt: DivisorIsIntMin.getNode());
7181
7182	// (N s% INT_MIN) ==/!= 0 <--> (N & INT_MAX) ==/!= 0
7183	SDValue Masked = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: N, N2: IntMax);
7184	Created.push_back(Elt: Masked.getNode());
7185	SDValue MaskedIsZero = DAG.getSetCC(DL, VT: SETCCVT, LHS: Masked, RHS: Zero, Cond);
7186	Created.push_back(Elt: MaskedIsZero.getNode());
7187
7188	// To produce final result we need to blend 2 vectors: 'SetCC' and
7189	// 'MaskedIsZero'. If the divisor for channel was NOT* INT_MIN, we pick*
7190	// from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is
7191	// constant-folded, select can get lowered to a shuffle with constant mask.
7192	SDValue Blended = DAG.getNode(Opcode: ISD::VSELECT, DL, VT: SETCCVT, N1: DivisorIsIntMin,
7193	N2: MaskedIsZero, N3: Fold);
7194
7195	return Blended;
7196	}
7197
7198	bool TargetLowering::
7199	verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {
7200	if (!isa<ConstantSDNode>(Val: Op.getOperand(i: `0`))) {
7201	DAG.getContext()->emitError(ErrorStr: "argument to '__builtin_return_address' must "
7202	"be a constant integer");
7203	return true;
7204	}
7205
7206	return false;
7207	}
7208
7209	SDValue TargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
7210	const DenormalMode &Mode) const {
7211	SDLoc DL(Op);
7212	EVT VT = Op.getValueType();
7213	EVT CCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
7214	SDValue FPZero = DAG.getConstantFP(Val: `0.0`, DL, VT);
7215
7216	// This is specifically a check for the handling of denormal inputs, not the
7217	// result.
7218	if (Mode.Input == DenormalMode::PreserveSign \|\|
7219	Mode.Input == DenormalMode::PositiveZero) {
7220	// Test = X == 0.0
7221	return DAG.getSetCC(DL, VT: CCVT, LHS: Op, RHS: FPZero, Cond: ISD::SETEQ);
7222	}
7223
7224	// Testing it with denormal inputs to avoid wrong estimate.
7225	//
7226	// Test = fabs(X) < SmallestNormal
7227	const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
7228	APFloat SmallestNorm = APFloat::getSmallestNormalized(Sem: FltSem);
7229	SDValue NormC = DAG.getConstantFP(Val: SmallestNorm, DL, VT);
7230	SDValue Fabs = DAG.getNode(Opcode: ISD::FABS, DL, VT, Operand: Op);
7231	return DAG.getSetCC(DL, VT: CCVT, LHS: Fabs, RHS: NormC, Cond: ISD::SETLT);
7232	}
7233
7234	SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
7235	bool LegalOps, bool OptForSize,
7236	NegatibleCost &Cost,
7237	unsigned Depth) const {
7238	// fneg is removable even if it has multiple uses.
7239	if (Op.getOpcode() == ISD::FNEG \|\| Op.getOpcode() == ISD::VP_FNEG) {
7240	Cost = NegatibleCost::Cheaper;
7241	return Op.getOperand(i: `0`);
7242	}
7243
7244	// Don't recurse exponentially.
7245	if (Depth > SelectionDAG::MaxRecursionDepth)
7246	return SDValue ();
7247
7248	// Pre-increment recursion depth for use in recursive calls.
7249	++Depth;
7250	const SDNodeFlags Flags = Op ->getFlags();
7251	const TargetOptions &Options = DAG.getTarget().Options;
7252	EVT VT = Op.getValueType();
7253	unsigned Opcode = Op.getOpcode();
7254
7255	// Don't allow anything with multiple uses unless we know it is free.
7256	if (!Op.hasOneUse() && Opcode != ISD::ConstantFP) {
7257	bool IsFreeExtend = Opcode == ISD::FP_EXTEND &&
7258	isFPExtFree(DestVT: VT, SrcVT: Op.getOperand(i: `0`).getValueType());
7259	if (!IsFreeExtend)
7260	return SDValue ();
7261	}
7262
7263	auto RemoveDeadNode = [&](SDValue N) {
7264	if (N && N.getNode()->use_empty())
7265	DAG.RemoveDeadNode(N: N.getNode());
7266	};
7267
7268	SDLoc DL(Op);
7269
7270	// Because getNegatedExpression can delete nodes we need a handle to keep
7271	// temporary nodes alive in case the recursion manages to create an identical
7272	// node.
7273	std::list<HandleSDNode> Handles;
7274
7275	switch (Opcode) {
7276	case ISD::ConstantFP: {
7277	// Don't invert constant FP values after legalization unless the target says
7278	// the negated constant is legal.
7279	bool IsOpLegal =
7280	isOperationLegal(Op: ISD::ConstantFP, VT) \|\|
7281	isFPImmLegal(neg(X: cast<ConstantFPSDNode>(Val&: Op)->getValueAPF()), VT,
7282	ForCodeSize: OptForSize);
7283
7284	if (LegalOps && !IsOpLegal)
7285	break;
7286
7287	APFloat V = cast<ConstantFPSDNode>(Val&: Op)->getValueAPF();
7288	V.changeSign();
7289	SDValue CFP = DAG.getConstantFP(Val: V, DL, VT);
7290
7291	// If we already have the use of the negated floating constant, it is free
7292	// to negate it even it has multiple uses.
7293	if (!Op.hasOneUse() && CFP.use_empty())
7294	break;
7295	Cost = NegatibleCost::Neutral;
7296	return CFP;
7297	}
7298	case ISD::BUILD_VECTOR: {
7299	// Only permit BUILD_VECTOR of constants.
7300	if (llvm::any_of(Range: Op ->op_values(), P: [&](SDValue N) {
7301	return !N.isUndef() && !isa<ConstantFPSDNode>(Val: N);
7302	}))
7303	break;
7304
7305	bool IsOpLegal =
7306	(isOperationLegal(Op: ISD::ConstantFP, VT) &&
7307	isOperationLegal(Op: ISD::BUILD_VECTOR, VT)) \|\|
7308	llvm::all_of(Range: Op ->op_values(), P: [&](SDValue N) {
7309	return N.isUndef() \|\|
7310	isFPImmLegal(neg(X: cast<ConstantFPSDNode>(Val&: N)->getValueAPF()), VT,
7311	ForCodeSize: OptForSize);
7312	});
7313
7314	if (LegalOps && !IsOpLegal)
7315	break;
7316
7317	SmallVector<SDValue, `4`> Ops;
7318	for (SDValue C : Op ->op_values()) {
7319	if (C.isUndef()) {
7320	Ops.push_back(Elt: C);
7321	continue;
7322	}
7323	APFloat V = cast<ConstantFPSDNode>(Val&: C)->getValueAPF();
7324	V.changeSign();
7325	Ops.push_back(Elt: DAG.getConstantFP(Val: V, DL, VT: C.getValueType()));
7326	}
7327	Cost = NegatibleCost::Neutral;
7328	return DAG.getBuildVector(VT, DL, Ops);
7329	}
7330	case ISD::FADD: {
7331	if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7332	break;
7333
7334	// After operation legalization, it might not be legal to create new FSUBs.
7335	if (LegalOps && !isOperationLegalOrCustom(Op: ISD::FSUB, VT))
7336	break;
7337	SDValue X = Op.getOperand(i: `0`), Y = Op.getOperand(i: `1`);
7338
7339	// fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y)
7340	NegatibleCost CostX = NegatibleCost::Expensive;
7341	SDValue NegX =
7342	getNegatedExpression(Op: X, DAG, LegalOps, OptForSize, Cost&: CostX, Depth);
7343	// Prevent this node from being deleted by the next call.
7344	if (NegX)
7345	Handles.emplace_back(args&: NegX);
7346
7347	// fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
7348	NegatibleCost CostY = NegatibleCost::Expensive;
7349	SDValue NegY =
7350	getNegatedExpression(Op: Y, DAG, LegalOps, OptForSize, Cost&: CostY, Depth);
7351
7352	// We're done with the handles.
7353	Handles.clear();
7354
7355	// Negate the X if its cost is less or equal than Y.
7356	if (NegX && (CostX <= CostY)) {
7357	Cost = CostX;
7358	SDValue N = DAG.getNode(Opcode: ISD::FSUB, DL, VT, N1: NegX, N2: Y, Flags);
7359	if (NegY != N)
7360	RemoveDeadNode (NegY);
7361	return N;
7362	}
7363
7364	// Negate the Y if it is not expensive.
7365	if (NegY) {
7366	Cost = CostY;
7367	SDValue N = DAG.getNode(Opcode: ISD::FSUB, DL, VT, N1: NegY, N2: X, Flags);
7368	if (NegX != N)
7369	RemoveDeadNode (NegX);
7370	return N;
7371	}
7372	break;
7373	}
7374	case ISD::FSUB: {
7375	// We can't turn -(A-B) into B-A when we honor signed zeros.
7376	if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7377	break;
7378
7379	SDValue X = Op.getOperand(i: `0`), Y = Op.getOperand(i: `1`);
7380	// fold (fneg (fsub 0, Y)) -> Y
7381	if (ConstantFPSDNode C = isConstOrConstSplatFP(N: X, /AllowUndefs/* true))
7382	if (C->isZero()) {
7383	Cost = NegatibleCost::Cheaper;
7384	return Y;
7385	}
7386
7387	// fold (fneg (fsub X, Y)) -> (fsub Y, X)
7388	Cost = NegatibleCost::Neutral;
7389	return DAG.getNode(Opcode: ISD::FSUB, DL, VT, N1: Y, N2: X, Flags);
7390	}
7391	case ISD::FMUL:
7392	case ISD::FDIV: {
7393	SDValue X = Op.getOperand(i: `0`), Y = Op.getOperand(i: `1`);
7394
7395	// fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
7396	NegatibleCost CostX = NegatibleCost::Expensive;
7397	SDValue NegX =
7398	getNegatedExpression(Op: X, DAG, LegalOps, OptForSize, Cost&: CostX, Depth);
7399	// Prevent this node from being deleted by the next call.
7400	if (NegX)
7401	Handles.emplace_back(args&: NegX);
7402
7403	// fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
7404	NegatibleCost CostY = NegatibleCost::Expensive;
7405	SDValue NegY =
7406	getNegatedExpression(Op: Y, DAG, LegalOps, OptForSize, Cost&: CostY, Depth);
7407
7408	// We're done with the handles.
7409	Handles.clear();
7410
7411	// Negate the X if its cost is less or equal than Y.
7412	if (NegX && (CostX <= CostY)) {
7413	Cost = CostX;
7414	SDValue N = DAG.getNode(Opcode, DL, VT, N1: NegX, N2: Y, Flags);
7415	if (NegY != N)
7416	RemoveDeadNode (NegY);
7417	return N;
7418	}
7419
7420	// Ignore X 2.0 because that is expected to be canonicalized to X + X.*
7421	if (auto *C = isConstOrConstSplatFP(N: Op.getOperand(i: `1`)))
7422	if (C->isExactlyValue(V: `2.0`) && Op.getOpcode() == ISD::FMUL)
7423	break;
7424
7425	// Negate the Y if it is not expensive.
7426	if (NegY) {
7427	Cost = CostY;
7428	SDValue N = DAG.getNode(Opcode, DL, VT, N1: X, N2: NegY, Flags);
7429	if (NegX != N)
7430	RemoveDeadNode (NegX);
7431	return N;
7432	}
7433	break;
7434	}
7435	case ISD::FMA:
7436	case ISD::FMAD: {
7437	if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7438	break;
7439
7440	SDValue X = Op.getOperand(i: `0`), Y = Op.getOperand(i: `1`), Z = Op.getOperand(i: `2`);
7441	NegatibleCost CostZ = NegatibleCost::Expensive;
7442	SDValue NegZ =
7443	getNegatedExpression(Op: Z, DAG, LegalOps, OptForSize, Cost&: CostZ, Depth);
7444	// Give up if fail to negate the Z.
7445	if (!NegZ)
7446	break;
7447
7448	// Prevent this node from being deleted by the next two calls.
7449	Handles.emplace_back(args&: NegZ);
7450
7451	// fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
7452	NegatibleCost CostX = NegatibleCost::Expensive;
7453	SDValue NegX =
7454	getNegatedExpression(Op: X, DAG, LegalOps, OptForSize, Cost&: CostX, Depth);
7455	// Prevent this node from being deleted by the next call.
7456	if (NegX)
7457	Handles.emplace_back(args&: NegX);
7458
7459	// fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
7460	NegatibleCost CostY = NegatibleCost::Expensive;
7461	SDValue NegY =
7462	getNegatedExpression(Op: Y, DAG, LegalOps, OptForSize, Cost&: CostY, Depth);
7463
7464	// We're done with the handles.
7465	Handles.clear();
7466
7467	// Negate the X if its cost is less or equal than Y.
7468	if (NegX && (CostX <= CostY)) {
7469	Cost = std::min(a: CostX, b: CostZ);
7470	SDValue N = DAG.getNode(Opcode, DL, VT, N1: NegX, N2: Y, N3: NegZ, Flags);
7471	if (NegY != N)
7472	RemoveDeadNode (NegY);
7473	return N;
7474	}
7475
7476	// Negate the Y if it is not expensive.
7477	if (NegY) {
7478	Cost = std::min(a: CostY, b: CostZ);
7479	SDValue N = DAG.getNode(Opcode, DL, VT, N1: X, N2: NegY, N3: NegZ, Flags);
7480	if (NegX != N)
7481	RemoveDeadNode (NegX);
7482	return N;
7483	}
7484	break;
7485	}
7486
7487	case ISD::FP_EXTEND:
7488	case ISD::FSIN:
7489	if (SDValue NegV = getNegatedExpression(Op: Op.getOperand(i: `0`), DAG, LegalOps,
7490	OptForSize, Cost, Depth))
7491	return DAG.getNode(Opcode, DL, VT, Operand: NegV);
7492	break;
7493	case ISD::FP_ROUND:
7494	if (SDValue NegV = getNegatedExpression(Op: Op.getOperand(i: `0`), DAG, LegalOps,
7495	OptForSize, Cost, Depth))
7496	return DAG.getNode(Opcode: ISD::FP_ROUND, DL, VT, N1: NegV, N2: Op.getOperand(i: `1`));
7497	break;
7498	case ISD::SELECT:
7499	case ISD::VSELECT: {
7500	// fold (fneg (select C, LHS, RHS)) -> (select C, (fneg LHS), (fneg RHS))
7501	// iff at least one cost is cheaper and the other is neutral/cheaper
7502	SDValue LHS = Op.getOperand(i: `1`);
7503	NegatibleCost CostLHS = NegatibleCost::Expensive;
7504	SDValue NegLHS =
7505	getNegatedExpression(Op: LHS, DAG, LegalOps, OptForSize, Cost&: CostLHS, Depth);
7506	if (!NegLHS \|\| CostLHS > NegatibleCost::Neutral) {
7507	RemoveDeadNode (NegLHS);
7508	break;
7509	}
7510
7511	// Prevent this node from being deleted by the next call.
7512	Handles.emplace_back(args&: NegLHS);
7513
7514	SDValue RHS = Op.getOperand(i: `2`);
7515	NegatibleCost CostRHS = NegatibleCost::Expensive;
7516	SDValue NegRHS =
7517	getNegatedExpression(Op: RHS, DAG, LegalOps, OptForSize, Cost&: CostRHS, Depth);
7518
7519	// We're done with the handles.
7520	Handles.clear();
7521
7522	if (!NegRHS \|\| CostRHS > NegatibleCost::Neutral \|\|
7523	(CostLHS != NegatibleCost::Cheaper &&
7524	CostRHS != NegatibleCost::Cheaper)) {
7525	RemoveDeadNode (NegLHS);
7526	RemoveDeadNode (NegRHS);
7527	break;
7528	}
7529
7530	Cost = std::min(a: CostLHS, b: CostRHS);
7531	return DAG.getSelect(DL, VT, Cond: Op.getOperand(i: `0`), LHS: NegLHS, RHS: NegRHS);
7532	}
7533	}
7534
7535	return SDValue ();
7536	}
7537
7538	//===----------------------------------------------------------------------===//
7539	// Legalization Utilities
7540	//===----------------------------------------------------------------------===//
7541
7542	bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
7543	SDValue LHS, SDValue RHS,
7544	SmallVectorImpl<SDValue> &Result,
7545	EVT HiLoVT, SelectionDAG &DAG,
7546	MulExpansionKind Kind, SDValue LL,
7547	SDValue LH, SDValue RL, SDValue RH) const {
7548	assert(Opcode == ISD::MUL \|\| Opcode == ISD::UMUL_LOHI \|\|
7549	Opcode == ISD::SMUL_LOHI);
7550
7551	bool HasMULHS = (Kind == MulExpansionKind::Always) \|\|
7552	isOperationLegalOrCustom(Op: ISD::MULHS, VT: HiLoVT);
7553	bool HasMULHU = (Kind == MulExpansionKind::Always) \|\|
7554	isOperationLegalOrCustom(Op: ISD::MULHU, VT: HiLoVT);
7555	bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) \|\|
7556	isOperationLegalOrCustom(Op: ISD::SMUL_LOHI, VT: HiLoVT);
7557	bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) \|\|
7558	isOperationLegalOrCustom(Op: ISD::UMUL_LOHI, VT: HiLoVT);
7559
7560	if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
7561	return false;
7562
7563	unsigned OuterBitSize = VT.getScalarSizeInBits();
7564	unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
7565
7566	// LL, LH, RL, and RH must be either all NULL or all set to a value.
7567	assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) \|\|
7568	(!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
7569
7570	SDVTList VTs = DAG.getVTList(VT1: HiLoVT, VT2: HiLoVT);
7571	auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
7572	bool Signed) -> bool {
7573	if ((Signed && HasSMUL_LOHI) \|\| (!Signed && HasUMUL_LOHI)) {
7574	Lo = DAG.getNode(Opcode: Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, DL: dl, VTList: VTs, N1: L, N2: R);
7575	Hi = SDValue (Lo.getNode(), `1`);
7576	return true;
7577	}
7578	if ((Signed && HasMULHS) \|\| (!Signed && HasMULHU)) {
7579	Lo = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: HiLoVT, N1: L, N2: R);
7580	Hi = DAG.getNode(Opcode: Signed ? ISD::MULHS : ISD::MULHU, DL: dl, VT: HiLoVT, N1: L, N2: R);
7581	return true;
7582	}
7583	return false;
7584	};
7585
7586	SDValue Lo, Hi;
7587
7588	if (!LL.getNode() && !RL.getNode() &&
7589	isOperationLegalOrCustom(Op: ISD::TRUNCATE, VT: HiLoVT)) {
7590	LL = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: LHS);
7591	RL = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: RHS);
7592	}
7593
7594	if (!LL.getNode())
7595	return false;
7596
7597	APInt HighMask = APInt::getHighBitsSet(numBits: OuterBitSize, hiBitsSet: InnerBitSize);
7598	if (DAG.MaskedValueIsZero(Op: LHS, Mask: HighMask) &&
7599	DAG.MaskedValueIsZero(Op: RHS, Mask: HighMask)) {
7600	// The inputs are both zero-extended.
7601	if (MakeMUL_LOHI (LL, RL, Lo, Hi, false)) {
7602	Result.push_back(Elt: Lo);
7603	Result.push_back(Elt: Hi);
7604	if (Opcode != ISD::MUL) {
7605	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT: HiLoVT);
7606	Result.push_back(Elt: Zero);
7607	Result.push_back(Elt: Zero);
7608	}
7609	return true;
7610	}
7611	}
7612
7613	if (!VT.isVector() && Opcode == ISD::MUL &&
7614	DAG.ComputeMaxSignificantBits(Op: LHS) <= InnerBitSize &&
7615	DAG.ComputeMaxSignificantBits(Op: RHS) <= InnerBitSize) {
7616	// The input values are both sign-extended.
7617	// TODO non-MUL case?
7618	if (MakeMUL_LOHI (LL, RL, Lo, Hi, true)) {
7619	Result.push_back(Elt: Lo);
7620	Result.push_back(Elt: Hi);
7621	return true;
7622	}
7623	}
7624
7625	unsigned ShiftAmount = OuterBitSize - InnerBitSize;
7626	SDValue Shift = DAG.getShiftAmountConstant(Val: ShiftAmount, VT, DL: dl);
7627
7628	if (!LH.getNode() && !RH.getNode() &&
7629	isOperationLegalOrCustom(Op: ISD::SRL, VT) &&
7630	isOperationLegalOrCustom(Op: ISD::TRUNCATE, VT: HiLoVT)) {
7631	LH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: LHS, N2: Shift);
7632	LH = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: LH);
7633	RH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: RHS, N2: Shift);
7634	RH = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: RH);
7635	}
7636
7637	if (!LH.getNode())
7638	return false;
7639
7640	if (!MakeMUL_LOHI (LL, RL, Lo, Hi, false))
7641	return false;
7642
7643	Result.push_back(Elt: Lo);
7644
7645	if (Opcode == ISD::MUL) {
7646	RH = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: HiLoVT, N1: LL, N2: RH);
7647	LH = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: HiLoVT, N1: LH, N2: RL);
7648	Hi = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: HiLoVT, N1: Hi, N2: RH);
7649	Hi = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: HiLoVT, N1: Hi, N2: LH);
7650	Result.push_back(Elt: Hi);
7651	return true;
7652	}
7653
7654	// Compute the full width result.
7655	auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
7656	Lo = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: Lo);
7657	Hi = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: Hi);
7658	Hi = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Hi, N2: Shift);
7659	return DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Lo, N2: Hi);
7660	};
7661
7662	SDValue Next = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: Hi);
7663	if (!MakeMUL_LOHI (LL, RH, Lo, Hi, false))
7664	return false;
7665
7666	// This is effectively the add part of a multiply-add of half-sized operands,
7667	// so it cannot overflow.
7668	Next = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Next, N2: Merge (Lo, Hi));
7669
7670	if (!MakeMUL_LOHI (LH, RL, Lo, Hi, false))
7671	return false;
7672
7673	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT: HiLoVT);
7674	EVT BoolType = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
7675
7676	bool UseGlue = (isOperationLegalOrCustom(Op: ISD::ADDC, VT) &&
7677	isOperationLegalOrCustom(Op: ISD::ADDE, VT));
7678	if (UseGlue)
7679	Next = DAG.getNode(Opcode: ISD::ADDC, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: MVT::Glue), N1: Next,
7680	N2: Merge (Lo, Hi));
7681	else
7682	Next = DAG.getNode(Opcode: ISD::UADDO_CARRY, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: BoolType), N1: Next,
7683	N2: Merge (Lo, Hi), N3: DAG.getConstant(Val: `0`, DL: dl, VT: BoolType));
7684
7685	SDValue Carry = Next.getValue(R: `1`);
7686	Result.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: Next));
7687	Next = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Next, N2: Shift);
7688
7689	if (!MakeMUL_LOHI (LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
7690	return false;
7691
7692	if (UseGlue)
7693	Hi = DAG.getNode(Opcode: ISD::ADDE, DL: dl, VTList: DAG.getVTList(VT1: HiLoVT, VT2: MVT::Glue), N1: Hi, N2: Zero,
7694	N3: Carry);
7695	else
7696	Hi = DAG.getNode(Opcode: ISD::UADDO_CARRY, DL: dl, VTList: DAG.getVTList(VT1: HiLoVT, VT2: BoolType), N1: Hi,
7697	N2: Zero, N3: Carry);
7698
7699	Next = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Next, N2: Merge (Lo, Hi));
7700
7701	if (Opcode == ISD::SMUL_LOHI) {
7702	SDValue NextSub = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Next,
7703	N2: DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: RL));
7704	Next = DAG.getSelectCC(DL: dl, LHS: LH, RHS: Zero, True: NextSub, False: Next, Cond: ISD::SETLT);
7705
7706	NextSub = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Next,
7707	N2: DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: LL));
7708	Next = DAG.getSelectCC(DL: dl, LHS: RH, RHS: Zero, True: NextSub, False: Next, Cond: ISD::SETLT);
7709	}
7710
7711	Result.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: Next));
7712	Next = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Next, N2: Shift);
7713	Result.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: Next));
7714	return true;
7715	}
7716
7717	bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
7718	SelectionDAG &DAG, MulExpansionKind Kind,
7719	SDValue LL, SDValue LH, SDValue RL,
7720	SDValue RH) const {
7721	SmallVector<SDValue, `2`> Result;
7722	bool Ok = expandMUL_LOHI(Opcode: N->getOpcode(), VT: N->getValueType(ResNo: `0`), dl: SDLoc (N),
7723	LHS: N->getOperand(Num: `0`), RHS: N->getOperand(Num: `1`), Result, HiLoVT,
7724	DAG, Kind, LL, LH, RL, RH);
7725	if (Ok) {
7726	assert(Result.size() == `2`);
7727	Lo = Result [`0`];
7728	Hi = Result [`1`];
7729	}
7730	return Ok;
7731	}
7732
7733	// Optimize unsigned division or remainder by constants for types twice as large
7734	// as a legal VT.
7735	//
7736	// If (1 << (BitWidth / 2)) % Constant == 1, then the remainder
7737	// can be computed
7738	// as:
7739	// Sum += __builtin_uadd_overflow(Lo, High, &Sum);
7740	// Remainder = Sum % Constant
7741	// This is based on "Remainder by Summing Digits" from Hacker's Delight.
7742	//
7743	// For division, we can compute the remainder using the algorithm described
7744	// above, subtract it from the dividend to get an exact multiple of Constant.
7745	// Then multiply that exact multiply by the multiplicative inverse modulo
7746	// (1 << (BitWidth / 2)) to get the quotient.
7747
7748	// If Constant is even, we can shift right the dividend and the divisor by the
7749	// number of trailing zeros in Constant before applying the remainder algorithm.
7750	// If we're after the quotient, we can subtract this value from the shifted
7751	// dividend and multiply by the multiplicative inverse of the shifted divisor.
7752	// If we want the remainder, we shift the value left by the number of trailing
7753	// zeros and add the bits that were shifted out of the dividend.
7754	bool TargetLowering::expandDIVREMByConstant(SDNode *N,
7755	SmallVectorImpl<SDValue> &Result,
7756	EVT HiLoVT, SelectionDAG &DAG,
7757	SDValue LL, SDValue LH) const {
7758	unsigned Opcode = N->getOpcode();
7759	EVT VT = N->getValueType(ResNo: `0`);
7760
7761	// TODO: Support signed division/remainder.
7762	if (Opcode == ISD::SREM \|\| Opcode == ISD::SDIV \|\| Opcode == ISD::SDIVREM)
7763	return false;
7764	assert(
7765	(Opcode == ISD::UREM \|\| Opcode == ISD::UDIV \|\| Opcode == ISD::UDIVREM) &&
7766	"Unexpected opcode");
7767
7768	auto *CN = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: `1`));
7769	if (!CN)
7770	return false;
7771
7772	APInt Divisor = CN->getAPIntValue();
7773	unsigned BitWidth = Divisor.getBitWidth();
7774	unsigned HBitWidth = BitWidth / `2`;
7775	assert(VT.getScalarSizeInBits() == BitWidth &&
7776	HiLoVT.getScalarSizeInBits() == HBitWidth && "Unexpected VTs");
7777
7778	// Divisor needs to less than (1 << HBitWidth).
7779	APInt HalfMaxPlus1 = APInt::getOneBitSet(numBits: BitWidth, BitNo: HBitWidth);
7780	if (Divisor.uge(RHS: HalfMaxPlus1))
7781	return false;
7782
7783	// We depend on the UREM by constant optimization in DAGCombiner that requires
7784	// high multiply.
7785	if (!isOperationLegalOrCustom(Op: ISD::MULHU, VT: HiLoVT) &&
7786	!isOperationLegalOrCustom(Op: ISD::UMUL_LOHI, VT: HiLoVT))
7787	return false;
7788
7789	// Don't expand if optimizing for size.
7790	if (DAG.shouldOptForSize())
7791	return false;
7792
7793	// Early out for 0 or 1 divisors.
7794	if (Divisor.ule(RHS: `1`))
7795	return false;
7796
7797	// If the divisor is even, shift it until it becomes odd.
7798	unsigned TrailingZeros = `0`;
7799	if (!Divisor [`0`]) {
7800	TrailingZeros = Divisor.countr_zero();
7801	Divisor.lshrInPlace(ShiftAmt: TrailingZeros);
7802	}
7803
7804	SDLoc dl(N);
7805	SDValue Sum;
7806	SDValue PartialRem;
7807
7808	// If (1 << HBitWidth) % divisor == 1, we can add the two halves together and
7809	// then add in the carry.
7810	// TODO: If we can't split it in half, we might be able to split into 3 or
7811	// more pieces using a smaller bit width.
7812	if (HalfMaxPlus1.urem(RHS: Divisor).isOne()) {
7813	assert(!LL == !LH && "Expected both input halves or no input halves!");
7814	if (!LL)
7815	std::tie(args&: LL, args&: LH) = DAG.SplitScalar(N: N->getOperand(Num: `0`), DL: dl, LoVT: HiLoVT, HiVT: HiLoVT);
7816
7817	// Shift the input by the number of TrailingZeros in the divisor. The
7818	// shifted out bits will be added to the remainder later.
7819	if (TrailingZeros) {
7820	// Save the shifted off bits if we need the remainder.
7821	if (Opcode != ISD::UDIV) {
7822	APInt Mask = APInt::getLowBitsSet(numBits: HBitWidth, loBitsSet: TrailingZeros);
7823	PartialRem = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: HiLoVT, N1: LL,
7824	N2: DAG.getConstant(Val: Mask, DL: dl, VT: HiLoVT));
7825	}
7826
7827	LL = DAG.getNode(
7828	Opcode: ISD::OR, DL: dl, VT: HiLoVT,
7829	N1: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: HiLoVT, N1: LL,
7830	N2: DAG.getShiftAmountConstant(Val: TrailingZeros, VT: HiLoVT, DL: dl)),
7831	N2: DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: HiLoVT, N1: LH,
7832	N2: DAG.getShiftAmountConstant(Val: HBitWidth - TrailingZeros,
7833	VT: HiLoVT, DL: dl)));
7834	LH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: HiLoVT, N1: LH,
7835	N2: DAG.getShiftAmountConstant(Val: TrailingZeros, VT: HiLoVT, DL: dl));
7836	}
7837
7838	// Use uaddo_carry if we can, otherwise use a compare to detect overflow.
7839	EVT SetCCType =
7840	getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: HiLoVT);
7841	if (isOperationLegalOrCustom(Op: ISD::UADDO_CARRY, VT: HiLoVT)) {
7842	SDVTList VTList = DAG.getVTList(VT1: HiLoVT, VT2: SetCCType);
7843	Sum = DAG.getNode(Opcode: ISD::UADDO, DL: dl, VTList, N1: LL, N2: LH);
7844	Sum = DAG.getNode(Opcode: ISD::UADDO_CARRY, DL: dl, VTList, N1: Sum,
7845	N2: DAG.getConstant(Val: `0`, DL: dl, VT: HiLoVT), N3: Sum.getValue(R: `1`));
7846	} else {
7847	Sum = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: HiLoVT, N1: LL, N2: LH);
7848	SDValue Carry = DAG.getSetCC(DL: dl, VT: SetCCType, LHS: Sum, RHS: LL, Cond: ISD::SETULT);
7849	// If the boolean for the target is 0 or 1, we can add the setcc result
7850	// directly.
7851	if (getBooleanContents(Type: HiLoVT) ==
7852	TargetLoweringBase::ZeroOrOneBooleanContent)
7853	Carry = DAG.getZExtOrTrunc(Op: Carry, DL: dl, VT: HiLoVT);
7854	else
7855	Carry = DAG.getSelect(DL: dl, VT: HiLoVT, Cond: Carry, LHS: DAG.getConstant(Val: `1`, DL: dl, VT: HiLoVT),
7856	RHS: DAG.getConstant(Val: `0`, DL: dl, VT: HiLoVT));
7857	Sum = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: HiLoVT, N1: Sum, N2: Carry);
7858	}
7859	}
7860
7861	// If we didn't find a sum, we can't do the expansion.
7862	if (!Sum)
7863	return false;
7864
7865	// Perform a HiLoVT urem on the Sum using truncated divisor.
7866	SDValue RemL =
7867	DAG.getNode(Opcode: ISD::UREM, DL: dl, VT: HiLoVT, N1: Sum,
7868	N2: DAG.getConstant(Val: Divisor.trunc(width: HBitWidth), DL: dl, VT: HiLoVT));
7869	SDValue RemH = DAG.getConstant(Val: `0`, DL: dl, VT: HiLoVT);
7870
7871	if (Opcode != ISD::UREM) {
7872	// Subtract the remainder from the shifted dividend.
7873	SDValue Dividend = DAG.getNode(Opcode: ISD::BUILD_PAIR, DL: dl, VT, N1: LL, N2: LH);
7874	SDValue Rem = DAG.getNode(Opcode: ISD::BUILD_PAIR, DL: dl, VT, N1: RemL, N2: RemH);
7875
7876	Dividend = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Dividend, N2: Rem);
7877
7878	// Multiply by the multiplicative inverse of the divisor modulo
7879	// (1 << BitWidth).
7880	APInt MulFactor = Divisor.multiplicativeInverse();
7881
7882	SDValue Quotient = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: Dividend,
7883	N2: DAG.getConstant(Val: MulFactor, DL: dl, VT));
7884
7885	// Split the quotient into low and high parts.
7886	SDValue QuotL, QuotH;
7887	std::tie(args&: QuotL, args&: QuotH) = DAG.SplitScalar(N: Quotient, DL: dl, LoVT: HiLoVT, HiVT: HiLoVT);
7888	Result.push_back(Elt: QuotL);
7889	Result.push_back(Elt: QuotH);
7890	}
7891
7892	if (Opcode != ISD::UDIV) {
7893	// If we shifted the input, shift the remainder left and add the bits we
7894	// shifted off the input.
7895	if (TrailingZeros) {
7896	APInt Mask = APInt::getLowBitsSet(numBits: HBitWidth, loBitsSet: TrailingZeros);
7897	RemL = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: HiLoVT, N1: RemL,
7898	N2: DAG.getShiftAmountConstant(Val: TrailingZeros, VT: HiLoVT, DL: dl));
7899	RemL = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: HiLoVT, N1: RemL, N2: PartialRem);
7900	}
7901	Result.push_back(Elt: RemL);
7902	Result.push_back(Elt: DAG.getConstant(Val: `0`, DL: dl, VT: HiLoVT));
7903	}
7904
7905	return true;
7906	}
7907
7908	// Check that (every element of) Z is undef or not an exact multiple of BW.
7909	static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
7910	return ISD::matchUnaryPredicate(
7911	Op: Z,
7912	Match: [=](ConstantSDNode C) { return* !C \|\| C->getAPIntValue().urem(RHS: BW) != `0`; },
7913	AllowUndefs: true);
7914	}
7915
7916	static SDValue expandVPFunnelShift(SDNode *Node, SelectionDAG &DAG) {
7917	EVT VT = Node->getValueType(ResNo: `0`);
7918	SDValue ShX, ShY;
7919	SDValue ShAmt, InvShAmt;
7920	SDValue X = Node->getOperand(Num: `0`);
7921	SDValue Y = Node->getOperand(Num: `1`);
7922	SDValue Z = Node->getOperand(Num: `2`);
7923	SDValue Mask = Node->getOperand(Num: `3`);
7924	SDValue VL = Node->getOperand(Num: `4`);
7925
7926	unsigned BW = VT.getScalarSizeInBits();
7927	bool IsFSHL = Node->getOpcode() == ISD::VP_FSHL;
7928	SDLoc DL(SDValue (Node, `0`));
7929
7930	EVT ShVT = Z.getValueType();
7931	if (isNonZeroModBitWidthOrUndef(Z, BW)) {
7932	// fshl: X << C \| Y >> (BW - C)
7933	// fshr: X << (BW - C) \| Y >> C
7934	// where C = Z % BW is not zero
7935	SDValue BitWidthC = DAG.getConstant(Val: BW, DL, VT: ShVT);
7936	ShAmt = DAG.getNode(Opcode: ISD::VP_UREM, DL, VT: ShVT, N1: Z, N2: BitWidthC, N3: Mask, N4: VL);
7937	InvShAmt = DAG.getNode(Opcode: ISD::VP_SUB, DL, VT: ShVT, N1: BitWidthC, N2: ShAmt, N3: Mask, N4: VL);
7938	ShX = DAG.getNode(Opcode: ISD::VP_SHL, DL, VT, N1: X, N2: IsFSHL ? ShAmt : InvShAmt, N3: Mask,
7939	N4: VL);
7940	ShY = DAG.getNode(Opcode: ISD::VP_SRL, DL, VT, N1: Y, N2: IsFSHL ? InvShAmt : ShAmt, N3: Mask,
7941	N4: VL);
7942	} else {
7943	// fshl: X << (Z % BW) \| Y >> 1 >> (BW - 1 - (Z % BW))
7944	// fshr: X << 1 << (BW - 1 - (Z % BW)) \| Y >> (Z % BW)
7945	SDValue BitMask = DAG.getConstant(Val: BW - `1`, DL, VT: ShVT);
7946	if (isPowerOf2_32(Value: BW)) {
7947	// Z % BW -> Z & (BW - 1)
7948	ShAmt = DAG.getNode(Opcode: ISD::VP_AND, DL, VT: ShVT, N1: Z, N2: BitMask, N3: Mask, N4: VL);
7949	// (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
7950	SDValue NotZ = DAG.getNode(Opcode: ISD::VP_XOR, DL, VT: ShVT, N1: Z,
7951	N2: DAG.getAllOnesConstant(DL, VT: ShVT), N3: Mask, N4: VL);
7952	InvShAmt = DAG.getNode(Opcode: ISD::VP_AND, DL, VT: ShVT, N1: NotZ, N2: BitMask, N3: Mask, N4: VL);
7953	} else {
7954	SDValue BitWidthC = DAG.getConstant(Val: BW, DL, VT: ShVT);
7955	ShAmt = DAG.getNode(Opcode: ISD::VP_UREM, DL, VT: ShVT, N1: Z, N2: BitWidthC, N3: Mask, N4: VL);
7956	InvShAmt = DAG.getNode(Opcode: ISD::VP_SUB, DL, VT: ShVT, N1: BitMask, N2: ShAmt, N3: Mask, N4: VL);
7957	}
7958
7959	SDValue One = DAG.getConstant(Val: `1`, DL, VT: ShVT);
7960	if (IsFSHL) {
7961	ShX = DAG.getNode(Opcode: ISD::VP_SHL, DL, VT, N1: X, N2: ShAmt, N3: Mask, N4: VL);
7962	SDValue ShY1 = DAG.getNode(Opcode: ISD::VP_SRL, DL, VT, N1: Y, N2: One, N3: Mask, N4: VL);
7963	ShY = DAG.getNode(Opcode: ISD::VP_SRL, DL, VT, N1: ShY1, N2: InvShAmt, N3: Mask, N4: VL);
7964	} else {
7965	SDValue ShX1 = DAG.getNode(Opcode: ISD::VP_SHL, DL, VT, N1: X, N2: One, N3: Mask, N4: VL);
7966	ShX = DAG.getNode(Opcode: ISD::VP_SHL, DL, VT, N1: ShX1, N2: InvShAmt, N3: Mask, N4: VL);
7967	ShY = DAG.getNode(Opcode: ISD::VP_SRL, DL, VT, N1: Y, N2: ShAmt, N3: Mask, N4: VL);
7968	}
7969	}
7970	return DAG.getNode(Opcode: ISD::VP_OR, DL, VT, N1: ShX, N2: ShY, N3: Mask, N4: VL);
7971	}
7972
7973	SDValue TargetLowering::expandFunnelShift(SDNode *Node,
7974	SelectionDAG &DAG) const {
7975	if (Node->isVPOpcode())
7976	return expandVPFunnelShift(Node, DAG);
7977
7978	EVT VT = Node->getValueType(ResNo: `0`);
7979
7980	if (VT.isVector() && (!isOperationLegalOrCustom(Op: ISD::SHL, VT) \|\|
7981	!isOperationLegalOrCustom(Op: ISD::SRL, VT) \|\|
7982	!isOperationLegalOrCustom(Op: ISD::SUB, VT) \|\|
7983	!isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT)))
7984	return SDValue ();
7985
7986	SDValue X = Node->getOperand(Num: `0`);
7987	SDValue Y = Node->getOperand(Num: `1`);
7988	SDValue Z = Node->getOperand(Num: `2`);
7989
7990	unsigned BW = VT.getScalarSizeInBits();
7991	bool IsFSHL = Node->getOpcode() == ISD::FSHL;
7992	SDLoc DL(SDValue (Node, `0`));
7993
7994	EVT ShVT = Z.getValueType();
7995
7996	// If a funnel shift in the other direction is more supported, use it.
7997	unsigned RevOpcode = IsFSHL ? ISD::FSHR : ISD::FSHL;
7998	if (!isOperationLegalOrCustom(Op: Node->getOpcode(), VT) &&
7999	isOperationLegalOrCustom(Op: RevOpcode, VT) && isPowerOf2_32(Value: BW)) {
8000	if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8001	// fshl X, Y, Z -> fshr X, Y, -Z
8002	// fshr X, Y, Z -> fshl X, Y, -Z
8003	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT: ShVT);
8004	Z = DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Zero, N2: Z);
8005	} else {
8006	// fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
8007	// fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
8008	SDValue One = DAG.getConstant(Val: `1`, DL, VT: ShVT);
8009	if (IsFSHL) {
8010	Y = DAG.getNode(Opcode: RevOpcode, DL, VT, N1: X, N2: Y, N3: One);
8011	X = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: X, N2: One);
8012	} else {
8013	X = DAG.getNode(Opcode: RevOpcode, DL, VT, N1: X, N2: Y, N3: One);
8014	Y = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Y, N2: One);
8015	}
8016	Z = DAG.getNOT(DL, Val: Z, VT: ShVT);
8017	}
8018	return DAG.getNode(Opcode: RevOpcode, DL, VT, N1: X, N2: Y, N3: Z);
8019	}
8020
8021	SDValue ShX, ShY;
8022	SDValue ShAmt, InvShAmt;
8023	if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8024	// fshl: X << C \| Y >> (BW - C)
8025	// fshr: X << (BW - C) \| Y >> C
8026	// where C = Z % BW is not zero
8027	SDValue BitWidthC = DAG.getConstant(Val: BW, DL, VT: ShVT);
8028	ShAmt = DAG.getNode(Opcode: ISD::UREM, DL, VT: ShVT, N1: Z, N2: BitWidthC);
8029	InvShAmt = DAG.getNode(Opcode: ISD::SUB, DL, VT: ShVT, N1: BitWidthC, N2: ShAmt);
8030	ShX = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: X, N2: IsFSHL ? ShAmt : InvShAmt);
8031	ShY = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Y, N2: IsFSHL ? InvShAmt : ShAmt);
8032	} else {
8033	// fshl: X << (Z % BW) \| Y >> 1 >> (BW - 1 - (Z % BW))
8034	// fshr: X << 1 << (BW - 1 - (Z % BW)) \| Y >> (Z % BW)
8035	SDValue Mask = DAG.getConstant(Val: BW - `1`, DL, VT: ShVT);
8036	if (isPowerOf2_32(Value: BW)) {
8037	// Z % BW -> Z & (BW - 1)
8038	ShAmt = DAG.getNode(Opcode: ISD::AND, DL, VT: ShVT, N1: Z, N2: Mask);
8039	// (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8040	InvShAmt = DAG.getNode(Opcode: ISD::AND, DL, VT: ShVT, N1: DAG.getNOT(DL, Val: Z, VT: ShVT), N2: Mask);
8041	} else {
8042	SDValue BitWidthC = DAG.getConstant(Val: BW, DL, VT: ShVT);
8043	ShAmt = DAG.getNode(Opcode: ISD::UREM, DL, VT: ShVT, N1: Z, N2: BitWidthC);
8044	InvShAmt = DAG.getNode(Opcode: ISD::SUB, DL, VT: ShVT, N1: Mask, N2: ShAmt);
8045	}
8046
8047	SDValue One = DAG.getConstant(Val: `1`, DL, VT: ShVT);
8048	if (IsFSHL) {
8049	ShX = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: X, N2: ShAmt);
8050	SDValue ShY1 = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Y, N2: One);
8051	ShY = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: ShY1, N2: InvShAmt);
8052	} else {
8053	SDValue ShX1 = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: X, N2: One);
8054	ShX = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: ShX1, N2: InvShAmt);
8055	ShY = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Y, N2: ShAmt);
8056	}
8057	}
8058	return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: ShX, N2: ShY);
8059	}
8060
8061	// TODO: Merge with expandFunnelShift.
8062	SDValue TargetLowering::expandROT(SDNode Node, bool* AllowVectorOps,
8063	SelectionDAG &DAG) const {
8064	EVT VT = Node->getValueType(ResNo: `0`);
8065	unsigned EltSizeInBits = VT.getScalarSizeInBits();
8066	bool IsLeft = Node->getOpcode() == ISD::ROTL;
8067	SDValue Op0 = Node->getOperand(Num: `0`);
8068	SDValue Op1 = Node->getOperand(Num: `1`);
8069	SDLoc DL(SDValue (Node, `0`));
8070
8071	EVT ShVT = Op1.getValueType();
8072	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT: ShVT);
8073
8074	// If a rotate in the other direction is more supported, use it.
8075	unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
8076	if (!isOperationLegalOrCustom(Op: Node->getOpcode(), VT) &&
8077	isOperationLegalOrCustom(Op: RevRot, VT) && isPowerOf2_32(Value: EltSizeInBits)) {
8078	SDValue Sub = DAG.getNode(Opcode: ISD::SUB, DL, VT: ShVT, N1: Zero, N2: Op1);
8079	return DAG.getNode(Opcode: RevRot, DL, VT, N1: Op0, N2: Sub);
8080	}
8081
8082	if (!AllowVectorOps && VT.isVector() &&
8083	(!isOperationLegalOrCustom(Op: ISD::SHL, VT) \|\|
8084	!isOperationLegalOrCustom(Op: ISD::SRL, VT) \|\|
8085	!isOperationLegalOrCustom(Op: ISD::SUB, VT) \|\|
8086	!isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT) \|\|
8087	!isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT)))
8088	return SDValue ();
8089
8090	unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
8091	unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
8092	SDValue BitWidthMinusOneC = DAG.getConstant(Val: EltSizeInBits - `1`, DL, VT: ShVT);
8093	SDValue ShVal;
8094	SDValue HsVal;
8095	if (isPowerOf2_32(Value: EltSizeInBits)) {
8096	// (rotl x, c) -> x << (c & (w - 1)) \| x >> (-c & (w - 1))
8097	// (rotr x, c) -> x >> (c & (w - 1)) \| x << (-c & (w - 1))
8098	SDValue NegOp1 = DAG.getNode(Opcode: ISD::SUB, DL, VT: ShVT, N1: Zero, N2: Op1);
8099	SDValue ShAmt = DAG.getNode(Opcode: ISD::AND, DL, VT: ShVT, N1: Op1, N2: BitWidthMinusOneC);
8100	ShVal = DAG.getNode(Opcode: ShOpc, DL, VT, N1: Op0, N2: ShAmt);
8101	SDValue HsAmt = DAG.getNode(Opcode: ISD::AND, DL, VT: ShVT, N1: NegOp1, N2: BitWidthMinusOneC);
8102	HsVal = DAG.getNode(Opcode: HsOpc, DL, VT, N1: Op0, N2: HsAmt);
8103	} else {
8104	// (rotl x, c) -> x << (c % w) \| x >> 1 >> (w - 1 - (c % w))
8105	// (rotr x, c) -> x >> (c % w) \| x << 1 << (w - 1 - (c % w))
8106	SDValue BitWidthC = DAG.getConstant(Val: EltSizeInBits, DL, VT: ShVT);
8107	SDValue ShAmt = DAG.getNode(Opcode: ISD::UREM, DL, VT: ShVT, N1: Op1, N2: BitWidthC);
8108	ShVal = DAG.getNode(Opcode: ShOpc, DL, VT, N1: Op0, N2: ShAmt);
8109	SDValue HsAmt = DAG.getNode(Opcode: ISD::SUB, DL, VT: ShVT, N1: BitWidthMinusOneC, N2: ShAmt);
8110	SDValue One = DAG.getConstant(Val: `1`, DL, VT: ShVT);
8111	HsVal =
8112	DAG.getNode(Opcode: HsOpc, DL, VT, N1: DAG.getNode(Opcode: HsOpc, DL, VT, N1: Op0, N2: One), N2: HsAmt);
8113	}
8114	return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: ShVal, N2: HsVal);
8115	}
8116
8117	void TargetLowering::expandShiftParts(SDNode *Node, SDValue &Lo, SDValue &Hi,
8118	SelectionDAG &DAG) const {
8119	assert(Node->getNumOperands() == `3` && "Not a double-shift!");
8120	EVT VT = Node->getValueType(ResNo: `0`);
8121	unsigned VTBits = VT.getScalarSizeInBits();
8122	assert(isPowerOf2_32(VTBits) && "Power-of-two integer type expected");
8123
8124	bool IsSHL = Node->getOpcode() == ISD::SHL_PARTS;
8125	bool IsSRA = Node->getOpcode() == ISD::SRA_PARTS;
8126	SDValue ShOpLo = Node->getOperand(Num: `0`);
8127	SDValue ShOpHi = Node->getOperand(Num: `1`);
8128	SDValue ShAmt = Node->getOperand(Num: `2`);
8129	EVT ShAmtVT = ShAmt.getValueType();
8130	EVT ShAmtCCVT =
8131	getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: ShAmtVT);
8132	SDLoc dl(Node);
8133
8134	// ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and
8135	// ISD::SRA/L nodes haven't. Insert an AND to be safe, it's usually optimized
8136	// away during isel.
8137	SDValue SafeShAmt = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ShAmtVT, N1: ShAmt,
8138	N2: DAG.getConstant(Val: VTBits - `1`, DL: dl, VT: ShAmtVT));
8139	SDValue Tmp1 = IsSRA ? DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: ShOpHi,
8140	N2: DAG.getConstant(Val: VTBits - `1`, DL: dl, VT: ShAmtVT))
8141	: DAG.getConstant(Val: `0`, DL: dl, VT);
8142
8143	SDValue Tmp2, Tmp3;
8144	if (IsSHL) {
8145	Tmp2 = DAG.getNode(Opcode: ISD::FSHL, DL: dl, VT, N1: ShOpHi, N2: ShOpLo, N3: ShAmt);
8146	Tmp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: ShOpLo, N2: SafeShAmt);
8147	} else {
8148	Tmp2 = DAG.getNode(Opcode: ISD::FSHR, DL: dl, VT, N1: ShOpHi, N2: ShOpLo, N3: ShAmt);
8149	Tmp3 = DAG.getNode(Opcode: IsSRA ? ISD::SRA : ISD::SRL, DL: dl, VT, N1: ShOpHi, N2: SafeShAmt);
8150	}
8151
8152	// If the shift amount is larger or equal than the width of a part we don't
8153	// use the result from the FSHL/FSHR. Insert a test and select the appropriate
8154	// values for large shift amounts.
8155	SDValue AndNode = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ShAmtVT, N1: ShAmt,
8156	N2: DAG.getConstant(Val: VTBits, DL: dl, VT: ShAmtVT));
8157	SDValue Cond = DAG.getSetCC(DL: dl, VT: ShAmtCCVT, LHS: AndNode,
8158	RHS: DAG.getConstant(Val: `0`, DL: dl, VT: ShAmtVT), Cond: ISD::SETNE);
8159
8160	if (IsSHL) {
8161	Hi = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT, N1: Cond, N2: Tmp3, N3: Tmp2);
8162	Lo = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT, N1: Cond, N2: Tmp1, N3: Tmp3);
8163	} else {
8164	Lo = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT, N1: Cond, N2: Tmp3, N3: Tmp2);
8165	Hi = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT, N1: Cond, N2: Tmp1, N3: Tmp3);
8166	}
8167	}
8168
8169	bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
8170	SelectionDAG &DAG) const {
8171	unsigned OpNo = Node->isStrictFPOpcode() ? `1` : `0`;
8172	SDValue Src = Node->getOperand(Num: OpNo);
8173	EVT SrcVT = Src.getValueType();
8174	EVT DstVT = Node->getValueType(ResNo: `0`);
8175	SDLoc dl(SDValue (Node, `0`));
8176
8177	// FIXME: Only f32 to i64 conversions are supported.
8178	if (SrcVT != MVT::f32 \|\| DstVT != MVT::i64)
8179	return false;
8180
8181	if (Node->isStrictFPOpcode())
8182	// When a NaN is converted to an integer a trap is allowed. We can't
8183	// use this expansion here because it would eliminate that trap. Other
8184	// traps are also allowed and cannot be eliminated. See
8185	// IEEE 754-2008 sec 5.8.
8186	return false;
8187
8188	// Expand f32 -> i64 conversion
8189	// This algorithm comes from compiler-rt's implementation of fixsfdi:
8190	// https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
8191	unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
8192	EVT IntVT = SrcVT.changeTypeToInteger();
8193	EVT IntShVT = getShiftAmountTy(LHSTy: IntVT, DL: DAG.getDataLayout());
8194
8195	SDValue ExponentMask = DAG.getConstant(Val: `0x7F800000`, DL: dl, VT: IntVT);
8196	SDValue ExponentLoBit = DAG.getConstant(Val: `23`, DL: dl, VT: IntVT);
8197	SDValue Bias = DAG.getConstant(Val: `127`, DL: dl, VT: IntVT);
8198	SDValue SignMask = DAG.getConstant(Val: APInt::getSignMask(BitWidth: SrcEltBits), DL: dl, VT: IntVT);
8199	SDValue SignLowBit = DAG.getConstant(Val: SrcEltBits - `1`, DL: dl, VT: IntVT);
8200	SDValue MantissaMask = DAG.getConstant(Val: `0x007FFFFF`, DL: dl, VT: IntVT);
8201
8202	SDValue Bits = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: IntVT, Operand: Src);
8203
8204	SDValue ExponentBits = DAG.getNode(
8205	Opcode: ISD::SRL, DL: dl, VT: IntVT, N1: DAG.getNode(Opcode: ISD::AND, DL: dl, VT: IntVT, N1: Bits, N2: ExponentMask),
8206	N2: DAG.getZExtOrTrunc(Op: ExponentLoBit, DL: dl, VT: IntShVT));
8207	SDValue Exponent = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: IntVT, N1: ExponentBits, N2: Bias);
8208
8209	SDValue Sign = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT: IntVT,
8210	N1: DAG.getNode(Opcode: ISD::AND, DL: dl, VT: IntVT, N1: Bits, N2: SignMask),
8211	N2: DAG.getZExtOrTrunc(Op: SignLowBit, DL: dl, VT: IntShVT));
8212	Sign = DAG.getSExtOrTrunc(Op: Sign, DL: dl, VT: DstVT);
8213
8214	SDValue R = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: IntVT,
8215	N1: DAG.getNode(Opcode: ISD::AND, DL: dl, VT: IntVT, N1: Bits, N2: MantissaMask),
8216	N2: DAG.getConstant(Val: `0x00800000`, DL: dl, VT: IntVT));
8217
8218	R = DAG.getZExtOrTrunc(Op: R, DL: dl, VT: DstVT);
8219
8220	R = DAG.getSelectCC(
8221	DL: dl, LHS: Exponent, RHS: ExponentLoBit,
8222	True: DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: DstVT, N1: R,
8223	N2: DAG.getZExtOrTrunc(
8224	Op: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: IntVT, N1: Exponent, N2: ExponentLoBit),
8225	DL: dl, VT: IntShVT)),
8226	False: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: DstVT, N1: R,
8227	N2: DAG.getZExtOrTrunc(
8228	Op: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: IntVT, N1: ExponentLoBit, N2: Exponent),
8229	DL: dl, VT: IntShVT)),
8230	Cond: ISD::SETGT);
8231
8232	SDValue Ret = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: DstVT,
8233	N1: DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: DstVT, N1: R, N2: Sign), N2: Sign);
8234
8235	Result = DAG.getSelectCC(DL: dl, LHS: Exponent, RHS: DAG.getConstant(Val: `0`, DL: dl, VT: IntVT),
8236	True: DAG.getConstant(Val: `0`, DL: dl, VT: DstVT), False: Ret, Cond: ISD::SETLT);
8237	return true;
8238	}
8239
8240	bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
8241	SDValue &Chain,
8242	SelectionDAG &DAG) const {
8243	SDLoc dl(SDValue (Node, `0`));
8244	unsigned OpNo = Node->isStrictFPOpcode() ? `1` : `0`;
8245	SDValue Src = Node->getOperand(Num: OpNo);
8246
8247	EVT SrcVT = Src.getValueType();
8248	EVT DstVT = Node->getValueType(ResNo: `0`);
8249	EVT SetCCVT =
8250	getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: SrcVT);
8251	EVT DstSetCCVT =
8252	getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: DstVT);
8253
8254	// Only expand vector types if we have the appropriate vector bit operations.
8255	unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
8256	ISD::FP_TO_SINT;
8257	if (DstVT.isVector() && (!isOperationLegalOrCustom(Op: SIntOpcode, VT: DstVT) \|\|
8258	!isOperationLegalOrCustomOrPromote(Op: ISD::XOR, VT: SrcVT)))
8259	return false;
8260
8261	// If the maximum float value is smaller then the signed integer range,
8262	// the destination signmask can't be represented by the float, so we can
8263	// just use FP_TO_SINT directly.
8264	const fltSemantics &APFSem = DAG.EVTToAPFloatSemantics(VT: SrcVT);
8265	APFloat APF(APFSem, APInt::getZero(numBits: SrcVT.getScalarSizeInBits()));
8266	APInt SignMask = APInt::getSignMask(BitWidth: DstVT.getScalarSizeInBits());
8267	if (APFloat::opOverflow &
8268	APF.convertFromAPInt(Input: SignMask, IsSigned: false, RM: APFloat::rmNearestTiesToEven)) {
8269	if (Node->isStrictFPOpcode()) {
8270	Result = DAG.getNode(Opcode: ISD::STRICT_FP_TO_SINT, DL: dl, ResultTys: { DstVT, MVT::Other },
8271	Ops: { Node->getOperand(Num: `0`), Src });
8272	Chain = Result.getValue(R: `1`);
8273	} else
8274	Result = DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: dl, VT: DstVT, Operand: Src);
8275	return true;
8276	}
8277
8278	// Don't expand it if there isn't cheap fsub instruction.
8279	if (!isOperationLegalOrCustom(
8280	Op: Node->isStrictFPOpcode() ? ISD::STRICT_FSUB : ISD::FSUB, VT: SrcVT))
8281	return false;
8282
8283	SDValue Cst = DAG.getConstantFP(Val: APF, DL: dl, VT: SrcVT);
8284	SDValue Sel;
8285
8286	if (Node->isStrictFPOpcode()) {
8287	Sel = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: Cst, Cond: ISD::SETLT,
8288	Chain: Node->getOperand(Num: `0`), /IsSignaling/ true);
8289	Chain = Sel.getValue(R: `1`);
8290	} else {
8291	Sel = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: Cst, Cond: ISD::SETLT);
8292	}
8293
8294	bool Strict = Node->isStrictFPOpcode() \|\|
8295	shouldUseStrictFP_TO_INT(FpVT: SrcVT, IntVT: DstVT, /IsSigned/ false);
8296
8297	if (Strict) {
8298	// Expand based on maximum range of FP_TO_SINT, if the value exceeds the
8299	// signmask then offset (the result of which should be fully representable).
8300	// Sel = Src < 0x8000000000000000
8301	// FltOfs = select Sel, 0, 0x8000000000000000
8302	// IntOfs = select Sel, 0, 0x8000000000000000
8303	// Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8304
8305	// TODO: Should any fast-math-flags be set for the FSUB?
8306	SDValue FltOfs = DAG.getSelect(DL: dl, VT: SrcVT, Cond: Sel,
8307	LHS: DAG.getConstantFP(Val: `0.0`, DL: dl, VT: SrcVT), RHS: Cst);
8308	Sel = DAG.getBoolExtOrTrunc(Op: Sel, SL: dl, VT: DstSetCCVT, OpVT: DstVT);
8309	SDValue IntOfs = DAG.getSelect(DL: dl, VT: DstVT, Cond: Sel,
8310	LHS: DAG.getConstant(Val: `0`, DL: dl, VT: DstVT),
8311	RHS: DAG.getConstant(Val: SignMask, DL: dl, VT: DstVT));
8312	SDValue SInt;
8313	if (Node->isStrictFPOpcode()) {
8314	SDValue Val = DAG.getNode(Opcode: ISD::STRICT_FSUB, DL: dl, ResultTys: { SrcVT, MVT::Other },
8315	Ops: { Chain, Src, FltOfs });
8316	SInt = DAG.getNode(Opcode: ISD::STRICT_FP_TO_SINT, DL: dl, ResultTys: { DstVT, MVT::Other },
8317	Ops: { Val.getValue(R: `1`), Val });
8318	Chain = SInt.getValue(R: `1`);
8319	} else {
8320	SDValue Val = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: SrcVT, N1: Src, N2: FltOfs);
8321	SInt = DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: dl, VT: DstVT, Operand: Val);
8322	}
8323	Result = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: DstVT, N1: SInt, N2: IntOfs);
8324	} else {
8325	// Expand based on maximum range of FP_TO_SINT:
8326	// True = fp_to_sint(Src)
8327	// False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
8328	// Result = select (Src < 0x8000000000000000), True, False
8329
8330	SDValue True = DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: dl, VT: DstVT, Operand: Src);
8331	// TODO: Should any fast-math-flags be set for the FSUB?
8332	SDValue False = DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: dl, VT: DstVT,
8333	Operand: DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: SrcVT, N1: Src, N2: Cst));
8334	False = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: DstVT, N1: False,
8335	N2: DAG.getConstant(Val: SignMask, DL: dl, VT: DstVT));
8336	Sel = DAG.getBoolExtOrTrunc(Op: Sel, SL: dl, VT: DstSetCCVT, OpVT: DstVT);
8337	Result = DAG.getSelect(DL: dl, VT: DstVT, Cond: Sel, LHS: True, RHS: False);
8338	}
8339	return true;
8340	}
8341
8342	bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
8343	SDValue &Chain,
8344	SelectionDAG &DAG) const {
8345	// This transform is not correct for converting 0 when rounding mode is set
8346	// to round toward negative infinity which will produce -0.0. So disable under
8347	// strictfp.
8348	if (Node->isStrictFPOpcode())
8349	return false;
8350
8351	SDValue Src = Node->getOperand(Num: `0`);
8352	EVT SrcVT = Src.getValueType();
8353	EVT DstVT = Node->getValueType(ResNo: `0`);
8354
8355	if (SrcVT.getScalarType() != MVT::i64 \|\| DstVT.getScalarType() != MVT::f64)
8356	return false;
8357
8358	// Only expand vector types if we have the appropriate vector bit operations.
8359	if (SrcVT.isVector() && (!isOperationLegalOrCustom(Op: ISD::SRL, VT: SrcVT) \|\|
8360	!isOperationLegalOrCustom(Op: ISD::FADD, VT: DstVT) \|\|
8361	!isOperationLegalOrCustom(Op: ISD::FSUB, VT: DstVT) \|\|
8362	!isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT: SrcVT) \|\|
8363	!isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT: SrcVT)))
8364	return false;
8365
8366	SDLoc dl(SDValue (Node, `0`));
8367	EVT ShiftVT = getShiftAmountTy(LHSTy: SrcVT, DL: DAG.getDataLayout());
8368
8369	// Implementation of unsigned i64 to f64 following the algorithm in
8370	// __floatundidf in compiler_rt. This implementation performs rounding
8371	// correctly in all rounding modes with the exception of converting 0
8372	// when rounding toward negative infinity. In that case the fsub will produce
8373	// -0.0. This will be added to +0.0 and produce -0.0 which is incorrect.
8374	SDValue TwoP52 = DAG.getConstant(UINT64_C(`0x4330000000000000`), DL: dl, VT: SrcVT);
8375	SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
8376	Val: llvm::bit_cast<double>(UINT64_C(`0x4530000000100000`)), DL: dl, VT: DstVT);
8377	SDValue TwoP84 = DAG.getConstant(UINT64_C(`0x4530000000000000`), DL: dl, VT: SrcVT);
8378	SDValue LoMask = DAG.getConstant(UINT64_C(`0x00000000FFFFFFFF`), DL: dl, VT: SrcVT);
8379	SDValue HiShift = DAG.getConstant(Val: `32`, DL: dl, VT: ShiftVT);
8380
8381	SDValue Lo = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: SrcVT, N1: Src, N2: LoMask);
8382	SDValue Hi = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: SrcVT, N1: Src, N2: HiShift);
8383	SDValue LoOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: SrcVT, N1: Lo, N2: TwoP52);
8384	SDValue HiOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: SrcVT, N1: Hi, N2: TwoP84);
8385	SDValue LoFlt = DAG.getBitcast(VT: DstVT, V: LoOr);
8386	SDValue HiFlt = DAG.getBitcast(VT: DstVT, V: HiOr);
8387	SDValue HiSub =
8388	DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: DstVT, N1: HiFlt, N2: TwoP84PlusTwoP52);
8389	Result = DAG.getNode(Opcode: ISD::FADD, DL: dl, VT: DstVT, N1: LoFlt, N2: HiSub);
8390	return true;
8391	}
8392
8393	SDValue
8394	TargetLowering::createSelectForFMINNUM_FMAXNUM(SDNode *Node,
8395	SelectionDAG &DAG) const {
8396	unsigned Opcode = Node->getOpcode();
8397	assert((Opcode == ISD::FMINNUM \|\| Opcode == ISD::FMAXNUM \|\|
8398	Opcode == ISD::STRICT_FMINNUM \|\| Opcode == ISD::STRICT_FMAXNUM) &&
8399	"Wrong opcode");
8400
8401	if (Node->getFlags().hasNoNaNs()) {
8402	ISD::CondCode Pred = Opcode == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
8403	SDValue Op1 = Node->getOperand(Num: `0`);
8404	SDValue Op2 = Node->getOperand(Num: `1`);
8405	SDValue SelCC = DAG.getSelectCC(DL: SDLoc (Node), LHS: Op1, RHS: Op2, True: Op1, False: Op2, Cond: Pred);
8406	// Copy FMF flags, but always set the no-signed-zeros flag
8407	// as this is implied by the FMINNUM/FMAXNUM semantics.
8408	SDNodeFlags Flags = Node->getFlags();
8409	Flags.setNoSignedZeros(true);
8410	SelCC ->setFlags(Flags);
8411	return SelCC;
8412	}
8413
8414	return SDValue ();
8415	}
8416
8417	SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
8418	SelectionDAG &DAG) const {
8419	SDLoc dl(Node);
8420	unsigned NewOp = Node->getOpcode() == ISD::FMINNUM ?
8421	ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
8422	EVT VT = Node->getValueType(ResNo: `0`);
8423
8424	if (VT.isScalableVector())
8425	report_fatal_error(
8426	reason: "Expanding fminnum/fmaxnum for scalable vectors is undefined.");
8427
8428	if (isOperationLegalOrCustom(Op: NewOp, VT)) {
8429	SDValue Quiet0 = Node->getOperand(Num: `0`);
8430	SDValue Quiet1 = Node->getOperand(Num: `1`);
8431
8432	if (!Node->getFlags().hasNoNaNs()) {
8433	// Insert canonicalizes if it's possible we need to quiet to get correct
8434	// sNaN behavior.
8435	if (!DAG.isKnownNeverSNaN(Op: Quiet0)) {
8436	Quiet0 = DAG.getNode(Opcode: ISD::FCANONICALIZE, DL: dl, VT, Operand: Quiet0,
8437	Flags: Node->getFlags());
8438	}
8439	if (!DAG.isKnownNeverSNaN(Op: Quiet1)) {
8440	Quiet1 = DAG.getNode(Opcode: ISD::FCANONICALIZE, DL: dl, VT, Operand: Quiet1,
8441	Flags: Node->getFlags());
8442	}
8443	}
8444
8445	return DAG.getNode(Opcode: NewOp, DL: dl, VT, N1: Quiet0, N2: Quiet1, Flags: Node->getFlags());
8446	}
8447
8448	// If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
8449	// instead if there are no NaNs and there can't be an incompatible zero
8450	// compare: at least one operand isn't +/-0, or there are no signed-zeros.
8451	if ((Node->getFlags().hasNoNaNs() \|\|
8452	(DAG.isKnownNeverNaN(Op: Node->getOperand(Num: `0`)) &&
8453	DAG.isKnownNeverNaN(Op: Node->getOperand(Num: `1`)))) &&
8454	(Node->getFlags().hasNoSignedZeros() \|\|
8455	DAG.isKnownNeverZeroFloat(Op: Node->getOperand(Num: `0`)) \|\|
8456	DAG.isKnownNeverZeroFloat(Op: Node->getOperand(Num: `1`)))) {
8457	unsigned IEEE2018Op =
8458	Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
8459	if (isOperationLegalOrCustom(Op: IEEE2018Op, VT))
8460	return DAG.getNode(Opcode: IEEE2018Op, DL: dl, VT, N1: Node->getOperand(Num: `0`),
8461	N2: Node->getOperand(Num: `1`), Flags: Node->getFlags());
8462	}
8463
8464	if (SDValue SelCC = createSelectForFMINNUM_FMAXNUM(Node, DAG))
8465	return SelCC;
8466
8467	return SDValue ();
8468	}
8469
8470	SDValue TargetLowering::expandFMINIMUM_FMAXIMUM(SDNode *N,
8471	SelectionDAG &DAG) const {
8472	SDLoc DL(N);
8473	SDValue LHS = N->getOperand(Num: `0`);
8474	SDValue RHS = N->getOperand(Num: `1`);
8475	unsigned Opc = N->getOpcode();
8476	EVT VT = N->getValueType(ResNo: `0`);
8477	EVT CCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
8478	bool IsMax = Opc == ISD::FMAXIMUM;
8479	SDNodeFlags Flags = N->getFlags();
8480
8481	// First, implement comparison not propagating NaN. If no native fmin or fmax
8482	// available, use plain select with setcc instead.
8483	SDValue MinMax;
8484	unsigned CompOpcIeee = IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
8485	unsigned CompOpc = IsMax ? ISD::FMAXNUM : ISD::FMINNUM;
8486
8487	// FIXME: We should probably define fminnum/fmaxnum variants with correct
8488	// signed zero behavior.
8489	bool MinMaxMustRespectOrderedZero = false;
8490
8491	if (isOperationLegalOrCustom(Op: CompOpcIeee, VT)) {
8492	MinMax = DAG.getNode(Opcode: CompOpcIeee, DL, VT, N1: LHS, N2: RHS, Flags);
8493	MinMaxMustRespectOrderedZero = true;
8494	} else if (isOperationLegalOrCustom(Op: CompOpc, VT)) {
8495	MinMax = DAG.getNode(Opcode: CompOpc, DL, VT, N1: LHS, N2: RHS, Flags);
8496	} else {
8497	if (VT.isVector() && !isOperationLegalOrCustom(Op: ISD::VSELECT, VT))
8498	return DAG.UnrollVectorOp(N);
8499
8500	// NaN (if exists) will be propagated later, so orderness doesn't matter.
8501	SDValue Compare =
8502	DAG.getSetCC(DL, VT: CCVT, LHS, RHS, Cond: IsMax ? ISD::SETGT : ISD::SETLT);
8503	MinMax = DAG.getSelect(DL, VT, Cond: Compare, LHS, RHS, Flags);
8504	}
8505
8506	// Propagate any NaN of both operands
8507	if (!N->getFlags().hasNoNaNs() &&
8508	(!DAG.isKnownNeverNaN(Op: RHS) \|\| !DAG.isKnownNeverNaN(Op: LHS))) {
8509	ConstantFP *FPNaN = ConstantFP::get(
8510	Context&: *DAG.getContext(), V: APFloat::getNaN(Sem: DAG.EVTToAPFloatSemantics(VT)));
8511	MinMax = DAG.getSelect(DL, VT, Cond: DAG.getSetCC(DL, VT: CCVT, LHS, RHS, Cond: ISD::SETUO),
8512	LHS: DAG.getConstantFP(V: *FPNaN, DL, VT), RHS: MinMax, Flags);
8513	}
8514
8515	// fminimum/fmaximum requires -0.0 less than +0.0
8516	if (!MinMaxMustRespectOrderedZero && !N->getFlags().hasNoSignedZeros() &&
8517	!DAG.isKnownNeverZeroFloat(Op: RHS) && !DAG.isKnownNeverZeroFloat(Op: LHS)) {
8518	SDValue IsZero = DAG.getSetCC(DL, VT: CCVT, LHS: MinMax,
8519	RHS: DAG.getConstantFP(Val: `0.0`, DL, VT), Cond: ISD::SETEQ);
8520	SDValue TestZero =
8521	DAG.getTargetConstant(Val: IsMax ? fcPosZero : fcNegZero, DL, VT: MVT::i32);
8522	SDValue LCmp = DAG.getSelect(
8523	DL, VT, Cond: DAG.getNode(Opcode: ISD::IS_FPCLASS, DL, VT: CCVT, N1: LHS, N2: TestZero), LHS,
8524	RHS: MinMax, Flags);
8525	SDValue RCmp = DAG.getSelect(
8526	DL, VT, Cond: DAG.getNode(Opcode: ISD::IS_FPCLASS, DL, VT: CCVT, N1: RHS, N2: TestZero), LHS: RHS,
8527	RHS: LCmp, Flags);
8528	MinMax = DAG.getSelect(DL, VT, Cond: IsZero, LHS: RCmp, RHS: MinMax, Flags);
8529	}
8530
8531	return MinMax;
8532	}
8533
8534	/// Returns a true value if if this FPClassTest can be performed with an ordered
8535	/// fcmp to 0, and a false value if it's an unordered fcmp to 0. Returns
8536	/// std::nullopt if it cannot be performed as a compare with 0.
8537	static std::optional<bool> isFCmpEqualZero(FPClassTest Test,
8538	const fltSemantics &Semantics,
8539	const MachineFunction &MF) {
8540	FPClassTest OrderedMask = Test & ~fcNan;
8541	FPClassTest NanTest = Test & fcNan;
8542	bool IsOrdered = NanTest == fcNone;
8543	bool IsUnordered = NanTest == fcNan;
8544
8545	// Skip cases that are testing for only a qnan or snan.
8546	if (!IsOrdered && !IsUnordered)
8547	return std::nullopt;
8548
8549	if (OrderedMask == fcZero &&
8550	MF.getDenormalMode(FPType: Semantics).Input == DenormalMode::IEEE)
8551	return IsOrdered;
8552	if (OrderedMask == (fcZero \| fcSubnormal) &&
8553	MF.getDenormalMode(FPType: Semantics).inputsAreZero())
8554	return IsOrdered;
8555	return std::nullopt;
8556	}
8557
8558	SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
8559	FPClassTest Test, SDNodeFlags Flags,
8560	const SDLoc &DL,
8561	SelectionDAG &DAG) const {
8562	EVT OperandVT = Op.getValueType();
8563	assert(OperandVT.isFloatingPoint());
8564
8565	// Degenerated cases.
8566	if (Test == fcNone)
8567	return DAG.getBoolConstant(V: false, DL, VT: ResultVT, OpVT: OperandVT);
8568	if ((Test & fcAllFlags) == fcAllFlags)
8569	return DAG.getBoolConstant(V: true, DL, VT: ResultVT, OpVT: OperandVT);
8570
8571	// PPC double double is a pair of doubles, of which the higher part determines
8572	// the value class.
8573	if (OperandVT == MVT::ppcf128) {
8574	Op = DAG.getNode(Opcode: ISD::EXTRACT_ELEMENT, DL, VT: MVT::f64, N1: Op,
8575	N2: DAG.getConstant(Val: `1`, DL, VT: MVT::i32));
8576	OperandVT = MVT::f64;
8577	}
8578
8579	// Some checks may be represented as inversion of simpler check, for example
8580	// "inf\|normal\|subnormal\|zero" => !"nan".
8581	bool IsInverted = false;
8582	if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test)) {
8583	IsInverted = true;
8584	Test = InvertedCheck;
8585	}
8586
8587	// Floating-point type properties.
8588	EVT ScalarFloatVT = OperandVT.getScalarType();
8589	const Type FloatTy = ScalarFloatVT.getTypeForEVT(Context&: DAG.getContext());
8590	const llvm::fltSemantics &Semantics = FloatTy->getFltSemantics();
8591	bool IsF80 = (ScalarFloatVT == MVT::f80);
8592
8593	// Some checks can be implemented using float comparisons, if floating point
8594	// exceptions are ignored.
8595	if (Flags.hasNoFPExcept() &&
8596	isOperationLegalOrCustom(Op: ISD::SETCC, VT: OperandVT.getScalarType())) {
8597	ISD::CondCode OrderedCmpOpcode = IsInverted ? ISD::SETUNE : ISD::SETOEQ;
8598	ISD::CondCode UnorderedCmpOpcode = IsInverted ? ISD::SETONE : ISD::SETUEQ;
8599
8600	if (std::optional<bool> IsCmp0 =
8601	isFCmpEqualZero(Test, Semantics, MF: DAG.getMachineFunction());
8602	IsCmp0 && (isCondCodeLegalOrCustom(
8603	CC: *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode,
8604	VT: OperandVT.getScalarType().getSimpleVT()))) {
8605
8606	// If denormals could be implicitly treated as 0, this is not equivalent
8607	// to a compare with 0 since it will also be true for denormals.
8608	return DAG.getSetCC(DL, VT: ResultVT, LHS: Op,
8609	RHS: DAG.getConstantFP(Val: `0.0`, DL, VT: OperandVT),
8610	Cond: *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode);
8611	}
8612
8613	if (Test == fcNan &&
8614	isCondCodeLegalOrCustom(CC: IsInverted ? ISD::SETO : ISD::SETUO,
8615	VT: OperandVT.getScalarType().getSimpleVT())) {
8616	return DAG.getSetCC(DL, VT: ResultVT, LHS: Op, RHS: Op,
8617	Cond: IsInverted ? ISD::SETO : ISD::SETUO);
8618	}
8619
8620	if (Test == fcInf &&
8621	isCondCodeLegalOrCustom(CC: IsInverted ? ISD::SETUNE : ISD::SETOEQ,
8622	VT: OperandVT.getScalarType().getSimpleVT()) &&
8623	isOperationLegalOrCustom(Op: ISD::FABS, VT: OperandVT.getScalarType())) {
8624	// isinf(x) --> fabs(x) == inf
8625	SDValue Abs = DAG.getNode(Opcode: ISD::FABS, DL, VT: OperandVT, Operand: Op);
8626	SDValue Inf =
8627	DAG.getConstantFP(Val: APFloat::getInf(Sem: Semantics), DL, VT: OperandVT);
8628	return DAG.getSetCC(DL, VT: ResultVT, LHS: Abs, RHS: Inf,
8629	Cond: IsInverted ? ISD::SETUNE : ISD::SETOEQ);
8630	}
8631	}
8632
8633	// In the general case use integer operations.
8634	unsigned BitSize = OperandVT.getScalarSizeInBits();
8635	EVT IntVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: BitSize);
8636	if (OperandVT.isVector())
8637	IntVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: IntVT,
8638	EC: OperandVT.getVectorElementCount());
8639	SDValue OpAsInt = DAG.getBitcast(VT: IntVT, V: Op);
8640
8641	// Various masks.
8642	APInt SignBit = APInt::getSignMask(BitWidth: BitSize);
8643	APInt ValueMask = APInt::getSignedMaxValue(numBits: BitSize); // All bits but sign.
8644	APInt Inf = APFloat::getInf(Sem: Semantics).bitcastToAPInt(); // Exp and int bit.
8645	const unsigned ExplicitIntBitInF80 = `63`;
8646	APInt ExpMask = Inf;
8647	if (IsF80)
8648	ExpMask.clearBit(BitPosition: ExplicitIntBitInF80);
8649	APInt AllOneMantissa = APFloat::getLargest(Sem: Semantics).bitcastToAPInt() & ~Inf;
8650	APInt QNaNBitMask =
8651	APInt::getOneBitSet(numBits: BitSize, BitNo: AllOneMantissa.getActiveBits() - `1`);
8652	APInt InvertionMask = APInt::getAllOnes(numBits: ResultVT.getScalarSizeInBits());
8653
8654	SDValue ValueMaskV = DAG.getConstant(Val: ValueMask, DL, VT: IntVT);
8655	SDValue SignBitV = DAG.getConstant(Val: SignBit, DL, VT: IntVT);
8656	SDValue ExpMaskV = DAG.getConstant(Val: ExpMask, DL, VT: IntVT);
8657	SDValue ZeroV = DAG.getConstant(Val: `0`, DL, VT: IntVT);
8658	SDValue InfV = DAG.getConstant(Val: Inf, DL, VT: IntVT);
8659	SDValue ResultInvertionMask = DAG.getConstant(Val: InvertionMask, DL, VT: ResultVT);
8660
8661	SDValue Res;
8662	const auto appendResult = [&](SDValue PartialRes) {
8663	if (PartialRes) {
8664	if (Res)
8665	Res = DAG.getNode(Opcode: ISD::OR, DL, VT: ResultVT, N1: Res, N2: PartialRes);
8666	else
8667	Res = PartialRes;
8668	}
8669	};
8670
8671	SDValue IntBitIsSetV; // Explicit integer bit in f80 mantissa is set.
8672	const auto getIntBitIsSet = [&]() -> SDValue {
8673	if (!IntBitIsSetV) {
8674	APInt IntBitMask(BitSize, `0`);
8675	IntBitMask.setBit(ExplicitIntBitInF80);
8676	SDValue IntBitMaskV = DAG.getConstant(Val: IntBitMask, DL, VT: IntVT);
8677	SDValue IntBitV = DAG.getNode(Opcode: ISD::AND, DL, VT: IntVT, N1: OpAsInt, N2: IntBitMaskV);
8678	IntBitIsSetV = DAG.getSetCC(DL, VT: ResultVT, LHS: IntBitV, RHS: ZeroV, Cond: ISD::SETNE);
8679	}
8680	return IntBitIsSetV;
8681	};
8682
8683	// Split the value into sign bit and absolute value.
8684	SDValue AbsV = DAG.getNode(Opcode: ISD::AND, DL, VT: IntVT, N1: OpAsInt, N2: ValueMaskV);
8685	SDValue SignV = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt,
8686	RHS: DAG.getConstant(Val: `0.0`, DL, VT: IntVT), Cond: ISD::SETLT);
8687
8688	// Tests that involve more than one class should be processed first.
8689	SDValue PartialRes;
8690
8691	if (IsF80)
8692	; // Detect finite numbers of f80 by checking individual classes because
8693	// they have different settings of the explicit integer bit.
8694	else if ((Test & fcFinite) == fcFinite) {
8695	// finite(V) ==> abs(V) < exp_mask
8696	PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: ExpMaskV, Cond: ISD::SETLT);
8697	Test &= ~fcFinite;
8698	} else if ((Test & fcFinite) == fcPosFinite) {
8699	// finite(V) && V > 0 ==> V < exp_mask
8700	PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt, RHS: ExpMaskV, Cond: ISD::SETULT);
8701	Test &= ~fcPosFinite;
8702	} else if ((Test & fcFinite) == fcNegFinite) {
8703	// finite(V) && V < 0 ==> abs(V) < exp_mask && signbit == 1
8704	PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: ExpMaskV, Cond: ISD::SETLT);
8705	PartialRes = DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: PartialRes, N2: SignV);
8706	Test &= ~fcNegFinite;
8707	}
8708	appendResult (PartialRes);
8709
8710	if (FPClassTest PartialCheck = Test & (fcZero \| fcSubnormal)) {
8711	// fcZero \| fcSubnormal => test all exponent bits are 0
8712	// TODO: Handle sign bit specific cases
8713	if (PartialCheck == (fcZero \| fcSubnormal)) {
8714	SDValue ExpBits = DAG.getNode(Opcode: ISD::AND, DL, VT: IntVT, N1: OpAsInt, N2: ExpMaskV);
8715	SDValue ExpIsZero =
8716	DAG.getSetCC(DL, VT: ResultVT, LHS: ExpBits, RHS: ZeroV, Cond: ISD::SETEQ);
8717	appendResult (ExpIsZero);
8718	Test &= ~PartialCheck & fcAllFlags;
8719	}
8720	}
8721
8722	// Check for individual classes.
8723
8724	if (unsigned PartialCheck = Test & fcZero) {
8725	if (PartialCheck == fcPosZero)
8726	PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt, RHS: ZeroV, Cond: ISD::SETEQ);
8727	else if (PartialCheck == fcZero)
8728	PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: ZeroV, Cond: ISD::SETEQ);
8729	else // ISD::fcNegZero
8730	PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt, RHS: SignBitV, Cond: ISD::SETEQ);
8731	appendResult (PartialRes);
8732	}
8733
8734	if (unsigned PartialCheck = Test & fcSubnormal) {
8735	// issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set)
8736	// issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set)
8737	SDValue V = (PartialCheck == fcPosSubnormal) ? OpAsInt : AbsV;
8738	SDValue MantissaV = DAG.getConstant(Val: AllOneMantissa, DL, VT: IntVT);
8739	SDValue VMinusOneV =
8740	DAG.getNode(Opcode: ISD::SUB, DL, VT: IntVT, N1: V, N2: DAG.getConstant(Val: `1`, DL, VT: IntVT));
8741	PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: VMinusOneV, RHS: MantissaV, Cond: ISD::SETULT);
8742	if (PartialCheck == fcNegSubnormal)
8743	PartialRes = DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: PartialRes, N2: SignV);
8744	appendResult (PartialRes);
8745	}
8746
8747	if (unsigned PartialCheck = Test & fcInf) {
8748	if (PartialCheck == fcPosInf)
8749	PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt, RHS: InfV, Cond: ISD::SETEQ);
8750	else if (PartialCheck == fcInf)
8751	PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: InfV, Cond: ISD::SETEQ);
8752	else { // ISD::fcNegInf
8753	APInt NegInf = APFloat::getInf(Sem: Semantics, Negative: true).bitcastToAPInt();
8754	SDValue NegInfV = DAG.getConstant(Val: NegInf, DL, VT: IntVT);
8755	PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt, RHS: NegInfV, Cond: ISD::SETEQ);
8756	}
8757	appendResult (PartialRes);
8758	}
8759
8760	if (unsigned PartialCheck = Test & fcNan) {
8761	APInt InfWithQnanBit = Inf \| QNaNBitMask;
8762	SDValue InfWithQnanBitV = DAG.getConstant(Val: InfWithQnanBit, DL, VT: IntVT);
8763	if (PartialCheck == fcNan) {
8764	// isnan(V) ==> abs(V) > int(inf)
8765	PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: InfV, Cond: ISD::SETGT);
8766	if (IsF80) {
8767	// Recognize unsupported values as NaNs for compatibility with glibc.
8768	// In them (exp(V)==0) == int_bit.
8769	SDValue ExpBits = DAG.getNode(Opcode: ISD::AND, DL, VT: IntVT, N1: AbsV, N2: ExpMaskV);
8770	SDValue ExpIsZero =
8771	DAG.getSetCC(DL, VT: ResultVT, LHS: ExpBits, RHS: ZeroV, Cond: ISD::SETEQ);
8772	SDValue IsPseudo =
8773	DAG.getSetCC(DL, VT: ResultVT, LHS: getIntBitIsSet (), RHS: ExpIsZero, Cond: ISD::SETEQ);
8774	PartialRes = DAG.getNode(Opcode: ISD::OR, DL, VT: ResultVT, N1: PartialRes, N2: IsPseudo);
8775	}
8776	} else if (PartialCheck == fcQNan) {
8777	// isquiet(V) ==> abs(V) >= (unsigned(Inf) \| quiet_bit)
8778	PartialRes =
8779	DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: InfWithQnanBitV, Cond: ISD::SETGE);
8780	} else { // ISD::fcSNan
8781	// issignaling(V) ==> abs(V) > unsigned(Inf) &&
8782	// abs(V) < (unsigned(Inf) \| quiet_bit)
8783	SDValue IsNan = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: InfV, Cond: ISD::SETGT);
8784	SDValue IsNotQnan =
8785	DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: InfWithQnanBitV, Cond: ISD::SETLT);
8786	PartialRes = DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: IsNan, N2: IsNotQnan);
8787	}
8788	appendResult (PartialRes);
8789	}
8790
8791	if (unsigned PartialCheck = Test & fcNormal) {
8792	// isnormal(V) ==> (0 < exp < max_exp) ==> (unsigned(exp-1) < (max_exp-1))
8793	APInt ExpLSB = ExpMask & ~(ExpMask.shl(shiftAmt: `1`));
8794	SDValue ExpLSBV = DAG.getConstant(Val: ExpLSB, DL, VT: IntVT);
8795	SDValue ExpMinus1 = DAG.getNode(Opcode: ISD::SUB, DL, VT: IntVT, N1: AbsV, N2: ExpLSBV);
8796	APInt ExpLimit = ExpMask - ExpLSB;
8797	SDValue ExpLimitV = DAG.getConstant(Val: ExpLimit, DL, VT: IntVT);
8798	PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: ExpMinus1, RHS: ExpLimitV, Cond: ISD::SETULT);
8799	if (PartialCheck == fcNegNormal)
8800	PartialRes = DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: PartialRes, N2: SignV);
8801	else if (PartialCheck == fcPosNormal) {
8802	SDValue PosSignV =
8803	DAG.getNode(Opcode: ISD::XOR, DL, VT: ResultVT, N1: SignV, N2: ResultInvertionMask);
8804	PartialRes = DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: PartialRes, N2: PosSignV);
8805	}
8806	if (IsF80)
8807	PartialRes =
8808	DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: PartialRes, N2: getIntBitIsSet ());
8809	appendResult (PartialRes);
8810	}
8811
8812	if (!Res)
8813	return DAG.getConstant(Val: IsInverted, DL, VT: ResultVT);
8814	if (IsInverted)
8815	Res = DAG.getNode(Opcode: ISD::XOR, DL, VT: ResultVT, N1: Res, N2: ResultInvertionMask);
8816	return Res;
8817	}
8818
8819	// Only expand vector types if we have the appropriate vector bit operations.
8820	static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) {
8821	assert(VT.isVector() && "Expected vector type");
8822	unsigned Len = VT.getScalarSizeInBits();
8823	return TLI.isOperationLegalOrCustom(Op: ISD::ADD, VT) &&
8824	TLI.isOperationLegalOrCustom(Op: ISD::SUB, VT) &&
8825	TLI.isOperationLegalOrCustom(Op: ISD::SRL, VT) &&
8826	(Len == `8` \|\| TLI.isOperationLegalOrCustom(Op: ISD::MUL, VT)) &&
8827	TLI.isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT);
8828	}
8829
8830	SDValue TargetLowering::expandCTPOP(SDNode Node, SelectionDAG &DAG) const* {
8831	SDLoc dl(Node);
8832	EVT VT = Node->getValueType(ResNo: `0`);
8833	EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
8834	SDValue Op = Node->getOperand(Num: `0`);
8835	unsigned Len = VT.getScalarSizeInBits();
8836	assert(VT.isInteger() && "CTPOP not implemented for this type.");
8837
8838	// TODO: Add support for irregular type lengths.
8839	if (!(Len <= `128` && Len % `8` == `0`))
8840	return SDValue ();
8841
8842	// Only expand vector types if we have the appropriate vector bit operations.
8843	if (VT.isVector() && !canExpandVectorCTPOP(TLI: *this, VT))
8844	return SDValue ();
8845
8846	// This is the "best" algorithm from
8847	// http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
8848	SDValue Mask55 =
8849	DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt (`8`, `0x55`)), DL: dl, VT);
8850	SDValue Mask33 =
8851	DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt (`8`, `0x33`)), DL: dl, VT);
8852	SDValue Mask0F =
8853	DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt (`8`, `0x0F`)), DL: dl, VT);
8854
8855	// v = v - ((v >> 1) & 0x55555555...)
8856	Op = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Op,
8857	N2: DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
8858	N1: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op,
8859	N2: DAG.getConstant(Val: `1`, DL: dl, VT: ShVT)),
8860	N2: Mask55));
8861	// v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
8862	Op = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op, N2: Mask33),
8863	N2: DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
8864	N1: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op,
8865	N2: DAG.getConstant(Val: `2`, DL: dl, VT: ShVT)),
8866	N2: Mask33));
8867	// v = (v + (v >> 4)) & 0x0F0F0F0F...
8868	Op = DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
8869	N1: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Op,
8870	N2: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op,
8871	N2: DAG.getConstant(Val: `4`, DL: dl, VT: ShVT))),
8872	N2: Mask0F);
8873
8874	if (Len <= `8`)
8875	return Op;
8876
8877	// Avoid the multiply if we only have 2 bytes to add.
8878	// TODO: Only doing this for scalars because vectors weren't as obviously
8879	// improved.
8880	if (Len == `16` && !VT.isVector()) {
8881	// v = (v + (v >> 8)) & 0x00FF;
8882	return DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
8883	N1: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Op,
8884	N2: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op,
8885	N2: DAG.getConstant(Val: `8`, DL: dl, VT: ShVT))),
8886	N2: DAG.getConstant(Val: `0xFF`, DL: dl, VT));
8887	}
8888
8889	// v = (v 0x01010101...) >> (Len - 8)*
8890	SDValue V;
8891	if (isOperationLegalOrCustomOrPromote(
8892	Op: ISD::MUL, VT: getTypeToTransformTo(Context&: *DAG.getContext(), VT))) {
8893	SDValue Mask01 =
8894	DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt (`8`, `0x01`)), DL: dl, VT);
8895	V = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: Op, N2: Mask01);
8896	} else {
8897	V = Op;
8898	for (unsigned Shift = `8`; Shift < Len; Shift *= `2`) {
8899	SDValue ShiftC = DAG.getShiftAmountConstant(Val: Shift, VT, DL: dl);
8900	V = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: V,
8901	N2: DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: V, N2: ShiftC));
8902	}
8903	}
8904	return DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: V, N2: DAG.getConstant(Val: Len - `8`, DL: dl, VT: ShVT));
8905	}
8906
8907	SDValue TargetLowering::expandVPCTPOP(SDNode Node, SelectionDAG &DAG) const* {
8908	SDLoc dl(Node);
8909	EVT VT = Node->getValueType(ResNo: `0`);
8910	EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
8911	SDValue Op = Node->getOperand(Num: `0`);
8912	SDValue Mask = Node->getOperand(Num: `1`);
8913	SDValue VL = Node->getOperand(Num: `2`);
8914	unsigned Len = VT.getScalarSizeInBits();
8915	assert(VT.isInteger() && "VP_CTPOP not implemented for this type.");
8916
8917	// TODO: Add support for irregular type lengths.
8918	if (!(Len <= `128` && Len % `8` == `0`))
8919	return SDValue ();
8920
8921	// This is same algorithm of expandCTPOP from
8922	// http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
8923	SDValue Mask55 =
8924	DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt (`8`, `0x55`)), DL: dl, VT);
8925	SDValue Mask33 =
8926	DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt (`8`, `0x33`)), DL: dl, VT);
8927	SDValue Mask0F =
8928	DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt (`8`, `0x0F`)), DL: dl, VT);
8929
8930	SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5;
8931
8932	// v = v - ((v >> 1) & 0x55555555...)
8933	Tmp1 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT,
8934	N1: DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op,
8935	N2: DAG.getConstant(Val: `1`, DL: dl, VT: ShVT), N3: Mask, N4: VL),
8936	N2: Mask55, N3: Mask, N4: VL);
8937	Op = DAG.getNode(Opcode: ISD::VP_SUB, DL: dl, VT, N1: Op, N2: Tmp1, N3: Mask, N4: VL);
8938
8939	// v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
8940	Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Op, N2: Mask33, N3: Mask, N4: VL);
8941	Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT,
8942	N1: DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op,
8943	N2: DAG.getConstant(Val: `2`, DL: dl, VT: ShVT), N3: Mask, N4: VL),
8944	N2: Mask33, N3: Mask, N4: VL);
8945	Op = DAG.getNode(Opcode: ISD::VP_ADD, DL: dl, VT, N1: Tmp2, N2: Tmp3, N3: Mask, N4: VL);
8946
8947	// v = (v + (v >> 4)) & 0x0F0F0F0F...
8948	Tmp4 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `4`, DL: dl, VT: ShVT),
8949	N3: Mask, N4: VL),
8950	Tmp5 = DAG.getNode(Opcode: ISD::VP_ADD, DL: dl, VT, N1: Op, N2: Tmp4, N3: Mask, N4: VL);
8951	Op = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp5, N2: Mask0F, N3: Mask, N4: VL);
8952
8953	if (Len <= `8`)
8954	return Op;
8955
8956	// v = (v 0x01010101...) >> (Len - 8)*
8957	SDValue V;
8958	if (isOperationLegalOrCustomOrPromote(
8959	Op: ISD::VP_MUL, VT: getTypeToTransformTo(Context&: *DAG.getContext(), VT))) {
8960	SDValue Mask01 =
8961	DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt (`8`, `0x01`)), DL: dl, VT);
8962	V = DAG.getNode(Opcode: ISD::VP_MUL, DL: dl, VT, N1: Op, N2: Mask01, N3: Mask, N4: VL);
8963	} else {
8964	V = Op;
8965	for (unsigned Shift = `8`; Shift < Len; Shift *= `2`) {
8966	SDValue ShiftC = DAG.getShiftAmountConstant(Val: Shift, VT, DL: dl);
8967	V = DAG.getNode(Opcode: ISD::VP_ADD, DL: dl, VT, N1: V,
8968	N2: DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: V, N2: ShiftC, N3: Mask, N4: VL),
8969	N3: Mask, N4: VL);
8970	}
8971	}
8972	return DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: V, N2: DAG.getConstant(Val: Len - `8`, DL: dl, VT: ShVT),
8973	N3: Mask, N4: VL);
8974	}
8975
8976	SDValue TargetLowering::expandCTLZ(SDNode Node, SelectionDAG &DAG) const* {
8977	SDLoc dl(Node);
8978	EVT VT = Node->getValueType(ResNo: `0`);
8979	EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
8980	SDValue Op = Node->getOperand(Num: `0`);
8981	unsigned NumBitsPerElt = VT.getScalarSizeInBits();
8982
8983	// If the non-ZERO_UNDEF version is supported we can use that instead.
8984	if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
8985	isOperationLegalOrCustom(Op: ISD::CTLZ, VT))
8986	return DAG.getNode(Opcode: ISD::CTLZ, DL: dl, VT, Operand: Op);
8987
8988	// If the ZERO_UNDEF version is supported use that and handle the zero case.
8989	if (isOperationLegalOrCustom(Op: ISD::CTLZ_ZERO_UNDEF, VT)) {
8990	EVT SetCCVT =
8991	getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
8992	SDValue CTLZ = DAG.getNode(Opcode: ISD::CTLZ_ZERO_UNDEF, DL: dl, VT, Operand: Op);
8993	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT);
8994	SDValue SrcIsZero = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Op, RHS: Zero, Cond: ISD::SETEQ);
8995	return DAG.getSelect(DL: dl, VT, Cond: SrcIsZero,
8996	LHS: DAG.getConstant(Val: NumBitsPerElt, DL: dl, VT), RHS: CTLZ);
8997	}
8998
8999	// Only expand vector types if we have the appropriate vector bit operations.
9000	// This includes the operations needed to expand CTPOP if it isn't supported.
9001	if (VT.isVector() && (!isPowerOf2_32(Value: NumBitsPerElt) \|\|
9002	(!isOperationLegalOrCustom(Op: ISD::CTPOP, VT) &&
9003	!canExpandVectorCTPOP(TLI: *this, VT)) \|\|
9004	!isOperationLegalOrCustom(Op: ISD::SRL, VT) \|\|
9005	!isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT)))
9006	return SDValue ();
9007
9008	// for now, we do this:
9009	// x = x \| (x >> 1);
9010	// x = x \| (x >> 2);
9011	// ...
9012	// x = x \| (x >>16);
9013	// x = x \| (x >>32); // for 64-bit input
9014	// return popcount(~x);
9015	//
9016	// Ref: "Hacker's Delight" by Henry Warren
9017	for (unsigned i = `0`; (`1U` << i) < NumBitsPerElt; ++i) {
9018	SDValue Tmp = DAG.getConstant(Val: `1ULL` << i, DL: dl, VT: ShVT);
9019	Op = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Op,
9020	N2: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: Tmp));
9021	}
9022	Op = DAG.getNOT(DL: dl, Val: Op, VT);
9023	return DAG.getNode(Opcode: ISD::CTPOP, DL: dl, VT, Operand: Op);
9024	}
9025
9026	SDValue TargetLowering::expandVPCTLZ(SDNode Node, SelectionDAG &DAG) const* {
9027	SDLoc dl(Node);
9028	EVT VT = Node->getValueType(ResNo: `0`);
9029	EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
9030	SDValue Op = Node->getOperand(Num: `0`);
9031	SDValue Mask = Node->getOperand(Num: `1`);
9032	SDValue VL = Node->getOperand(Num: `2`);
9033	unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9034
9035	// do this:
9036	// x = x \| (x >> 1);
9037	// x = x \| (x >> 2);
9038	// ...
9039	// x = x \| (x >>16);
9040	// x = x \| (x >>32); // for 64-bit input
9041	// return popcount(~x);
9042	for (unsigned i = `0`; (`1U` << i) < NumBitsPerElt; ++i) {
9043	SDValue Tmp = DAG.getConstant(Val: `1ULL` << i, DL: dl, VT: ShVT);
9044	Op = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Op,
9045	N2: DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: Tmp, N3: Mask, N4: VL), N3: Mask,
9046	N4: VL);
9047	}
9048	Op = DAG.getNode(Opcode: ISD::VP_XOR, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: -`1`, DL: dl, VT), N3: Mask,
9049	N4: VL);
9050	return DAG.getNode(Opcode: ISD::VP_CTPOP, DL: dl, VT, N1: Op, N2: Mask, N3: VL);
9051	}
9052
9053	SDValue TargetLowering::CTTZTableLookup(SDNode *Node, SelectionDAG &DAG,
9054	const SDLoc &DL, EVT VT, SDValue Op,
9055	unsigned BitWidth) const {
9056	if (BitWidth != `32` && BitWidth != `64`)
9057	return SDValue ();
9058	APInt DeBruijn = BitWidth == `32` ? APInt (`32`, `0x077CB531U`)
9059	: APInt (`64`, `0x0218A392CD3D5DBFULL`);
9060	const DataLayout &TD = DAG.getDataLayout();
9061	MachinePointerInfo PtrInfo =
9062	MachinePointerInfo::getConstantPool(MF&: DAG.getMachineFunction());
9063	unsigned ShiftAmt = BitWidth - Log2_32(Value: BitWidth);
9064	SDValue Neg = DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: DAG.getConstant(Val: `0`, DL, VT), N2: Op);
9065	SDValue Lookup = DAG.getNode(
9066	Opcode: ISD::SRL, DL, VT,
9067	N1: DAG.getNode(Opcode: ISD::MUL, DL, VT, N1: DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Op, N2: Neg),
9068	N2: DAG.getConstant(Val: DeBruijn, DL, VT)),
9069	N2: DAG.getConstant(Val: ShiftAmt, DL, VT));
9070	Lookup = DAG.getSExtOrTrunc(Op: Lookup, DL, VT: getPointerTy(DL: TD));
9071
9072	SmallVector<uint8_t> Table(BitWidth, `0`);
9073	for (unsigned i = `0`; i < BitWidth; i++) {
9074	APInt Shl = DeBruijn.shl(shiftAmt: i);
9075	APInt Lshr = Shl.lshr(shiftAmt: ShiftAmt);
9076	Table [Lshr.getZExtValue()] = i;
9077	}
9078
9079	// Create a ConstantArray in Constant Pool
9080	auto CA = ConstantDataArray::get(Context&: DAG.getContext(), Elts&: Table);
9081	SDValue CPIdx = DAG.getConstantPool(C: CA, VT: getPointerTy(DL: TD),
9082	Align: TD.getPrefTypeAlign(Ty: CA->getType()));
9083	SDValue ExtLoad = DAG.getExtLoad(ExtType: ISD::ZEXTLOAD, dl: DL, VT, Chain: DAG.getEntryNode(),
9084	Ptr: DAG.getMemBasePlusOffset(Base: CPIdx, Offset: Lookup, DL),
9085	PtrInfo, MemVT: MVT::i8);
9086	if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF)
9087	return ExtLoad;
9088
9089	EVT SetCCVT =
9090	getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
9091	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT);
9092	SDValue SrcIsZero = DAG.getSetCC(DL, VT: SetCCVT, LHS: Op, RHS: Zero, Cond: ISD::SETEQ);
9093	return DAG.getSelect(DL, VT, Cond: SrcIsZero,
9094	LHS: DAG.getConstant(Val: BitWidth, DL, VT), RHS: ExtLoad);
9095	}
9096
9097	SDValue TargetLowering::expandCTTZ(SDNode Node, SelectionDAG &DAG) const* {
9098	SDLoc dl(Node);
9099	EVT VT = Node->getValueType(ResNo: `0`);
9100	SDValue Op = Node->getOperand(Num: `0`);
9101	unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9102
9103	// If the non-ZERO_UNDEF version is supported we can use that instead.
9104	if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
9105	isOperationLegalOrCustom(Op: ISD::CTTZ, VT))
9106	return DAG.getNode(Opcode: ISD::CTTZ, DL: dl, VT, Operand: Op);
9107
9108	// If the ZERO_UNDEF version is supported use that and handle the zero case.
9109	if (isOperationLegalOrCustom(Op: ISD::CTTZ_ZERO_UNDEF, VT)) {
9110	EVT SetCCVT =
9111	getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
9112	SDValue CTTZ = DAG.getNode(Opcode: ISD::CTTZ_ZERO_UNDEF, DL: dl, VT, Operand: Op);
9113	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT);
9114	SDValue SrcIsZero = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Op, RHS: Zero, Cond: ISD::SETEQ);
9115	return DAG.getSelect(DL: dl, VT, Cond: SrcIsZero,
9116	LHS: DAG.getConstant(Val: NumBitsPerElt, DL: dl, VT), RHS: CTTZ);
9117	}
9118
9119	// Only expand vector types if we have the appropriate vector bit operations.
9120	// This includes the operations needed to expand CTPOP if it isn't supported.
9121	if (VT.isVector() && (!isPowerOf2_32(Value: NumBitsPerElt) \|\|
9122	(!isOperationLegalOrCustom(Op: ISD::CTPOP, VT) &&
9123	!isOperationLegalOrCustom(Op: ISD::CTLZ, VT) &&
9124	!canExpandVectorCTPOP(TLI: *this, VT)) \|\|
9125	!isOperationLegalOrCustom(Op: ISD::SUB, VT) \|\|
9126	!isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT) \|\|
9127	!isOperationLegalOrCustomOrPromote(Op: ISD::XOR, VT)))
9128	return SDValue ();
9129
9130	// Emit Table Lookup if ISD::CTLZ and ISD::CTPOP are not legal.
9131	if (!VT.isVector() && isOperationExpand(Op: ISD::CTPOP, VT) &&
9132	!isOperationLegal(Op: ISD::CTLZ, VT))
9133	if (SDValue V = CTTZTableLookup(Node, DAG, DL: dl, VT, Op, BitWidth: NumBitsPerElt))
9134	return V;
9135
9136	// for now, we use: { return popcount(~x & (x - 1)); }
9137	// unless the target has ctlz but not ctpop, in which case we use:
9138	// { return 32 - nlz(~x & (x-1)); }
9139	// Ref: "Hacker's Delight" by Henry Warren
9140	SDValue Tmp = DAG.getNode(
9141	Opcode: ISD::AND, DL: dl, VT, N1: DAG.getNOT(DL: dl, Val: Op, VT),
9142	N2: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `1`, DL: dl, VT)));
9143
9144	// If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
9145	if (isOperationLegal(Op: ISD::CTLZ, VT) && !isOperationLegal(Op: ISD::CTPOP, VT)) {
9146	return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: DAG.getConstant(Val: NumBitsPerElt, DL: dl, VT),
9147	N2: DAG.getNode(Opcode: ISD::CTLZ, DL: dl, VT, Operand: Tmp));
9148	}
9149
9150	return DAG.getNode(Opcode: ISD::CTPOP, DL: dl, VT, Operand: Tmp);
9151	}
9152
9153	SDValue TargetLowering::expandVPCTTZ(SDNode Node, SelectionDAG &DAG) const* {
9154	SDValue Op = Node->getOperand(Num: `0`);
9155	SDValue Mask = Node->getOperand(Num: `1`);
9156	SDValue VL = Node->getOperand(Num: `2`);
9157	SDLoc dl(Node);
9158	EVT VT = Node->getValueType(ResNo: `0`);
9159
9160	// Same as the vector part of expandCTTZ, use: popcount(~x & (x - 1))
9161	SDValue Not = DAG.getNode(Opcode: ISD::VP_XOR, DL: dl, VT, N1: Op,
9162	N2: DAG.getConstant(Val: -`1`, DL: dl, VT), N3: Mask, N4: VL);
9163	SDValue MinusOne = DAG.getNode(Opcode: ISD::VP_SUB, DL: dl, VT, N1: Op,
9164	N2: DAG.getConstant(Val: `1`, DL: dl, VT), N3: Mask, N4: VL);
9165	SDValue Tmp = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Not, N2: MinusOne, N3: Mask, N4: VL);
9166	return DAG.getNode(Opcode: ISD::VP_CTPOP, DL: dl, VT, N1: Tmp, N2: Mask, N3: VL);
9167	}
9168
9169	SDValue TargetLowering::expandVPCTTZElements(SDNode *N,
9170	SelectionDAG &DAG) const {
9171	// %cond = to_bool_vec %source
9172	// %splat = splat /val=/VL
9173	// %tz = step_vector
9174	// %v = vp.select %cond, /true=/tz, /false=/%splat
9175	// %r = vp.reduce.umin %v
9176	SDLoc DL(N);
9177	SDValue Source = N->getOperand(Num: `0`);
9178	SDValue Mask = N->getOperand(Num: `1`);
9179	SDValue EVL = N->getOperand(Num: `2`);
9180	EVT SrcVT = Source.getValueType();
9181	EVT ResVT = N->getValueType(ResNo: `0`);
9182	EVT ResVecVT =
9183	EVT::getVectorVT(Context&: *DAG.getContext(), VT: ResVT, EC: SrcVT.getVectorElementCount());
9184
9185	// Convert to boolean vector.
9186	if (SrcVT.getScalarType() != MVT::i1) {
9187	SDValue AllZero = DAG.getConstant(Val: `0`, DL, VT: SrcVT);
9188	SrcVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: MVT::i1,
9189	EC: SrcVT.getVectorElementCount());
9190	Source = DAG.getNode(Opcode: ISD::VP_SETCC, DL, VT: SrcVT, N1: Source, N2: AllZero,
9191	N3: DAG.getCondCode(Cond: ISD::SETNE), N4: Mask, N5: EVL);
9192	}
9193
9194	SDValue ExtEVL = DAG.getZExtOrTrunc(Op: EVL, DL, VT: ResVT);
9195	SDValue Splat = DAG.getSplat(VT: ResVecVT, DL, Op: ExtEVL);
9196	SDValue StepVec = DAG.getStepVector(DL, ResVT: ResVecVT);
9197	SDValue Select =
9198	DAG.getNode(Opcode: ISD::VP_SELECT, DL, VT: ResVecVT, N1: Source, N2: StepVec, N3: Splat, N4: EVL);
9199	return DAG.getNode(Opcode: ISD::VP_REDUCE_UMIN, DL, VT: ResVT, N1: ExtEVL, N2: Select, N3: Mask, N4: EVL);
9200	}
9201
9202	SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG,
9203	bool IsNegative) const {
9204	SDLoc dl(N);
9205	EVT VT = N->getValueType(ResNo: `0`);
9206	SDValue Op = N->getOperand(Num: `0`);
9207
9208	// abs(x) -> smax(x,sub(0,x))
9209	if (!IsNegative && isOperationLegal(Op: ISD::SUB, VT) &&
9210	isOperationLegal(Op: ISD::SMAX, VT)) {
9211	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT);
9212	Op = DAG.getFreeze(V: Op);
9213	return DAG.getNode(Opcode: ISD::SMAX, DL: dl, VT, N1: Op,
9214	N2: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Zero, N2: Op));
9215	}
9216
9217	// abs(x) -> umin(x,sub(0,x))
9218	if (!IsNegative && isOperationLegal(Op: ISD::SUB, VT) &&
9219	isOperationLegal(Op: ISD::UMIN, VT)) {
9220	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT);
9221	Op = DAG.getFreeze(V: Op);
9222	return DAG.getNode(Opcode: ISD::UMIN, DL: dl, VT, N1: Op,
9223	N2: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Zero, N2: Op));
9224	}
9225
9226	// 0 - abs(x) -> smin(x, sub(0,x))
9227	if (IsNegative && isOperationLegal(Op: ISD::SUB, VT) &&
9228	isOperationLegal(Op: ISD::SMIN, VT)) {
9229	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT);
9230	Op = DAG.getFreeze(V: Op);
9231	return DAG.getNode(Opcode: ISD::SMIN, DL: dl, VT, N1: Op,
9232	N2: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Zero, N2: Op));
9233	}
9234
9235	// Only expand vector types if we have the appropriate vector operations.
9236	if (VT.isVector() &&
9237	(!isOperationLegalOrCustom(Op: ISD::SRA, VT) \|\|
9238	(!IsNegative && !isOperationLegalOrCustom(Op: ISD::ADD, VT)) \|\|
9239	(IsNegative && !isOperationLegalOrCustom(Op: ISD::SUB, VT)) \|\|
9240	!isOperationLegalOrCustomOrPromote(Op: ISD::XOR, VT)))
9241	return SDValue ();
9242
9243	Op = DAG.getFreeze(V: Op);
9244	SDValue Shift = DAG.getNode(
9245	Opcode: ISD::SRA, DL: dl, VT, N1: Op,
9246	N2: DAG.getShiftAmountConstant(Val: VT.getScalarSizeInBits() - `1`, VT, DL: dl));
9247	SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: Op, N2: Shift);
9248
9249	// abs(x) -> Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
9250	if (!IsNegative)
9251	return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Xor, N2: Shift);
9252
9253	// 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
9254	return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Shift, N2: Xor);
9255	}
9256
9257	SDValue TargetLowering::expandABD(SDNode N, SelectionDAG &DAG) const* {
9258	SDLoc dl(N);
9259	EVT VT = N->getValueType(ResNo: `0`);
9260	SDValue LHS = DAG.getFreeze(V: N->getOperand(Num: `0`));
9261	SDValue RHS = DAG.getFreeze(V: N->getOperand(Num: `1`));
9262	bool IsSigned = N->getOpcode() == ISD::ABDS;
9263
9264	// abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
9265	// abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
9266	unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
9267	unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
9268	if (isOperationLegal(Op: MaxOpc, VT) && isOperationLegal(Op: MinOpc, VT)) {
9269	SDValue Max = DAG.getNode(Opcode: MaxOpc, DL: dl, VT, N1: LHS, N2: RHS);
9270	SDValue Min = DAG.getNode(Opcode: MinOpc, DL: dl, VT, N1: LHS, N2: RHS);
9271	return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Max, N2: Min);
9272	}
9273
9274	// abdu(lhs, rhs) -> or(usubsat(lhs,rhs), usubsat(rhs,lhs))
9275	if (!IsSigned && isOperationLegal(Op: ISD::USUBSAT, VT))
9276	return DAG.getNode(Opcode: ISD::OR, DL: dl, VT,
9277	N1: DAG.getNode(Opcode: ISD::USUBSAT, DL: dl, VT, N1: LHS, N2: RHS),
9278	N2: DAG.getNode(Opcode: ISD::USUBSAT, DL: dl, VT, N1: RHS, N2: LHS));
9279
9280	EVT CCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
9281	ISD::CondCode CC = IsSigned ? ISD::CondCode::SETGT : ISD::CondCode::SETUGT;
9282	SDValue Cmp = DAG.getSetCC(DL: dl, VT: CCVT, LHS, RHS, Cond: CC);
9283
9284	// Branchless expansion iff cmp result is allbits:
9285	// abds(lhs, rhs) -> sub(sgt(lhs, rhs), xor(sgt(lhs, rhs), sub(lhs, rhs)))
9286	// abdu(lhs, rhs) -> sub(ugt(lhs, rhs), xor(ugt(lhs, rhs), sub(lhs, rhs)))
9287	if (CCVT == VT && getBooleanContents(Type: VT) == ZeroOrNegativeOneBooleanContent) {
9288	SDValue Diff = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: LHS, N2: RHS);
9289	SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: Diff, N2: Cmp);
9290	return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Cmp, N2: Xor);
9291	}
9292
9293	// abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9294	// abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9295	return DAG.getSelect(DL: dl, VT, Cond: Cmp, LHS: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: LHS, N2: RHS),
9296	RHS: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: RHS, N2: LHS));
9297	}
9298
9299	SDValue TargetLowering::expandAVG(SDNode N, SelectionDAG &DAG) const* {
9300	SDLoc dl(N);
9301	EVT VT = N->getValueType(ResNo: `0`);
9302	SDValue LHS = N->getOperand(Num: `0`);
9303	SDValue RHS = N->getOperand(Num: `1`);
9304
9305	unsigned Opc = N->getOpcode();
9306	bool IsFloor = Opc == ISD::AVGFLOORS \|\| Opc == ISD::AVGFLOORU;
9307	bool IsSigned = Opc == ISD::AVGCEILS \|\| Opc == ISD::AVGFLOORS;
9308	unsigned SumOpc = IsFloor ? ISD::ADD : ISD::SUB;
9309	unsigned SignOpc = IsFloor ? ISD::AND : ISD::OR;
9310	unsigned ShiftOpc = IsSigned ? ISD::SRA : ISD::SRL;
9311	unsigned ExtOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
9312	assert((Opc == ISD::AVGFLOORS \|\| Opc == ISD::AVGCEILS \|\|
9313	Opc == ISD::AVGFLOORU \|\| Opc == ISD::AVGCEILU) &&
9314	"Unknown AVG node");
9315
9316	// If the operands are already extended, we can add+shift.
9317	bool IsExt =
9318	(IsSigned && DAG.ComputeNumSignBits(Op: LHS) >= `2` &&
9319	DAG.ComputeNumSignBits(Op: RHS) >= `2`) \|\|
9320	(!IsSigned && DAG.computeKnownBits(Op: LHS).countMinLeadingZeros() >= `1` &&
9321	DAG.computeKnownBits(Op: RHS).countMinLeadingZeros() >= `1`);
9322	if (IsExt) {
9323	SDValue Sum = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: LHS, N2: RHS);
9324	if (!IsFloor)
9325	Sum = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Sum, N2: DAG.getConstant(Val: `1`, DL: dl, VT));
9326	return DAG.getNode(Opcode: ShiftOpc, DL: dl, VT, N1: Sum,
9327	N2: DAG.getShiftAmountConstant(Val: `1`, VT, DL: dl));
9328	}
9329
9330	// For scalars, see if we can efficiently extend/truncate to use add+shift.
9331	if (VT.isScalarInteger()) {
9332	unsigned BW = VT.getScalarSizeInBits();
9333	EVT ExtVT = VT.getIntegerVT(Context&: DAG.getContext(), BitWidth: `2` BW);
9334	if (isTypeLegal(VT: ExtVT) && isTruncateFree(FromVT: ExtVT, ToVT: VT)) {
9335	LHS = DAG.getNode(Opcode: ExtOpc, DL: dl, VT: ExtVT, Operand: LHS);
9336	RHS = DAG.getNode(Opcode: ExtOpc, DL: dl, VT: ExtVT, Operand: RHS);
9337	SDValue Avg = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: ExtVT, N1: LHS, N2: RHS);
9338	if (!IsFloor)
9339	Avg = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: ExtVT, N1: Avg,
9340	N2: DAG.getConstant(Val: `1`, DL: dl, VT: ExtVT));
9341	// Just use SRL as we will be truncating away the extended sign bits.
9342	Avg = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: ExtVT, N1: Avg,
9343	N2: DAG.getShiftAmountConstant(Val: `1`, VT: ExtVT, DL: dl));
9344	return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Avg);
9345	}
9346	}
9347
9348	// avgceils(lhs, rhs) -> sub(or(lhs,rhs),ashr(xor(lhs,rhs),1))
9349	// avgceilu(lhs, rhs) -> sub(or(lhs,rhs),lshr(xor(lhs,rhs),1))
9350	// avgfloors(lhs, rhs) -> add(and(lhs,rhs),ashr(xor(lhs,rhs),1))
9351	// avgflooru(lhs, rhs) -> add(and(lhs,rhs),lshr(xor(lhs,rhs),1))
9352	LHS = DAG.getFreeze(V: LHS);
9353	RHS = DAG.getFreeze(V: RHS);
9354	SDValue Sign = DAG.getNode(Opcode: SignOpc, DL: dl, VT, N1: LHS, N2: RHS);
9355	SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: LHS, N2: RHS);
9356	SDValue Shift =
9357	DAG.getNode(Opcode: ShiftOpc, DL: dl, VT, N1: Xor, N2: DAG.getShiftAmountConstant(Val: `1`, VT, DL: dl));
9358	return DAG.getNode(Opcode: SumOpc, DL: dl, VT, N1: Sign, N2: Shift);
9359	}
9360
9361	SDValue TargetLowering::expandBSWAP(SDNode N, SelectionDAG &DAG) const* {
9362	SDLoc dl(N);
9363	EVT VT = N->getValueType(ResNo: `0`);
9364	SDValue Op = N->getOperand(Num: `0`);
9365
9366	if (!VT.isSimple())
9367	return SDValue ();
9368
9369	EVT SHVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
9370	SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9371	switch (VT.getSimpleVT().getScalarType().SimpleTy) {
9372	default:
9373	return SDValue ();
9374	case MVT::i16:
9375	// Use a rotate by 8. This can be further expanded if necessary.
9376	return DAG.getNode(Opcode: ISD::ROTL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `8`, DL: dl, VT: SHVT));
9377	case MVT::i32:
9378	Tmp4 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `24`, DL: dl, VT: SHVT));
9379	Tmp3 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op,
9380	N2: DAG.getConstant(Val: `0xFF00`, DL: dl, VT));
9381	Tmp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: `8`, DL: dl, VT: SHVT));
9382	Tmp2 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `8`, DL: dl, VT: SHVT));
9383	Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2, N2: DAG.getConstant(Val: `0xFF00`, DL: dl, VT));
9384	Tmp1 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `24`, DL: dl, VT: SHVT));
9385	Tmp4 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp4, N2: Tmp3);
9386	Tmp2 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp1);
9387	return DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp4, N2: Tmp2);
9388	case MVT::i64:
9389	Tmp8 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `56`, DL: dl, VT: SHVT));
9390	Tmp7 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op,
9391	N2: DAG.getConstant(Val: `255ULL`<<`8`, DL: dl, VT));
9392	Tmp7 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp7, N2: DAG.getConstant(Val: `40`, DL: dl, VT: SHVT));
9393	Tmp6 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op,
9394	N2: DAG.getConstant(Val: `255ULL`<<`16`, DL: dl, VT));
9395	Tmp6 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp6, N2: DAG.getConstant(Val: `24`, DL: dl, VT: SHVT));
9396	Tmp5 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op,
9397	N2: DAG.getConstant(Val: `255ULL`<<`24`, DL: dl, VT));
9398	Tmp5 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp5, N2: DAG.getConstant(Val: `8`, DL: dl, VT: SHVT));
9399	Tmp4 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `8`, DL: dl, VT: SHVT));
9400	Tmp4 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp4,
9401	N2: DAG.getConstant(Val: `255ULL`<<`24`, DL: dl, VT));
9402	Tmp3 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `24`, DL: dl, VT: SHVT));
9403	Tmp3 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp3,
9404	N2: DAG.getConstant(Val: `255ULL`<<`16`, DL: dl, VT));
9405	Tmp2 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `40`, DL: dl, VT: SHVT));
9406	Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2,
9407	N2: DAG.getConstant(Val: `255ULL`<<`8`, DL: dl, VT));
9408	Tmp1 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `56`, DL: dl, VT: SHVT));
9409	Tmp8 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp8, N2: Tmp7);
9410	Tmp6 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp6, N2: Tmp5);
9411	Tmp4 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp4, N2: Tmp3);
9412	Tmp2 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp1);
9413	Tmp8 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp8, N2: Tmp6);
9414	Tmp4 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp4, N2: Tmp2);
9415	return DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp8, N2: Tmp4);
9416	}
9417	}
9418
9419	SDValue TargetLowering::expandVPBSWAP(SDNode N, SelectionDAG &DAG) const* {
9420	SDLoc dl(N);
9421	EVT VT = N->getValueType(ResNo: `0`);
9422	SDValue Op = N->getOperand(Num: `0`);
9423	SDValue Mask = N->getOperand(Num: `1`);
9424	SDValue EVL = N->getOperand(Num: `2`);
9425
9426	if (!VT.isSimple())
9427	return SDValue ();
9428
9429	EVT SHVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
9430	SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9431	switch (VT.getSimpleVT().getScalarType().SimpleTy) {
9432	default:
9433	return SDValue ();
9434	case MVT::i16:
9435	Tmp1 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `8`, DL: dl, VT: SHVT),
9436	N3: Mask, N4: EVL);
9437	Tmp2 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `8`, DL: dl, VT: SHVT),
9438	N3: Mask, N4: EVL);
9439	return DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp1, N2: Tmp2, N3: Mask, N4: EVL);
9440	case MVT::i32:
9441	Tmp4 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `24`, DL: dl, VT: SHVT),
9442	N3: Mask, N4: EVL);
9443	Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `0xFF00`, DL: dl, VT),
9444	N3: Mask, N4: EVL);
9445	Tmp3 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: `8`, DL: dl, VT: SHVT),
9446	N3: Mask, N4: EVL);
9447	Tmp2 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `8`, DL: dl, VT: SHVT),
9448	N3: Mask, N4: EVL);
9449	Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp2,
9450	N2: DAG.getConstant(Val: `0xFF00`, DL: dl, VT), N3: Mask, N4: EVL);
9451	Tmp1 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `24`, DL: dl, VT: SHVT),
9452	N3: Mask, N4: EVL);
9453	Tmp4 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp4, N2: Tmp3, N3: Mask, N4: EVL);
9454	Tmp2 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp2, N2: Tmp1, N3: Mask, N4: EVL);
9455	return DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp4, N2: Tmp2, N3: Mask, N4: EVL);
9456	case MVT::i64:
9457	Tmp8 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `56`, DL: dl, VT: SHVT),
9458	N3: Mask, N4: EVL);
9459	Tmp7 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Op,
9460	N2: DAG.getConstant(Val: `255ULL` << `8`, DL: dl, VT), N3: Mask, N4: EVL);
9461	Tmp7 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp7, N2: DAG.getConstant(Val: `40`, DL: dl, VT: SHVT),
9462	N3: Mask, N4: EVL);
9463	Tmp6 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Op,
9464	N2: DAG.getConstant(Val: `255ULL` << `16`, DL: dl, VT), N3: Mask, N4: EVL);
9465	Tmp6 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp6, N2: DAG.getConstant(Val: `24`, DL: dl, VT: SHVT),
9466	N3: Mask, N4: EVL);
9467	Tmp5 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Op,
9468	N2: DAG.getConstant(Val: `255ULL` << `24`, DL: dl, VT), N3: Mask, N4: EVL);
9469	Tmp5 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp5, N2: DAG.getConstant(Val: `8`, DL: dl, VT: SHVT),
9470	N3: Mask, N4: EVL);
9471	Tmp4 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `8`, DL: dl, VT: SHVT),
9472	N3: Mask, N4: EVL);
9473	Tmp4 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp4,
9474	N2: DAG.getConstant(Val: `255ULL` << `24`, DL: dl, VT), N3: Mask, N4: EVL);
9475	Tmp3 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `24`, DL: dl, VT: SHVT),
9476	N3: Mask, N4: EVL);
9477	Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp3,
9478	N2: DAG.getConstant(Val: `255ULL` << `16`, DL: dl, VT), N3: Mask, N4: EVL);
9479	Tmp2 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `40`, DL: dl, VT: SHVT),
9480	N3: Mask, N4: EVL);
9481	Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp2,
9482	N2: DAG.getConstant(Val: `255ULL` << `8`, DL: dl, VT), N3: Mask, N4: EVL);
9483	Tmp1 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `56`, DL: dl, VT: SHVT),
9484	N3: Mask, N4: EVL);
9485	Tmp8 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp8, N2: Tmp7, N3: Mask, N4: EVL);
9486	Tmp6 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp6, N2: Tmp5, N3: Mask, N4: EVL);
9487	Tmp4 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp4, N2: Tmp3, N3: Mask, N4: EVL);
9488	Tmp2 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp2, N2: Tmp1, N3: Mask, N4: EVL);
9489	Tmp8 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp8, N2: Tmp6, N3: Mask, N4: EVL);
9490	Tmp4 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp4, N2: Tmp2, N3: Mask, N4: EVL);
9491	return DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp8, N2: Tmp4, N3: Mask, N4: EVL);
9492	}
9493	}
9494
9495	SDValue TargetLowering::expandBITREVERSE(SDNode N, SelectionDAG &DAG) const* {
9496	SDLoc dl(N);
9497	EVT VT = N->getValueType(ResNo: `0`);
9498	SDValue Op = N->getOperand(Num: `0`);
9499	EVT SHVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
9500	unsigned Sz = VT.getScalarSizeInBits();
9501
9502	SDValue Tmp, Tmp2, Tmp3;
9503
9504	// If we can, perform BSWAP first and then the mask+swap the i4, then i2
9505	// and finally the i1 pairs.
9506	// TODO: We can easily support i4/i2 legal types if any target ever does.
9507	if (Sz >= `8` && isPowerOf2_32(Value: Sz)) {
9508	// Create the masks - repeating the pattern every byte.
9509	APInt Mask4 = APInt::getSplat(NewLen: Sz, V: APInt (`8`, `0x0F`));
9510	APInt Mask2 = APInt::getSplat(NewLen: Sz, V: APInt (`8`, `0x33`));
9511	APInt Mask1 = APInt::getSplat(NewLen: Sz, V: APInt (`8`, `0x55`));
9512
9513	// BSWAP if the type is wider than a single byte.
9514	Tmp = (Sz > `8` ? DAG.getNode(Opcode: ISD::BSWAP, DL: dl, VT, Operand: Op) : Op);
9515
9516	// swap i4: ((V >> 4) & 0x0F) \| ((V & 0x0F) << 4)
9517	Tmp2 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: `4`, DL: dl, VT: SHVT));
9518	Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2, N2: DAG.getConstant(Val: Mask4, DL: dl, VT));
9519	Tmp3 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask4, DL: dl, VT));
9520	Tmp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: `4`, DL: dl, VT: SHVT));
9521	Tmp = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp3);
9522
9523	// swap i2: ((V >> 2) & 0x33) \| ((V & 0x33) << 2)
9524	Tmp2 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: `2`, DL: dl, VT: SHVT));
9525	Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2, N2: DAG.getConstant(Val: Mask2, DL: dl, VT));
9526	Tmp3 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask2, DL: dl, VT));
9527	Tmp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: `2`, DL: dl, VT: SHVT));
9528	Tmp = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp3);
9529
9530	// swap i1: ((V >> 1) & 0x55) \| ((V & 0x55) << 1)
9531	Tmp2 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: `1`, DL: dl, VT: SHVT));
9532	Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2, N2: DAG.getConstant(Val: Mask1, DL: dl, VT));
9533	Tmp3 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask1, DL: dl, VT));
9534	Tmp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: `1`, DL: dl, VT: SHVT));
9535	Tmp = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp3);
9536	return Tmp;
9537	}
9538
9539	Tmp = DAG.getConstant(Val: `0`, DL: dl, VT);
9540	for (unsigned I = `0`, J = Sz-`1`; I < Sz; ++I, --J) {
9541	if (I < J)
9542	Tmp2 =
9543	DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: J - I, DL: dl, VT: SHVT));
9544	else
9545	Tmp2 =
9546	DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: I - J, DL: dl, VT: SHVT));
9547
9548	APInt Shift = APInt::getOneBitSet(numBits: Sz, BitNo: J);
9549	Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2, N2: DAG.getConstant(Val: Shift, DL: dl, VT));
9550	Tmp = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp, N2: Tmp2);
9551	}
9552
9553	return Tmp;
9554	}
9555
9556	SDValue TargetLowering::expandVPBITREVERSE(SDNode N, SelectionDAG &DAG) const* {
9557	assert(N->getOpcode() == ISD::VP_BITREVERSE);
9558
9559	SDLoc dl(N);
9560	EVT VT = N->getValueType(ResNo: `0`);
9561	SDValue Op = N->getOperand(Num: `0`);
9562	SDValue Mask = N->getOperand(Num: `1`);
9563	SDValue EVL = N->getOperand(Num: `2`);
9564	EVT SHVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
9565	unsigned Sz = VT.getScalarSizeInBits();
9566
9567	SDValue Tmp, Tmp2, Tmp3;
9568
9569	// If we can, perform BSWAP first and then the mask+swap the i4, then i2
9570	// and finally the i1 pairs.
9571	// TODO: We can easily support i4/i2 legal types if any target ever does.
9572	if (Sz >= `8` && isPowerOf2_32(Value: Sz)) {
9573	// Create the masks - repeating the pattern every byte.
9574	APInt Mask4 = APInt::getSplat(NewLen: Sz, V: APInt (`8`, `0x0F`));
9575	APInt Mask2 = APInt::getSplat(NewLen: Sz, V: APInt (`8`, `0x33`));
9576	APInt Mask1 = APInt::getSplat(NewLen: Sz, V: APInt (`8`, `0x55`));
9577
9578	// BSWAP if the type is wider than a single byte.
9579	Tmp = (Sz > `8` ? DAG.getNode(Opcode: ISD::VP_BSWAP, DL: dl, VT, N1: Op, N2: Mask, N3: EVL) : Op);
9580
9581	// swap i4: ((V >> 4) & 0x0F) \| ((V & 0x0F) << 4)
9582	Tmp2 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: `4`, DL: dl, VT: SHVT),
9583	N3: Mask, N4: EVL);
9584	Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp2,
9585	N2: DAG.getConstant(Val: Mask4, DL: dl, VT), N3: Mask, N4: EVL);
9586	Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask4, DL: dl, VT),
9587	N3: Mask, N4: EVL);
9588	Tmp3 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: `4`, DL: dl, VT: SHVT),
9589	N3: Mask, N4: EVL);
9590	Tmp = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp2, N2: Tmp3, N3: Mask, N4: EVL);
9591
9592	// swap i2: ((V >> 2) & 0x33) \| ((V & 0x33) << 2)
9593	Tmp2 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: `2`, DL: dl, VT: SHVT),
9594	N3: Mask, N4: EVL);
9595	Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp2,
9596	N2: DAG.getConstant(Val: Mask2, DL: dl, VT), N3: Mask, N4: EVL);
9597	Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask2, DL: dl, VT),
9598	N3: Mask, N4: EVL);
9599	Tmp3 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: `2`, DL: dl, VT: SHVT),
9600	N3: Mask, N4: EVL);
9601	Tmp = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp2, N2: Tmp3, N3: Mask, N4: EVL);
9602
9603	// swap i1: ((V >> 1) & 0x55) \| ((V & 0x55) << 1)
9604	Tmp2 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: `1`, DL: dl, VT: SHVT),
9605	N3: Mask, N4: EVL);
9606	Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp2,
9607	N2: DAG.getConstant(Val: Mask1, DL: dl, VT), N3: Mask, N4: EVL);
9608	Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask1, DL: dl, VT),
9609	N3: Mask, N4: EVL);
9610	Tmp3 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: `1`, DL: dl, VT: SHVT),
9611	N3: Mask, N4: EVL);
9612	Tmp = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp2, N2: Tmp3, N3: Mask, N4: EVL);
9613	return Tmp;
9614	}
9615	return SDValue ();
9616	}
9617
9618	std::pair<SDValue, SDValue>
9619	TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
9620	SelectionDAG &DAG) const {
9621	SDLoc SL(LD);
9622	SDValue Chain = LD->getChain();
9623	SDValue BasePTR = LD->getBasePtr();
9624	EVT SrcVT = LD->getMemoryVT();
9625	EVT DstVT = LD->getValueType(ResNo: `0`);
9626	ISD::LoadExtType ExtType = LD->getExtensionType();
9627
9628	if (SrcVT.isScalableVector())
9629	report_fatal_error(reason: "Cannot scalarize scalable vector loads");
9630
9631	unsigned NumElem = SrcVT.getVectorNumElements();
9632
9633	EVT SrcEltVT = SrcVT.getScalarType();
9634	EVT DstEltVT = DstVT.getScalarType();
9635
9636	// A vector must always be stored in memory as-is, i.e. without any padding
9637	// between the elements, since various code depend on it, e.g. in the
9638	// handling of a bitcast of a vector type to int, which may be done with a
9639	// vector store followed by an integer load. A vector that does not have
9640	// elements that are byte-sized must therefore be stored as an integer
9641	// built out of the extracted vector elements.
9642	if (!SrcEltVT.isByteSized()) {
9643	unsigned NumLoadBits = SrcVT.getStoreSizeInBits();
9644	EVT LoadVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NumLoadBits);
9645
9646	unsigned NumSrcBits = SrcVT.getSizeInBits();
9647	EVT SrcIntVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NumSrcBits);
9648
9649	unsigned SrcEltBits = SrcEltVT.getSizeInBits();
9650	SDValue SrcEltBitMask = DAG.getConstant(
9651	Val: APInt::getLowBitsSet(numBits: NumLoadBits, loBitsSet: SrcEltBits), DL: SL, VT: LoadVT);
9652
9653	// Load the whole vector and avoid masking off the top bits as it makes
9654	// the codegen worse.
9655	SDValue Load =
9656	DAG.getExtLoad(ExtType: ISD::EXTLOAD, dl: SL, VT: LoadVT, Chain, Ptr: BasePTR,
9657	PtrInfo: LD->getPointerInfo(), MemVT: SrcIntVT, Alignment: LD->getOriginalAlign(),
9658	MMOFlags: LD->getMemOperand()->getFlags(), AAInfo: LD->getAAInfo());
9659
9660	SmallVector<SDValue, `8`> Vals;
9661	for (unsigned Idx = `0`; Idx < NumElem; ++Idx) {
9662	unsigned ShiftIntoIdx =
9663	(DAG.getDataLayout().isBigEndian() ? (NumElem - `1`) - Idx : Idx);
9664	SDValue ShiftAmount = DAG.getShiftAmountConstant(
9665	Val: ShiftIntoIdx * SrcEltVT.getSizeInBits(), VT: LoadVT, DL: SL);
9666	SDValue ShiftedElt = DAG.getNode(Opcode: ISD::SRL, DL: SL, VT: LoadVT, N1: Load, N2: ShiftAmount);
9667	SDValue Elt =
9668	DAG.getNode(Opcode: ISD::AND, DL: SL, VT: LoadVT, N1: ShiftedElt, N2: SrcEltBitMask);
9669	SDValue Scalar = DAG.getNode(Opcode: ISD::TRUNCATE, DL: SL, VT: SrcEltVT, Operand: Elt);
9670
9671	if (ExtType != ISD::NON_EXTLOAD) {
9672	unsigned ExtendOp = ISD::getExtForLoadExtType(IsFP: false, ExtType);
9673	Scalar = DAG.getNode(Opcode: ExtendOp, DL: SL, VT: DstEltVT, Operand: Scalar);
9674	}
9675
9676	Vals.push_back(Elt: Scalar);
9677	}
9678
9679	SDValue Value = DAG.getBuildVector(VT: DstVT, DL: SL, Ops: Vals);
9680	return std::make_pair(x&: Value, y: Load.getValue(R: `1`));
9681	}
9682
9683	unsigned Stride = SrcEltVT.getSizeInBits() / `8`;
9684	assert(SrcEltVT.isByteSized());
9685
9686	SmallVector<SDValue, `8`> Vals;
9687	SmallVector<SDValue, `8`> LoadChains;
9688
9689	for (unsigned Idx = `0`; Idx < NumElem; ++Idx) {
9690	SDValue ScalarLoad =
9691	DAG.getExtLoad(ExtType, dl: SL, VT: DstEltVT, Chain, Ptr: BasePTR,
9692	PtrInfo: LD->getPointerInfo().getWithOffset(O: Idx * Stride),
9693	MemVT: SrcEltVT, Alignment: LD->getOriginalAlign(),
9694	MMOFlags: LD->getMemOperand()->getFlags(), AAInfo: LD->getAAInfo());
9695
9696	BasePTR = DAG.getObjectPtrOffset(SL, Ptr: BasePTR, Offset: TypeSize::getFixed(ExactSize: Stride));
9697
9698	Vals.push_back(Elt: ScalarLoad.getValue(R: `0`));
9699	LoadChains.push_back(Elt: ScalarLoad.getValue(R: `1`));
9700	}
9701
9702	SDValue NewChain = DAG.getNode(Opcode: ISD::TokenFactor, DL: SL, VT: MVT::Other, Ops: LoadChains);
9703	SDValue Value = DAG.getBuildVector(VT: DstVT, DL: SL, Ops: Vals);
9704
9705	return std::make_pair(x&: Value, y&: NewChain);
9706	}
9707
9708	SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
9709	SelectionDAG &DAG) const {
9710	SDLoc SL(ST);
9711
9712	SDValue Chain = ST->getChain();
9713	SDValue BasePtr = ST->getBasePtr();
9714	SDValue Value = ST->getValue();
9715	EVT StVT = ST->getMemoryVT();
9716
9717	if (StVT.isScalableVector())
9718	report_fatal_error(reason: "Cannot scalarize scalable vector stores");
9719
9720	// The type of the data we want to save
9721	EVT RegVT = Value.getValueType();
9722	EVT RegSclVT = RegVT.getScalarType();
9723
9724	// The type of data as saved in memory.
9725	EVT MemSclVT = StVT.getScalarType();
9726
9727	unsigned NumElem = StVT.getVectorNumElements();
9728
9729	// A vector must always be stored in memory as-is, i.e. without any padding
9730	// between the elements, since various code depend on it, e.g. in the
9731	// handling of a bitcast of a vector type to int, which may be done with a
9732	// vector store followed by an integer load. A vector that does not have
9733	// elements that are byte-sized must therefore be stored as an integer
9734	// built out of the extracted vector elements.
9735	if (!MemSclVT.isByteSized()) {
9736	unsigned NumBits = StVT.getSizeInBits();
9737	EVT IntVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NumBits);
9738
9739	SDValue CurrVal = DAG.getConstant(Val: `0`, DL: SL, VT: IntVT);
9740
9741	for (unsigned Idx = `0`; Idx < NumElem; ++Idx) {
9742	SDValue Elt = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: SL, VT: RegSclVT, N1: Value,
9743	N2: DAG.getVectorIdxConstant(Val: Idx, DL: SL));
9744	SDValue Trunc = DAG.getNode(Opcode: ISD::TRUNCATE, DL: SL, VT: MemSclVT, Operand: Elt);
9745	SDValue ExtElt = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: SL, VT: IntVT, Operand: Trunc);
9746	unsigned ShiftIntoIdx =
9747	(DAG.getDataLayout().isBigEndian() ? (NumElem - `1`) - Idx : Idx);
9748	SDValue ShiftAmount =
9749	DAG.getConstant(Val: ShiftIntoIdx * MemSclVT.getSizeInBits(), DL: SL, VT: IntVT);
9750	SDValue ShiftedElt =
9751	DAG.getNode(Opcode: ISD::SHL, DL: SL, VT: IntVT, N1: ExtElt, N2: ShiftAmount);
9752	CurrVal = DAG.getNode(Opcode: ISD::OR, DL: SL, VT: IntVT, N1: CurrVal, N2: ShiftedElt);
9753	}
9754
9755	return DAG.getStore(Chain, dl: SL, Val: CurrVal, Ptr: BasePtr, PtrInfo: ST->getPointerInfo(),
9756	Alignment: ST->getOriginalAlign(), MMOFlags: ST->getMemOperand()->getFlags(),
9757	AAInfo: ST->getAAInfo());
9758	}
9759
9760	// Store Stride in bytes
9761	unsigned Stride = MemSclVT.getSizeInBits() / `8`;
9762	assert(Stride && "Zero stride!");
9763	// Extract each of the elements from the original vector and save them into
9764	// memory individually.
9765	SmallVector<SDValue, `8`> Stores;
9766	for (unsigned Idx = `0`; Idx < NumElem; ++Idx) {
9767	SDValue Elt = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: SL, VT: RegSclVT, N1: Value,
9768	N2: DAG.getVectorIdxConstant(Val: Idx, DL: SL));
9769
9770	SDValue Ptr =
9771	DAG.getObjectPtrOffset(SL, Ptr: BasePtr, Offset: TypeSize::getFixed(ExactSize: Idx * Stride));
9772
9773	// This scalar TruncStore may be illegal, but we legalize it later.
9774	SDValue Store = DAG.getTruncStore(
9775	Chain, dl: SL, Val: Elt, Ptr, PtrInfo: ST->getPointerInfo().getWithOffset(O: Idx * Stride),
9776	SVT: MemSclVT, Alignment: ST->getOriginalAlign(), MMOFlags: ST->getMemOperand()->getFlags(),
9777	AAInfo: ST->getAAInfo());
9778
9779	Stores.push_back(Elt: Store);
9780	}
9781
9782	return DAG.getNode(Opcode: ISD::TokenFactor, DL: SL, VT: MVT::Other, Ops: Stores);
9783	}
9784
9785	std::pair<SDValue, SDValue>
9786	TargetLowering::expandUnalignedLoad(LoadSDNode LD, SelectionDAG &DAG) const* {
9787	assert(LD->getAddressingMode() == ISD::UNINDEXED &&
9788	"unaligned indexed loads not implemented!");
9789	SDValue Chain = LD->getChain();
9790	SDValue Ptr = LD->getBasePtr();
9791	EVT VT = LD->getValueType(ResNo: `0`);
9792	EVT LoadedVT = LD->getMemoryVT();
9793	SDLoc dl(LD);
9794	auto &MF = DAG.getMachineFunction();
9795
9796	if (VT.isFloatingPoint() \|\| VT.isVector()) {
9797	EVT intVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: LoadedVT.getSizeInBits());
9798	if (isTypeLegal(VT: intVT) && isTypeLegal(VT: LoadedVT)) {
9799	if (!isOperationLegalOrCustom(Op: ISD::LOAD, VT: intVT) &&
9800	LoadedVT.isVector()) {
9801	// Scalarize the load and let the individual components be handled.
9802	return scalarizeVectorLoad(LD, DAG);
9803	}
9804
9805	// Expand to a (misaligned) integer load of the same size,
9806	// then bitconvert to floating point or vector.
9807	SDValue newLoad = DAG.getLoad(VT: intVT, dl, Chain, Ptr,
9808	MMO: LD->getMemOperand());
9809	SDValue Result = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: LoadedVT, Operand: newLoad);
9810	if (LoadedVT != VT)
9811	Result = DAG.getNode(Opcode: VT.isFloatingPoint() ? ISD::FP_EXTEND :
9812	ISD::ANY_EXTEND, DL: dl, VT, Operand: Result);
9813
9814	return std::make_pair(x&: Result, y: newLoad.getValue(R: `1`));
9815	}
9816
9817	// Copy the value to a (aligned) stack slot using (unaligned) integer
9818	// loads and stores, then do a (aligned) load from the stack slot.
9819	MVT RegVT = getRegisterType(Context&: *DAG.getContext(), VT: intVT);
9820	unsigned LoadedBytes = LoadedVT.getStoreSize();
9821	unsigned RegBytes = RegVT.getSizeInBits() / `8`;
9822	unsigned NumRegs = (LoadedBytes + RegBytes - `1`) / RegBytes;
9823
9824	// Make sure the stack slot is also aligned for the register type.
9825	SDValue StackBase = DAG.CreateStackTemporary(VT1: LoadedVT, VT2: RegVT);
9826	auto FrameIndex = cast<FrameIndexSDNode>(Val: StackBase.getNode())->getIndex();
9827	SmallVector<SDValue, `8`> Stores;
9828	SDValue StackPtr = StackBase;
9829	unsigned Offset = `0`;
9830
9831	EVT PtrVT = Ptr.getValueType();
9832	EVT StackPtrVT = StackPtr.getValueType();
9833
9834	SDValue PtrIncrement = DAG.getConstant(Val: RegBytes, DL: dl, VT: PtrVT);
9835	SDValue StackPtrIncrement = DAG.getConstant(Val: RegBytes, DL: dl, VT: StackPtrVT);
9836
9837	// Do all but one copies using the full register width.
9838	for (unsigned i = `1`; i < NumRegs; i++) {
9839	// Load one integer register's worth from the original location.
9840	SDValue Load = DAG.getLoad(
9841	VT: RegVT, dl, Chain, Ptr, PtrInfo: LD->getPointerInfo().getWithOffset(O: Offset),
9842	Alignment: LD->getOriginalAlign(), MMOFlags: LD->getMemOperand()->getFlags(),
9843	AAInfo: LD->getAAInfo());
9844	// Follow the load with a store to the stack slot. Remember the store.
9845	Stores.push_back(Elt: DAG.getStore(
9846	Chain: Load.getValue(R: `1`), dl, Val: Load, Ptr: StackPtr,
9847	PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset)));
9848	// Increment the pointers.
9849	Offset += RegBytes;
9850
9851	Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: PtrIncrement);
9852	StackPtr = DAG.getObjectPtrOffset(SL: dl, Ptr: StackPtr, Offset: StackPtrIncrement);
9853	}
9854
9855	// The last copy may be partial. Do an extending load.
9856	EVT MemVT = EVT::getIntegerVT(Context&: *DAG.getContext(),
9857	BitWidth: `8` * (LoadedBytes - Offset));
9858	SDValue Load =
9859	DAG.getExtLoad(ExtType: ISD::EXTLOAD, dl, VT: RegVT, Chain, Ptr,
9860	PtrInfo: LD->getPointerInfo().getWithOffset(O: Offset), MemVT,
9861	Alignment: LD->getOriginalAlign(), MMOFlags: LD->getMemOperand()->getFlags(),
9862	AAInfo: LD->getAAInfo());
9863	// Follow the load with a store to the stack slot. Remember the store.
9864	// On big-endian machines this requires a truncating store to ensure
9865	// that the bits end up in the right place.
9866	Stores.push_back(Elt: DAG.getTruncStore(
9867	Chain: Load.getValue(R: `1`), dl, Val: Load, Ptr: StackPtr,
9868	PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset), SVT: MemVT));
9869
9870	// The order of the stores doesn't matter - say it with a TokenFactor.
9871	SDValue TF = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: Stores);
9872
9873	// Finally, perform the original load only redirected to the stack slot.
9874	Load = DAG.getExtLoad(ExtType: LD->getExtensionType(), dl, VT, Chain: TF, Ptr: StackBase,
9875	PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset: `0`),
9876	MemVT: LoadedVT);
9877
9878	// Callers expect a MERGE_VALUES node.
9879	return std::make_pair(x&: Load, y&: TF);
9880	}
9881
9882	assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
9883	"Unaligned load of unsupported type.");
9884
9885	// Compute the new VT that is half the size of the old one. This is an
9886	// integer MVT.
9887	unsigned NumBits = LoadedVT.getSizeInBits();
9888	EVT NewLoadedVT;
9889	NewLoadedVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NumBits/`2`);
9890	NumBits >>= `1`;
9891
9892	Align Alignment = LD->getOriginalAlign();
9893	unsigned IncrementSize = NumBits / `8`;
9894	ISD::LoadExtType HiExtType = LD->getExtensionType();
9895
9896	// If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
9897	if (HiExtType == ISD::NON_EXTLOAD)
9898	HiExtType = ISD::ZEXTLOAD;
9899
9900	// Load the value in two parts
9901	SDValue Lo, Hi;
9902	if (DAG.getDataLayout().isLittleEndian()) {
9903	Lo = DAG.getExtLoad(ExtType: ISD::ZEXTLOAD, dl, VT, Chain, Ptr, PtrInfo: LD->getPointerInfo(),
9904	MemVT: NewLoadedVT, Alignment, MMOFlags: LD->getMemOperand()->getFlags(),
9905	AAInfo: LD->getAAInfo());
9906
9907	Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: TypeSize::getFixed(ExactSize: IncrementSize));
9908	Hi = DAG.getExtLoad(ExtType: HiExtType, dl, VT, Chain, Ptr,
9909	PtrInfo: LD->getPointerInfo().getWithOffset(O: IncrementSize),
9910	MemVT: NewLoadedVT, Alignment, MMOFlags: LD->getMemOperand()->getFlags(),
9911	AAInfo: LD->getAAInfo());
9912	} else {
9913	Hi = DAG.getExtLoad(ExtType: HiExtType, dl, VT, Chain, Ptr, PtrInfo: LD->getPointerInfo(),
9914	MemVT: NewLoadedVT, Alignment, MMOFlags: LD->getMemOperand()->getFlags(),
9915	AAInfo: LD->getAAInfo());
9916
9917	Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: TypeSize::getFixed(ExactSize: IncrementSize));
9918	Lo = DAG.getExtLoad(ExtType: ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
9919	PtrInfo: LD->getPointerInfo().getWithOffset(O: IncrementSize),
9920	MemVT: NewLoadedVT, Alignment, MMOFlags: LD->getMemOperand()->getFlags(),
9921	AAInfo: LD->getAAInfo());
9922	}
9923
9924	// aggregate the two parts
9925	SDValue ShiftAmount = DAG.getShiftAmountConstant(Val: NumBits, VT, DL: dl);
9926	SDValue Result = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Hi, N2: ShiftAmount);
9927	Result = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Result, N2: Lo);
9928
9929	SDValue TF = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, N1: Lo.getValue(R: `1`),
9930	N2: Hi.getValue(R: `1`));
9931
9932	return std::make_pair(x&: Result, y&: TF);
9933	}
9934
9935	SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
9936	SelectionDAG &DAG) const {
9937	assert(ST->getAddressingMode() == ISD::UNINDEXED &&
9938	"unaligned indexed stores not implemented!");
9939	SDValue Chain = ST->getChain();
9940	SDValue Ptr = ST->getBasePtr();
9941	SDValue Val = ST->getValue();
9942	EVT VT = Val.getValueType();
9943	Align Alignment = ST->getOriginalAlign();
9944	auto &MF = DAG.getMachineFunction();
9945	EVT StoreMemVT = ST->getMemoryVT();
9946
9947	SDLoc dl(ST);
9948	if (StoreMemVT.isFloatingPoint() \|\| StoreMemVT.isVector()) {
9949	EVT intVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: VT.getSizeInBits());
9950	if (isTypeLegal(VT: intVT)) {
9951	if (!isOperationLegalOrCustom(Op: ISD::STORE, VT: intVT) &&
9952	StoreMemVT.isVector()) {
9953	// Scalarize the store and let the individual components be handled.
9954	SDValue Result = scalarizeVectorStore(ST, DAG);
9955	return Result;
9956	}
9957	// Expand to a bitconvert of the value to the integer type of the
9958	// same size, then a (misaligned) int store.
9959	// FIXME: Does not handle truncating floating point stores!
9960	SDValue Result = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: intVT, Operand: Val);
9961	Result = DAG.getStore(Chain, dl, Val: Result, Ptr, PtrInfo: ST->getPointerInfo(),
9962	Alignment, MMOFlags: ST->getMemOperand()->getFlags());
9963	return Result;
9964	}
9965	// Do a (aligned) store to a stack slot, then copy from the stack slot
9966	// to the final destination using (unaligned) integer loads and stores.
9967	MVT RegVT = getRegisterType(
9968	Context&: *DAG.getContext(),
9969	VT: EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: StoreMemVT.getSizeInBits()));
9970	EVT PtrVT = Ptr.getValueType();
9971	unsigned StoredBytes = StoreMemVT.getStoreSize();
9972	unsigned RegBytes = RegVT.getSizeInBits() / `8`;
9973	unsigned NumRegs = (StoredBytes + RegBytes - `1`) / RegBytes;
9974
9975	// Make sure the stack slot is also aligned for the register type.
9976	SDValue StackPtr = DAG.CreateStackTemporary(VT1: StoreMemVT, VT2: RegVT);
9977	auto FrameIndex = cast<FrameIndexSDNode>(Val: StackPtr.getNode())->getIndex();
9978
9979	// Perform the original store, only redirected to the stack slot.
9980	SDValue Store = DAG.getTruncStore(
9981	Chain, dl, Val, Ptr: StackPtr,
9982	PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset: `0`), SVT: StoreMemVT);
9983
9984	EVT StackPtrVT = StackPtr.getValueType();
9985
9986	SDValue PtrIncrement = DAG.getConstant(Val: RegBytes, DL: dl, VT: PtrVT);
9987	SDValue StackPtrIncrement = DAG.getConstant(Val: RegBytes, DL: dl, VT: StackPtrVT);
9988	SmallVector<SDValue, `8`> Stores;
9989	unsigned Offset = `0`;
9990
9991	// Do all but one copies using the full register width.
9992	for (unsigned i = `1`; i < NumRegs; i++) {
9993	// Load one integer register's worth from the stack slot.
9994	SDValue Load = DAG.getLoad(
9995	VT: RegVT, dl, Chain: Store, Ptr: StackPtr,
9996	PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset));
9997	// Store it to the final location. Remember the store.
9998	Stores.push_back(Elt: DAG.getStore(Chain: Load.getValue(R: `1`), dl, Val: Load, Ptr,
9999	PtrInfo: ST->getPointerInfo().getWithOffset(O: Offset),
10000	Alignment: ST->getOriginalAlign(),
10001	MMOFlags: ST->getMemOperand()->getFlags()));
10002	// Increment the pointers.
10003	Offset += RegBytes;
10004	StackPtr = DAG.getObjectPtrOffset(SL: dl, Ptr: StackPtr, Offset: StackPtrIncrement);
10005	Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: PtrIncrement);
10006	}
10007
10008	// The last store may be partial. Do a truncating store. On big-endian
10009	// machines this requires an extending load from the stack slot to ensure
10010	// that the bits are in the right place.
10011	EVT LoadMemVT =
10012	EVT::getIntegerVT(Context&: DAG.getContext(), BitWidth: `8` (StoredBytes - Offset));
10013
10014	// Load from the stack slot.
10015	SDValue Load = DAG.getExtLoad(
10016	ExtType: ISD::EXTLOAD, dl, VT: RegVT, Chain: Store, Ptr: StackPtr,
10017	PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset), MemVT: LoadMemVT);
10018
10019	Stores.push_back(
10020	Elt: DAG.getTruncStore(Chain: Load.getValue(R: `1`), dl, Val: Load, Ptr,
10021	PtrInfo: ST->getPointerInfo().getWithOffset(O: Offset), SVT: LoadMemVT,
10022	Alignment: ST->getOriginalAlign(),
10023	MMOFlags: ST->getMemOperand()->getFlags(), AAInfo: ST->getAAInfo()));
10024	// The order of the stores doesn't matter - say it with a TokenFactor.
10025	SDValue Result = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: Stores);
10026	return Result;
10027	}
10028
10029	assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
10030	"Unaligned store of unknown type.");
10031	// Get the half-size VT
10032	EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(Context&: *DAG.getContext());
10033	unsigned NumBits = NewStoredVT.getFixedSizeInBits();
10034	unsigned IncrementSize = NumBits / `8`;
10035
10036	// Divide the stored value in two parts.
10037	SDValue ShiftAmount =
10038	DAG.getShiftAmountConstant(Val: NumBits, VT: Val.getValueType(), DL: dl);
10039	SDValue Lo = Val;
10040	// If Val is a constant, replace the upper bits with 0. The SRL will constant
10041	// fold and not use the upper bits. A smaller constant may be easier to
10042	// materialize.
10043	if (auto *C = dyn_cast<ConstantSDNode>(Val&: Lo); C && !C->isOpaque())
10044	Lo = DAG.getNode(
10045	Opcode: ISD::AND, DL: dl, VT, N1: Lo,
10046	N2: DAG.getConstant(Val: APInt::getLowBitsSet(numBits: VT.getSizeInBits(), loBitsSet: NumBits), DL: dl,
10047	VT));
10048	SDValue Hi = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Val, N2: ShiftAmount);
10049
10050	// Store the two parts
10051	SDValue Store1, Store2;
10052	Store1 = DAG.getTruncStore(Chain, dl,
10053	Val: DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
10054	Ptr, PtrInfo: ST->getPointerInfo(), SVT: NewStoredVT, Alignment,
10055	MMOFlags: ST->getMemOperand()->getFlags());
10056
10057	Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: TypeSize::getFixed(ExactSize: IncrementSize));
10058	Store2 = DAG.getTruncStore(
10059	Chain, dl, Val: DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
10060	PtrInfo: ST->getPointerInfo().getWithOffset(O: IncrementSize), SVT: NewStoredVT, Alignment,
10061	MMOFlags: ST->getMemOperand()->getFlags(), AAInfo: ST->getAAInfo());
10062
10063	SDValue Result =
10064	DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, N1: Store1, N2: Store2);
10065	return Result;
10066	}
10067
10068	SDValue
10069	TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
10070	const SDLoc &DL, EVT DataVT,
10071	SelectionDAG &DAG,
10072	bool IsCompressedMemory) const {
10073	SDValue Increment;
10074	EVT AddrVT = Addr.getValueType();
10075	EVT MaskVT = Mask.getValueType();
10076	assert(DataVT.getVectorElementCount() == MaskVT.getVectorElementCount() &&
10077	"Incompatible types of Data and Mask");
10078	if (IsCompressedMemory) {
10079	if (DataVT.isScalableVector())
10080	report_fatal_error(
10081	reason: "Cannot currently handle compressed memory with scalable vectors");
10082	// Incrementing the pointer according to number of '1's in the mask.
10083	EVT MaskIntVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: MaskVT.getSizeInBits());
10084	SDValue MaskInIntReg = DAG.getBitcast(VT: MaskIntVT, V: Mask);
10085	if (MaskIntVT.getSizeInBits() < `32`) {
10086	MaskInIntReg = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: MVT::i32, Operand: MaskInIntReg);
10087	MaskIntVT = MVT::i32;
10088	}
10089
10090	// Count '1's with POPCNT.
10091	Increment = DAG.getNode(Opcode: ISD::CTPOP, DL, VT: MaskIntVT, Operand: MaskInIntReg);
10092	Increment = DAG.getZExtOrTrunc(Op: Increment, DL, VT: AddrVT);
10093	// Scale is an element size in bytes.
10094	SDValue Scale = DAG.getConstant(Val: DataVT.getScalarSizeInBits() / `8`, DL,
10095	VT: AddrVT);
10096	Increment = DAG.getNode(Opcode: ISD::MUL, DL, VT: AddrVT, N1: Increment, N2: Scale);
10097	} else if (DataVT.isScalableVector()) {
10098	Increment = DAG.getVScale(DL, VT: AddrVT,
10099	MulImm: APInt (AddrVT.getFixedSizeInBits(),
10100	DataVT.getStoreSize().getKnownMinValue()));
10101	} else
10102	Increment = DAG.getConstant(Val: DataVT.getStoreSize(), DL, VT: AddrVT);
10103
10104	return DAG.getNode(Opcode: ISD::ADD, DL, VT: AddrVT, N1: Addr, N2: Increment);
10105	}
10106
10107	static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx,
10108	EVT VecVT, const SDLoc &dl,
10109	ElementCount SubEC) {
10110	assert(!(SubEC.isScalable() && VecVT.isFixedLengthVector()) &&
10111	"Cannot index a scalable vector within a fixed-width vector");
10112
10113	unsigned NElts = VecVT.getVectorMinNumElements();
10114	unsigned NumSubElts = SubEC.getKnownMinValue();
10115	EVT IdxVT = Idx.getValueType();
10116
10117	if (VecVT.isScalableVector() && !SubEC.isScalable()) {
10118	// If this is a constant index and we know the value plus the number of the
10119	// elements in the subvector minus one is less than the minimum number of
10120	// elements then it's safe to return Idx.
10121	if (auto *IdxCst = dyn_cast<ConstantSDNode>(Val&: Idx))
10122	if (IdxCst->getZExtValue() + (NumSubElts - `1`) < NElts)
10123	return Idx;
10124	SDValue VS =
10125	DAG.getVScale(DL: dl, VT: IdxVT, MulImm: APInt (IdxVT.getFixedSizeInBits(), NElts));
10126	unsigned SubOpcode = NumSubElts <= NElts ? ISD::SUB : ISD::USUBSAT;
10127	SDValue Sub = DAG.getNode(Opcode: SubOpcode, DL: dl, VT: IdxVT, N1: VS,
10128	N2: DAG.getConstant(Val: NumSubElts, DL: dl, VT: IdxVT));
10129	return DAG.getNode(Opcode: ISD::UMIN, DL: dl, VT: IdxVT, N1: Idx, N2: Sub);
10130	}
10131	if (isPowerOf2_32(Value: NElts) && NumSubElts == `1`) {
10132	APInt Imm = APInt::getLowBitsSet(numBits: IdxVT.getSizeInBits(), loBitsSet: Log2_32(Value: NElts));
10133	return DAG.getNode(Opcode: ISD::AND, DL: dl, VT: IdxVT, N1: Idx,
10134	N2: DAG.getConstant(Val: Imm, DL: dl, VT: IdxVT));
10135	}
10136	unsigned MaxIndex = NumSubElts < NElts ? NElts - NumSubElts : `0`;
10137	return DAG.getNode(Opcode: ISD::UMIN, DL: dl, VT: IdxVT, N1: Idx,
10138	N2: DAG.getConstant(Val: MaxIndex, DL: dl, VT: IdxVT));
10139	}
10140
10141	SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG,
10142	SDValue VecPtr, EVT VecVT,
10143	SDValue Index) const {
10144	return getVectorSubVecPointer(
10145	DAG, VecPtr, VecVT,
10146	SubVecVT: EVT::getVectorVT(Context&: *DAG.getContext(), VT: VecVT.getVectorElementType(), NumElements: `1`),
10147	Index);
10148	}
10149
10150	SDValue TargetLowering::getVectorSubVecPointer(SelectionDAG &DAG,
10151	SDValue VecPtr, EVT VecVT,
10152	EVT SubVecVT,
10153	SDValue Index) const {
10154	SDLoc dl(Index);
10155	// Make sure the index type is big enough to compute in.
10156	Index = DAG.getZExtOrTrunc(Op: Index, DL: dl, VT: VecPtr.getValueType());
10157
10158	EVT EltVT = VecVT.getVectorElementType();
10159
10160	// Calculate the element offset and add it to the pointer.
10161	unsigned EltSize = EltVT.getFixedSizeInBits() / `8`; // FIXME: should be ABI size.
10162	assert(EltSize * `8` == EltVT.getFixedSizeInBits() &&
10163	"Converting bits to bytes lost precision");
10164	assert(SubVecVT.getVectorElementType() == EltVT &&
10165	"Sub-vector must be a vector with matching element type");
10166	Index = clampDynamicVectorIndex(DAG, Idx: Index, VecVT, dl,
10167	SubEC: SubVecVT.getVectorElementCount());
10168
10169	EVT IdxVT = Index.getValueType();
10170	if (SubVecVT.isScalableVector())
10171	Index =
10172	DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: IdxVT, N1: Index,
10173	N2: DAG.getVScale(DL: dl, VT: IdxVT, MulImm: APInt (IdxVT.getSizeInBits(), `1`)));
10174
10175	Index = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: IdxVT, N1: Index,
10176	N2: DAG.getConstant(Val: EltSize, DL: dl, VT: IdxVT));
10177	return DAG.getMemBasePlusOffset(Base: VecPtr, Offset: Index, DL: dl);
10178	}
10179
10180	//===----------------------------------------------------------------------===//
10181	// Implementation of Emulated TLS Model
10182	//===----------------------------------------------------------------------===//
10183
10184	SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
10185	SelectionDAG &DAG) const {
10186	// Access to address of TLS varialbe xyz is lowered to a function call:
10187	// __emutls_get_address( address of global variable named "__emutls_v.xyz" )
10188	EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
10189	PointerType VoidPtrType = PointerType::get(C&: DAG.getContext(), AddressSpace: `0`);
10190	SDLoc dl(GA);
10191
10192	ArgListTy Args;
10193	ArgListEntry Entry;
10194	std::string NameString = ("__emutls_v." + GA->getGlobal()->getName()).str();
10195	Module VariableModule = const_cast<Module>(GA->getGlobal()->getParent());
10196	StringRef EmuTlsVarName(NameString);
10197	GlobalVariable *EmuTlsVar = VariableModule->getNamedGlobal(Name: EmuTlsVarName);
10198	assert(EmuTlsVar && "Cannot find EmuTlsVar ");
10199	Entry.Node = DAG.getGlobalAddress(GV: EmuTlsVar, DL: dl, VT: PtrVT);
10200	Entry.Ty = VoidPtrType;
10201	Args.push_back(x: Entry);
10202
10203	SDValue EmuTlsGetAddr = DAG.getExternalSymbol(Sym: "__emutls_get_address", VT: PtrVT);
10204
10205	TargetLowering::CallLoweringInfo CLI(DAG);
10206	CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
10207	CLI.setLibCallee(CC: CallingConv::C, ResultType: VoidPtrType, Target: EmuTlsGetAddr, ArgsList: std::move(Args));
10208	std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
10209
10210	// TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
10211	// At last for X86 targets, maybe good for other targets too?
10212	MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
10213	MFI.setAdjustsStack(true); // Is this only for X86 target?
10214	MFI.setHasCalls(true);
10215
10216	assert((GA->getOffset() == `0`) &&
10217	"Emulated TLS must have zero offset in GlobalAddressSDNode");
10218	return CallResult.first;
10219	}
10220
10221	SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,
10222	SelectionDAG &DAG) const {
10223	assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
10224	if (!isCtlzFast())
10225	return SDValue ();
10226	ISD::CondCode CC = cast<CondCodeSDNode>(Val: Op.getOperand(i: `2`))->get();
10227	SDLoc dl(Op);
10228	if (isNullConstant(V: Op.getOperand(i: `1`)) && CC == ISD::SETEQ) {
10229	EVT VT = Op.getOperand(i: `0`).getValueType();
10230	SDValue Zext = Op.getOperand(i: `0`);
10231	if (VT.bitsLT(VT: MVT::i32)) {
10232	VT = MVT::i32;
10233	Zext = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: Op.getOperand(i: `0`));
10234	}
10235	unsigned Log2b = Log2_32(Value: VT.getSizeInBits());
10236	SDValue Clz = DAG.getNode(Opcode: ISD::CTLZ, DL: dl, VT, Operand: Zext);
10237	SDValue Scc = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Clz,
10238	N2: DAG.getConstant(Val: Log2b, DL: dl, VT: MVT::i32));
10239	return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i32, Operand: Scc);
10240	}
10241	return SDValue ();
10242	}
10243
10244	SDValue TargetLowering::expandIntMINMAX(SDNode Node, SelectionDAG &DAG) const* {
10245	SDValue Op0 = Node->getOperand(Num: `0`);
10246	SDValue Op1 = Node->getOperand(Num: `1`);
10247	EVT VT = Op0.getValueType();
10248	EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
10249	unsigned Opcode = Node->getOpcode();
10250	SDLoc DL(Node);
10251
10252	// umax(x,1) --> sub(x,cmpeq(x,0)) iff cmp result is allbits
10253	if (Opcode == ISD::UMAX && llvm::isOneOrOneSplat(V: Op1, AllowUndefs: true) && BoolVT == VT &&
10254	getBooleanContents(Type: VT) == ZeroOrNegativeOneBooleanContent) {
10255	Op0 = DAG.getFreeze(V: Op0);
10256	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT);
10257	return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Op0,
10258	N2: DAG.getSetCC(DL, VT, LHS: Op0, RHS: Zero, Cond: ISD::SETEQ));
10259	}
10260
10261	// umin(x,y) -> sub(x,usubsat(x,y))
10262	// TODO: Missing freeze(Op0)?
10263	if (Opcode == ISD::UMIN && isOperationLegal(Op: ISD::SUB, VT) &&
10264	isOperationLegal(Op: ISD::USUBSAT, VT)) {
10265	return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Op0,
10266	N2: DAG.getNode(Opcode: ISD::USUBSAT, DL, VT, N1: Op0, N2: Op1));
10267	}
10268
10269	// umax(x,y) -> add(x,usubsat(y,x))
10270	// TODO: Missing freeze(Op0)?
10271	if (Opcode == ISD::UMAX && isOperationLegal(Op: ISD::ADD, VT) &&
10272	isOperationLegal(Op: ISD::USUBSAT, VT)) {
10273	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Op0,
10274	N2: DAG.getNode(Opcode: ISD::USUBSAT, DL, VT, N1: Op1, N2: Op0));
10275	}
10276
10277	// FIXME: Should really try to split the vector in case it's legal on a
10278	// subvector.
10279	if (VT.isVector() && !isOperationLegalOrCustom(Op: ISD::VSELECT, VT))
10280	return DAG.UnrollVectorOp(N: Node);
10281
10282	// Attempt to find an existing SETCC node that we can reuse.
10283	// TODO: Do we need a generic doesSETCCNodeExist?
10284	// TODO: Missing freeze(Op0)/freeze(Op1)?
10285	auto buildMinMax = [&](ISD::CondCode PrefCC, ISD::CondCode AltCC,
10286	ISD::CondCode PrefCommuteCC,
10287	ISD::CondCode AltCommuteCC) {
10288	SDVTList BoolVTList = DAG.getVTList(VT: BoolVT);
10289	for (ISD::CondCode CC : {PrefCC, AltCC}) {
10290	if (DAG.doesNodeExist(Opcode: ISD::SETCC, VTList: BoolVTList,
10291	Ops: {Op0, Op1, DAG.getCondCode(Cond: CC)})) {
10292	SDValue Cond = DAG.getSetCC(DL, VT: BoolVT, LHS: Op0, RHS: Op1, Cond: CC);
10293	return DAG.getSelect(DL, VT, Cond, LHS: Op0, RHS: Op1);
10294	}
10295	}
10296	for (ISD::CondCode CC : {PrefCommuteCC, AltCommuteCC}) {
10297	if (DAG.doesNodeExist(Opcode: ISD::SETCC, VTList: BoolVTList,
10298	Ops: {Op0, Op1, DAG.getCondCode(Cond: CC)})) {
10299	SDValue Cond = DAG.getSetCC(DL, VT: BoolVT, LHS: Op0, RHS: Op1, Cond: CC);
10300	return DAG.getSelect(DL, VT, Cond, LHS: Op1, RHS: Op0);
10301	}
10302	}
10303	SDValue Cond = DAG.getSetCC(DL, VT: BoolVT, LHS: Op0, RHS: Op1, Cond: PrefCC);
10304	return DAG.getSelect(DL, VT, Cond, LHS: Op0, RHS: Op1);
10305	};
10306
10307	// Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
10308	// -> Y = (A < B) ? B : A
10309	// -> Y = (A >= B) ? A : B
10310	// -> Y = (A <= B) ? B : A
10311	switch (Opcode) {
10312	case ISD::SMAX:
10313	return buildMinMax (ISD::SETGT, ISD::SETGE, ISD::SETLT, ISD::SETLE);
10314	case ISD::SMIN:
10315	return buildMinMax (ISD::SETLT, ISD::SETLE, ISD::SETGT, ISD::SETGE);
10316	case ISD::UMAX:
10317	return buildMinMax (ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE);
10318	case ISD::UMIN:
10319	return buildMinMax (ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE);
10320	}
10321
10322	llvm_unreachable("How did we get here?");
10323	}
10324
10325	SDValue TargetLowering::expandAddSubSat(SDNode Node, SelectionDAG &DAG) const* {
10326	unsigned Opcode = Node->getOpcode();
10327	SDValue LHS = Node->getOperand(Num: `0`);
10328	SDValue RHS = Node->getOperand(Num: `1`);
10329	EVT VT = LHS.getValueType();
10330	SDLoc dl(Node);
10331
10332	assert(VT == RHS.getValueType() && "Expected operands to be the same type");
10333	assert(VT.isInteger() && "Expected operands to be integers");
10334
10335	// usub.sat(a, b) -> umax(a, b) - b
10336	if (Opcode == ISD::USUBSAT && isOperationLegal(Op: ISD::UMAX, VT)) {
10337	SDValue Max = DAG.getNode(Opcode: ISD::UMAX, DL: dl, VT, N1: LHS, N2: RHS);
10338	return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Max, N2: RHS);
10339	}
10340
10341	// uadd.sat(a, b) -> umin(a, ~b) + b
10342	if (Opcode == ISD::UADDSAT && isOperationLegal(Op: ISD::UMIN, VT)) {
10343	SDValue InvRHS = DAG.getNOT(DL: dl, Val: RHS, VT);
10344	SDValue Min = DAG.getNode(Opcode: ISD::UMIN, DL: dl, VT, N1: LHS, N2: InvRHS);
10345	return DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Min, N2: RHS);
10346	}
10347
10348	unsigned OverflowOp;
10349	switch (Opcode) {
10350	case ISD::SADDSAT:
10351	OverflowOp = ISD::SADDO;
10352	break;
10353	case ISD::UADDSAT:
10354	OverflowOp = ISD::UADDO;
10355	break;
10356	case ISD::SSUBSAT:
10357	OverflowOp = ISD::SSUBO;
10358	break;
10359	case ISD::USUBSAT:
10360	OverflowOp = ISD::USUBO;
10361	break;
10362	default:
10363	llvm_unreachable("Expected method to receive signed or unsigned saturation "
10364	"addition or subtraction node.");
10365	}
10366
10367	// FIXME: Should really try to split the vector in case it's legal on a
10368	// subvector.
10369	if (VT.isVector() && !isOperationLegalOrCustom(Op: ISD::VSELECT, VT))
10370	return DAG.UnrollVectorOp(N: Node);
10371
10372	unsigned BitWidth = LHS.getScalarValueSizeInBits();
10373	EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
10374	SDValue Result = DAG.getNode(Opcode: OverflowOp, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: BoolVT), N1: LHS, N2: RHS);
10375	SDValue SumDiff = Result.getValue(R: `0`);
10376	SDValue Overflow = Result.getValue(R: `1`);
10377	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT);
10378	SDValue AllOnes = DAG.getAllOnesConstant(DL: dl, VT);
10379
10380	if (Opcode == ISD::UADDSAT) {
10381	if (getBooleanContents(Type: VT) == ZeroOrNegativeOneBooleanContent) {
10382	// (LHS + RHS) \| OverflowMask
10383	SDValue OverflowMask = DAG.getSExtOrTrunc(Op: Overflow, DL: dl, VT);
10384	return DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: SumDiff, N2: OverflowMask);
10385	}
10386	// Overflow ? 0xffff.... : (LHS + RHS)
10387	return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: AllOnes, RHS: SumDiff);
10388	}
10389
10390	if (Opcode == ISD::USUBSAT) {
10391	if (getBooleanContents(Type: VT) == ZeroOrNegativeOneBooleanContent) {
10392	// (LHS - RHS) & ~OverflowMask
10393	SDValue OverflowMask = DAG.getSExtOrTrunc(Op: Overflow, DL: dl, VT);
10394	SDValue Not = DAG.getNOT(DL: dl, Val: OverflowMask, VT);
10395	return DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: SumDiff, N2: Not);
10396	}
10397	// Overflow ? 0 : (LHS - RHS)
10398	return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: Zero, RHS: SumDiff);
10399	}
10400
10401	if (Opcode == ISD::SADDSAT \|\| Opcode == ISD::SSUBSAT) {
10402	APInt MinVal = APInt::getSignedMinValue(numBits: BitWidth);
10403	APInt MaxVal = APInt::getSignedMaxValue(numBits: BitWidth);
10404
10405	KnownBits KnownLHS = DAG.computeKnownBits(Op: LHS);
10406	KnownBits KnownRHS = DAG.computeKnownBits(Op: RHS);
10407
10408	// If either of the operand signs are known, then they are guaranteed to
10409	// only saturate in one direction. If non-negative they will saturate
10410	// towards SIGNED_MAX, if negative they will saturate towards SIGNED_MIN.
10411	//
10412	// In the case of ISD::SSUBSAT, 'x - y' is equivalent to 'x + (-y)', so the
10413	// sign of 'y' has to be flipped.
10414
10415	bool LHSIsNonNegative = KnownLHS.isNonNegative();
10416	bool RHSIsNonNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNonNegative()
10417	: KnownRHS.isNegative();
10418	if (LHSIsNonNegative \|\| RHSIsNonNegative) {
10419	SDValue SatMax = DAG.getConstant(Val: MaxVal, DL: dl, VT);
10420	return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: SatMax, RHS: SumDiff);
10421	}
10422
10423	bool LHSIsNegative = KnownLHS.isNegative();
10424	bool RHSIsNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNegative()
10425	: KnownRHS.isNonNegative();
10426	if (LHSIsNegative \|\| RHSIsNegative) {
10427	SDValue SatMin = DAG.getConstant(Val: MinVal, DL: dl, VT);
10428	return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: SatMin, RHS: SumDiff);
10429	}
10430	}
10431
10432	// Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff
10433	APInt MinVal = APInt::getSignedMinValue(numBits: BitWidth);
10434	SDValue SatMin = DAG.getConstant(Val: MinVal, DL: dl, VT);
10435	SDValue Shift = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: SumDiff,
10436	N2: DAG.getConstant(Val: BitWidth - `1`, DL: dl, VT));
10437	Result = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: Shift, N2: SatMin);
10438	return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: Result, RHS: SumDiff);
10439	}
10440
10441	SDValue TargetLowering::expandCMP(SDNode Node, SelectionDAG &DAG) const* {
10442	unsigned Opcode = Node->getOpcode();
10443	SDValue LHS = Node->getOperand(Num: `0`);
10444	SDValue RHS = Node->getOperand(Num: `1`);
10445	EVT VT = LHS.getValueType();
10446	EVT ResVT = Node->getValueType(ResNo: `0`);
10447	EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
10448	SDLoc dl(Node);
10449
10450	auto LTPredicate = (Opcode == ISD::UCMP ? ISD::SETULT : ISD::SETLT);
10451	auto GTPredicate = (Opcode == ISD::UCMP ? ISD::SETUGT : ISD::SETGT);
10452	SDValue IsLT = DAG.getSetCC(DL: dl, VT: BoolVT, LHS, RHS, Cond: LTPredicate);
10453	SDValue IsGT = DAG.getSetCC(DL: dl, VT: BoolVT, LHS, RHS, Cond: GTPredicate);
10454
10455	// We can't perform arithmetic on i1 values. Extending them would
10456	// probably result in worse codegen, so let's just use two selects instead.
10457	// Some targets are also just better off using selects rather than subtraction
10458	// because one of the conditions can be merged with one of the selects.
10459	// And finally, if we don't know the contents of high bits of a boolean value
10460	// we can't perform any arithmetic either.
10461	if (shouldExpandCmpUsingSelects() \|\| BoolVT.getScalarSizeInBits() == `1` \|\|
10462	getBooleanContents(Type: BoolVT) == UndefinedBooleanContent) {
10463	SDValue SelectZeroOrOne =
10464	DAG.getSelect(DL: dl, VT: ResVT, Cond: IsGT, LHS: DAG.getConstant(Val: `1`, DL: dl, VT: ResVT),
10465	RHS: DAG.getConstant(Val: `0`, DL: dl, VT: ResVT));
10466	return DAG.getSelect(DL: dl, VT: ResVT, Cond: IsLT, LHS: DAG.getConstant(Val: -`1`, DL: dl, VT: ResVT),
10467	RHS: SelectZeroOrOne);
10468	}
10469
10470	if (getBooleanContents(Type: BoolVT) == ZeroOrNegativeOneBooleanContent)
10471	std::swap(a&: IsGT, b&: IsLT);
10472	return DAG.getSExtOrTrunc(Op: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: BoolVT, N1: IsGT, N2: IsLT), DL: dl,
10473	VT: ResVT);
10474	}
10475
10476	SDValue TargetLowering::expandShlSat(SDNode Node, SelectionDAG &DAG) const* {
10477	unsigned Opcode = Node->getOpcode();
10478	bool IsSigned = Opcode == ISD::SSHLSAT;
10479	SDValue LHS = Node->getOperand(Num: `0`);
10480	SDValue RHS = Node->getOperand(Num: `1`);
10481	EVT VT = LHS.getValueType();
10482	SDLoc dl(Node);
10483
10484	assert((Node->getOpcode() == ISD::SSHLSAT \|\|
10485	Node->getOpcode() == ISD::USHLSAT) &&
10486	"Expected a SHLSAT opcode");
10487	assert(VT == RHS.getValueType() && "Expected operands to be the same type");
10488	assert(VT.isInteger() && "Expected operands to be integers");
10489
10490	if (VT.isVector() && !isOperationLegalOrCustom(Op: ISD::VSELECT, VT))
10491	return DAG.UnrollVectorOp(N: Node);
10492
10493	// If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate.
10494
10495	unsigned BW = VT.getScalarSizeInBits();
10496	EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
10497	SDValue Result = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: LHS, N2: RHS);
10498	SDValue Orig =
10499	DAG.getNode(Opcode: IsSigned ? ISD::SRA : ISD::SRL, DL: dl, VT, N1: Result, N2: RHS);
10500
10501	SDValue SatVal;
10502	if (IsSigned) {
10503	SDValue SatMin = DAG.getConstant(Val: APInt::getSignedMinValue(numBits: BW), DL: dl, VT);
10504	SDValue SatMax = DAG.getConstant(Val: APInt::getSignedMaxValue(numBits: BW), DL: dl, VT);
10505	SDValue Cond =
10506	DAG.getSetCC(DL: dl, VT: BoolVT, LHS, RHS: DAG.getConstant(Val: `0`, DL: dl, VT), Cond: ISD::SETLT);
10507	SatVal = DAG.getSelect(DL: dl, VT, Cond, LHS: SatMin, RHS: SatMax);
10508	} else {
10509	SatVal = DAG.getConstant(Val: APInt::getMaxValue(numBits: BW), DL: dl, VT);
10510	}
10511	SDValue Cond = DAG.getSetCC(DL: dl, VT: BoolVT, LHS, RHS: Orig, Cond: ISD::SETNE);
10512	return DAG.getSelect(DL: dl, VT, Cond, LHS: SatVal, RHS: Result);
10513	}
10514
10515	void TargetLowering::forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl,
10516	bool Signed, EVT WideVT,
10517	const SDValue LL, const SDValue LH,
10518	const SDValue RL, const SDValue RH,
10519	SDValue &Lo, SDValue &Hi) const {
10520	// We can fall back to a libcall with an illegal type for the MUL if we
10521	// have a libcall big enough.
10522	// Also, we can fall back to a division in some cases, but that's a big
10523	// performance hit in the general case.
10524	RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
10525	if (WideVT == MVT::i16)
10526	LC = RTLIB::MUL_I16;
10527	else if (WideVT == MVT::i32)
10528	LC = RTLIB::MUL_I32;
10529	else if (WideVT == MVT::i64)
10530	LC = RTLIB::MUL_I64;
10531	else if (WideVT == MVT::i128)
10532	LC = RTLIB::MUL_I128;
10533
10534	if (LC == RTLIB::UNKNOWN_LIBCALL \|\| !getLibcallName(Call: LC)) {
10535	// We'll expand the multiplication by brute force because we have no other
10536	// options. This is a trivially-generalized version of the code from
10537	// Hacker's Delight (itself derived from Knuth's Algorithm M from section
10538	// 4.3.1).
10539	EVT VT = LL.getValueType();
10540	unsigned Bits = VT.getSizeInBits();
10541	unsigned HalfBits = Bits >> `1`;
10542	SDValue Mask =
10543	DAG.getConstant(Val: APInt::getLowBitsSet(numBits: Bits, loBitsSet: HalfBits), DL: dl, VT);
10544	SDValue LLL = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: LL, N2: Mask);
10545	SDValue RLL = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: RL, N2: Mask);
10546
10547	SDValue T = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LLL, N2: RLL);
10548	SDValue TL = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: T, N2: Mask);
10549
10550	SDValue Shift = DAG.getShiftAmountConstant(Val: HalfBits, VT, DL: dl);
10551	SDValue TH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: T, N2: Shift);
10552	SDValue LLH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: LL, N2: Shift);
10553	SDValue RLH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: RL, N2: Shift);
10554
10555	SDValue U = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT,
10556	N1: DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LLH, N2: RLL), N2: TH);
10557	SDValue UL = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: U, N2: Mask);
10558	SDValue UH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: U, N2: Shift);
10559
10560	SDValue V = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT,
10561	N1: DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LLL, N2: RLH), N2: UL);
10562	SDValue VH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: V, N2: Shift);
10563
10564	SDValue W =
10565	DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LLH, N2: RLH),
10566	N2: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: UH, N2: VH));
10567	Lo = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: TL,
10568	N2: DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: V, N2: Shift));
10569
10570	Hi = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: W,
10571	N2: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT,
10572	N1: DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: RH, N2: LL),
10573	N2: DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: RL, N2: LH)));
10574	} else {
10575	// Attempt a libcall.
10576	SDValue Ret;
10577	TargetLowering::MakeLibCallOptions CallOptions;
10578	CallOptions.setSExt(Signed);
10579	CallOptions.setIsPostTypeLegalization(true);
10580	if (shouldSplitFunctionArgumentsAsLittleEndian(DL: DAG.getDataLayout())) {
10581	// Halves of WideVT are packed into registers in different order
10582	// depending on platform endianness. This is usually handled by
10583	// the C calling convention, but we can't defer to it in
10584	// the legalizer.
10585	SDValue Args[] = {LL, LH, RL, RH};
10586	Ret = makeLibCall(DAG, LC, RetVT: WideVT, Ops: Args, CallOptions, dl).first;
10587	} else {
10588	SDValue Args[] = {LH, LL, RH, RL};
10589	Ret = makeLibCall(DAG, LC, RetVT: WideVT, Ops: Args, CallOptions, dl).first;
10590	}
10591	assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
10592	"Ret value is a collection of constituent nodes holding result.");
10593	if (DAG.getDataLayout().isLittleEndian()) {
10594	// Same as above.
10595	Lo = Ret.getOperand(i: `0`);
10596	Hi = Ret.getOperand(i: `1`);
10597	} else {
10598	Lo = Ret.getOperand(i: `1`);
10599	Hi = Ret.getOperand(i: `0`);
10600	}
10601	}
10602	}
10603
10604	void TargetLowering::forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl,
10605	bool Signed, const SDValue LHS,
10606	const SDValue RHS, SDValue &Lo,
10607	SDValue &Hi) const {
10608	EVT VT = LHS.getValueType();
10609	assert(RHS.getValueType() == VT && "Mismatching operand types");
10610
10611	SDValue HiLHS;
10612	SDValue HiRHS;
10613	if (Signed) {
10614	// The high part is obtained by SRA'ing all but one of the bits of low
10615	// part.
10616	unsigned LoSize = VT.getFixedSizeInBits();
10617	HiLHS = DAG.getNode(
10618	Opcode: ISD::SRA, DL: dl, VT, N1: LHS,
10619	N2: DAG.getConstant(Val: LoSize - `1`, DL: dl, VT: getPointerTy(DL: DAG.getDataLayout())));
10620	HiRHS = DAG.getNode(
10621	Opcode: ISD::SRA, DL: dl, VT, N1: RHS,
10622	N2: DAG.getConstant(Val: LoSize - `1`, DL: dl, VT: getPointerTy(DL: DAG.getDataLayout())));
10623	} else {
10624	HiLHS = DAG.getConstant(Val: `0`, DL: dl, VT);
10625	HiRHS = DAG.getConstant(Val: `0`, DL: dl, VT);
10626	}
10627	EVT WideVT = EVT::getIntegerVT(Context&: DAG.getContext(), BitWidth: VT.getSizeInBits() `2`);
10628	forceExpandWideMUL(DAG, dl, Signed, WideVT, LL: LHS, LH: HiLHS, RL: RHS, RH: HiRHS, Lo, Hi);
10629	}
10630
10631	SDValue
10632	TargetLowering::expandFixedPointMul(SDNode Node, SelectionDAG &DAG) const* {
10633	assert((Node->getOpcode() == ISD::SMULFIX \|\|
10634	Node->getOpcode() == ISD::UMULFIX \|\|
10635	Node->getOpcode() == ISD::SMULFIXSAT \|\|
10636	Node->getOpcode() == ISD::UMULFIXSAT) &&
10637	"Expected a fixed point multiplication opcode");
10638
10639	SDLoc dl(Node);
10640	SDValue LHS = Node->getOperand(Num: `0`);
10641	SDValue RHS = Node->getOperand(Num: `1`);
10642	EVT VT = LHS.getValueType();
10643	unsigned Scale = Node->getConstantOperandVal(Num: `2`);
10644	bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT \|\|
10645	Node->getOpcode() == ISD::UMULFIXSAT);
10646	bool Signed = (Node->getOpcode() == ISD::SMULFIX \|\|
10647	Node->getOpcode() == ISD::SMULFIXSAT);
10648	EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
10649	unsigned VTSize = VT.getScalarSizeInBits();
10650
10651	if (!Scale) {
10652	// [us]mul.fix(a, b, 0) -> mul(a, b)
10653	if (!Saturating) {
10654	if (isOperationLegalOrCustom(Op: ISD::MUL, VT))
10655	return DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LHS, N2: RHS);
10656	} else if (Signed && isOperationLegalOrCustom(Op: ISD::SMULO, VT)) {
10657	SDValue Result =
10658	DAG.getNode(Opcode: ISD::SMULO, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: BoolVT), N1: LHS, N2: RHS);
10659	SDValue Product = Result.getValue(R: `0`);
10660	SDValue Overflow = Result.getValue(R: `1`);
10661	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT);
10662
10663	APInt MinVal = APInt::getSignedMinValue(numBits: VTSize);
10664	APInt MaxVal = APInt::getSignedMaxValue(numBits: VTSize);
10665	SDValue SatMin = DAG.getConstant(Val: MinVal, DL: dl, VT);
10666	SDValue SatMax = DAG.getConstant(Val: MaxVal, DL: dl, VT);
10667	// Xor the inputs, if resulting sign bit is 0 the product will be
10668	// positive, else negative.
10669	SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: LHS, N2: RHS);
10670	SDValue ProdNeg = DAG.getSetCC(DL: dl, VT: BoolVT, LHS: Xor, RHS: Zero, Cond: ISD::SETLT);
10671	Result = DAG.getSelect(DL: dl, VT, Cond: ProdNeg, LHS: SatMin, RHS: SatMax);
10672	return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: Result, RHS: Product);
10673	} else if (!Signed && isOperationLegalOrCustom(Op: ISD::UMULO, VT)) {
10674	SDValue Result =
10675	DAG.getNode(Opcode: ISD::UMULO, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: BoolVT), N1: LHS, N2: RHS);
10676	SDValue Product = Result.getValue(R: `0`);
10677	SDValue Overflow = Result.getValue(R: `1`);
10678
10679	APInt MaxVal = APInt::getMaxValue(numBits: VTSize);
10680	SDValue SatMax = DAG.getConstant(Val: MaxVal, DL: dl, VT);
10681	return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: SatMax, RHS: Product);
10682	}
10683	}
10684
10685	assert(((Signed && Scale < VTSize) \|\| (!Signed && Scale <= VTSize)) &&
10686	"Expected scale to be less than the number of bits if signed or at "
10687	"most the number of bits if unsigned.");
10688	assert(LHS.getValueType() == RHS.getValueType() &&
10689	"Expected both operands to be the same type");
10690
10691	// Get the upper and lower bits of the result.
10692	SDValue Lo, Hi;
10693	unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
10694	unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
10695	EVT WideVT = EVT::getIntegerVT(Context&: DAG.getContext(), BitWidth: VTSize `2`);
10696	if (isOperationLegalOrCustom(Op: LoHiOp, VT)) {
10697	SDValue Result = DAG.getNode(Opcode: LoHiOp, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: VT), N1: LHS, N2: RHS);
10698	Lo = Result.getValue(R: `0`);
10699	Hi = Result.getValue(R: `1`);
10700	} else if (isOperationLegalOrCustom(Op: HiOp, VT)) {
10701	Lo = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LHS, N2: RHS);
10702	Hi = DAG.getNode(Opcode: HiOp, DL: dl, VT, N1: LHS, N2: RHS);
10703	} else if (isOperationLegalOrCustom(Op: ISD::MUL, VT: WideVT)) {
10704	// Try for a multiplication using a wider type.
10705	unsigned Ext = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
10706	SDValue LHSExt = DAG.getNode(Opcode: Ext, DL: dl, VT: WideVT, Operand: LHS);
10707	SDValue RHSExt = DAG.getNode(Opcode: Ext, DL: dl, VT: WideVT, Operand: RHS);
10708	SDValue Res = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: WideVT, N1: LHSExt, N2: RHSExt);
10709	Lo = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Res);
10710	SDValue Shifted =
10711	DAG.getNode(Opcode: ISD::SRA, DL: dl, VT: WideVT, N1: Res,
10712	N2: DAG.getShiftAmountConstant(Val: VTSize, VT: WideVT, DL: dl));
10713	Hi = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Shifted);
10714	} else if (VT.isVector()) {
10715	return SDValue ();
10716	} else {
10717	forceExpandWideMUL(DAG, dl, Signed, LHS, RHS, Lo, Hi);
10718	}
10719
10720	if (Scale == VTSize)
10721	// Result is just the top half since we'd be shifting by the width of the
10722	// operand. Overflow impossible so this works for both UMULFIX and
10723	// UMULFIXSAT.
10724	return Hi;
10725
10726	// The result will need to be shifted right by the scale since both operands
10727	// are scaled. The result is given to us in 2 halves, so we only want part of
10728	// both in the result.
10729	SDValue Result = DAG.getNode(Opcode: ISD::FSHR, DL: dl, VT, N1: Hi, N2: Lo,
10730	N3: DAG.getShiftAmountConstant(Val: Scale, VT, DL: dl));
10731	if (!Saturating)
10732	return Result;
10733
10734	if (!Signed) {
10735	// Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
10736	// widened multiplication) aren't all zeroes.
10737
10738	// Saturate to max if ((Hi >> Scale) != 0),
10739	// which is the same as if (Hi > ((1 << Scale) - 1))
10740	APInt MaxVal = APInt::getMaxValue(numBits: VTSize);
10741	SDValue LowMask = DAG.getConstant(Val: APInt::getLowBitsSet(numBits: VTSize, loBitsSet: Scale),
10742	DL: dl, VT);
10743	Result = DAG.getSelectCC(DL: dl, LHS: Hi, RHS: LowMask,
10744	True: DAG.getConstant(Val: MaxVal, DL: dl, VT), False: Result,
10745	Cond: ISD::SETUGT);
10746
10747	return Result;
10748	}
10749
10750	// Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
10751	// widened multiplication) aren't all ones or all zeroes.
10752
10753	SDValue SatMin = DAG.getConstant(Val: APInt::getSignedMinValue(numBits: VTSize), DL: dl, VT);
10754	SDValue SatMax = DAG.getConstant(Val: APInt::getSignedMaxValue(numBits: VTSize), DL: dl, VT);
10755
10756	if (Scale == `0`) {
10757	SDValue Sign = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: Lo,
10758	N2: DAG.getShiftAmountConstant(Val: VTSize - `1`, VT, DL: dl));
10759	SDValue Overflow = DAG.getSetCC(DL: dl, VT: BoolVT, LHS: Hi, RHS: Sign, Cond: ISD::SETNE);
10760	// Saturated to SatMin if wide product is negative, and SatMax if wide
10761	// product is positive ...
10762	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT);
10763	SDValue ResultIfOverflow = DAG.getSelectCC(DL: dl, LHS: Hi, RHS: Zero, True: SatMin, False: SatMax,
10764	Cond: ISD::SETLT);
10765	// ... but only if we overflowed.
10766	return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: ResultIfOverflow, RHS: Result);
10767	}
10768
10769	// We handled Scale==0 above so all the bits to examine is in Hi.
10770
10771	// Saturate to max if ((Hi >> (Scale - 1)) > 0),
10772	// which is the same as if (Hi > (1 << (Scale - 1)) - 1)
10773	SDValue LowMask = DAG.getConstant(Val: APInt::getLowBitsSet(numBits: VTSize, loBitsSet: Scale - `1`),
10774	DL: dl, VT);
10775	Result = DAG.getSelectCC(DL: dl, LHS: Hi, RHS: LowMask, True: SatMax, False: Result, Cond: ISD::SETGT);
10776	// Saturate to min if (Hi >> (Scale - 1)) < -1),
10777	// which is the same as if (HI < (-1 << (Scale - 1))
10778	SDValue HighMask =
10779	DAG.getConstant(Val: APInt::getHighBitsSet(numBits: VTSize, hiBitsSet: VTSize - Scale + `1`),
10780	DL: dl, VT);
10781	Result = DAG.getSelectCC(DL: dl, LHS: Hi, RHS: HighMask, True: SatMin, False: Result, Cond: ISD::SETLT);
10782	return Result;
10783	}
10784
10785	SDValue
10786	TargetLowering::expandFixedPointDiv(unsigned Opcode, const SDLoc &dl,
10787	SDValue LHS, SDValue RHS,
10788	unsigned Scale, SelectionDAG &DAG) const {
10789	assert((Opcode == ISD::SDIVFIX \|\| Opcode == ISD::SDIVFIXSAT \|\|
10790	Opcode == ISD::UDIVFIX \|\| Opcode == ISD::UDIVFIXSAT) &&
10791	"Expected a fixed point division opcode");
10792
10793	EVT VT = LHS.getValueType();
10794	bool Signed = Opcode == ISD::SDIVFIX \|\| Opcode == ISD::SDIVFIXSAT;
10795	bool Saturating = Opcode == ISD::SDIVFIXSAT \|\| Opcode == ISD::UDIVFIXSAT;
10796	EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
10797
10798	// If there is enough room in the type to upscale the LHS or downscale the
10799	// RHS before the division, we can perform it in this type without having to
10800	// resize. For signed operations, the LHS headroom is the number of
10801	// redundant sign bits, and for unsigned ones it is the number of zeroes.
10802	// The headroom for the RHS is the number of trailing zeroes.
10803	unsigned LHSLead = Signed ? DAG.ComputeNumSignBits(Op: LHS) - `1`
10804	: DAG.computeKnownBits(Op: LHS).countMinLeadingZeros();
10805	unsigned RHSTrail = DAG.computeKnownBits(Op: RHS).countMinTrailingZeros();
10806
10807	// For signed saturating operations, we need to be able to detect true integer
10808	// division overflow; that is, when you have MIN / -EPS. However, this
10809	// is undefined behavior and if we emit divisions that could take such
10810	// values it may cause undesired behavior (arithmetic exceptions on x86, for
10811	// example).
10812	// Avoid this by requiring an extra bit so that we never get this case.
10813	// FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale
10814	// signed saturating division, we need to emit a whopping 32-bit division.
10815	if (LHSLead + RHSTrail < Scale + (unsigned)(Saturating && Signed))
10816	return SDValue ();
10817
10818	unsigned LHSShift = std::min(a: LHSLead, b: Scale);
10819	unsigned RHSShift = Scale - LHSShift;
10820
10821	// At this point, we know that if we shift the LHS up by LHSShift and the
10822	// RHS down by RHSShift, we can emit a regular division with a final scaling
10823	// factor of Scale.
10824
10825	if (LHSShift)
10826	LHS = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: LHS,
10827	N2: DAG.getShiftAmountConstant(Val: LHSShift, VT, DL: dl));
10828	if (RHSShift)
10829	RHS = DAG.getNode(Opcode: Signed ? ISD::SRA : ISD::SRL, DL: dl, VT, N1: RHS,
10830	N2: DAG.getShiftAmountConstant(Val: RHSShift, VT, DL: dl));
10831
10832	SDValue Quot;
10833	if (Signed) {
10834	// For signed operations, if the resulting quotient is negative and the
10835	// remainder is nonzero, subtract 1 from the quotient to round towards
10836	// negative infinity.
10837	SDValue Rem;
10838	// FIXME: Ideally we would always produce an SDIVREM here, but if the
10839	// type isn't legal, SDIVREM cannot be expanded. There is no reason why
10840	// we couldn't just form a libcall, but the type legalizer doesn't do it.
10841	if (isTypeLegal(VT) &&
10842	isOperationLegalOrCustom(Op: ISD::SDIVREM, VT)) {
10843	Quot = DAG.getNode(Opcode: ISD::SDIVREM, DL: dl,
10844	VTList: DAG.getVTList(VT1: VT, VT2: VT),
10845	N1: LHS, N2: RHS);
10846	Rem = Quot.getValue(R: `1`);
10847	Quot = Quot.getValue(R: `0`);
10848	} else {
10849	Quot = DAG.getNode(Opcode: ISD::SDIV, DL: dl, VT,
10850	N1: LHS, N2: RHS);
10851	Rem = DAG.getNode(Opcode: ISD::SREM, DL: dl, VT,
10852	N1: LHS, N2: RHS);
10853	}
10854	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT);
10855	SDValue RemNonZero = DAG.getSetCC(DL: dl, VT: BoolVT, LHS: Rem, RHS: Zero, Cond: ISD::SETNE);
10856	SDValue LHSNeg = DAG.getSetCC(DL: dl, VT: BoolVT, LHS, RHS: Zero, Cond: ISD::SETLT);
10857	SDValue RHSNeg = DAG.getSetCC(DL: dl, VT: BoolVT, LHS: RHS, RHS: Zero, Cond: ISD::SETLT);
10858	SDValue QuotNeg = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: BoolVT, N1: LHSNeg, N2: RHSNeg);
10859	SDValue Sub1 = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Quot,
10860	N2: DAG.getConstant(Val: `1`, DL: dl, VT));
10861	Quot = DAG.getSelect(DL: dl, VT,
10862	Cond: DAG.getNode(Opcode: ISD::AND, DL: dl, VT: BoolVT, N1: RemNonZero, N2: QuotNeg),
10863	LHS: Sub1, RHS: Quot);
10864	} else
10865	Quot = DAG.getNode(Opcode: ISD::UDIV, DL: dl, VT,
10866	N1: LHS, N2: RHS);
10867
10868	return Quot;
10869	}
10870
10871	void TargetLowering::expandUADDSUBO(
10872	SDNode Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const* {
10873	SDLoc dl(Node);
10874	SDValue LHS = Node->getOperand(Num: `0`);
10875	SDValue RHS = Node->getOperand(Num: `1`);
10876	bool IsAdd = Node->getOpcode() == ISD::UADDO;
10877
10878	// If UADDO_CARRY/SUBO_CARRY is legal, use that instead.
10879	unsigned OpcCarry = IsAdd ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
10880	if (isOperationLegalOrCustom(Op: OpcCarry, VT: Node->getValueType(ResNo: `0`))) {
10881	SDValue CarryIn = DAG.getConstant(Val: `0`, DL: dl, VT: Node->getValueType(ResNo: `1`));
10882	SDValue NodeCarry = DAG.getNode(Opcode: OpcCarry, DL: dl, VTList: Node->getVTList(),
10883	Ops: { LHS, RHS, CarryIn });
10884	Result = SDValue (NodeCarry.getNode(), `0`);
10885	Overflow = SDValue (NodeCarry.getNode(), `1`);
10886	return;
10887	}
10888
10889	Result = DAG.getNode(Opcode: IsAdd ? ISD::ADD : ISD::SUB, DL: dl,
10890	VT: LHS.getValueType(), N1: LHS, N2: RHS);
10891
10892	EVT ResultType = Node->getValueType(ResNo: `1`);
10893	EVT SetCCType = getSetCCResultType(
10894	DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: Node->getValueType(ResNo: `0`));
10895	SDValue SetCC;
10896	if (IsAdd && isOneConstant(V: RHS)) {
10897	// Special case: uaddo X, 1 overflowed if X+1 is 0. This potential reduces
10898	// the live range of X. We assume comparing with 0 is cheap.
10899	// The general case (X + C) < C is not necessarily beneficial. Although we
10900	// reduce the live range of X, we may introduce the materialization of
10901	// constant C.
10902	SetCC =
10903	DAG.getSetCC(DL: dl, VT: SetCCType, LHS: Result,
10904	RHS: DAG.getConstant(Val: `0`, DL: dl, VT: Node->getValueType(ResNo: `0`)), Cond: ISD::SETEQ);
10905	} else if (IsAdd && isAllOnesConstant(V: RHS)) {
10906	// Special case: uaddo X, -1 overflows if X != 0.
10907	SetCC =
10908	DAG.getSetCC(DL: dl, VT: SetCCType, LHS,
10909	RHS: DAG.getConstant(Val: `0`, DL: dl, VT: Node->getValueType(ResNo: `0`)), Cond: ISD::SETNE);
10910	} else {
10911	ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
10912	SetCC = DAG.getSetCC(DL: dl, VT: SetCCType, LHS: Result, RHS: LHS, Cond: CC);
10913	}
10914	Overflow = DAG.getBoolExtOrTrunc(Op: SetCC, SL: dl, VT: ResultType, OpVT: ResultType);
10915	}
10916
10917	void TargetLowering::expandSADDSUBO(
10918	SDNode Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const* {
10919	SDLoc dl(Node);
10920	SDValue LHS = Node->getOperand(Num: `0`);
10921	SDValue RHS = Node->getOperand(Num: `1`);
10922	bool IsAdd = Node->getOpcode() == ISD::SADDO;
10923
10924	Result = DAG.getNode(Opcode: IsAdd ? ISD::ADD : ISD::SUB, DL: dl,
10925	VT: LHS.getValueType(), N1: LHS, N2: RHS);
10926
10927	EVT ResultType = Node->getValueType(ResNo: `1`);
10928	EVT OType = getSetCCResultType(
10929	DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: Node->getValueType(ResNo: `0`));
10930
10931	// If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
10932	unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
10933	if (isOperationLegal(Op: OpcSat, VT: LHS.getValueType())) {
10934	SDValue Sat = DAG.getNode(Opcode: OpcSat, DL: dl, VT: LHS.getValueType(), N1: LHS, N2: RHS);
10935	SDValue SetCC = DAG.getSetCC(DL: dl, VT: OType, LHS: Result, RHS: Sat, Cond: ISD::SETNE);
10936	Overflow = DAG.getBoolExtOrTrunc(Op: SetCC, SL: dl, VT: ResultType, OpVT: ResultType);
10937	return;
10938	}
10939
10940	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT: LHS.getValueType());
10941
10942	// For an addition, the result should be less than one of the operands (LHS)
10943	// if and only if the other operand (RHS) is negative, otherwise there will
10944	// be overflow.
10945	// For a subtraction, the result should be less than one of the operands
10946	// (LHS) if and only if the other operand (RHS) is (non-zero) positive,
10947	// otherwise there will be overflow.
10948	SDValue ResultLowerThanLHS = DAG.getSetCC(DL: dl, VT: OType, LHS: Result, RHS: LHS, Cond: ISD::SETLT);
10949	SDValue ConditionRHS =
10950	DAG.getSetCC(DL: dl, VT: OType, LHS: RHS, RHS: Zero, Cond: IsAdd ? ISD::SETLT : ISD::SETGT);
10951
10952	Overflow = DAG.getBoolExtOrTrunc(
10953	Op: DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: OType, N1: ConditionRHS, N2: ResultLowerThanLHS), SL: dl,
10954	VT: ResultType, OpVT: ResultType);
10955	}
10956
10957	bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,
10958	SDValue &Overflow, SelectionDAG &DAG) const {
10959	SDLoc dl(Node);
10960	EVT VT = Node->getValueType(ResNo: `0`);
10961	EVT SetCCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
10962	SDValue LHS = Node->getOperand(Num: `0`);
10963	SDValue RHS = Node->getOperand(Num: `1`);
10964	bool isSigned = Node->getOpcode() == ISD::SMULO;
10965
10966	// For power-of-two multiplications we can use a simpler shift expansion.
10967	if (ConstantSDNode *RHSC = isConstOrConstSplat(N: RHS)) {
10968	const APInt &C = RHSC->getAPIntValue();
10969	// mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
10970	if (C.isPowerOf2()) {
10971	// smulo(x, signed_min) is same as umulo(x, signed_min).
10972	bool UseArithShift = isSigned && !C.isMinSignedValue();
10973	SDValue ShiftAmt = DAG.getShiftAmountConstant(Val: C.logBase2(), VT, DL: dl);
10974	Result = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: LHS, N2: ShiftAmt);
10975	Overflow = DAG.getSetCC(DL: dl, VT: SetCCVT,
10976	LHS: DAG.getNode(Opcode: UseArithShift ? ISD::SRA : ISD::SRL,
10977	DL: dl, VT, N1: Result, N2: ShiftAmt),
10978	RHS: LHS, Cond: ISD::SETNE);
10979	return true;
10980	}
10981	}
10982
10983	EVT WideVT = EVT::getIntegerVT(Context&: DAG.getContext(), BitWidth: VT.getScalarSizeInBits() `2`);
10984	if (VT.isVector())
10985	WideVT =
10986	EVT::getVectorVT(Context&: *DAG.getContext(), VT: WideVT, EC: VT.getVectorElementCount());
10987
10988	SDValue BottomHalf;
10989	SDValue TopHalf;
10990	static const unsigned Ops[`2`][`3`] =
10991	{ { ISD::MULHU, ISD::UMUL_LOHI, ISD::ZERO_EXTEND },
10992	{ ISD::MULHS, ISD::SMUL_LOHI, ISD::SIGN_EXTEND }};
10993	if (isOperationLegalOrCustom(Op: Ops[isSigned][`0`], VT)) {
10994	BottomHalf = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LHS, N2: RHS);
10995	TopHalf = DAG.getNode(Opcode: Ops[isSigned][`0`], DL: dl, VT, N1: LHS, N2: RHS);
10996	} else if (isOperationLegalOrCustom(Op: Ops[isSigned][`1`], VT)) {
10997	BottomHalf = DAG.getNode(Opcode: Ops[isSigned][`1`], DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: VT), N1: LHS,
10998	N2: RHS);
10999	TopHalf = BottomHalf.getValue(R: `1`);
11000	} else if (isTypeLegal(VT: WideVT)) {
11001	LHS = DAG.getNode(Opcode: Ops[isSigned][`2`], DL: dl, VT: WideVT, Operand: LHS);
11002	RHS = DAG.getNode(Opcode: Ops[isSigned][`2`], DL: dl, VT: WideVT, Operand: RHS);
11003	SDValue Mul = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: WideVT, N1: LHS, N2: RHS);
11004	BottomHalf = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Mul);
11005	SDValue ShiftAmt =
11006	DAG.getShiftAmountConstant(Val: VT.getScalarSizeInBits(), VT: WideVT, DL: dl);
11007	TopHalf = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT,
11008	Operand: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: WideVT, N1: Mul, N2: ShiftAmt));
11009	} else {
11010	if (VT.isVector())
11011	return false;
11012
11013	forceExpandWideMUL(DAG, dl, Signed: isSigned, LHS, RHS, Lo&: BottomHalf, Hi&: TopHalf);
11014	}
11015
11016	Result = BottomHalf;
11017	if (isSigned) {
11018	SDValue ShiftAmt = DAG.getShiftAmountConstant(
11019	Val: VT.getScalarSizeInBits() - `1`, VT: BottomHalf.getValueType(), DL: dl);
11020	SDValue Sign = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: BottomHalf, N2: ShiftAmt);
11021	Overflow = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: TopHalf, RHS: Sign, Cond: ISD::SETNE);
11022	} else {
11023	Overflow = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: TopHalf,
11024	RHS: DAG.getConstant(Val: `0`, DL: dl, VT), Cond: ISD::SETNE);
11025	}
11026
11027	// Truncate the result if SetCC returns a larger type than needed.
11028	EVT RType = Node->getValueType(ResNo: `1`);
11029	if (RType.bitsLT(VT: Overflow.getValueType()))
11030	Overflow = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: RType, Operand: Overflow);
11031
11032	assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
11033	"Unexpected result type for S/UMULO legalization");
11034	return true;
11035	}
11036
11037	SDValue TargetLowering::expandVecReduce(SDNode Node, SelectionDAG &DAG) const* {
11038	SDLoc dl(Node);
11039	unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(VecReduceOpcode: Node->getOpcode());
11040	SDValue Op = Node->getOperand(Num: `0`);
11041	EVT VT = Op.getValueType();
11042
11043	if (VT.isScalableVector())
11044	report_fatal_error(
11045	reason: "Expanding reductions for scalable vectors is undefined.");
11046
11047	// Try to use a shuffle reduction for power of two vectors.
11048	if (VT.isPow2VectorType()) {
11049	while (VT.getVectorNumElements() > `1`) {
11050	EVT HalfVT = VT.getHalfNumVectorElementsVT(Context&: *DAG.getContext());
11051	if (!isOperationLegalOrCustom(Op: BaseOpcode, VT: HalfVT))
11052	break;
11053
11054	SDValue Lo, Hi;
11055	std::tie(args&: Lo, args&: Hi) = DAG.SplitVector(N: Op, DL: dl);
11056	Op = DAG.getNode(Opcode: BaseOpcode, DL: dl, VT: HalfVT, N1: Lo, N2: Hi, Flags: Node->getFlags());
11057	VT = HalfVT;
11058	}
11059	}
11060
11061	EVT EltVT = VT.getVectorElementType();
11062	unsigned NumElts = VT.getVectorNumElements();
11063
11064	SmallVector<SDValue, `8`> Ops;
11065	DAG.ExtractVectorElements(Op, Args&: Ops, Start: `0`, Count: NumElts);
11066
11067	SDValue Res = Ops [`0`];
11068	for (unsigned i = `1`; i < NumElts; i++)
11069	Res = DAG.getNode(Opcode: BaseOpcode, DL: dl, VT: EltVT, N1: Res, N2: Ops [i], Flags: Node->getFlags());
11070
11071	// Result type may be wider than element type.
11072	if (EltVT != Node->getValueType(ResNo: `0`))
11073	Res = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT: Node->getValueType(ResNo: `0`), Operand: Res);
11074	return Res;
11075	}
11076
11077	SDValue TargetLowering::expandVecReduceSeq(SDNode Node, SelectionDAG &DAG) const* {
11078	SDLoc dl(Node);
11079	SDValue AccOp = Node->getOperand(Num: `0`);
11080	SDValue VecOp = Node->getOperand(Num: `1`);
11081	SDNodeFlags Flags = Node->getFlags();
11082
11083	EVT VT = VecOp.getValueType();
11084	EVT EltVT = VT.getVectorElementType();
11085
11086	if (VT.isScalableVector())
11087	report_fatal_error(
11088	reason: "Expanding reductions for scalable vectors is undefined.");
11089
11090	unsigned NumElts = VT.getVectorNumElements();
11091
11092	SmallVector<SDValue, `8`> Ops;
11093	DAG.ExtractVectorElements(Op: VecOp, Args&: Ops, Start: `0`, Count: NumElts);
11094
11095	unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(VecReduceOpcode: Node->getOpcode());
11096
11097	SDValue Res = AccOp;
11098	for (unsigned i = `0`; i < NumElts; i++)
11099	Res = DAG.getNode(Opcode: BaseOpcode, DL: dl, VT: EltVT, N1: Res, N2: Ops [i], Flags);
11100
11101	return Res;
11102	}
11103
11104	bool TargetLowering::expandREM(SDNode *Node, SDValue &Result,
11105	SelectionDAG &DAG) const {
11106	EVT VT = Node->getValueType(ResNo: `0`);
11107	SDLoc dl(Node);
11108	bool isSigned = Node->getOpcode() == ISD::SREM;
11109	unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
11110	unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
11111	SDValue Dividend = Node->getOperand(Num: `0`);
11112	SDValue Divisor = Node->getOperand(Num: `1`);
11113	if (isOperationLegalOrCustom(Op: DivRemOpc, VT)) {
11114	SDVTList VTs = DAG.getVTList(VT1: VT, VT2: VT);
11115	Result = DAG.getNode(Opcode: DivRemOpc, DL: dl, VTList: VTs, N1: Dividend, N2: Divisor).getValue(R: `1`);
11116	return true;
11117	}
11118	if (isOperationLegalOrCustom(Op: DivOpc, VT)) {
11119	// X % Y -> X-X/YY*
11120	SDValue Divide = DAG.getNode(Opcode: DivOpc, DL: dl, VT, N1: Dividend, N2: Divisor);
11121	SDValue Mul = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: Divide, N2: Divisor);
11122	Result = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Dividend, N2: Mul);
11123	return true;
11124	}
11125	return false;
11126	}
11127
11128	SDValue TargetLowering::expandFP_TO_INT_SAT(SDNode *Node,
11129	SelectionDAG &DAG) const {
11130	bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT;
11131	SDLoc dl(SDValue (Node, `0`));
11132	SDValue Src = Node->getOperand(Num: `0`);
11133
11134	// DstVT is the result type, while SatVT is the size to which we saturate
11135	EVT SrcVT = Src.getValueType();
11136	EVT DstVT = Node->getValueType(ResNo: `0`);
11137
11138	EVT SatVT = cast<VTSDNode>(Val: Node->getOperand(Num: `1`))->getVT();
11139	unsigned SatWidth = SatVT.getScalarSizeInBits();
11140	unsigned DstWidth = DstVT.getScalarSizeInBits();
11141	assert(SatWidth <= DstWidth &&
11142	"Expected saturation width smaller than result width");
11143
11144	// Determine minimum and maximum integer values and their corresponding
11145	// floating-point values.
11146	APInt MinInt, MaxInt;
11147	if (IsSigned) {
11148	MinInt = APInt::getSignedMinValue(numBits: SatWidth).sext(width: DstWidth);
11149	MaxInt = APInt::getSignedMaxValue(numBits: SatWidth).sext(width: DstWidth);
11150	} else {
11151	MinInt = APInt::getMinValue(numBits: SatWidth).zext(width: DstWidth);
11152	MaxInt = APInt::getMaxValue(numBits: SatWidth).zext(width: DstWidth);
11153	}
11154
11155	// We cannot risk emitting FP_TO_XINT nodes with a source VT of [b]f16, as
11156	// libcall emission cannot handle this. Large result types will fail.
11157	if (SrcVT == MVT::f16 \|\| SrcVT == MVT::bf16) {
11158	Src = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f32, Operand: Src);
11159	SrcVT = Src.getValueType();
11160	}
11161
11162	APFloat MinFloat(DAG.EVTToAPFloatSemantics(VT: SrcVT));
11163	APFloat MaxFloat(DAG.EVTToAPFloatSemantics(VT: SrcVT));
11164
11165	APFloat::opStatus MinStatus =
11166	MinFloat.convertFromAPInt(Input: MinInt, IsSigned, RM: APFloat::rmTowardZero);
11167	APFloat::opStatus MaxStatus =
11168	MaxFloat.convertFromAPInt(Input: MaxInt, IsSigned, RM: APFloat::rmTowardZero);
11169	bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
11170	!(MaxStatus & APFloat::opStatus::opInexact);
11171
11172	SDValue MinFloatNode = DAG.getConstantFP(Val: MinFloat, DL: dl, VT: SrcVT);
11173	SDValue MaxFloatNode = DAG.getConstantFP(Val: MaxFloat, DL: dl, VT: SrcVT);
11174
11175	// If the integer bounds are exactly representable as floats and min/max are
11176	// legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
11177	// of comparisons and selects.
11178	bool MinMaxLegal = isOperationLegal(Op: ISD::FMINNUM, VT: SrcVT) &&
11179	isOperationLegal(Op: ISD::FMAXNUM, VT: SrcVT);
11180	if (AreExactFloatBounds && MinMaxLegal) {
11181	SDValue Clamped = Src;
11182
11183	// Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
11184	Clamped = DAG.getNode(Opcode: ISD::FMAXNUM, DL: dl, VT: SrcVT, N1: Clamped, N2: MinFloatNode);
11185	// Clamp by MaxFloat from above. NaN cannot occur.
11186	Clamped = DAG.getNode(Opcode: ISD::FMINNUM, DL: dl, VT: SrcVT, N1: Clamped, N2: MaxFloatNode);
11187	// Convert clamped value to integer.
11188	SDValue FpToInt = DAG.getNode(Opcode: IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT,
11189	DL: dl, VT: DstVT, Operand: Clamped);
11190
11191	// In the unsigned case we're done, because we mapped NaN to MinFloat,
11192	// which will cast to zero.
11193	if (!IsSigned)
11194	return FpToInt;
11195
11196	// Otherwise, select 0 if Src is NaN.
11197	SDValue ZeroInt = DAG.getConstant(Val: `0`, DL: dl, VT: DstVT);
11198	EVT SetCCVT =
11199	getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: SrcVT);
11200	SDValue IsNan = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: Src, Cond: ISD::CondCode::SETUO);
11201	return DAG.getSelect(DL: dl, VT: DstVT, Cond: IsNan, LHS: ZeroInt, RHS: FpToInt);
11202	}
11203
11204	SDValue MinIntNode = DAG.getConstant(Val: MinInt, DL: dl, VT: DstVT);
11205	SDValue MaxIntNode = DAG.getConstant(Val: MaxInt, DL: dl, VT: DstVT);
11206
11207	// Result of direct conversion. The assumption here is that the operation is
11208	// non-trapping and it's fine to apply it to an out-of-range value if we
11209	// select it away later.
11210	SDValue FpToInt =
11211	DAG.getNode(Opcode: IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, DL: dl, VT: DstVT, Operand: Src);
11212
11213	SDValue Select = FpToInt;
11214
11215	EVT SetCCVT =
11216	getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: SrcVT);
11217
11218	// If Src ULT MinFloat, select MinInt. In particular, this also selects
11219	// MinInt if Src is NaN.
11220	SDValue ULT = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: MinFloatNode, Cond: ISD::SETULT);
11221	Select = DAG.getSelect(DL: dl, VT: DstVT, Cond: ULT, LHS: MinIntNode, RHS: Select);
11222	// If Src OGT MaxFloat, select MaxInt.
11223	SDValue OGT = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: MaxFloatNode, Cond: ISD::SETOGT);
11224	Select = DAG.getSelect(DL: dl, VT: DstVT, Cond: OGT, LHS: MaxIntNode, RHS: Select);
11225
11226	// In the unsigned case we are done, because we mapped NaN to MinInt, which
11227	// is already zero.
11228	if (!IsSigned)
11229	return Select;
11230
11231	// Otherwise, select 0 if Src is NaN.
11232	SDValue ZeroInt = DAG.getConstant(Val: `0`, DL: dl, VT: DstVT);
11233	SDValue IsNan = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: Src, Cond: ISD::CondCode::SETUO);
11234	return DAG.getSelect(DL: dl, VT: DstVT, Cond: IsNan, LHS: ZeroInt, RHS: Select);
11235	}
11236
11237	SDValue TargetLowering::expandRoundInexactToOdd(EVT ResultVT, SDValue Op,
11238	const SDLoc &dl,
11239	SelectionDAG &DAG) const {
11240	EVT OperandVT = Op.getValueType();
11241	if (OperandVT.getScalarType() == ResultVT.getScalarType())
11242	return Op;
11243	EVT ResultIntVT = ResultVT.changeTypeToInteger();
11244	// We are rounding binary64/binary128 -> binary32 -> bfloat16. This
11245	// can induce double-rounding which may alter the results. We can
11246	// correct for this using a trick explained in: Boldo, Sylvie, and
11247	// Guillaume Melquiond. "When double rounding is odd." 17th IMACS
11248	// World Congress. 2005.
11249	unsigned BitSize = OperandVT.getScalarSizeInBits();
11250	EVT WideIntVT = OperandVT.changeTypeToInteger();
11251	SDValue OpAsInt = DAG.getBitcast(VT: WideIntVT, V: Op);
11252	SDValue SignBit =
11253	DAG.getNode(Opcode: ISD::AND, DL: dl, VT: WideIntVT, N1: OpAsInt,
11254	N2: DAG.getConstant(Val: APInt::getSignMask(BitWidth: BitSize), DL: dl, VT: WideIntVT));
11255	SDValue AbsWide;
11256	if (isOperationLegalOrCustom(Op: ISD::FABS, VT: OperandVT)) {
11257	AbsWide = DAG.getNode(Opcode: ISD::FABS, DL: dl, VT: OperandVT, Operand: Op);
11258	} else {
11259	SDValue ClearedSign = DAG.getNode(
11260	Opcode: ISD::AND, DL: dl, VT: WideIntVT, N1: OpAsInt,
11261	N2: DAG.getConstant(Val: APInt::getSignedMaxValue(numBits: BitSize), DL: dl, VT: WideIntVT));
11262	AbsWide = DAG.getBitcast(VT: OperandVT, V: ClearedSign);
11263	}
11264	SDValue AbsNarrow = DAG.getFPExtendOrRound(Op: AbsWide, DL: dl, VT: ResultVT);
11265	SDValue AbsNarrowAsWide = DAG.getFPExtendOrRound(Op: AbsNarrow, DL: dl, VT: OperandVT);
11266
11267	// We can keep the narrow value as-is if narrowing was exact (no
11268	// rounding error), the wide value was NaN (the narrow value is also
11269	// NaN and should be preserved) or if we rounded to the odd value.
11270	SDValue NarrowBits = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: ResultIntVT, Operand: AbsNarrow);
11271	SDValue One = DAG.getConstant(Val: `1`, DL: dl, VT: ResultIntVT);
11272	SDValue NegativeOne = DAG.getAllOnesConstant(DL: dl, VT: ResultIntVT);
11273	SDValue And = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ResultIntVT, N1: NarrowBits, N2: One);
11274	EVT ResultIntVTCCVT = getSetCCResultType(
11275	DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: And.getValueType());
11276	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT: ResultIntVT);
11277	// The result is already odd so we don't need to do anything.
11278	SDValue AlreadyOdd = DAG.getSetCC(DL: dl, VT: ResultIntVTCCVT, LHS: And, RHS: Zero, Cond: ISD::SETNE);
11279
11280	EVT WideSetCCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(),
11281	VT: AbsWide.getValueType());
11282	// We keep results which are exact, odd or NaN.
11283	SDValue KeepNarrow =
11284	DAG.getSetCC(DL: dl, VT: WideSetCCVT, LHS: AbsWide, RHS: AbsNarrowAsWide, Cond: ISD::SETUEQ);
11285	KeepNarrow = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: WideSetCCVT, N1: KeepNarrow, N2: AlreadyOdd);
11286	// We morally performed a round-down if AbsNarrow is smaller than
11287	// AbsWide.
11288	SDValue NarrowIsRd =
11289	DAG.getSetCC(DL: dl, VT: WideSetCCVT, LHS: AbsWide, RHS: AbsNarrowAsWide, Cond: ISD::SETOGT);
11290	// If the narrow value is odd or exact, pick it.
11291	// Otherwise, narrow is even and corresponds to either the rounded-up
11292	// or rounded-down value. If narrow is the rounded-down value, we want
11293	// the rounded-up value as it will be odd.
11294	SDValue Adjust = DAG.getSelect(DL: dl, VT: ResultIntVT, Cond: NarrowIsRd, LHS: One, RHS: NegativeOne);
11295	SDValue Adjusted = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: ResultIntVT, N1: NarrowBits, N2: Adjust);
11296	Op = DAG.getSelect(DL: dl, VT: ResultIntVT, Cond: KeepNarrow, LHS: NarrowBits, RHS: Adjusted);
11297	int ShiftAmount = BitSize - ResultVT.getScalarSizeInBits();
11298	SDValue ShiftCnst = DAG.getShiftAmountConstant(Val: ShiftAmount, VT: WideIntVT, DL: dl);
11299	SignBit = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: WideIntVT, N1: SignBit, N2: ShiftCnst);
11300	SignBit = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: ResultIntVT, Operand: SignBit);
11301	Op = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: ResultIntVT, N1: Op, N2: SignBit);
11302	return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: ResultVT, Operand: Op);
11303	}
11304
11305	SDValue TargetLowering::expandFP_ROUND(SDNode Node, SelectionDAG &DAG) const* {
11306	assert(Node->getOpcode() == ISD::FP_ROUND && "Unexpected opcode!");
11307	SDValue Op = Node->getOperand(Num: `0`);
11308	EVT VT = Node->getValueType(ResNo: `0`);
11309	SDLoc dl(Node);
11310	if (VT.getScalarType() == MVT::bf16) {
11311	if (Node->getConstantOperandVal(Num: `1`) == `1`) {
11312	return DAG.getNode(Opcode: ISD::FP_TO_BF16, DL: dl, VT, Operand: Node->getOperand(Num: `0`));
11313	}
11314	EVT OperandVT = Op.getValueType();
11315	SDValue IsNaN = DAG.getSetCC(
11316	DL: dl,
11317	VT: getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: OperandVT),
11318	LHS: Op, RHS: Op, Cond: ISD::SETUO);
11319
11320	// We are rounding binary64/binary128 -> binary32 -> bfloat16. This
11321	// can induce double-rounding which may alter the results. We can
11322	// correct for this using a trick explained in: Boldo, Sylvie, and
11323	// Guillaume Melquiond. "When double rounding is odd." 17th IMACS
11324	// World Congress. 2005.
11325	EVT F32 = VT.isVector() ? VT.changeVectorElementType(EltVT: MVT::f32) : MVT::f32;
11326	EVT I32 = F32.changeTypeToInteger();
11327	Op = expandRoundInexactToOdd(ResultVT: F32, Op, dl, DAG);
11328	Op = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: I32, Operand: Op);
11329
11330	// Conversions should set NaN's quiet bit. This also prevents NaNs from
11331	// turning into infinities.
11332	SDValue NaN =
11333	DAG.getNode(Opcode: ISD::OR, DL: dl, VT: I32, N1: Op, N2: DAG.getConstant(Val: `0x400000`, DL: dl, VT: I32));
11334
11335	// Factor in the contribution of the low 16 bits.
11336	SDValue One = DAG.getConstant(Val: `1`, DL: dl, VT: I32);
11337	SDValue Lsb = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: I32, N1: Op,
11338	N2: DAG.getShiftAmountConstant(Val: `16`, VT: I32, DL: dl));
11339	Lsb = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: I32, N1: Lsb, N2: One);
11340	SDValue RoundingBias =
11341	DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: I32, N1: DAG.getConstant(Val: `0x7fff`, DL: dl, VT: I32), N2: Lsb);
11342	SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: I32, N1: Op, N2: RoundingBias);
11343
11344	// Don't round if we had a NaN, we don't want to turn 0x7fffffff into
11345	// 0x80000000.
11346	Op = DAG.getSelect(DL: dl, VT: I32, Cond: IsNaN, LHS: NaN, RHS: Add);
11347
11348	// Now that we have rounded, shift the bits into position.
11349	Op = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: I32, N1: Op,
11350	N2: DAG.getShiftAmountConstant(Val: `16`, VT: I32, DL: dl));
11351	Op = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: I32, Operand: Op);
11352	EVT I16 = I32.isVector() ? I32.changeVectorElementType(EltVT: MVT::i16) : MVT::i16;
11353	Op = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: I16, Operand: Op);
11354	return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT, Operand: Op);
11355	}
11356	return SDValue ();
11357	}
11358
11359	SDValue TargetLowering::expandVectorSplice(SDNode *Node,
11360	SelectionDAG &DAG) const {
11361	assert(Node->getOpcode() == ISD::VECTOR_SPLICE && "Unexpected opcode!");
11362	assert(Node->getValueType(`0`).isScalableVector() &&
11363	"Fixed length vector types expected to use SHUFFLE_VECTOR!");
11364
11365	EVT VT = Node->getValueType(ResNo: `0`);
11366	SDValue V1 = Node->getOperand(Num: `0`);
11367	SDValue V2 = Node->getOperand(Num: `1`);
11368	int64_t Imm = cast<ConstantSDNode>(Val: Node->getOperand(Num: `2`))->getSExtValue();
11369	SDLoc DL(Node);
11370
11371	// Expand through memory thusly:
11372	// Alloca CONCAT_VECTORS_TYPES(V1, V2) Ptr
11373	// Store V1, Ptr
11374	// Store V2, Ptr + sizeof(V1)
11375	// If (Imm < 0)
11376	// TrailingElts = -Imm
11377	// Ptr = Ptr + sizeof(V1) - (TrailingElts sizeof(VT.Elt))*
11378	// else
11379	// Ptr = Ptr + (Imm sizeof(VT.Elt))*
11380	// Res = Load Ptr
11381
11382	Align Alignment = DAG.getReducedAlign(VT, /UseABI=/false);
11383
11384	EVT MemVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: VT.getVectorElementType(),
11385	EC: VT.getVectorElementCount() * `2`);
11386	SDValue StackPtr = DAG.CreateStackTemporary(Bytes: MemVT.getStoreSize(), Alignment);
11387	EVT PtrVT = StackPtr.getValueType();
11388	auto &MF = DAG.getMachineFunction();
11389	auto FrameIndex = cast<FrameIndexSDNode>(Val: StackPtr.getNode())->getIndex();
11390	auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FI: FrameIndex);
11391
11392	// Store the lo part of CONCAT_VECTORS(V1, V2)
11393	SDValue StoreV1 = DAG.getStore(Chain: DAG.getEntryNode(), dl: DL, Val: V1, Ptr: StackPtr, PtrInfo);
11394	// Store the hi part of CONCAT_VECTORS(V1, V2)
11395	SDValue OffsetToV2 = DAG.getVScale(
11396	DL, VT: PtrVT,
11397	MulImm: APInt (PtrVT.getFixedSizeInBits(), VT.getStoreSize().getKnownMinValue()));
11398	SDValue StackPtr2 = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: StackPtr, N2: OffsetToV2);
11399	SDValue StoreV2 = DAG.getStore(Chain: StoreV1, dl: DL, Val: V2, Ptr: StackPtr2, PtrInfo);
11400
11401	if (Imm >= `0`) {
11402	// Load back the required element. getVectorElementPointer takes care of
11403	// clamping the index if it's out-of-bounds.
11404	StackPtr = getVectorElementPointer(DAG, VecPtr: StackPtr, VecVT: VT, Index: Node->getOperand(Num: `2`));
11405	// Load the spliced result
11406	return DAG.getLoad(VT, dl: DL, Chain: StoreV2, Ptr: StackPtr,
11407	PtrInfo: MachinePointerInfo::getUnknownStack(MF));
11408	}
11409
11410	uint64_t TrailingElts = -Imm;
11411
11412	// NOTE: TrailingElts must be clamped so as not to read outside of V1:V2.
11413	TypeSize EltByteSize = VT.getVectorElementType().getStoreSize();
11414	SDValue TrailingBytes =
11415	DAG.getConstant(Val: TrailingElts * EltByteSize, DL, VT: PtrVT);
11416
11417	if (TrailingElts > VT.getVectorMinNumElements()) {
11418	SDValue VLBytes =
11419	DAG.getVScale(DL, VT: PtrVT,
11420	MulImm: APInt (PtrVT.getFixedSizeInBits(),
11421	VT.getStoreSize().getKnownMinValue()));
11422	TrailingBytes = DAG.getNode(Opcode: ISD::UMIN, DL, VT: PtrVT, N1: TrailingBytes, N2: VLBytes);
11423	}
11424
11425	// Calculate the start address of the spliced result.
11426	StackPtr2 = DAG.getNode(Opcode: ISD::SUB, DL, VT: PtrVT, N1: StackPtr2, N2: TrailingBytes);
11427
11428	// Load the spliced result
11429	return DAG.getLoad(VT, dl: DL, Chain: StoreV2, Ptr: StackPtr2,
11430	PtrInfo: MachinePointerInfo::getUnknownStack(MF));
11431	}
11432
11433	SDValue TargetLowering::expandVECTOR_COMPRESS(SDNode *Node,
11434	SelectionDAG &DAG) const {
11435	SDLoc DL(Node);
11436	SDValue Vec = Node->getOperand(Num: `0`);
11437	SDValue Mask = Node->getOperand(Num: `1`);
11438	SDValue Passthru = Node->getOperand(Num: `2`);
11439
11440	EVT VecVT = Vec.getValueType();
11441	EVT ScalarVT = VecVT.getScalarType();
11442	EVT MaskVT = Mask.getValueType();
11443	EVT MaskScalarVT = MaskVT.getScalarType();
11444
11445	// Needs to be handled by targets that have scalable vector types.
11446	if (VecVT.isScalableVector())
11447	report_fatal_error(reason: "Cannot expand masked_compress for scalable vectors.");
11448
11449	SDValue StackPtr = DAG.CreateStackTemporary(
11450	Bytes: VecVT.getStoreSize(), Alignment: DAG.getReducedAlign(VT: VecVT, /UseABI=/false));
11451	int FI = cast<FrameIndexSDNode>(Val: StackPtr.getNode())->getIndex();
11452	MachinePointerInfo PtrInfo =
11453	MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI);
11454
11455	MVT PositionVT = getVectorIdxTy(DL: DAG.getDataLayout());
11456	SDValue Chain = DAG.getEntryNode();
11457	SDValue OutPos = DAG.getConstant(Val: `0`, DL, VT: PositionVT);
11458
11459	bool HasPassthru = !Passthru.isUndef();
11460
11461	// If we have a passthru vector, store it on the stack, overwrite the matching
11462	// positions and then re-write the last element that was potentially
11463	// overwritten even though mask[i] = false.
11464	if (HasPassthru)
11465	Chain = DAG.getStore(Chain, dl: DL, Val: Passthru, Ptr: StackPtr, PtrInfo);
11466
11467	SDValue LastWriteVal;
11468	APInt PassthruSplatVal;
11469	bool IsSplatPassthru =
11470	ISD::isConstantSplatVector(N: Passthru.getNode(), SplatValue&: PassthruSplatVal);
11471
11472	if (IsSplatPassthru) {
11473	// As we do not know which position we wrote to last, we cannot simply
11474	// access that index from the passthru vector. So we first check if passthru
11475	// is a splat vector, to use any element ...
11476	LastWriteVal = DAG.getConstant(Val: PassthruSplatVal, DL, VT: ScalarVT);
11477	} else if (HasPassthru) {
11478	// ... if it is not a splat vector, we need to get the passthru value at
11479	// position = popcount(mask) and re-load it from the stack before it is
11480	// overwritten in the loop below.
11481	SDValue Popcount = DAG.getNode(
11482	Opcode: ISD::TRUNCATE, DL, VT: MaskVT.changeVectorElementType(EltVT: MVT::i1), Operand: Mask);
11483	Popcount = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL,
11484	VT: MaskVT.changeVectorElementType(EltVT: ScalarVT), Operand: Popcount);
11485	Popcount = DAG.getNode(Opcode: ISD::VECREDUCE_ADD, DL, VT: ScalarVT, Operand: Popcount);
11486	SDValue LastElmtPtr =
11487	getVectorElementPointer(DAG, VecPtr: StackPtr, VecVT, Index: Popcount);
11488	LastWriteVal = DAG.getLoad(
11489	VT: ScalarVT, dl: DL, Chain, Ptr: LastElmtPtr,
11490	PtrInfo: MachinePointerInfo::getUnknownStack(MF&: DAG.getMachineFunction()));
11491	Chain = LastWriteVal.getValue(R: `1`);
11492	}
11493
11494	unsigned NumElms = VecVT.getVectorNumElements();
11495	for (unsigned I = `0`; I < NumElms; I++) {
11496	SDValue Idx = DAG.getVectorIdxConstant(Val: I, DL);
11497
11498	SDValue ValI = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: ScalarVT, N1: Vec, N2: Idx);
11499	SDValue OutPtr = getVectorElementPointer(DAG, VecPtr: StackPtr, VecVT, Index: OutPos);
11500	Chain = DAG.getStore(
11501	Chain, dl: DL, Val: ValI, Ptr: OutPtr,
11502	PtrInfo: MachinePointerInfo::getUnknownStack(MF&: DAG.getMachineFunction()));
11503
11504	// Get the mask value and add it to the current output position. This
11505	// either increments by 1 if MaskI is true or adds 0 otherwise.
11506	// Freeze in case we have poison/undef mask entries.
11507	SDValue MaskI = DAG.getFreeze(
11508	V: DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: MaskScalarVT, N1: Mask, N2: Idx));
11509	MaskI = DAG.getFreeze(V: MaskI);
11510	MaskI = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i1, Operand: MaskI);
11511	MaskI = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: PositionVT, Operand: MaskI);
11512	OutPos = DAG.getNode(Opcode: ISD::ADD, DL, VT: PositionVT, N1: OutPos, N2: MaskI);
11513
11514	if (HasPassthru && I == NumElms - `1`) {
11515	SDValue EndOfVector =
11516	DAG.getConstant(Val: VecVT.getVectorNumElements() - `1`, DL, VT: PositionVT);
11517	SDValue AllLanesSelected =
11518	DAG.getSetCC(DL, VT: MVT::i1, LHS: OutPos, RHS: EndOfVector, Cond: ISD::CondCode::SETUGT);
11519	OutPos = DAG.getNode(Opcode: ISD::UMIN, DL, VT: PositionVT, N1: OutPos, N2: EndOfVector);
11520	OutPtr = getVectorElementPointer(DAG, VecPtr: StackPtr, VecVT, Index: OutPos);
11521
11522	// Re-write the last ValI if all lanes were selected. Otherwise,
11523	// overwrite the last write it with the passthru value.
11524	LastWriteVal =
11525	DAG.getSelect(DL, VT: ScalarVT, Cond: AllLanesSelected, LHS: ValI, RHS: LastWriteVal);
11526	Chain = DAG.getStore(
11527	Chain, dl: DL, Val: LastWriteVal, Ptr: OutPtr,
11528	PtrInfo: MachinePointerInfo::getUnknownStack(MF&: DAG.getMachineFunction()));
11529	}
11530	}
11531
11532	return DAG.getLoad(VT: VecVT, dl: DL, Chain, Ptr: StackPtr, PtrInfo);
11533	}
11534
11535	bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT,
11536	SDValue &LHS, SDValue &RHS,
11537	SDValue &CC, SDValue Mask,
11538	SDValue EVL, bool &NeedInvert,
11539	const SDLoc &dl, SDValue &Chain,
11540	bool IsSignaling) const {
11541	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11542	MVT OpVT = LHS.getSimpleValueType();
11543	ISD::CondCode CCCode = cast<CondCodeSDNode>(Val&: CC)->get();
11544	NeedInvert = false;
11545	assert(!EVL == !Mask && "VP Mask and EVL must either both be set or unset");
11546	bool IsNonVP = !EVL;
11547	switch (TLI.getCondCodeAction(CC: CCCode, VT: OpVT)) {
11548	default:
11549	llvm_unreachable("Unknown condition code action!");
11550	case TargetLowering::Legal:
11551	// Nothing to do.
11552	break;
11553	case TargetLowering::Expand: {
11554	ISD::CondCode InvCC = ISD::getSetCCSwappedOperands(Operation: CCCode);
11555	if (TLI.isCondCodeLegalOrCustom(CC: InvCC, VT: OpVT)) {
11556	std::swap(a&: LHS, b&: RHS);
11557	CC = DAG.getCondCode(Cond: InvCC);
11558	return true;
11559	}
11560	// Swapping operands didn't work. Try inverting the condition.
11561	bool NeedSwap = false;
11562	InvCC = getSetCCInverse(Operation: CCCode, Type: OpVT);
11563	if (!TLI.isCondCodeLegalOrCustom(CC: InvCC, VT: OpVT)) {
11564	// If inverting the condition is not enough, try swapping operands
11565	// on top of it.
11566	InvCC = ISD::getSetCCSwappedOperands(Operation: InvCC);
11567	NeedSwap = true;
11568	}
11569	if (TLI.isCondCodeLegalOrCustom(CC: InvCC, VT: OpVT)) {
11570	CC = DAG.getCondCode(Cond: InvCC);
11571	NeedInvert = true;
11572	if (NeedSwap)
11573	std::swap(a&: LHS, b&: RHS);
11574	return true;
11575	}
11576
11577	ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
11578	unsigned Opc = `0`;
11579	switch (CCCode) {
11580	default:
11581	llvm_unreachable("Don't know how to expand this condition!");
11582	case ISD::SETUO:
11583	if (TLI.isCondCodeLegal(CC: ISD::SETUNE, VT: OpVT)) {
11584	CC1 = ISD::SETUNE;
11585	CC2 = ISD::SETUNE;
11586	Opc = ISD::OR;
11587	break;
11588	}
11589	assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) &&
11590	"If SETUE is expanded, SETOEQ or SETUNE must be legal!");
11591	NeedInvert = true;
11592	[[fallthrough]];
11593	case ISD::SETO:
11594	assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) &&
11595	"If SETO is expanded, SETOEQ must be legal!");
11596	CC1 = ISD::SETOEQ;
11597	CC2 = ISD::SETOEQ;
11598	Opc = ISD::AND;
11599	break;
11600	case ISD::SETONE:
11601	case ISD::SETUEQ:
11602	// If the SETUO or SETO CC isn't legal, we might be able to use
11603	// SETOGT \|\| SETOLT, inverting the result for SETUEQ. We only need one
11604	// of SETOGT/SETOLT to be legal, the other can be emulated by swapping
11605	// the operands.
11606	CC2 = ((unsigned)CCCode & `0x8U`) ? ISD::SETUO : ISD::SETO;
11607	if (!TLI.isCondCodeLegal(CC: CC2, VT: OpVT) &&
11608	(TLI.isCondCodeLegal(CC: ISD::SETOGT, VT: OpVT) \|\|
11609	TLI.isCondCodeLegal(CC: ISD::SETOLT, VT: OpVT))) {
11610	CC1 = ISD::SETOGT;
11611	CC2 = ISD::SETOLT;
11612	Opc = ISD::OR;
11613	NeedInvert = ((unsigned)CCCode & `0x8U`);
11614	break;
11615	}
11616	[[fallthrough]];
11617	case ISD::SETOEQ:
11618	case ISD::SETOGT:
11619	case ISD::SETOGE:
11620	case ISD::SETOLT:
11621	case ISD::SETOLE:
11622	case ISD::SETUNE:
11623	case ISD::SETUGT:
11624	case ISD::SETUGE:
11625	case ISD::SETULT:
11626	case ISD::SETULE:
11627	// If we are floating point, assign and break, otherwise fall through.
11628	if (!OpVT.isInteger()) {
11629	// We can use the 4th bit to tell if we are the unordered
11630	// or ordered version of the opcode.
11631	CC2 = ((unsigned)CCCode & `0x8U`) ? ISD::SETUO : ISD::SETO;
11632	Opc = ((unsigned)CCCode & `0x8U`) ? ISD::OR : ISD::AND;
11633	CC1 = (ISD::CondCode)(((int)CCCode & `0x7`) \| `0x10`);
11634	break;
11635	}
11636	// Fallthrough if we are unsigned integer.
11637	[[fallthrough]];
11638	case ISD::SETLE:
11639	case ISD::SETGT:
11640	case ISD::SETGE:
11641	case ISD::SETLT:
11642	case ISD::SETNE:
11643	case ISD::SETEQ:
11644	// If all combinations of inverting the condition and swapping operands
11645	// didn't work then we have no means to expand the condition.
11646	llvm_unreachable("Don't know how to expand this condition!");
11647	}
11648
11649	SDValue SetCC1, SetCC2;
11650	if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
11651	// If we aren't the ordered or unorder operation,
11652	// then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
11653	if (IsNonVP) {
11654	SetCC1 = DAG.getSetCC(DL: dl, VT, LHS, RHS, Cond: CC1, Chain, IsSignaling);
11655	SetCC2 = DAG.getSetCC(DL: dl, VT, LHS, RHS, Cond: CC2, Chain, IsSignaling);
11656	} else {
11657	SetCC1 = DAG.getSetCCVP(DL: dl, VT, LHS, RHS, Cond: CC1, Mask, EVL);
11658	SetCC2 = DAG.getSetCCVP(DL: dl, VT, LHS, RHS, Cond: CC2, Mask, EVL);
11659	}
11660	} else {
11661	// Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
11662	if (IsNonVP) {
11663	SetCC1 = DAG.getSetCC(DL: dl, VT, LHS, RHS: LHS, Cond: CC1, Chain, IsSignaling);
11664	SetCC2 = DAG.getSetCC(DL: dl, VT, LHS: RHS, RHS, Cond: CC2, Chain, IsSignaling);
11665	} else {
11666	SetCC1 = DAG.getSetCCVP(DL: dl, VT, LHS, RHS: LHS, Cond: CC1, Mask, EVL);
11667	SetCC2 = DAG.getSetCCVP(DL: dl, VT, LHS: RHS, RHS, Cond: CC2, Mask, EVL);
11668	}
11669	}
11670	if (Chain)
11671	Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, N1: SetCC1.getValue(R: `1`),
11672	N2: SetCC2.getValue(R: `1`));
11673	if (IsNonVP)
11674	LHS = DAG.getNode(Opcode: Opc, DL: dl, VT, N1: SetCC1, N2: SetCC2);
11675	else {
11676	// Transform the binary opcode to the VP equivalent.
11677	assert((Opc == ISD::OR \|\| Opc == ISD::AND) && "Unexpected opcode");
11678	Opc = Opc == ISD::OR ? ISD::VP_OR : ISD::VP_AND;
11679	LHS = DAG.getNode(Opcode: Opc, DL: dl, VT, N1: SetCC1, N2: SetCC2, N3: Mask, N4: EVL);
11680	}
11681	RHS = SDValue ();
11682	CC = SDValue ();
11683	return true;
11684	}
11685	}
11686	return false;
11687	}
11688

Browse the source code of llvm_projects/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp