DAGCombiner.cpp source code [llvm_projects/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp]

1	//===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
10	// both before and after the DAG is legalized.
11	//
12	// This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13	// primarily intended to handle simplification opportunities that are implicit
14	// in the LLVM IR and exposed by the various codegen lowering phases.
15	//
16	//===----------------------------------------------------------------------===//
17
18	#include "llvm/ADT/APFloat.h"
19	#include "llvm/ADT/APInt.h"
20	#include "llvm/ADT/APSInt.h"
21	#include "llvm/ADT/ArrayRef.h"
22	#include "llvm/ADT/DenseMap.h"
23	#include "llvm/ADT/IntervalMap.h"
24	#include "llvm/ADT/STLExtras.h"
25	#include "llvm/ADT/SetVector.h"
26	#include "llvm/ADT/SmallBitVector.h"
27	#include "llvm/ADT/SmallPtrSet.h"
28	#include "llvm/ADT/SmallSet.h"
29	#include "llvm/ADT/SmallVector.h"
30	#include "llvm/ADT/Statistic.h"
31	#include "llvm/Analysis/AliasAnalysis.h"
32	#include "llvm/Analysis/MemoryLocation.h"
33	#include "llvm/Analysis/TargetLibraryInfo.h"
34	#include "llvm/Analysis/ValueTracking.h"
35	#include "llvm/Analysis/VectorUtils.h"
36	#include "llvm/CodeGen/ByteProvider.h"
37	#include "llvm/CodeGen/DAGCombine.h"
38	#include "llvm/CodeGen/ISDOpcodes.h"
39	#include "llvm/CodeGen/MachineFrameInfo.h"
40	#include "llvm/CodeGen/MachineFunction.h"
41	#include "llvm/CodeGen/MachineMemOperand.h"
42	#include "llvm/CodeGen/SDPatternMatch.h"
43	#include "llvm/CodeGen/SelectionDAG.h"
44	#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
45	#include "llvm/CodeGen/SelectionDAGNodes.h"
46	#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
47	#include "llvm/CodeGen/TargetLowering.h"
48	#include "llvm/CodeGen/TargetRegisterInfo.h"
49	#include "llvm/CodeGen/TargetSubtargetInfo.h"
50	#include "llvm/CodeGen/ValueTypes.h"
51	#include "llvm/CodeGenTypes/MachineValueType.h"
52	#include "llvm/IR/Attributes.h"
53	#include "llvm/IR/Constant.h"
54	#include "llvm/IR/DataLayout.h"
55	#include "llvm/IR/DerivedTypes.h"
56	#include "llvm/IR/Function.h"
57	#include "llvm/IR/Metadata.h"
58	#include "llvm/Support/Casting.h"
59	#include "llvm/Support/CodeGen.h"
60	#include "llvm/Support/CommandLine.h"
61	#include "llvm/Support/Compiler.h"
62	#include "llvm/Support/Debug.h"
63	#include "llvm/Support/DebugCounter.h"
64	#include "llvm/Support/ErrorHandling.h"
65	#include "llvm/Support/KnownBits.h"
66	#include "llvm/Support/MathExtras.h"
67	#include "llvm/Support/raw_ostream.h"
68	#include "llvm/Target/TargetMachine.h"
69	#include "llvm/Target/TargetOptions.h"
70	#include <algorithm>
71	#include <cassert>
72	#include <cstdint>
73	#include <functional>
74	#include <iterator>
75	#include <optional>
76	#include <string>
77	#include <tuple>
78	#include <utility>
79	#include <variant>
80
81	#include "MatchContext.h"
82
83	using namespace llvm;
84	using namespace llvm::SDPatternMatch;
85
86	#define DEBUG_TYPE "dagcombine"
87
88	STATISTIC(NodesCombined , "Number of dag nodes combined");
89	STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
90	STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
91	STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
92	STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
93	STATISTIC(SlicedLoads, "Number of load sliced");
94	STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
95
96	DEBUG_COUNTER(DAGCombineCounter, "dagcombine",
97	"Controls whether a DAG combine is performed for a node");
98
99	static cl::opt<bool>
100	CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
101	cl::desc ("Enable DAG combiner's use of IR alias analysis"));
102
103	static cl::opt<bool>
104	UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(Val: true),
105	cl::desc ("Enable DAG combiner's use of TBAA"));
106
107	#ifndef NDEBUG
108	static cl::opt<std::string>
109	CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
110	cl::desc("Only use DAG-combiner alias analysis in this"
111	" function"));
112	#endif
113
114	/// Hidden option to stress test load slicing, i.e., when this option
115	/// is enabled, load slicing bypasses most of its profitability guards.
116	static cl::opt<bool>
117	StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
118	cl::desc ("Bypass the profitability model of load slicing"),
119	cl::init(Val: false));
120
121	static cl::opt<bool>
122	MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(Val: true),
123	cl::desc ("DAG combiner may split indexing from loads"));
124
125	static cl::opt<bool>
126	EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(Val: true),
127	cl::desc ("DAG combiner enable merging multiple stores "
128	"into a wider store"));
129
130	static cl::opt<unsigned> TokenFactorInlineLimit(
131	"combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(Val: `2048`),
132	cl::desc ("Limit the number of operands to inline for Token Factors"));
133
134	static cl::opt<unsigned> StoreMergeDependenceLimit(
135	"combiner-store-merge-dependence-limit", cl::Hidden, cl::init(Val: `10`),
136	cl::desc ("Limit the number of times for the same StoreNode and RootNode "
137	"to bail out in store merging dependence check"));
138
139	static cl::opt<bool> EnableReduceLoadOpStoreWidth(
140	"combiner-reduce-load-op-store-width", cl::Hidden, cl::init(Val: true),
141	cl::desc ("DAG combiner enable reducing the width of load/op/store "
142	"sequence"));
143	static cl::opt<bool> ReduceLoadOpStoreWidthForceNarrowingProfitable(
144	"combiner-reduce-load-op-store-width-force-narrowing-profitable",
145	cl::Hidden, cl::init(Val: false),
146	cl::desc ("DAG combiner force override the narrowing profitable check when "
147	"reducing the width of load/op/store sequences"));
148
149	static cl::opt<bool> EnableShrinkLoadReplaceStoreWithStore(
150	"combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(Val: true),
151	cl::desc ("DAG combiner enable load/<replace bytes>/store with "
152	"a narrower store"));
153
154	static cl::opt<bool> DisableCombines("combiner-disabled", cl::Hidden,
155	cl::init(Val: false),
156	cl::desc ("Disable the DAG combiner"));
157
158	namespace {
159
160	class DAGCombiner {
161	SelectionDAG &DAG;
162	const TargetLowering &TLI;
163	const SelectionDAGTargetInfo *STI;
164	CombineLevel Level = BeforeLegalizeTypes;
165	CodeGenOptLevel OptLevel;
166	bool LegalDAG = false;
167	bool LegalOperations = false;
168	bool LegalTypes = false;
169	bool ForCodeSize;
170	bool DisableGenericCombines;
171
172	/// Worklist of all of the nodes that need to be simplified.
173	///
174	/// This must behave as a stack -- new nodes to process are pushed onto the
175	/// back and when processing we pop off of the back.
176	///
177	/// The worklist will not contain duplicates but may contain null entries
178	/// due to nodes being deleted from the underlying DAG. For fast lookup and
179	/// deduplication, the index of the node in this vector is stored in the
180	/// node in SDNode::CombinerWorklistIndex.
181	SmallVector<SDNode *, `64`> Worklist;
182
183	/// This records all nodes attempted to be added to the worklist since we
184	/// considered a new worklist entry. As we keep do not add duplicate nodes
185	/// in the worklist, this is different from the tail of the worklist.
186	SmallSetVector<SDNode *, `32`> PruningList;
187
188	/// Map from candidate StoreNode to the pair of RootNode and count.
189	/// The count is used to track how many times we have seen the StoreNode
190	/// with the same RootNode bail out in dependence check. If we have seen
191	/// the bail out for the same pair many times over a limit, we won't
192	/// consider the StoreNode with the same RootNode as store merging
193	/// candidate again.
194	DenseMap<SDNode , std::pair<SDNode , unsigned>> StoreRootCountMap;
195
196	// BatchAA - Used for DAG load/store alias analysis.
197	BatchAAResults *BatchAA;
198
199	/// This caches all chains that have already been processed in
200	/// DAGCombiner::getStoreMergeCandidates() and found to have no mergeable
201	/// stores candidates.
202	SmallPtrSet<SDNode *, `4`> ChainsWithoutMergeableStores;
203
204	/// When an instruction is simplified, add all users of the instruction to
205	/// the work lists because they might get more simplified now.
206	void AddUsersToWorklist(SDNode *N) {
207	for (SDNode *Node : N->users())
208	AddToWorklist(N: Node);
209	}
210
211	/// Convenient shorthand to add a node and all of its user to the worklist.
212	void AddToWorklistWithUsers(SDNode *N) {
213	AddUsersToWorklist(N);
214	AddToWorklist(N);
215	}
216
217	// Prune potentially dangling nodes. This is called after
218	// any visit to a node, but should also be called during a visit after any
219	// failed combine which may have created a DAG node.
220	void clearAddedDanglingWorklistEntries() {
221	// Check any nodes added to the worklist to see if they are prunable.
222	while (!PruningList.empty()) {
223	auto *N = PruningList.pop_back_val();
224	if (N->use_empty())
225	recursivelyDeleteUnusedNodes(N);
226	}
227	}
228
229	SDNode *getNextWorklistEntry() {
230	// Before we do any work, remove nodes that are not in use.
231	clearAddedDanglingWorklistEntries();
232	SDNode N = nullptr*;
233	// The Worklist holds the SDNodes in order, but it may contain null
234	// entries.
235	while (!N && !Worklist.empty()) {
236	N = Worklist.pop_back_val();
237	}
238
239	if (N) {
240	assert(N->getCombinerWorklistIndex() >= `0` &&
241	"Found a worklist entry without a corresponding map entry!");
242	// Set to -2 to indicate that we combined the node.
243	N->setCombinerWorklistIndex(-`2`);
244	}
245	return N;
246	}
247
248	/// Call the node-specific routine that folds each particular type of node.
249	SDValue visit(SDNode *N);
250
251	public:
252	DAGCombiner(SelectionDAG &D, BatchAAResults *BatchAA, CodeGenOptLevel OL)
253	: DAG(D), TLI(D.getTargetLoweringInfo()),
254	STI(D.getSubtarget().getSelectionDAGInfo()), OptLevel(OL),
255	BatchAA(BatchAA) {
256	ForCodeSize = DAG.shouldOptForSize();
257	DisableGenericCombines =
258	DisableCombines \|\| (STI && STI->disableGenericCombines(OptLevel));
259
260	MaximumLegalStoreInBits = `0`;
261	// We use the minimum store size here, since that's all we can guarantee
262	// for the scalable vector types.
263	for (MVT VT : MVT::all_valuetypes())
264	if (EVT (VT).isSimple() && VT != MVT::Other &&
265	TLI.isTypeLegal(VT: EVT (VT)) &&
266	VT.getSizeInBits().getKnownMinValue() >= MaximumLegalStoreInBits)
267	MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinValue();
268	}
269
270	void ConsiderForPruning(SDNode *N) {
271	// Mark this for potential pruning.
272	PruningList.insert(X: N);
273	}
274
275	/// Add to the worklist making sure its instance is at the back (next to be
276	/// processed.)
277	void AddToWorklist(SDNode N, bool* IsCandidateForPruning = true,
278	bool SkipIfCombinedBefore = false) {
279	assert(N->getOpcode() != ISD::DELETED_NODE &&
280	"Deleted Node added to Worklist");
281
282	// Skip handle nodes as they can't usefully be combined and confuse the
283	// zero-use deletion strategy.
284	if (N->getOpcode() == ISD::HANDLENODE)
285	return;
286
287	if (SkipIfCombinedBefore && N->getCombinerWorklistIndex() == -`2`)
288	return;
289
290	if (IsCandidateForPruning)
291	ConsiderForPruning(N);
292
293	if (N->getCombinerWorklistIndex() < `0`) {
294	N->setCombinerWorklistIndex(Worklist.size());
295	Worklist.push_back(Elt: N);
296	}
297	}
298
299	/// Remove all instances of N from the worklist.
300	void removeFromWorklist(SDNode *N) {
301	PruningList.remove(X: N);
302	StoreRootCountMap.erase(Val: N);
303
304	int WorklistIndex = N->getCombinerWorklistIndex();
305	// If not in the worklist, the index might be -1 or -2 (was combined
306	// before). As the node gets deleted anyway, there's no need to update
307	// the index.
308	if (WorklistIndex < `0`)
309	return; // Not in the worklist.
310
311	// Null out the entry rather than erasing it to avoid a linear operation.
312	Worklist [WorklistIndex] = nullptr;
313	N->setCombinerWorklistIndex(-`1`);
314	}
315
316	void deleteAndRecombine(SDNode *N);
317	bool recursivelyDeleteUnusedNodes(SDNode *N);
318
319	/// Replaces all uses of the results of one DAG node with new values.
320	SDValue CombineTo(SDNode N, const* SDValue To, unsigned* NumTo,
321	bool AddTo = true);
322
323	/// Replaces all uses of the results of one DAG node with new values.
324	SDValue CombineTo(SDNode N, SDValue Res, bool* AddTo = true) {
325	return CombineTo(N, To: &Res, NumTo: `1`, AddTo);
326	}
327
328	/// Replaces all uses of the results of one DAG node with new values.
329	SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
330	bool AddTo = true) {
331	SDValue To[] = { Res0, Res1 };
332	return CombineTo(N, To, NumTo: `2`, AddTo);
333	}
334
335	SDValue CombineTo(SDNode N, SmallVectorImpl<SDValue> To,
336	bool AddTo = true) {
337	return CombineTo(N, To: To->data(), NumTo: To->size(), AddTo);
338	}
339
340	void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
341
342	private:
343	unsigned MaximumLegalStoreInBits;
344
345	/// Check the specified integer node value to see if it can be simplified or
346	/// if things it uses can be simplified by bit propagation.
347	/// If so, return true.
348	bool SimplifyDemandedBits(SDValue Op) {
349	unsigned BitWidth = Op.getScalarValueSizeInBits();
350	APInt DemandedBits = APInt::getAllOnes(numBits: BitWidth);
351	return SimplifyDemandedBits(Op, DemandedBits);
352	}
353
354	bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
355	EVT VT = Op.getValueType();
356	APInt DemandedElts = VT.isFixedLengthVector()
357	? APInt::getAllOnes(numBits: VT.getVectorNumElements())
358	: APInt (`1`, `1`);
359	return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, AssumeSingleUse: false);
360	}
361
362	/// Check the specified vector node value to see if it can be simplified or
363	/// if things it uses can be simplified as it only uses some of the
364	/// elements. If so, return true.
365	bool SimplifyDemandedVectorElts(SDValue Op) {
366	// TODO: For now just pretend it cannot be simplified.
367	if (Op.getValueType().isScalableVector())
368	return false;
369
370	unsigned NumElts = Op.getValueType().getVectorNumElements();
371	APInt DemandedElts = APInt::getAllOnes(numBits: NumElts);
372	return SimplifyDemandedVectorElts(Op, DemandedElts);
373	}
374
375	bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
376	const APInt &DemandedElts,
377	bool AssumeSingleUse = false);
378	bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
379	bool AssumeSingleUse = false);
380
381	bool CombineToPreIndexedLoadStore(SDNode *N);
382	bool CombineToPostIndexedLoadStore(SDNode *N);
383	SDValue SplitIndexingFromLoad(LoadSDNode *LD);
384	bool SliceUpLoad(SDNode *N);
385
386	// Looks up the chain to find a unique (unaliased) store feeding the passed
387	// load. If no such store is found, returns a nullptr.
388	// Note: This will look past a CALLSEQ_START if the load is chained to it so
389	// so that it can find stack stores for byval params.
390	StoreSDNode getUniqueStoreFeeding(LoadSDNode LD, int64_t &Offset);
391	// Scalars have size 0 to distinguish from singleton vectors.
392	SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
393	bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
394	bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
395
396	void ReplaceLoadWithPromotedLoad(SDNode Load, SDNode ExtLoad);
397	SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
398	SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
399	SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
400	SDValue PromoteIntBinOp(SDValue Op);
401	SDValue PromoteIntShiftOp(SDValue Op);
402	SDValue PromoteExtend(SDValue Op);
403	bool PromoteLoad(SDValue Op);
404
405	SDValue foldShiftToAvg(SDNode N, const* SDLoc &DL);
406	// Fold `a bitwiseop (~b +/- c)` -> `a bitwiseop ~(b -/+ c)`
407	SDValue foldBitwiseOpWithNeg(SDNode N, const* SDLoc &DL, EVT VT);
408
409	SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
410	SDValue RHS, SDValue True, SDValue False,
411	ISD::CondCode CC);
412
413	/// Call the node-specific routine that knows how to fold each
414	/// particular type of node. If that doesn't do anything, try the
415	/// target-specific DAG combines.
416	SDValue combine(SDNode *N);
417
418	// Visitation implementation - Implement dag node combining for different
419	// node types. The semantics are as follows:
420	// Return Value:
421	// SDValue.getNode() == 0 - No change was made
422	// SDValue.getNode() == N - N was replaced, is dead and has been handled.
423	// otherwise - N should be replaced by the returned Operand.
424	//
425	SDValue visitTokenFactor(SDNode *N);
426	SDValue visitMERGE_VALUES(SDNode *N);
427	SDValue visitADD(SDNode *N);
428	SDValue visitADDLike(SDNode *N);
429	SDValue visitADDLikeCommutative(SDValue N0, SDValue N1,
430	SDNode *LocReference);
431	SDValue visitPTRADD(SDNode *N);
432	SDValue visitSUB(SDNode *N);
433	SDValue visitADDSAT(SDNode *N);
434	SDValue visitSUBSAT(SDNode *N);
435	SDValue visitADDC(SDNode *N);
436	SDValue visitADDO(SDNode *N);
437	SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
438	SDValue visitSUBC(SDNode *N);
439	SDValue visitSUBO(SDNode *N);
440	SDValue visitADDE(SDNode *N);
441	SDValue visitUADDO_CARRY(SDNode *N);
442	SDValue visitSADDO_CARRY(SDNode *N);
443	SDValue visitUADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
444	SDNode *N);
445	SDValue visitSADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
446	SDNode *N);
447	SDValue visitSUBE(SDNode *N);
448	SDValue visitUSUBO_CARRY(SDNode *N);
449	SDValue visitSSUBO_CARRY(SDNode *N);
450	template <class MatchContextClass> SDValue visitMUL(SDNode *N);
451	SDValue visitMULFIX(SDNode *N);
452	SDValue useDivRem(SDNode *N);
453	SDValue visitSDIV(SDNode *N);
454	SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
455	SDValue visitUDIV(SDNode *N);
456	SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
457	SDValue visitREM(SDNode *N);
458	SDValue visitMULHU(SDNode *N);
459	SDValue visitMULHS(SDNode *N);
460	SDValue visitAVG(SDNode *N);
461	SDValue visitABD(SDNode *N);
462	SDValue visitSMUL_LOHI(SDNode *N);
463	SDValue visitUMUL_LOHI(SDNode *N);
464	SDValue visitMULO(SDNode *N);
465	SDValue visitIMINMAX(SDNode *N);
466	SDValue visitAND(SDNode *N);
467	SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
468	SDValue visitOR(SDNode *N);
469	SDValue visitORLike(SDValue N0, SDValue N1, const SDLoc &DL);
470	SDValue visitXOR(SDNode *N);
471	SDValue SimplifyVCastOp(SDNode N, const* SDLoc &DL);
472	SDValue SimplifyVBinOp(SDNode N, const* SDLoc &DL);
473	SDValue visitSHL(SDNode *N);
474	SDValue visitSRA(SDNode *N);
475	SDValue visitSRL(SDNode *N);
476	SDValue visitFunnelShift(SDNode *N);
477	SDValue visitSHLSAT(SDNode *N);
478	SDValue visitRotate(SDNode *N);
479	SDValue visitABS(SDNode *N);
480	SDValue visitCLMUL(SDNode *N);
481	SDValue visitBSWAP(SDNode *N);
482	SDValue visitBITREVERSE(SDNode *N);
483	SDValue visitCTLZ(SDNode *N);
484	SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
485	SDValue visitCTTZ(SDNode *N);
486	SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
487	SDValue visitCTPOP(SDNode *N);
488	SDValue visitSELECT(SDNode *N);
489	SDValue visitVSELECT(SDNode *N);
490	SDValue visitVP_SELECT(SDNode *N);
491	SDValue visitSELECT_CC(SDNode *N);
492	SDValue visitSETCC(SDNode *N);
493	SDValue visitSETCCCARRY(SDNode *N);
494	SDValue visitSIGN_EXTEND(SDNode *N);
495	SDValue visitZERO_EXTEND(SDNode *N);
496	SDValue visitANY_EXTEND(SDNode *N);
497	SDValue visitAssertExt(SDNode *N);
498	SDValue visitAssertAlign(SDNode *N);
499	SDValue visitSIGN_EXTEND_INREG(SDNode *N);
500	SDValue visitEXTEND_VECTOR_INREG(SDNode *N);
501	SDValue visitTRUNCATE(SDNode *N);
502	SDValue visitTRUNCATE_USAT_U(SDNode *N);
503	SDValue visitBITCAST(SDNode *N);
504	SDValue visitFREEZE(SDNode *N);
505	SDValue visitBUILD_PAIR(SDNode *N);
506	SDValue visitFADD(SDNode *N);
507	SDValue visitVP_FADD(SDNode *N);
508	SDValue visitVP_FSUB(SDNode *N);
509	SDValue visitSTRICT_FADD(SDNode *N);
510	SDValue visitFSUB(SDNode *N);
511	SDValue visitFMUL(SDNode *N);
512	template <class MatchContextClass> SDValue visitFMA(SDNode *N);
513	SDValue visitFMAD(SDNode *N);
514	SDValue visitFMULADD(SDNode *N);
515	SDValue visitFDIV(SDNode *N);
516	SDValue visitFREM(SDNode *N);
517	SDValue visitFSQRT(SDNode *N);
518	SDValue visitFCOPYSIGN(SDNode *N);
519	SDValue visitFPOW(SDNode *N);
520	SDValue visitFCANONICALIZE(SDNode *N);
521	SDValue visitSINT_TO_FP(SDNode *N);
522	SDValue visitUINT_TO_FP(SDNode *N);
523	SDValue visitFP_TO_SINT(SDNode *N);
524	SDValue visitFP_TO_UINT(SDNode *N);
525	SDValue visitXROUND(SDNode *N);
526	SDValue visitFP_ROUND(SDNode *N);
527	SDValue visitFP_EXTEND(SDNode *N);
528	SDValue visitFNEG(SDNode *N);
529	SDValue visitFABS(SDNode *N);
530	SDValue visitFCEIL(SDNode *N);
531	SDValue visitFTRUNC(SDNode *N);
532	SDValue visitFFREXP(SDNode *N);
533	SDValue visitFFLOOR(SDNode *N);
534	SDValue visitFMinMax(SDNode *N);
535	SDValue visitBRCOND(SDNode *N);
536	SDValue visitBR_CC(SDNode *N);
537	SDValue visitLOAD(SDNode *N);
538
539	SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
540	SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
541	SDValue replaceStoreOfInsertLoad(StoreSDNode *ST);
542
543	bool refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(SDNode *N);
544
545	SDValue visitSTORE(SDNode *N);
546	SDValue visitATOMIC_STORE(SDNode *N);
547	SDValue visitLIFETIME_END(SDNode *N);
548	SDValue visitINSERT_VECTOR_ELT(SDNode *N);
549	SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
550	SDValue visitBUILD_VECTOR(SDNode *N);
551	SDValue visitCONCAT_VECTORS(SDNode *N);
552	SDValue visitVECTOR_INTERLEAVE(SDNode *N);
553	SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
554	SDValue visitVECTOR_SHUFFLE(SDNode *N);
555	SDValue visitSCALAR_TO_VECTOR(SDNode *N);
556	SDValue visitINSERT_SUBVECTOR(SDNode *N);
557	SDValue visitVECTOR_COMPRESS(SDNode *N);
558	SDValue visitMLOAD(SDNode *N);
559	SDValue visitMSTORE(SDNode *N);
560	SDValue visitMGATHER(SDNode *N);
561	SDValue visitMSCATTER(SDNode *N);
562	SDValue visitMHISTOGRAM(SDNode *N);
563	SDValue visitPARTIAL_REDUCE_MLA(SDNode *N);
564	SDValue visitVPGATHER(SDNode *N);
565	SDValue visitVPSCATTER(SDNode *N);
566	SDValue visitVP_STRIDED_LOAD(SDNode *N);
567	SDValue visitVP_STRIDED_STORE(SDNode *N);
568	SDValue visitFP_TO_FP16(SDNode *N);
569	SDValue visitFP16_TO_FP(SDNode *N);
570	SDValue visitFP_TO_BF16(SDNode *N);
571	SDValue visitBF16_TO_FP(SDNode *N);
572	SDValue visitVECREDUCE(SDNode *N);
573	SDValue visitVPOp(SDNode *N);
574	SDValue visitGET_FPENV_MEM(SDNode *N);
575	SDValue visitSET_FPENV_MEM(SDNode *N);
576
577	template <class MatchContextClass>
578	SDValue visitFADDForFMACombine(SDNode *N);
579	template <class MatchContextClass>
580	SDValue visitFSUBForFMACombine(SDNode *N);
581	SDValue visitFMULForFMADistributiveCombine(SDNode *N);
582
583	SDValue XformToShuffleWithZero(SDNode *N);
584	bool reassociationCanBreakAddressingModePattern(unsigned Opc,
585	const SDLoc &DL,
586	SDNode *N,
587	SDValue N0,
588	SDValue N1);
589	SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
590	SDValue N1, SDNodeFlags Flags);
591	SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
592	SDValue N1, SDNodeFlags Flags);
593	SDValue reassociateReduction(unsigned RedOpc, unsigned Opc, const SDLoc &DL,
594	EVT VT, SDValue N0, SDValue N1,
595	SDNodeFlags Flags = SDNodeFlags ());
596
597	SDValue visitShiftByConstant(SDNode *N);
598
599	SDValue foldSelectOfConstants(SDNode *N);
600	SDValue foldVSelectOfConstants(SDNode *N);
601	SDValue foldBinOpIntoSelect(SDNode *BO);
602	bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
603	SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
604	SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
605	SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
606	SDValue N2, SDValue N3, ISD::CondCode CC,
607	bool NotExtCompare = false);
608	SDValue convertSelectOfFPConstantsToLoadOffset(
609	const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
610	ISD::CondCode CC);
611	SDValue foldSignChangeInBitcast(SDNode *N);
612	SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
613	SDValue N2, SDValue N3, ISD::CondCode CC);
614	SDValue foldSelectOfBinops(SDNode *N);
615	SDValue foldSextSetcc(SDNode *N);
616	SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
617	const SDLoc &DL);
618	SDValue foldSubToUSubSat(EVT DstVT, SDNode N, const* SDLoc &DL);
619	SDValue foldABSToABD(SDNode N, const* SDLoc &DL);
620	SDValue foldSelectToABD(SDValue LHS, SDValue RHS, SDValue True,
621	SDValue False, ISD::CondCode CC, const SDLoc &DL);
622	SDValue foldSelectToUMin(SDValue LHS, SDValue RHS, SDValue True,
623	SDValue False, ISD::CondCode CC, const SDLoc &DL);
624	SDValue unfoldMaskedMerge(SDNode *N);
625	SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
626	SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
627	const SDLoc &DL, bool foldBooleans);
628	SDValue rebuildSetCC(SDValue N);
629
630	bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
631	SDValue &CC, bool MatchStrict = false) const;
632	bool isOneUseSetCC(SDValue N) const;
633
634	SDValue foldAddToAvg(SDNode N, const* SDLoc &DL);
635	SDValue foldSubToAvg(SDNode N, const* SDLoc &DL);
636
637	SDValue foldCTLZToCTLS(SDValue Src, const SDLoc &DL);
638
639	SDValue SimplifyNodeWithTwoResults(SDNode N, unsigned* LoOp,
640	unsigned HiOp);
641	SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
642	SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
643	const TargetLowering &TLI);
644	SDValue foldPartialReduceMLAMulOp(SDNode *N);
645	SDValue foldPartialReduceAdd(SDNode *N);
646
647	SDValue CombineExtLoad(SDNode *N);
648	SDValue CombineZExtLogicopShiftLoad(SDNode *N);
649	SDValue combineRepeatedFPDivisors(SDNode *N);
650	SDValue combineFMulOrFDivWithIntPow2(SDNode *N);
651	SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf);
652	SDValue mergeInsertEltWithShuffle(SDNode N, unsigned* InsIndex);
653	SDValue combineInsertEltToShuffle(SDNode N, unsigned* InsIndex);
654	SDValue combineInsertEltToLoad(SDNode N, unsigned* InsIndex);
655	SDValue BuildSDIV(SDNode *N);
656	SDValue BuildSDIVPow2(SDNode *N);
657	SDValue BuildUDIV(SDNode *N);
658	SDValue BuildSREMPow2(SDNode *N);
659	SDValue buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N);
660	SDValue BuildLogBase2(SDValue V, const SDLoc &DL,
661	bool KnownNeverZero = false,
662	bool InexpensiveOnly = false,
663	std::optional<EVT> OutVT = std::nullopt);
664	SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
665	SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
666	SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
667	SDValue buildSqrtEstimateImpl(SDValue Op, bool Recip, SDNodeFlags Flags);
668	SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
669	bool Reciprocal);
670	SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
671	bool Reciprocal);
672	SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
673	bool DemandHighBits = true);
674	SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
675	SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
676	SDValue InnerPos, SDValue InnerNeg, bool FromAdd,
677	bool HasPos, unsigned PosOpcode,
678	unsigned NegOpcode, const SDLoc &DL);
679	SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
680	SDValue InnerPos, SDValue InnerNeg, bool FromAdd,
681	bool HasPos, unsigned PosOpcode,
682	unsigned NegOpcode, const SDLoc &DL);
683	SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL,
684	bool FromAdd);
685	SDValue MatchLoadCombine(SDNode *N);
686	SDValue mergeTruncStores(StoreSDNode *N);
687	SDValue reduceLoadWidth(SDNode *N);
688	SDValue ReduceLoadOpStoreWidth(SDNode *N);
689	SDValue splitMergedValStore(StoreSDNode *ST);
690	SDValue TransformFPLoadStorePair(SDNode *N);
691	SDValue convertBuildVecZextToZext(SDNode *N);
692	SDValue convertBuildVecZextToBuildVecWithZeros(SDNode *N);
693	SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
694	SDValue reduceBuildVecTruncToBitCast(SDNode *N);
695	SDValue reduceBuildVecToShuffle(SDNode *N);
696	SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
697	ArrayRef<int> VectorMask, SDValue VecIn1,
698	SDValue VecIn2, unsigned LeftIdx,
699	bool DidSplitVec);
700	SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
701
702	/// Walk up chain skipping non-aliasing memory nodes,
703	/// looking for aliasing nodes and adding them to the Aliases vector.
704	void GatherAllAliases(SDNode *N, SDValue OriginalChain,
705	SmallVectorImpl<SDValue> &Aliases);
706
707	/// Return true if there is any possibility that the two addresses overlap.
708	bool mayAlias(SDNode Op0, SDNode Op1) const;
709
710	/// Walk up chain skipping non-aliasing memory nodes, looking for a better
711	/// chain (aliasing node.)
712	SDValue FindBetterChain(SDNode *N, SDValue Chain);
713
714	/// Try to replace a store and any possibly adjacent stores on
715	/// consecutive chains with better chains. Return true only if St is
716	/// replaced.
717	///
718	/// Notice that other chains may still be replaced even if the function
719	/// returns false.
720	bool findBetterNeighborChains(StoreSDNode *St);
721
722	// Helper for findBetterNeighborChains. Walk up store chain add additional
723	// chained stores that do not overlap and can be parallelized.
724	bool parallelizeChainedStores(StoreSDNode *St);
725
726	/// Holds a pointer to an LSBaseSDNode as well as information on where it
727	/// is located in a sequence of memory operations connected by a chain.
728	struct MemOpLink {
729	// Ptr to the mem node.
730	LSBaseSDNode *MemNode;
731
732	// Offset from the base ptr.
733	int64_t OffsetFromBase;
734
735	MemOpLink(LSBaseSDNode *N, int64_t Offset)
736	: MemNode(N), OffsetFromBase(Offset) {}
737	};
738
739	// Classify the origin of a stored value.
740	enum class StoreSource { Unknown, Constant, Extract, Load };
741	StoreSource getStoreSource(SDValue StoreVal) {
742	switch (StoreVal.getOpcode()) {
743	case ISD::Constant:
744	case ISD::ConstantFP:
745	return StoreSource::Constant;
746	case ISD::BUILD_VECTOR:
747	if (ISD::isBuildVectorOfConstantSDNodes(N: StoreVal.getNode()) \|\|
748	ISD::isBuildVectorOfConstantFPSDNodes(N: StoreVal.getNode()))
749	return StoreSource::Constant;
750	return StoreSource::Unknown;
751	case ISD::EXTRACT_VECTOR_ELT:
752	case ISD::EXTRACT_SUBVECTOR:
753	return StoreSource::Extract;
754	case ISD::LOAD:
755	return StoreSource::Load;
756	default:
757	return StoreSource::Unknown;
758	}
759	}
760
761	/// This is a helper function for visitMUL to check the profitability
762	/// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1c2).*
763	/// MulNode is the original multiply, AddNode is (add x, c1),
764	/// and ConstNode is c2.
765	bool isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
766	SDValue ConstNode);
767
768	/// This is a helper function for visitAND and visitZERO_EXTEND. Returns
769	/// true if the (and (load x) c) pattern matches an extload. ExtVT returns
770	/// the type of the loaded value to be extended.
771	bool isAndLoadExtLoad(ConstantSDNode AndC, LoadSDNode LoadN,
772	EVT LoadResultTy, EVT &ExtVT);
773
774	/// Helper function to calculate whether the given Load/Store can have its
775	/// width reduced to ExtVT.
776	bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
777	EVT &MemVT, unsigned ShAmt = `0`);
778
779	/// Used by BackwardsPropagateMask to find suitable loads.
780	bool SearchForAndLoads(SDNode N, SmallVectorImpl<LoadSDNode> &Loads,
781	SmallPtrSetImpl<SDNode*> &NodesWithConsts,
782	ConstantSDNode Mask, SDNode &NodeToMask);
783	/// Attempt to propagate a given AND node back to load leaves so that they
784	/// can be combined into narrow loads.
785	bool BackwardsPropagateMask(SDNode *N);
786
787	/// Helper function for mergeConsecutiveStores which merges the component
788	/// store chains.
789	SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
790	unsigned NumStores);
791
792	/// Helper function for mergeConsecutiveStores which checks if all the store
793	/// nodes have the same underlying object. We can still reuse the first
794	/// store's pointer info if all the stores are from the same object.
795	bool hasSameUnderlyingObj(ArrayRef<MemOpLink> StoreNodes);
796
797	/// This is a helper function for mergeConsecutiveStores. When the source
798	/// elements of the consecutive stores are all constants or all extracted
799	/// vector elements, try to merge them into one larger store introducing
800	/// bitcasts if necessary. \return True if a merged store was created.
801	bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
802	EVT MemVT, unsigned NumStores,
803	bool IsConstantSrc, bool UseVector,
804	bool UseTrunc);
805
806	/// This is a helper function for mergeConsecutiveStores. Stores that
807	/// potentially may be merged with St are placed in StoreNodes. On success,
808	/// returns a chain predecessor to all store candidates.
809	SDNode getStoreMergeCandidates(StoreSDNode St,
810	SmallVectorImpl<MemOpLink> &StoreNodes);
811
812	/// Helper function for mergeConsecutiveStores. Checks if candidate stores
813	/// have indirect dependency through their operands. RootNode is the
814	/// predecessor to all stores calculated by getStoreMergeCandidates and is
815	/// used to prune the dependency check. \return True if safe to merge.
816	bool checkMergeStoreCandidatesForDependencies(
817	SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
818	SDNode *RootNode);
819
820	/// Helper function for tryStoreMergeOfLoads. Checks if the load/store
821	/// chain has a call in it. \return True if a call is found.
822	bool hasCallInLdStChain(StoreSDNode St, LoadSDNode Ld);
823
824	/// This is a helper function for mergeConsecutiveStores. Given a list of
825	/// store candidates, find the first N that are consecutive in memory.
826	/// Returns 0 if there are not at least 2 consecutive stores to try merging.
827	unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
828	int64_t ElementSizeBytes) const;
829
830	/// This is a helper function for mergeConsecutiveStores. It is used for
831	/// store chains that are composed entirely of constant values.
832	bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
833	unsigned NumConsecutiveStores,
834	EVT MemVT, SDNode Root, bool* AllowVectors);
835
836	/// This is a helper function for mergeConsecutiveStores. It is used for
837	/// store chains that are composed entirely of extracted vector elements.
838	/// When extracting multiple vector elements, try to store them in one
839	/// vector store rather than a sequence of scalar stores.
840	bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
841	unsigned NumConsecutiveStores, EVT MemVT,
842	SDNode *Root);
843
844	/// This is a helper function for mergeConsecutiveStores. It is used for
845	/// store chains that are composed entirely of loaded values.
846	bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
847	unsigned NumConsecutiveStores, EVT MemVT,
848	SDNode Root, bool* AllowVectors,
849	bool IsNonTemporalStore, bool IsNonTemporalLoad);
850
851	/// Merge consecutive store operations into a wide store.
852	/// This optimization uses wide integers or vectors when possible.
853	/// \return true if stores were merged.
854	bool mergeConsecutiveStores(StoreSDNode *St);
855
856	/// Try to transform a truncation where C is a constant:
857	/// (trunc (and X, C)) -> (and (trunc X), (trunc C))
858	///
859	/// \p N needs to be a truncation and its first operand an AND. Other
860	/// requirements are checked by the function (e.g. that trunc is
861	/// single-use) and if missed an empty SDValue is returned.
862	SDValue distributeTruncateThroughAnd(SDNode *N);
863
864	/// Helper function to determine whether the target supports operation
865	/// given by \p Opcode for type \p VT, that is, whether the operation
866	/// is legal or custom before legalizing operations, and whether is
867	/// legal (but not custom) after legalization.
868	bool hasOperation(unsigned Opcode, EVT VT) {
869	return TLI.isOperationLegalOrCustom(Op: Opcode, VT, LegalOnly: LegalOperations);
870	}
871
872	bool hasUMin(EVT VT) const {
873	auto LK = TLI.getTypeConversion(Context&: *DAG.getContext(), VT);
874	return (LK.first == TargetLoweringBase::TypeLegal \|\|
875	LK.first == TargetLoweringBase::TypePromoteInteger) &&
876	TLI.isOperationLegalOrCustom(Op: ISD::UMIN, VT: LK.second);
877	}
878
879	public:
880	/// Runs the dag combiner on all nodes in the work list
881	void Run(CombineLevel AtLevel);
882
883	SelectionDAG &getDAG() const { return DAG; }
884
885	/// Convenience wrapper around TargetLowering::getShiftAmountTy.
886	EVT getShiftAmountTy(EVT LHSTy) {
887	return TLI.getShiftAmountTy(LHSTy, DL: DAG.getDataLayout());
888	}
889
890	/// This method returns true if we are running before type legalization or
891	/// if the specified VT is legal.
892	bool isTypeLegal(const EVT &VT) {
893	if (!LegalTypes) return true;
894	return TLI.isTypeLegal(VT);
895	}
896
897	/// Convenience wrapper around TargetLowering::getSetCCResultType
898	EVT getSetCCResultType(EVT VT) const {
899	return TLI.getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
900	}
901
902	void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
903	SDValue OrigLoad, SDValue ExtLoad,
904	ISD::NodeType ExtType);
905	};
906
907	/// This class is a DAGUpdateListener that removes any deleted
908	/// nodes from the worklist.
909	class WorklistRemover : public SelectionDAG::DAGUpdateListener {
910	DAGCombiner &DC;
911
912	public:
913	explicit WorklistRemover(DAGCombiner &dc)
914	: SelectionDAG::DAGUpdateListener (dc.getDAG()), DC(dc) {}
915
916	void NodeDeleted(SDNode N, SDNode E) override {
917	DC.removeFromWorklist(N);
918	}
919	};
920
921	class WorklistInserter : public SelectionDAG::DAGUpdateListener {
922	DAGCombiner &DC;
923
924	public:
925	explicit WorklistInserter(DAGCombiner &dc)
926	: SelectionDAG::DAGUpdateListener (dc.getDAG()), DC(dc) {}
927
928	// FIXME: Ideally we could add N to the worklist, but this causes exponential
929	// compile time costs in large DAGs, e.g. Halide.
930	void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
931	};
932
933	} // end anonymous namespace
934
935	//===----------------------------------------------------------------------===//
936	// TargetLowering::DAGCombinerInfo implementation
937	//===----------------------------------------------------------------------===//
938
939	void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
940	((DAGCombiner*)DC)->AddToWorklist(N);
941	}
942
943	SDValue TargetLowering::DAGCombinerInfo::
944	CombineTo(SDNode N, ArrayRef<SDValue> To, bool* AddTo) {
945	return ((DAGCombiner*)DC)->CombineTo(N, To: &To [`0`], NumTo: To.size(), AddTo);
946	}
947
948	SDValue TargetLowering::DAGCombinerInfo::
949	CombineTo(SDNode N, SDValue Res, bool* AddTo) {
950	return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
951	}
952
953	SDValue TargetLowering::DAGCombinerInfo::
954	CombineTo(SDNode N, SDValue Res0, SDValue Res1, bool* AddTo) {
955	return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
956	}
957
958	bool TargetLowering::DAGCombinerInfo::
959	recursivelyDeleteUnusedNodes(SDNode *N) {
960	return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
961	}
962
963	void TargetLowering::DAGCombinerInfo::
964	CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
965	return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
966	}
967
968	//===----------------------------------------------------------------------===//
969	// Helper Functions
970	//===----------------------------------------------------------------------===//
971
972	void DAGCombiner::deleteAndRecombine(SDNode *N) {
973	removeFromWorklist(N);
974
975	// If the operands of this node are only used by the node, they will now be
976	// dead. Make sure to re-visit them and recursively delete dead nodes.
977	for (const SDValue &Op : N->ops())
978	// For an operand generating multiple values, one of the values may
979	// become dead allowing further simplification (e.g. split index
980	// arithmetic from an indexed load).
981	if (Op ->hasOneUse() \|\| Op ->getNumValues() > `1`)
982	AddToWorklist(N: Op.getNode());
983
984	DAG.DeleteNode(N);
985	}
986
987	// APInts must be the same size for most operations, this helper
988	// function zero extends the shorter of the pair so that they match.
989	// We provide an Offset so that we can create bitwidths that won't overflow.
990	static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = `0`) {
991	unsigned Bits = Offset + std::max(a: LHS.getBitWidth(), b: RHS.getBitWidth());
992	LHS = LHS.zext(width: Bits);
993	RHS = RHS.zext(width: Bits);
994	}
995
996	// Return true if this node is a setcc, or is a select_cc
997	// that selects between the target values used for true and false, making it
998	// equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
999	// the appropriate nodes based on the type of node we are checking. This
1000	// simplifies life a bit for the callers.
1001	bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
1002	SDValue &CC, bool MatchStrict) const {
1003	if (N.getOpcode() == ISD::SETCC) {
1004	LHS = N.getOperand(i: `0`);
1005	RHS = N.getOperand(i: `1`);
1006	CC = N.getOperand(i: `2`);
1007	return true;
1008	}
1009
1010	if (MatchStrict &&
1011	(N.getOpcode() == ISD::STRICT_FSETCC \|\|
1012	N.getOpcode() == ISD::STRICT_FSETCCS)) {
1013	LHS = N.getOperand(i: `1`);
1014	RHS = N.getOperand(i: `2`);
1015	CC = N.getOperand(i: `3`);
1016	return true;
1017	}
1018
1019	if (N.getOpcode() != ISD::SELECT_CC \|\| !TLI.isConstTrueVal(N: N.getOperand(i: `2`)) \|\|
1020	!TLI.isConstFalseVal(N: N.getOperand(i: `3`)))
1021	return false;
1022
1023	if (TLI.getBooleanContents(Type: N.getValueType()) ==
1024	TargetLowering::UndefinedBooleanContent)
1025	return false;
1026
1027	LHS = N.getOperand(i: `0`);
1028	RHS = N.getOperand(i: `1`);
1029	CC = N.getOperand(i: `4`);
1030	return true;
1031	}
1032
1033	/// Return true if this is a SetCC-equivalent operation with only one use.
1034	/// If this is true, it allows the users to invert the operation for free when
1035	/// it is profitable to do so.
1036	bool DAGCombiner::isOneUseSetCC(SDValue N) const {
1037	SDValue N0, N1, N2;
1038	if (isSetCCEquivalent(N, LHS&: N0, RHS&: N1, CC&: N2) && N ->hasOneUse())
1039	return true;
1040	return false;
1041	}
1042
1043	static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy) {
1044	if (!ScalarTy.isSimple())
1045	return false;
1046
1047	uint64_t MaskForTy = `0ULL`;
1048	switch (ScalarTy.getSimpleVT().SimpleTy) {
1049	case MVT::i8:
1050	MaskForTy = `0xFFULL`;
1051	break;
1052	case MVT::i16:
1053	MaskForTy = `0xFFFFULL`;
1054	break;
1055	case MVT::i32:
1056	MaskForTy = `0xFFFFFFFFULL`;
1057	break;
1058	default:
1059	return false;
1060	break;
1061	}
1062
1063	APInt Val;
1064	if (ISD::isConstantSplatVector(N, SplatValue&: Val))
1065	return Val.getLimitedValue() == MaskForTy;
1066
1067	return false;
1068	}
1069
1070	// Determines if it is a constant integer or a splat/build vector of constant
1071	// integers (and undefs).
1072	// Do not permit build vector implicit truncation unless AllowTruncation is set.
1073	static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false,
1074	bool AllowTruncation = false) {
1075	if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Val&: N))
1076	return !(Const->isOpaque() && NoOpaques);
1077	if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
1078	return false;
1079	unsigned BitWidth = N.getScalarValueSizeInBits();
1080	for (const SDValue &Op : N ->op_values()) {
1081	if (Op.isUndef())
1082	continue;
1083	ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Val: Op);
1084	if (!Const \|\| (Const->isOpaque() && NoOpaques))
1085	return false;
1086	// When AllowTruncation is true, allow constants that have been promoted
1087	// during type legalization as long as the value fits in the target type.
1088	if ((AllowTruncation &&
1089	Const->getAPIntValue().getActiveBits() > BitWidth) \|\|
1090	(!AllowTruncation && Const->getAPIntValue().getBitWidth() != BitWidth))
1091	return false;
1092	}
1093	return true;
1094	}
1095
1096	// Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
1097	// undef's.
1098	static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
1099	if (V.getOpcode() != ISD::BUILD_VECTOR)
1100	return false;
1101	return isConstantOrConstantVector(N: V, NoOpaques) \|\|
1102	ISD::isBuildVectorOfConstantFPSDNodes(N: V.getNode());
1103	}
1104
1105	// Determine if this an indexed load with an opaque target constant index.
1106	static bool canSplitIdx(LoadSDNode *LD) {
1107	return MaySplitLoadIndex &&
1108	(LD->getOperand(Num: `2`).getOpcode() != ISD::TargetConstant \|\|
1109	!cast<ConstantSDNode>(Val: LD->getOperand(Num: `2`))->isOpaque());
1110	}
1111
1112	bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
1113	const SDLoc &DL,
1114	SDNode *N,
1115	SDValue N0,
1116	SDValue N1) {
1117	// Currently this only tries to ensure we don't undo the GEP splits done by
1118	// CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
1119	// we check if the following transformation would be problematic:
1120	// (load/store (add, (add, x, offset1), offset2)) ->
1121	// (load/store (add, x, offset1+offset2)).
1122
1123	// (load/store (add, (add, x, y), offset2)) ->
1124	// (load/store (add, (add, x, offset2), y)).
1125
1126	if (!N0.isAnyAdd())
1127	return false;
1128
1129	// Check for vscale addressing modes.
1130	// (load/store (add/sub (add x, y), vscale))
1131	// (load/store (add/sub (add x, y), (lsl vscale, C)))
1132	// (load/store (add/sub (add x, y), (mul vscale, C)))
1133	if ((N1.getOpcode() == ISD::VSCALE \|\|
1134	((N1.getOpcode() == ISD::SHL \|\| N1.getOpcode() == ISD::MUL) &&
1135	N1.getOperand(i: `0`).getOpcode() == ISD::VSCALE &&
1136	isa<ConstantSDNode>(Val: N1.getOperand(i: `1`)))) &&
1137	N1.getValueType().getFixedSizeInBits() <= `64`) {
1138	int64_t ScalableOffset = N1.getOpcode() == ISD::VSCALE
1139	? N1.getConstantOperandVal(i: `0`)
1140	: (N1.getOperand(i: `0`).getConstantOperandVal(i: `0`) *
1141	(N1.getOpcode() == ISD::SHL
1142	? (`1LL` << N1.getConstantOperandVal(i: `1`))
1143	: N1.getConstantOperandVal(i: `1`)));
1144	if (Opc == ISD::SUB)
1145	ScalableOffset = -ScalableOffset;
1146	if (all_of(Range: N->users(), P: [&](SDNode *Node) {
1147	if (auto *LoadStore = dyn_cast<MemSDNode>(Val: Node);
1148	LoadStore && LoadStore->hasUniqueMemOperand() &&
1149	LoadStore->getBasePtr().getNode() == N) {
1150	TargetLoweringBase::AddrMode AM;
1151	AM.HasBaseReg = true;
1152	AM.ScalableOffset = ScalableOffset;
1153	EVT VT = LoadStore->getMemoryVT();
1154	unsigned AS = LoadStore->getAddressSpace();
1155	Type AccessTy = VT.getTypeForEVT(Context&: DAG.getContext());
1156	return TLI.isLegalAddressingMode(DL: DAG.getDataLayout(), AM, Ty: AccessTy,
1157	AddrSpace: AS);
1158	}
1159	return false;
1160	}))
1161	return true;
1162	}
1163
1164	if (Opc != ISD::ADD && Opc != ISD::PTRADD)
1165	return false;
1166
1167	auto *C2 = dyn_cast<ConstantSDNode>(Val&: N1);
1168	if (!C2)
1169	return false;
1170
1171	const APInt &C2APIntVal = C2->getAPIntValue();
1172	if (C2APIntVal.getSignificantBits() > `64`)
1173	return false;
1174
1175	if (auto *C1 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: `1`))) {
1176	if (N0.hasOneUse())
1177	return false;
1178
1179	const APInt &C1APIntVal = C1->getAPIntValue();
1180	const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1181	if (CombinedValueIntVal.getSignificantBits() > `64`)
1182	return false;
1183	const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1184
1185	for (SDNode *Node : N->users()) {
1186	if (auto *LoadStore = dyn_cast<MemSDNode>(Val: Node)) {
1187	if (!LoadStore->hasUniqueMemOperand())
1188	continue;
1189	// Is x[offset2] already not a legal addressing mode? If so then
1190	// reassociating the constants breaks nothing (we test offset2 because
1191	// that's the one we hope to fold into the load or store).
1192	TargetLoweringBase::AddrMode AM;
1193	AM.HasBaseReg = true;
1194	AM.BaseOffs = C2APIntVal.getSExtValue();
1195	EVT VT = LoadStore->getMemoryVT();
1196	unsigned AS = LoadStore->getAddressSpace();
1197	Type AccessTy = VT.getTypeForEVT(Context&: DAG.getContext());
1198	if (!TLI.isLegalAddressingMode(DL: DAG.getDataLayout(), AM, Ty: AccessTy, AddrSpace: AS))
1199	continue;
1200
1201	// Would x[offset1+offset2] still be a legal addressing mode?
1202	AM.BaseOffs = CombinedValue;
1203	if (!TLI.isLegalAddressingMode(DL: DAG.getDataLayout(), AM, Ty: AccessTy, AddrSpace: AS))
1204	return true;
1205	}
1206	}
1207	} else {
1208	if (auto *GA = dyn_cast<GlobalAddressSDNode>(Val: N0.getOperand(i: `1`)))
1209	if (GA->getOpcode() == ISD::GlobalAddress && TLI.isOffsetFoldingLegal(GA))
1210	return false;
1211
1212	for (SDNode *Node : N->users()) {
1213	auto *LoadStore = dyn_cast<MemSDNode>(Val: Node);
1214	if (!LoadStore \|\| !LoadStore->hasUniqueMemOperand())
1215	return false;
1216
1217	// Is x[offset2] a legal addressing mode? If so then
1218	// reassociating the constants breaks address pattern
1219	TargetLoweringBase::AddrMode AM;
1220	AM.HasBaseReg = true;
1221	AM.BaseOffs = C2APIntVal.getSExtValue();
1222	EVT VT = LoadStore->getMemoryVT();
1223	unsigned AS = LoadStore->getAddressSpace();
1224	Type AccessTy = VT.getTypeForEVT(Context&: DAG.getContext());
1225	if (!TLI.isLegalAddressingMode(DL: DAG.getDataLayout(), AM, Ty: AccessTy, AddrSpace: AS))
1226	return false;
1227	}
1228	return true;
1229	}
1230
1231	return false;
1232	}
1233
1234	/// Helper for DAGCombiner::reassociateOps. Try to reassociate (Opc N0, N1) if
1235	/// \p N0 is the same kind of operation as \p Opc.
1236	SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1237	SDValue N0, SDValue N1,
1238	SDNodeFlags Flags) {
1239	EVT VT = N0.getValueType();
1240
1241	if (N0.getOpcode() != Opc)
1242	return SDValue ();
1243
1244	SDValue N00 = N0.getOperand(i: `0`);
1245	SDValue N01 = N0.getOperand(i: `1`);
1246
1247	if (DAG.isConstantIntBuildVectorOrConstantInt(N: N01)) {
1248	SDNodeFlags NewFlags;
1249	if (N0.getOpcode() == ISD::ADD && N0 ->getFlags().hasNoUnsignedWrap() &&
1250	Flags.hasNoUnsignedWrap())
1251	NewFlags \|= SDNodeFlags::NoUnsignedWrap;
1252
1253	if (DAG.isConstantIntBuildVectorOrConstantInt(N: N1)) {
1254	// Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1255	if (SDValue OpNode = DAG.FoldConstantArithmetic(Opcode: Opc, DL, VT, Ops: {N01, N1})) {
1256	NewFlags.setDisjoint(Flags.hasDisjoint() &&
1257	N0 ->getFlags().hasDisjoint());
1258	return DAG.getNode(Opcode: Opc, DL, VT, N1: N00, N2: OpNode, Flags: NewFlags);
1259	}
1260	return SDValue ();
1261	}
1262	if (TLI.isReassocProfitable(DAG, N0, N1)) {
1263	// Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1264	// iff (op x, c1) has one use
1265	SDValue OpNode = DAG.getNode(Opcode: Opc, DL: SDLoc (N0), VT, N1: N00, N2: N1, Flags: NewFlags);
1266	return DAG.getNode(Opcode: Opc, DL, VT, N1: OpNode, N2: N01, Flags: NewFlags);
1267	}
1268	}
1269
1270	// Check for repeated operand logic simplifications.
1271	if (Opc == ISD::AND \|\| Opc == ISD::OR) {
1272	// (N00 & N01) & N00 --> N00 & N01
1273	// (N00 & N01) & N01 --> N00 & N01
1274	// (N00 \| N01) \| N00 --> N00 \| N01
1275	// (N00 \| N01) \| N01 --> N00 \| N01
1276	if (N1 == N00 \|\| N1 == N01)
1277	return N0;
1278	}
1279	if (Opc == ISD::XOR) {
1280	// (N00 ^ N01) ^ N00 --> N01
1281	if (N1 == N00)
1282	return N01;
1283	// (N00 ^ N01) ^ N01 --> N00
1284	if (N1 == N01)
1285	return N00;
1286	}
1287
1288	if (TLI.isReassocProfitable(DAG, N0, N1)) {
1289	if (N1 != N01) {
1290	// Reassociate if (op N00, N1) already exist
1291	if (SDNode *NE = DAG.getNodeIfExists(Opcode: Opc, VTList: DAG.getVTList(VT), Ops: {N00, N1})) {
1292	// if Op (Op N00, N1), N01 already exist
1293	// we need to stop reassciate to avoid dead loop
1294	if (!DAG.doesNodeExist(Opcode: Opc, VTList: DAG.getVTList(VT), Ops: {SDValue (NE, `0`), N01}))
1295	return DAG.getNode(Opcode: Opc, DL, VT, N1: SDValue (NE, `0`), N2: N01);
1296	}
1297	}
1298
1299	if (N1 != N00) {
1300	// Reassociate if (op N01, N1) already exist
1301	if (SDNode *NE = DAG.getNodeIfExists(Opcode: Opc, VTList: DAG.getVTList(VT), Ops: {N01, N1})) {
1302	// if Op (Op N01, N1), N00 already exist
1303	// we need to stop reassciate to avoid dead loop
1304	if (!DAG.doesNodeExist(Opcode: Opc, VTList: DAG.getVTList(VT), Ops: {SDValue (NE, `0`), N00}))
1305	return DAG.getNode(Opcode: Opc, DL, VT, N1: SDValue (NE, `0`), N2: N00);
1306	}
1307	}
1308
1309	// Reassociate the operands from (OR/AND (OR/AND(N00, N001)), N1) to (OR/AND
1310	// (OR/AND(N00, N1)), N01) when N00 and N1 are comparisons with the same
1311	// predicate or to (OR/AND (OR/AND(N1, N01)), N00) when N01 and N1 are
1312	// comparisons with the same predicate. This enables optimizations as the
1313	// following one:
1314	// CMP(A,C)\|\|CMP(B,C) => CMP(MIN/MAX(A,B), C)
1315	// CMP(A,C)&&CMP(B,C) => CMP(MIN/MAX(A,B), C)
1316	if (Opc == ISD::AND \|\| Opc == ISD::OR) {
1317	if (N1 ->getOpcode() == ISD::SETCC && N00 ->getOpcode() == ISD::SETCC &&
1318	N01 ->getOpcode() == ISD::SETCC) {
1319	ISD::CondCode CC1 = cast<CondCodeSDNode>(Val: N1.getOperand(i: `2`))->get();
1320	ISD::CondCode CC00 = cast<CondCodeSDNode>(Val: N00.getOperand(i: `2`))->get();
1321	ISD::CondCode CC01 = cast<CondCodeSDNode>(Val: N01.getOperand(i: `2`))->get();
1322	if (CC1 == CC00 && CC1 != CC01) {
1323	SDValue OpNode = DAG.getNode(Opcode: Opc, DL: SDLoc (N0), VT, N1: N00, N2: N1, Flags);
1324	return DAG.getNode(Opcode: Opc, DL, VT, N1: OpNode, N2: N01, Flags);
1325	}
1326	if (CC1 == CC01 && CC1 != CC00) {
1327	SDValue OpNode = DAG.getNode(Opcode: Opc, DL: SDLoc (N0), VT, N1: N01, N2: N1, Flags);
1328	return DAG.getNode(Opcode: Opc, DL, VT, N1: OpNode, N2: N00, Flags);
1329	}
1330	}
1331	}
1332	}
1333
1334	return SDValue ();
1335	}
1336
1337	/// Try to reassociate commutative (Opc N0, N1) if either \p N0 or \p N1 is the
1338	/// same kind of operation as \p Opc.
1339	SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1340	SDValue N1, SDNodeFlags Flags) {
1341	assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
1342
1343	// Floating-point reassociation is not allowed without loose FP math.
1344	if (N0.getValueType().isFloatingPoint() \|\|
1345	N1.getValueType().isFloatingPoint())
1346	if (!Flags.hasAllowReassociation() \|\| !Flags.hasNoSignedZeros())
1347	return SDValue ();
1348
1349	if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1, Flags))
1350	return Combined;
1351	if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0: N1, N1: N0, Flags))
1352	return Combined;
1353	return SDValue ();
1354	}
1355
1356	// Try to fold Opc(vecreduce(x), vecreduce(y)) -> vecreduce(Opc(x, y))
1357	// Note that we only expect Flags to be passed from FP operations. For integer
1358	// operations they need to be dropped.
1359	SDValue DAGCombiner::reassociateReduction(unsigned RedOpc, unsigned Opc,
1360	const SDLoc &DL, EVT VT, SDValue N0,
1361	SDValue N1, SDNodeFlags Flags) {
1362	if (N0.getOpcode() == RedOpc && N1.getOpcode() == RedOpc &&
1363	N0.getOperand(i: `0`).getValueType() == N1.getOperand(i: `0`).getValueType() &&
1364	N0 ->hasOneUse() && N1 ->hasOneUse() &&
1365	TLI.isOperationLegalOrCustom(Op: Opc, VT: N0.getOperand(i: `0`).getValueType()) &&
1366	TLI.shouldReassociateReduction(RedOpc, VT: N0.getOperand(i: `0`).getValueType())) {
1367	SelectionDAG::FlagInserter FlagsInserter(DAG, Flags);
1368	return DAG.getNode(Opcode: RedOpc, DL, VT,
1369	Operand: DAG.getNode(Opcode: Opc, DL, VT: N0.getOperand(i: `0`).getValueType(),
1370	N1: N0.getOperand(i: `0`), N2: N1.getOperand(i: `0`)));
1371	}
1372
1373	// Reassociate op(op(vecreduce(a), b), op(vecreduce(c), d)) into
1374	// op(vecreduce(op(a, c)), op(b, d)), to combine the reductions into a
1375	// single node.
1376	SDValue A, B, C, D, RedA, RedB;
1377	if (sd_match(N: N0, P: m_OneUse(P: m_c_BinOp(
1378	Opc,
1379	L: m_AllOf(preds: m_OneUse(P: m_UnaryOp(Opc: RedOpc, Op: m_Value(N&: A))),
1380	preds: m_Value(N&: RedA)),
1381	R: m_Value(N&: B)))) &&
1382	sd_match(N: N1, P: m_OneUse(P: m_c_BinOp(
1383	Opc,
1384	L: m_AllOf(preds: m_OneUse(P: m_UnaryOp(Opc: RedOpc, Op: m_Value(N&: C))),
1385	preds: m_Value(N&: RedB)),
1386	R: m_Value(N&: D)))) &&
1387	!sd_match(N: B, P: m_UnaryOp(Opc: RedOpc, Op: m_Value())) &&
1388	!sd_match(N: D, P: m_UnaryOp(Opc: RedOpc, Op: m_Value())) &&
1389	A.getValueType() == C.getValueType() &&
1390	hasOperation(Opcode: Opc, VT: A.getValueType()) &&
1391	TLI.shouldReassociateReduction(RedOpc, VT)) {
1392	if ((Opc == ISD::FADD \|\| Opc == ISD::FMUL) &&
1393	(!N0 ->getFlags().hasAllowReassociation() \|\|
1394	!N1 ->getFlags().hasAllowReassociation() \|\|
1395	!RedA ->getFlags().hasAllowReassociation() \|\|
1396	!RedB ->getFlags().hasAllowReassociation()))
1397	return SDValue ();
1398	SelectionDAG::FlagInserter FlagsInserter(
1399	DAG, Flags & N0 ->getFlags() & N1 ->getFlags() & RedA ->getFlags() &
1400	RedB ->getFlags());
1401	SDValue Op = DAG.getNode(Opcode: Opc, DL, VT: A.getValueType(), N1: A, N2: C);
1402	SDValue Red = DAG.getNode(Opcode: RedOpc, DL, VT, Operand: Op);
1403	SDValue Op2 = DAG.getNode(Opcode: Opc, DL, VT, N1: B, N2: D);
1404	return DAG.getNode(Opcode: Opc, DL, VT, N1: Red, N2: Op2);
1405	}
1406	return SDValue ();
1407	}
1408
1409	SDValue DAGCombiner::CombineTo(SDNode N, const* SDValue To, unsigned* NumTo,
1410	bool AddTo) {
1411	assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1412	++NodesCombined;
1413	LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1414	To[`0`].dump(&DAG);
1415	dbgs() << " and " << NumTo - `1` << " other values\n");
1416	for (unsigned i = `0`, e = NumTo; i != e; ++i)
1417	assert((!To[i].getNode() \|\|
1418	N->getValueType(i) == To[i].getValueType()) &&
1419	"Cannot combine value to value of different type!");
1420
1421	WorklistRemover DeadNodes(*this);
1422	DAG.ReplaceAllUsesWith(From: N, To);
1423	if (AddTo) {
1424	// Push the new nodes and any users onto the worklist
1425	for (unsigned i = `0`, e = NumTo; i != e; ++i) {
1426	if (To[i].getNode())
1427	AddToWorklistWithUsers(N: To[i].getNode());
1428	}
1429	}
1430
1431	// Finally, if the node is now dead, remove it from the graph. The node
1432	// may not be dead if the replacement process recursively simplified to
1433	// something else needing this node.
1434	if (N->use_empty())
1435	deleteAndRecombine(N);
1436	return SDValue (N, `0`);
1437	}
1438
1439	void DAGCombiner::
1440	CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1441	// Replace the old value with the new one.
1442	++NodesCombined;
1443	LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.dump(&DAG);
1444	dbgs() << "\nWith: "; TLO.New.dump(&DAG); dbgs() << `'\n'`);
1445
1446	// Replace all uses.
1447	DAG.ReplaceAllUsesOfValueWith(From: TLO.Old, To: TLO.New);
1448
1449	// Push the new node and any (possibly new) users onto the worklist.
1450	AddToWorklistWithUsers(N: TLO.New.getNode());
1451
1452	// Finally, if the node is now dead, remove it from the graph.
1453	recursivelyDeleteUnusedNodes(N: TLO.Old.getNode());
1454	}
1455
1456	/// Check the specified integer node value to see if it can be simplified or if
1457	/// things it uses can be simplified by bit propagation. If so, return true.
1458	bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1459	const APInt &DemandedElts,
1460	bool AssumeSingleUse) {
1461	TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1462	KnownBits Known;
1463	if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth: `0`,
1464	AssumeSingleUse))
1465	return false;
1466
1467	// Revisit the node.
1468	AddToWorklist(N: Op.getNode());
1469
1470	CommitTargetLoweringOpt(TLO);
1471	return true;
1472	}
1473
1474	/// Check the specified vector node value to see if it can be simplified or
1475	/// if things it uses can be simplified as it only uses some of the elements.
1476	/// If so, return true.
1477	bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1478	const APInt &DemandedElts,
1479	bool AssumeSingleUse) {
1480	TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1481	APInt KnownUndef, KnownZero;
1482	if (!TLI.SimplifyDemandedVectorElts(Op, DemandedEltMask: DemandedElts, KnownUndef, KnownZero,
1483	TLO, Depth: `0`, AssumeSingleUse))
1484	return false;
1485
1486	// Revisit the node.
1487	AddToWorklist(N: Op.getNode());
1488
1489	CommitTargetLoweringOpt(TLO);
1490	return true;
1491	}
1492
1493	void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode Load, SDNode ExtLoad) {
1494	SDLoc DL(Load);
1495	EVT VT = Load->getValueType(ResNo: `0`);
1496	SDValue Trunc = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: SDValue (ExtLoad, `0`));
1497
1498	LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1499	Trunc.dump(&DAG); dbgs() << `'\n'`);
1500
1501	DAG.ReplaceAllUsesOfValueWith(From: SDValue (Load, `0`), To: Trunc);
1502	DAG.ReplaceAllUsesOfValueWith(From: SDValue (Load, `1`), To: SDValue (ExtLoad, `1`));
1503
1504	AddToWorklist(N: Trunc.getNode());
1505	recursivelyDeleteUnusedNodes(N: Load);
1506	}
1507
1508	SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1509	Replace = false;
1510	SDLoc DL(Op);
1511	if (ISD::isUNINDEXEDLoad(N: Op.getNode())) {
1512	LoadSDNode *LD = cast<LoadSDNode>(Val&: Op);
1513	EVT MemVT = LD->getMemoryVT();
1514	ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(N: LD) ? ISD::EXTLOAD
1515	: LD->getExtensionType();
1516	Replace = true;
1517	return DAG.getExtLoad(ExtType, dl: DL, VT: PVT,
1518	Chain: LD->getChain(), Ptr: LD->getBasePtr(),
1519	MemVT, MMO: LD->getMemOperand());
1520	}
1521
1522	unsigned Opc = Op.getOpcode();
1523	switch (Opc) {
1524	default: break;
1525	case ISD::AssertSext:
1526	if (SDValue Op0 = SExtPromoteOperand(Op: Op.getOperand(i: `0`), PVT))
1527	return DAG.getNode(Opcode: ISD::AssertSext, DL, VT: PVT, N1: Op0, N2: Op.getOperand(i: `1`));
1528	break;
1529	case ISD::AssertZext:
1530	if (SDValue Op0 = ZExtPromoteOperand(Op: Op.getOperand(i: `0`), PVT))
1531	return DAG.getNode(Opcode: ISD::AssertZext, DL, VT: PVT, N1: Op0, N2: Op.getOperand(i: `1`));
1532	break;
1533	case ISD::Constant: {
1534	unsigned ExtOpc =
1535	Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1536	return DAG.getNode(Opcode: ExtOpc, DL, VT: PVT, Operand: Op);
1537	}
1538	}
1539
1540	if (!TLI.isOperationLegal(Op: ISD::ANY_EXTEND, VT: PVT))
1541	return SDValue ();
1542	return DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: PVT, Operand: Op);
1543	}
1544
1545	SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1546	if (!TLI.isOperationLegal(Op: ISD::SIGN_EXTEND_INREG, VT: PVT))
1547	return SDValue ();
1548	EVT OldVT = Op.getValueType();
1549	SDLoc DL(Op);
1550	bool Replace = false;
1551	SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1552	if (!NewOp.getNode())
1553	return SDValue ();
1554	AddToWorklist(N: NewOp.getNode());
1555
1556	if (Replace)
1557	ReplaceLoadWithPromotedLoad(Load: Op.getNode(), ExtLoad: NewOp.getNode());
1558	return DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL, VT: NewOp.getValueType(), N1: NewOp,
1559	N2: DAG.getValueType(OldVT));
1560	}
1561
1562	SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1563	EVT OldVT = Op.getValueType();
1564	SDLoc DL(Op);
1565	bool Replace = false;
1566	SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1567	if (!NewOp.getNode())
1568	return SDValue ();
1569	AddToWorklist(N: NewOp.getNode());
1570
1571	if (Replace)
1572	ReplaceLoadWithPromotedLoad(Load: Op.getNode(), ExtLoad: NewOp.getNode());
1573	return DAG.getZeroExtendInReg(Op: NewOp, DL, VT: OldVT);
1574	}
1575
1576	/// Promote the specified integer binary operation if the target indicates it is
1577	/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1578	/// i32 since i16 instructions are longer.
1579	SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1580	if (!LegalOperations)
1581	return SDValue ();
1582
1583	EVT VT = Op.getValueType();
1584	if (VT.isVector() \|\| !VT.isInteger())
1585	return SDValue ();
1586
1587	// If operation type is 'undesirable', e.g. i16 on x86, consider
1588	// promoting it.
1589	unsigned Opc = Op.getOpcode();
1590	if (TLI.isTypeDesirableForOp(Opc, VT))
1591	return SDValue ();
1592
1593	EVT PVT = VT;
1594	// Consult target whether it is a good idea to promote this operation and
1595	// what's the right type to promote it to.
1596	if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1597	assert(PVT != VT && "Don't know what type to promote to!");
1598
1599	LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1600
1601	bool Replace0 = false;
1602	SDValue N0 = Op.getOperand(i: `0`);
1603	SDValue NN0 = PromoteOperand(Op: N0, PVT, Replace&: Replace0);
1604
1605	bool Replace1 = false;
1606	SDValue N1 = Op.getOperand(i: `1`);
1607	SDValue NN1 = PromoteOperand(Op: N1, PVT, Replace&: Replace1);
1608	SDLoc DL(Op);
1609
1610	SDValue RV =
1611	DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: DAG.getNode(Opcode: Opc, DL, VT: PVT, N1: NN0, N2: NN1));
1612
1613	// We are always replacing N0/N1's use in N and only need additional
1614	// replacements if there are additional uses.
1615	// Note: We are checking uses of the nodes* (SDNode) rather than values*
1616	// (SDValue) here because the node may reference multiple values
1617	// (for example, the chain value of a load node).
1618	Replace0 &= !N0 ->hasOneUse();
1619	Replace1 &= (N0 != N1) && !N1 ->hasOneUse();
1620
1621	// Combine Op here so it is preserved past replacements.
1622	CombineTo(N: Op.getNode(), Res: RV);
1623
1624	// If operands have a use ordering, make sure we deal with
1625	// predecessor first.
1626	if (Replace0 && Replace1 && N0 ->isPredecessorOf(N: N1.getNode())) {
1627	std::swap(a&: N0, b&: N1);
1628	std::swap(a&: NN0, b&: NN1);
1629	}
1630
1631	if (Replace0) {
1632	AddToWorklist(N: NN0.getNode());
1633	ReplaceLoadWithPromotedLoad(Load: N0.getNode(), ExtLoad: NN0.getNode());
1634	}
1635	if (Replace1) {
1636	AddToWorklist(N: NN1.getNode());
1637	ReplaceLoadWithPromotedLoad(Load: N1.getNode(), ExtLoad: NN1.getNode());
1638	}
1639	return Op;
1640	}
1641	return SDValue ();
1642	}
1643
1644	/// Promote the specified integer shift operation if the target indicates it is
1645	/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1646	/// i32 since i16 instructions are longer.
1647	SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1648	if (!LegalOperations)
1649	return SDValue ();
1650
1651	EVT VT = Op.getValueType();
1652	if (VT.isVector() \|\| !VT.isInteger())
1653	return SDValue ();
1654
1655	// If operation type is 'undesirable', e.g. i16 on x86, consider
1656	// promoting it.
1657	unsigned Opc = Op.getOpcode();
1658	if (TLI.isTypeDesirableForOp(Opc, VT))
1659	return SDValue ();
1660
1661	EVT PVT = VT;
1662	// Consult target whether it is a good idea to promote this operation and
1663	// what's the right type to promote it to.
1664	if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1665	assert(PVT != VT && "Don't know what type to promote to!");
1666
1667	LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1668
1669	SDNodeFlags TruncFlags;
1670	bool Replace = false;
1671	SDValue N0 = Op.getOperand(i: `0`);
1672	if (Opc == ISD::SRA) {
1673	N0 = SExtPromoteOperand(Op: N0, PVT);
1674	} else if (Opc == ISD::SRL) {
1675	N0 = ZExtPromoteOperand(Op: N0, PVT);
1676	} else {
1677	if (Op ->getFlags().hasNoUnsignedWrap()) {
1678	N0 = ZExtPromoteOperand(Op: N0, PVT);
1679	TruncFlags = SDNodeFlags::NoUnsignedWrap;
1680	} else if (Op ->getFlags().hasNoSignedWrap()) {
1681	N0 = SExtPromoteOperand(Op: N0, PVT);
1682	TruncFlags = SDNodeFlags::NoSignedWrap;
1683	} else {
1684	N0 = PromoteOperand(Op: N0, PVT, Replace);
1685	}
1686	}
1687
1688	if (!N0.getNode())
1689	return SDValue ();
1690
1691	SDLoc DL(Op);
1692	SDValue N1 = Op.getOperand(i: `1`);
1693	SDValue RV = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT,
1694	Operand: DAG.getNode(Opcode: Opc, DL, VT: PVT, N1: N0, N2: N1), Flags: TruncFlags);
1695
1696	if (Replace)
1697	ReplaceLoadWithPromotedLoad(Load: Op.getOperand(i: `0`).getNode(), ExtLoad: N0.getNode());
1698
1699	// Deal with Op being deleted.
1700	if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1701	return RV;
1702	}
1703	return SDValue ();
1704	}
1705
1706	SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1707	if (!LegalOperations)
1708	return SDValue ();
1709
1710	EVT VT = Op.getValueType();
1711	if (VT.isVector() \|\| !VT.isInteger())
1712	return SDValue ();
1713
1714	// If operation type is 'undesirable', e.g. i16 on x86, consider
1715	// promoting it.
1716	unsigned Opc = Op.getOpcode();
1717	if (TLI.isTypeDesirableForOp(Opc, VT))
1718	return SDValue ();
1719
1720	EVT PVT = VT;
1721	// Consult target whether it is a good idea to promote this operation and
1722	// what's the right type to promote it to.
1723	if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1724	assert(PVT != VT && "Don't know what type to promote to!");
1725	// fold (aext (aext x)) -> (aext x)
1726	// fold (aext (zext x)) -> (zext x)
1727	// fold (aext (sext x)) -> (sext x)
1728	LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1729	return DAG.getNode(Opcode: Op.getOpcode(), DL: SDLoc (Op), VT, Operand: Op.getOperand(i: `0`));
1730	}
1731	return SDValue ();
1732	}
1733
1734	bool DAGCombiner::PromoteLoad(SDValue Op) {
1735	if (!LegalOperations)
1736	return false;
1737
1738	if (!ISD::isUNINDEXEDLoad(N: Op.getNode()))
1739	return false;
1740
1741	EVT VT = Op.getValueType();
1742	if (VT.isVector() \|\| !VT.isInteger())
1743	return false;
1744
1745	// If operation type is 'undesirable', e.g. i16 on x86, consider
1746	// promoting it.
1747	unsigned Opc = Op.getOpcode();
1748	if (TLI.isTypeDesirableForOp(Opc, VT))
1749	return false;
1750
1751	EVT PVT = VT;
1752	// Consult target whether it is a good idea to promote this operation and
1753	// what's the right type to promote it to.
1754	if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1755	assert(PVT != VT && "Don't know what type to promote to!");
1756
1757	SDLoc DL(Op);
1758	SDNode *N = Op.getNode();
1759	LoadSDNode *LD = cast<LoadSDNode>(Val: N);
1760	EVT MemVT = LD->getMemoryVT();
1761	ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(N: LD) ? ISD::EXTLOAD
1762	: LD->getExtensionType();
1763	SDValue NewLD = DAG.getExtLoad(ExtType, dl: DL, VT: PVT,
1764	Chain: LD->getChain(), Ptr: LD->getBasePtr(),
1765	MemVT, MMO: LD->getMemOperand());
1766	SDValue Result = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: NewLD);
1767
1768	LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1769	Result.dump(&DAG); dbgs() << `'\n'`);
1770
1771	DAG.ReplaceAllUsesOfValueWith(From: SDValue (N, `0`), To: Result);
1772	DAG.ReplaceAllUsesOfValueWith(From: SDValue (N, `1`), To: NewLD.getValue(R: `1`));
1773
1774	AddToWorklist(N: Result.getNode());
1775	recursivelyDeleteUnusedNodes(N);
1776	return true;
1777	}
1778
1779	return false;
1780	}
1781
1782	/// Recursively delete a node which has no uses and any operands for
1783	/// which it is the only use.
1784	///
1785	/// Note that this both deletes the nodes and removes them from the worklist.
1786	/// It also adds any nodes who have had a user deleted to the worklist as they
1787	/// may now have only one use and subject to other combines.
1788	bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1789	if (!N->use_empty())
1790	return false;
1791
1792	SmallSetVector<SDNode *, `16`> Nodes;
1793	Nodes.insert(X: N);
1794	do {
1795	N = Nodes.pop_back_val();
1796	if (!N)
1797	continue;
1798
1799	if (N->use_empty()) {
1800	for (const SDValue &ChildN : N->op_values())
1801	Nodes.insert(X: ChildN.getNode());
1802
1803	removeFromWorklist(N);
1804	DAG.DeleteNode(N);
1805	} else {
1806	AddToWorklist(N);
1807	}
1808	} while (!Nodes.empty());
1809	return true;
1810	}
1811
1812	//===----------------------------------------------------------------------===//
1813	// Main DAG Combiner implementation
1814	//===----------------------------------------------------------------------===//
1815
1816	void DAGCombiner::Run(CombineLevel AtLevel) {
1817	// set the instance variables, so that the various visit routines may use it.
1818	Level = AtLevel;
1819	LegalDAG = Level >= AfterLegalizeDAG;
1820	LegalOperations = Level >= AfterLegalizeVectorOps;
1821	LegalTypes = Level >= AfterLegalizeTypes;
1822
1823	WorklistInserter AddNodes(*this);
1824
1825	// Add all the dag nodes to the worklist.
1826	//
1827	// Note: All nodes are not added to PruningList here, this is because the only
1828	// nodes which can be deleted are those which have no uses and all other nodes
1829	// which would otherwise be added to the worklist by the first call to
1830	// getNextWorklistEntry are already present in it.
1831	for (SDNode &Node : DAG.allnodes())
1832	AddToWorklist(N: &Node, / IsCandidateForPruning / Node.use_empty());
1833
1834	// Create a dummy node (which is not added to allnodes), that adds a reference
1835	// to the root node, preventing it from being deleted, and tracking any
1836	// changes of the root.
1837	HandleSDNode Dummy(DAG.getRoot());
1838
1839	// While we have a valid worklist entry node, try to combine it.
1840	while (SDNode *N = getNextWorklistEntry()) {
1841	// If N has no uses, it is dead. Make sure to revisit all N's operands once
1842	// N is deleted from the DAG, since they too may now be dead or may have a
1843	// reduced number of uses, allowing other xforms.
1844	if (recursivelyDeleteUnusedNodes(N))
1845	continue;
1846
1847	WorklistRemover DeadNodes(*this);
1848
1849	// If this combine is running after legalizing the DAG, re-legalize any
1850	// nodes pulled off the worklist.
1851	if (LegalDAG) {
1852	SmallSetVector<SDNode *, `16`> UpdatedNodes;
1853	bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1854
1855	for (SDNode *LN : UpdatedNodes)
1856	AddToWorklistWithUsers(N: LN);
1857
1858	if (!NIsValid)
1859	continue;
1860	}
1861
1862	LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1863
1864	// Add any operands of the new node which have not yet been combined to the
1865	// worklist as well. getNextWorklistEntry flags nodes that have been
1866	// combined before. Because the worklist uniques things already, this won't
1867	// repeatedly process the same operand.
1868	for (const SDValue &ChildN : N->op_values())
1869	AddToWorklist(N: ChildN.getNode(), /IsCandidateForPruning=/true,
1870	/SkipIfCombinedBefore=/true);
1871
1872	SDValue RV = combine(N);
1873
1874	if (!RV.getNode())
1875	continue;
1876
1877	++NodesCombined;
1878
1879	// Invalidate cached info.
1880	ChainsWithoutMergeableStores.clear();
1881
1882	// If we get back the same node we passed in, rather than a new node or
1883	// zero, we know that the node must have defined multiple values and
1884	// CombineTo was used. Since CombineTo takes care of the worklist
1885	// mechanics for us, we have no work to do in this case.
1886	if (RV.getNode() == N)
1887	continue;
1888
1889	assert(N->getOpcode() != ISD::DELETED_NODE &&
1890	RV.getOpcode() != ISD::DELETED_NODE &&
1891	"Node was deleted but visit returned new node!");
1892
1893	LLVM_DEBUG(dbgs() << " ... into: "; RV.dump(&DAG));
1894
1895	if (N->getNumValues() == RV ->getNumValues())
1896	DAG.ReplaceAllUsesWith(From: N, To: RV.getNode());
1897	else {
1898	assert(N->getValueType(`0`) == RV.getValueType() &&
1899	N->getNumValues() == `1` && "Type mismatch");
1900	DAG.ReplaceAllUsesWith(From: N, To: &RV);
1901	}
1902
1903	// Push the new node and any users onto the worklist. Omit this if the
1904	// new node is the EntryToken (e.g. if a store managed to get optimized
1905	// out), because re-visiting the EntryToken and its users will not uncover
1906	// any additional opportunities, but there may be a large number of such
1907	// users, potentially causing compile time explosion.
1908	if (RV.getOpcode() != ISD::EntryToken)
1909	AddToWorklistWithUsers(N: RV.getNode());
1910
1911	// Finally, if the node is now dead, remove it from the graph. The node
1912	// may not be dead if the replacement process recursively simplified to
1913	// something else needing this node. This will also take care of adding any
1914	// operands which have lost a user to the worklist.
1915	recursivelyDeleteUnusedNodes(N);
1916	}
1917
1918	// If the root changed (e.g. it was a dead load, update the root).
1919	DAG.setRoot(Dummy.getValue());
1920	DAG.RemoveDeadNodes();
1921	}
1922
1923	SDValue DAGCombiner::visit(SDNode *N) {
1924	// clang-format off
1925	switch (N->getOpcode()) {
1926	default: break;
1927	case ISD::TokenFactor: return visitTokenFactor(N);
1928	case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1929	case ISD::ADD: return visitADD(N);
1930	case ISD::PTRADD: return visitPTRADD(N);
1931	case ISD::SUB: return visitSUB(N);
1932	case ISD::SADDSAT:
1933	case ISD::UADDSAT: return visitADDSAT(N);
1934	case ISD::SSUBSAT:
1935	case ISD::USUBSAT: return visitSUBSAT(N);
1936	case ISD::ADDC: return visitADDC(N);
1937	case ISD::SADDO:
1938	case ISD::UADDO: return visitADDO(N);
1939	case ISD::SUBC: return visitSUBC(N);
1940	case ISD::SSUBO:
1941	case ISD::USUBO: return visitSUBO(N);
1942	case ISD::ADDE: return visitADDE(N);
1943	case ISD::UADDO_CARRY: return visitUADDO_CARRY(N);
1944	case ISD::SADDO_CARRY: return visitSADDO_CARRY(N);
1945	case ISD::SUBE: return visitSUBE(N);
1946	case ISD::USUBO_CARRY: return visitUSUBO_CARRY(N);
1947	case ISD::SSUBO_CARRY: return visitSSUBO_CARRY(N);
1948	case ISD::SMULFIX:
1949	case ISD::SMULFIXSAT:
1950	case ISD::UMULFIX:
1951	case ISD::UMULFIXSAT: return visitMULFIX(N);
1952	case ISD::MUL: return visitMUL<EmptyMatchContext>(N);
1953	case ISD::SDIV: return visitSDIV(N);
1954	case ISD::UDIV: return visitUDIV(N);
1955	case ISD::SREM:
1956	case ISD::UREM: return visitREM(N);
1957	case ISD::MULHU: return visitMULHU(N);
1958	case ISD::MULHS: return visitMULHS(N);
1959	case ISD::AVGFLOORS:
1960	case ISD::AVGFLOORU:
1961	case ISD::AVGCEILS:
1962	case ISD::AVGCEILU: return visitAVG(N);
1963	case ISD::ABDS:
1964	case ISD::ABDU: return visitABD(N);
1965	case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1966	case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1967	case ISD::SMULO:
1968	case ISD::UMULO: return visitMULO(N);
1969	case ISD::SMIN:
1970	case ISD::SMAX:
1971	case ISD::UMIN:
1972	case ISD::UMAX: return visitIMINMAX(N);
1973	case ISD::AND: return visitAND(N);
1974	case ISD::OR: return visitOR(N);
1975	case ISD::XOR: return visitXOR(N);
1976	case ISD::SHL: return visitSHL(N);
1977	case ISD::SRA: return visitSRA(N);
1978	case ISD::SRL: return visitSRL(N);
1979	case ISD::ROTR:
1980	case ISD::ROTL: return visitRotate(N);
1981	case ISD::FSHL:
1982	case ISD::FSHR: return visitFunnelShift(N);
1983	case ISD::SSHLSAT:
1984	case ISD::USHLSAT: return visitSHLSAT(N);
1985	case ISD::ABS: return visitABS(N);
1986	case ISD::CLMUL:
1987	case ISD::CLMULR:
1988	case ISD::CLMULH: return visitCLMUL(N);
1989	case ISD::BSWAP: return visitBSWAP(N);
1990	case ISD::BITREVERSE: return visitBITREVERSE(N);
1991	case ISD::CTLZ: return visitCTLZ(N);
1992	case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1993	case ISD::CTTZ: return visitCTTZ(N);
1994	case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1995	case ISD::CTPOP: return visitCTPOP(N);
1996	case ISD::SELECT: return visitSELECT(N);
1997	case ISD::VSELECT: return visitVSELECT(N);
1998	case ISD::SELECT_CC: return visitSELECT_CC(N);
1999	case ISD::SETCC: return visitSETCC(N);
2000	case ISD::SETCCCARRY: return visitSETCCCARRY(N);
2001	case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
2002	case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
2003	case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
2004	case ISD::AssertSext:
2005	case ISD::AssertZext: return visitAssertExt(N);
2006	case ISD::AssertAlign: return visitAssertAlign(N);
2007	case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
2008	case ISD::SIGN_EXTEND_VECTOR_INREG:
2009	case ISD::ZERO_EXTEND_VECTOR_INREG:
2010	case ISD::ANY_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N);
2011	case ISD::TRUNCATE: return visitTRUNCATE(N);
2012	case ISD::TRUNCATE_USAT_U: return visitTRUNCATE_USAT_U(N);
2013	case ISD::BITCAST: return visitBITCAST(N);
2014	case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
2015	case ISD::FADD: return visitFADD(N);
2016	case ISD::STRICT_FADD: return visitSTRICT_FADD(N);
2017	case ISD::FSUB: return visitFSUB(N);
2018	case ISD::FMUL: return visitFMUL(N);
2019	case ISD::FMA: return visitFMA<EmptyMatchContext>(N);
2020	case ISD::FMAD: return visitFMAD(N);
2021	case ISD::FMULADD: return visitFMULADD(N);
2022	case ISD::FDIV: return visitFDIV(N);
2023	case ISD::FREM: return visitFREM(N);
2024	case ISD::FSQRT: return visitFSQRT(N);
2025	case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
2026	case ISD::FPOW: return visitFPOW(N);
2027	case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
2028	case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
2029	case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
2030	case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
2031	case ISD::LROUND:
2032	case ISD::LLROUND:
2033	case ISD::LRINT:
2034	case ISD::LLRINT: return visitXROUND(N);
2035	case ISD::FP_ROUND: return visitFP_ROUND(N);
2036	case ISD::FP_EXTEND: return visitFP_EXTEND(N);
2037	case ISD::FNEG: return visitFNEG(N);
2038	case ISD::FABS: return visitFABS(N);
2039	case ISD::FFLOOR: return visitFFLOOR(N);
2040	case ISD::FMINNUM:
2041	case ISD::FMAXNUM:
2042	case ISD::FMINIMUM:
2043	case ISD::FMAXIMUM:
2044	case ISD::FMINIMUMNUM:
2045	case ISD::FMAXIMUMNUM: return visitFMinMax(N);
2046	case ISD::FCEIL: return visitFCEIL(N);
2047	case ISD::FTRUNC: return visitFTRUNC(N);
2048	case ISD::FFREXP: return visitFFREXP(N);
2049	case ISD::BRCOND: return visitBRCOND(N);
2050	case ISD::BR_CC: return visitBR_CC(N);
2051	case ISD::LOAD: return visitLOAD(N);
2052	case ISD::STORE: return visitSTORE(N);
2053	case ISD::ATOMIC_STORE: return visitATOMIC_STORE(N);
2054	case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
2055	case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
2056	case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
2057	case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
2058	case ISD::VECTOR_INTERLEAVE: return visitVECTOR_INTERLEAVE(N);
2059	case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
2060	case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
2061	case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
2062	case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
2063	case ISD::MGATHER: return visitMGATHER(N);
2064	case ISD::MLOAD: return visitMLOAD(N);
2065	case ISD::MSCATTER: return visitMSCATTER(N);
2066	case ISD::MSTORE: return visitMSTORE(N);
2067	case ISD::EXPERIMENTAL_VECTOR_HISTOGRAM: return visitMHISTOGRAM(N);
2068	case ISD::PARTIAL_REDUCE_SMLA:
2069	case ISD::PARTIAL_REDUCE_UMLA:
2070	case ISD::PARTIAL_REDUCE_SUMLA:
2071	case ISD::PARTIAL_REDUCE_FMLA:
2072	return visitPARTIAL_REDUCE_MLA(N);
2073	case ISD::VECTOR_COMPRESS: return visitVECTOR_COMPRESS(N);
2074	case ISD::LIFETIME_END: return visitLIFETIME_END(N);
2075	case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
2076	case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
2077	case ISD::FP_TO_BF16: return visitFP_TO_BF16(N);
2078	case ISD::BF16_TO_FP: return visitBF16_TO_FP(N);
2079	case ISD::FREEZE: return visitFREEZE(N);
2080	case ISD::GET_FPENV_MEM: return visitGET_FPENV_MEM(N);
2081	case ISD::SET_FPENV_MEM: return visitSET_FPENV_MEM(N);
2082	case ISD::FCANONICALIZE: return visitFCANONICALIZE(N);
2083	case ISD::VECREDUCE_FADD:
2084	case ISD::VECREDUCE_FMUL:
2085	case ISD::VECREDUCE_ADD:
2086	case ISD::VECREDUCE_MUL:
2087	case ISD::VECREDUCE_AND:
2088	case ISD::VECREDUCE_OR:
2089	case ISD::VECREDUCE_XOR:
2090	case ISD::VECREDUCE_SMAX:
2091	case ISD::VECREDUCE_SMIN:
2092	case ISD::VECREDUCE_UMAX:
2093	case ISD::VECREDUCE_UMIN:
2094	case ISD::VECREDUCE_FMAX:
2095	case ISD::VECREDUCE_FMIN:
2096	case ISD::VECREDUCE_FMAXIMUM:
2097	case ISD::VECREDUCE_FMINIMUM: return visitVECREDUCE(N);
2098	#define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) case ISD::SDOPC:
2099	#include "llvm/IR/VPIntrinsics.def"
2100	return visitVPOp(N);
2101	}
2102	// clang-format on
2103	return SDValue ();
2104	}
2105
2106	SDValue DAGCombiner::combine(SDNode *N) {
2107	if (!DebugCounter::shouldExecute(Counter&: DAGCombineCounter))
2108	return SDValue ();
2109
2110	SDValue RV;
2111	if (!DisableGenericCombines)
2112	RV = visit(N);
2113
2114	// If nothing happened, try a target-specific DAG combine.
2115	if (!RV.getNode()) {
2116	assert(N->getOpcode() != ISD::DELETED_NODE &&
2117	"Node was deleted but visit returned NULL!");
2118
2119	if (N->getOpcode() >= ISD::BUILTIN_OP_END \|\|
2120	TLI.hasTargetDAGCombine(NT: (ISD::NodeType)N->getOpcode())) {
2121
2122	// Expose the DAG combiner to the target combiner impls.
2123	TargetLowering::DAGCombinerInfo
2124	DagCombineInfo(DAG, Level, false, this);
2125
2126	RV = TLI.PerformDAGCombine(N, DCI&: DagCombineInfo);
2127	}
2128	}
2129
2130	// If nothing happened still, try promoting the operation.
2131	if (!RV.getNode()) {
2132	switch (N->getOpcode()) {
2133	default: break;
2134	case ISD::ADD:
2135	case ISD::SUB:
2136	case ISD::MUL:
2137	case ISD::AND:
2138	case ISD::OR:
2139	case ISD::XOR:
2140	RV = PromoteIntBinOp(Op: SDValue (N, `0`));
2141	break;
2142	case ISD::SHL:
2143	case ISD::SRA:
2144	case ISD::SRL:
2145	RV = PromoteIntShiftOp(Op: SDValue (N, `0`));
2146	break;
2147	case ISD::SIGN_EXTEND:
2148	case ISD::ZERO_EXTEND:
2149	case ISD::ANY_EXTEND:
2150	RV = PromoteExtend(Op: SDValue (N, `0`));
2151	break;
2152	case ISD::LOAD:
2153	if (PromoteLoad(Op: SDValue (N, `0`)))
2154	RV = SDValue (N, `0`);
2155	break;
2156	}
2157	}
2158
2159	// If N is a commutative binary node, try to eliminate it if the commuted
2160	// version is already present in the DAG.
2161	if (!RV.getNode() && TLI.isCommutativeBinOp(Opcode: N->getOpcode())) {
2162	SDValue N0 = N->getOperand(Num: `0`);
2163	SDValue N1 = N->getOperand(Num: `1`);
2164
2165	// Constant operands are canonicalized to RHS.
2166	if (N0 != N1 && (isa<ConstantSDNode>(Val: N0) \|\| !isa<ConstantSDNode>(Val: N1))) {
2167	SDValue Ops[] = {N1, N0};
2168	SDNode *CSENode = DAG.getNodeIfExists(Opcode: N->getOpcode(), VTList: N->getVTList(), Ops,
2169	Flags: N->getFlags());
2170	if (CSENode)
2171	return SDValue (CSENode, `0`);
2172	}
2173	}
2174
2175	return RV;
2176	}
2177
2178	/// Given a node, return its input chain if it has one, otherwise return a null
2179	/// sd operand.
2180	static SDValue getInputChainForNode(SDNode *N) {
2181	if (unsigned NumOps = N->getNumOperands()) {
2182	if (N->getOperand(Num: `0`).getValueType() == MVT::Other)
2183	return N->getOperand(Num: `0`);
2184	if (N->getOperand(Num: NumOps-`1`).getValueType() == MVT::Other)
2185	return N->getOperand(Num: NumOps-`1`);
2186	for (unsigned i = `1`; i < NumOps-`1`; ++i)
2187	if (N->getOperand(Num: i).getValueType() == MVT::Other)
2188	return N->getOperand(Num: i);
2189	}
2190	return SDValue ();
2191	}
2192
2193	SDValue DAGCombiner::visitFCANONICALIZE(SDNode *N) {
2194	SDValue Operand = N->getOperand(Num: `0`);
2195	EVT VT = Operand.getValueType();
2196	SDLoc dl(N);
2197
2198	// Canonicalize undef to quiet NaN.
2199	if (Operand.isUndef()) {
2200	APFloat CanonicalQNaN = APFloat::getQNaN(Sem: VT.getFltSemantics());
2201	return DAG.getConstantFP(Val: CanonicalQNaN, DL: dl, VT);
2202	}
2203	return SDValue ();
2204	}
2205
2206	SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
2207	// If N has two operands, where one has an input chain equal to the other,
2208	// the 'other' chain is redundant.
2209	if (N->getNumOperands() == `2`) {
2210	if (getInputChainForNode(N: N->getOperand(Num: `0`).getNode()) == N->getOperand(Num: `1`))
2211	return N->getOperand(Num: `0`);
2212	if (getInputChainForNode(N: N->getOperand(Num: `1`).getNode()) == N->getOperand(Num: `0`))
2213	return N->getOperand(Num: `1`);
2214	}
2215
2216	// Don't simplify token factors if optnone.
2217	if (OptLevel == CodeGenOptLevel::None)
2218	return SDValue ();
2219
2220	// Don't simplify the token factor if the node itself has too many operands.
2221	if (N->getNumOperands() > TokenFactorInlineLimit)
2222	return SDValue ();
2223
2224	// If the sole user is a token factor, we should make sure we have a
2225	// chance to merge them together. This prevents TF chains from inhibiting
2226	// optimizations.
2227	if (N->hasOneUse() && N->user_begin()->getOpcode() == ISD::TokenFactor)
2228	AddToWorklist(N: *(N->user_begin()));
2229
2230	SmallVector<SDNode , `8`> TFs; // List of token factors to visit.*
2231	SmallVector<SDValue, `8`> Ops; // Ops for replacing token factor.
2232	SmallPtrSet<SDNode*, `16`> SeenOps;
2233	bool Changed = false; // If we should replace this token factor.
2234
2235	// Start out with this token factor.
2236	TFs.push_back(Elt: N);
2237
2238	// Iterate through token factors. The TFs grows when new token factors are
2239	// encountered.
2240	for (unsigned i = `0`; i < TFs.size(); ++i) {
2241	// Limit number of nodes to inline, to avoid quadratic compile times.
2242	// We have to add the outstanding Token Factors to Ops, otherwise we might
2243	// drop Ops from the resulting Token Factors.
2244	if (Ops.size() > TokenFactorInlineLimit) {
2245	for (unsigned j = i; j < TFs.size(); j++)
2246	Ops.emplace_back(Args&: TFs [j], Args: `0`);
2247	// Drop unprocessed Token Factors from TFs, so we do not add them to the
2248	// combiner worklist later.
2249	TFs.resize(N: i);
2250	break;
2251	}
2252
2253	SDNode *TF = TFs [i];
2254	// Check each of the operands.
2255	for (const SDValue &Op : TF->op_values()) {
2256	switch (Op.getOpcode()) {
2257	case ISD::EntryToken:
2258	// Entry tokens don't need to be added to the list. They are
2259	// redundant.
2260	Changed = true;
2261	break;
2262
2263	case ISD::TokenFactor:
2264	if (Op.hasOneUse() && !is_contained(Range&: TFs, Element: Op.getNode())) {
2265	// Queue up for processing.
2266	TFs.push_back(Elt: Op.getNode());
2267	Changed = true;
2268	break;
2269	}
2270	[[fallthrough]];
2271
2272	default:
2273	// Only add if it isn't already in the list.
2274	if (SeenOps.insert(Ptr: Op.getNode()).second)
2275	Ops.push_back(Elt: Op);
2276	else
2277	Changed = true;
2278	break;
2279	}
2280	}
2281	}
2282
2283	// Re-visit inlined Token Factors, to clean them up in case they have been
2284	// removed. Skip the first Token Factor, as this is the current node.
2285	for (unsigned i = `1`, e = TFs.size(); i < e; i++)
2286	AddToWorklist(N: TFs [i]);
2287
2288	// Remove Nodes that are chained to another node in the list. Do so
2289	// by walking up chains breath-first stopping when we've seen
2290	// another operand. In general we must climb to the EntryNode, but we can exit
2291	// early if we find all remaining work is associated with just one operand as
2292	// no further pruning is possible.
2293
2294	// List of nodes to search through and original Ops from which they originate.
2295	SmallVector<std::pair<SDNode , unsigned*>, `8`> Worklist;
2296	SmallVector<unsigned, `8`> OpWorkCount; // Count of work for each Op.
2297	SmallPtrSet<SDNode *, `16`> SeenChains;
2298	bool DidPruneOps = false;
2299
2300	unsigned NumLeftToConsider = `0`;
2301	for (const SDValue &Op : Ops) {
2302	Worklist.push_back(Elt: std::make_pair(x: Op.getNode(), y: NumLeftToConsider++));
2303	OpWorkCount.push_back(Elt: `1`);
2304	}
2305
2306	auto AddToWorklist = [&](unsigned CurIdx, SDNode Op, unsigned* OpNumber) {
2307	// If this is an Op, we can remove the op from the list. Remark any
2308	// search associated with it as from the current OpNumber.
2309	if (SeenOps.contains(Ptr: Op)) {
2310	Changed = true;
2311	DidPruneOps = true;
2312	unsigned OrigOpNumber = `0`;
2313	while (OrigOpNumber < Ops.size() && Ops [OrigOpNumber].getNode() != Op)
2314	OrigOpNumber++;
2315	assert((OrigOpNumber != Ops.size()) &&
2316	"expected to find TokenFactor Operand");
2317	// Re-mark worklist from OrigOpNumber to OpNumber
2318	for (unsigned i = CurIdx + `1`; i < Worklist.size(); ++i) {
2319	if (Worklist [i].second == OrigOpNumber) {
2320	Worklist [i].second = OpNumber;
2321	}
2322	}
2323	OpWorkCount [OpNumber] += OpWorkCount [OrigOpNumber];
2324	OpWorkCount [OrigOpNumber] = `0`;
2325	NumLeftToConsider--;
2326	}
2327	// Add if it's a new chain
2328	if (SeenChains.insert(Ptr: Op).second) {
2329	OpWorkCount [OpNumber]++;
2330	Worklist.push_back(Elt: std::make_pair(x&: Op, y&: OpNumber));
2331	}
2332	};
2333
2334	for (unsigned i = `0`; i < Worklist.size() && i < `1024`; ++i) {
2335	// We need at least be consider at least 2 Ops to prune.
2336	if (NumLeftToConsider <= `1`)
2337	break;
2338	auto CurNode = Worklist [i].first;
2339	auto CurOpNumber = Worklist [i].second;
2340	assert((OpWorkCount[CurOpNumber] > `0`) &&
2341	"Node should not appear in worklist");
2342	switch (CurNode->getOpcode()) {
2343	case ISD::EntryToken:
2344	// Hitting EntryToken is the only way for the search to terminate without
2345	// hitting
2346	// another operand's search. Prevent us from marking this operand
2347	// considered.
2348	NumLeftToConsider++;
2349	break;
2350	case ISD::TokenFactor:
2351	for (const SDValue &Op : CurNode->op_values())
2352	AddToWorklist (i, Op.getNode(), CurOpNumber);
2353	break;
2354	case ISD::LIFETIME_START:
2355	case ISD::LIFETIME_END:
2356	case ISD::CopyFromReg:
2357	case ISD::CopyToReg:
2358	AddToWorklist (i, CurNode->getOperand(Num: `0`).getNode(), CurOpNumber);
2359	break;
2360	default:
2361	if (auto *MemNode = dyn_cast<MemSDNode>(Val: CurNode))
2362	AddToWorklist (i, MemNode->getChain().getNode(), CurOpNumber);
2363	break;
2364	}
2365	OpWorkCount [CurOpNumber]--;
2366	if (OpWorkCount [CurOpNumber] == `0`)
2367	NumLeftToConsider--;
2368	}
2369
2370	// If we've changed things around then replace token factor.
2371	if (Changed) {
2372	SDValue Result;
2373	if (Ops.empty()) {
2374	// The entry token is the only possible outcome.
2375	Result = DAG.getEntryNode();
2376	} else {
2377	if (DidPruneOps) {
2378	SmallVector<SDValue, `8`> PrunedOps;
2379	//
2380	for (const SDValue &Op : Ops) {
2381	if (SeenChains.count(Ptr: Op.getNode()) == `0`)
2382	PrunedOps.push_back(Elt: Op);
2383	}
2384	Result = DAG.getTokenFactor(DL: SDLoc (N), Vals&: PrunedOps);
2385	} else {
2386	Result = DAG.getTokenFactor(DL: SDLoc (N), Vals&: Ops);
2387	}
2388	}
2389	return Result;
2390	}
2391	return SDValue ();
2392	}
2393
2394	/// MERGE_VALUES can always be eliminated.
2395	SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
2396	WorklistRemover DeadNodes(*this);
2397	// Replacing results may cause a different MERGE_VALUES to suddenly
2398	// be CSE'd with N, and carry its uses with it. Iterate until no
2399	// uses remain, to ensure that the node can be safely deleted.
2400	// First add the users of this node to the work list so that they
2401	// can be tried again once they have new operands.
2402	AddUsersToWorklist(N);
2403	do {
2404	// Do as a single replacement to avoid rewalking use lists.
2405	SmallVector<SDValue, `8`> Ops(N->ops());
2406	DAG.ReplaceAllUsesWith(From: N, To: Ops.data());
2407	} while (!N->use_empty());
2408	deleteAndRecombine(N);
2409	return SDValue (N, `0`); // Return N so it doesn't get rechecked!
2410	}
2411
2412	/// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
2413	/// ConstantSDNode pointer else nullptr.
2414	static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
2415	ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Val&: N);
2416	return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2417	}
2418
2419	// isTruncateOf - If N is a truncate of some other value, return true, record
2420	// the value being truncated in Op and which of Op's bits are zero/one in Known.
2421	// This function computes KnownBits to avoid a duplicated call to
2422	// computeKnownBits in the caller.
2423	static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
2424	KnownBits &Known) {
2425	if (N ->getOpcode() == ISD::TRUNCATE) {
2426	Op = N ->getOperand(Num: `0`);
2427	Known = DAG.computeKnownBits(Op);
2428	if (N ->getFlags().hasNoUnsignedWrap())
2429	Known.Zero.setBitsFrom(N.getScalarValueSizeInBits());
2430	return true;
2431	}
2432
2433	if (N.getValueType().getScalarType() != MVT::i1 \|\|
2434	!sd_match(
2435	N, P: m_c_SetCC(LHS: m_Value(N&: Op), RHS: m_Zero(), CC: m_SpecificCondCode(CC: ISD::SETNE))))
2436	return false;
2437
2438	Known = DAG.computeKnownBits(Op);
2439	return (Known.Zero \| `1`).isAllOnes();
2440	}
2441
2442	/// Return true if 'Use' is a load or a store that uses N as its base pointer
2443	/// and that N may be folded in the load / store addressing mode.
2444	static bool canFoldInAddressingMode(SDNode N, SDNode Use, SelectionDAG &DAG,
2445	const TargetLowering &TLI) {
2446	EVT VT;
2447	unsigned AS;
2448
2449	if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val: Use)) {
2450	if (LD->isIndexed() \|\| LD->getBasePtr().getNode() != N)
2451	return false;
2452	VT = LD->getMemoryVT();
2453	AS = LD->getAddressSpace();
2454	} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Val: Use)) {
2455	if (ST->isIndexed() \|\| ST->getBasePtr().getNode() != N)
2456	return false;
2457	VT = ST->getMemoryVT();
2458	AS = ST->getAddressSpace();
2459	} else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Val: Use)) {
2460	if (LD->isIndexed() \|\| LD->getBasePtr().getNode() != N)
2461	return false;
2462	VT = LD->getMemoryVT();
2463	AS = LD->getAddressSpace();
2464	} else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Val: Use)) {
2465	if (ST->isIndexed() \|\| ST->getBasePtr().getNode() != N)
2466	return false;
2467	VT = ST->getMemoryVT();
2468	AS = ST->getAddressSpace();
2469	} else {
2470	return false;
2471	}
2472
2473	TargetLowering::AddrMode AM;
2474	if (N->isAnyAdd()) {
2475	AM.HasBaseReg = true;
2476	ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: `1`));
2477	if (Offset)
2478	// [reg +/- imm]
2479	AM.BaseOffs = Offset->getSExtValue();
2480	else
2481	// [reg +/- reg]
2482	AM.Scale = `1`;
2483	} else if (N->getOpcode() == ISD::SUB) {
2484	AM.HasBaseReg = true;
2485	ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: `1`));
2486	if (Offset)
2487	// [reg +/- imm]
2488	AM.BaseOffs = -Offset->getSExtValue();
2489	else
2490	// [reg +/- reg]
2491	AM.Scale = `1`;
2492	} else {
2493	return false;
2494	}
2495
2496	return TLI.isLegalAddressingMode(DL: DAG.getDataLayout(), AM,
2497	Ty: VT.getTypeForEVT(Context&: *DAG.getContext()), AddrSpace: AS);
2498	}
2499
2500	/// This inverts a canonicalization in IR that replaces a variable select arm
2501	/// with an identity constant. Codegen improves if we re-use the variable
2502	/// operand rather than load a constant. This can also be converted into a
2503	/// masked vector operation if the target supports it.
2504	static SDValue foldSelectWithIdentityConstant(SDNode *N, SelectionDAG &DAG,
2505	bool ShouldCommuteOperands) {
2506	SDValue N0 = N->getOperand(Num: `0`);
2507	SDValue N1 = N->getOperand(Num: `1`);
2508
2509	// Match a select as operand 1. The identity constant that we are looking for
2510	// is only valid as operand 1 of a non-commutative binop.
2511	if (ShouldCommuteOperands)
2512	std::swap(a&: N0, b&: N1);
2513
2514	SDValue Cond, TVal, FVal;
2515	if (!sd_match(N: N1, P: m_OneUse(P: m_SelectLike(Cond: m_Value(N&: Cond), T: m_Value(N&: TVal),
2516	F: m_Value(N&: FVal)))))
2517	return SDValue ();
2518
2519	// We can't hoist all instructions because of immediate UB (not speculatable).
2520	// For example div/rem by zero.
2521	if (!DAG.isSafeToSpeculativelyExecuteNode(N))
2522	return SDValue ();
2523
2524	unsigned SelOpcode = N1.getOpcode();
2525	unsigned Opcode = N->getOpcode();
2526	EVT VT = N->getValueType(ResNo: `0`);
2527	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2528
2529	// This transform increases uses of N0, so freeze it to be safe.
2530	// binop N0, (vselect Cond, IDC, FVal) --> vselect Cond, N0, (binop N0, FVal)
2531	unsigned OpNo = ShouldCommuteOperands ? `0` : `1`;
2532	if (isNeutralConstant(Opc: Opcode, Flags: N->getFlags(), V: TVal, OperandNo: OpNo) &&
2533	TLI.shouldFoldSelectWithIdentityConstant(BinOpcode: Opcode, VT, SelectOpcode: SelOpcode, X: N0,
2534	Y: FVal)) {
2535	SDValue F0 = DAG.getFreeze(V: N0);
2536	SDValue NewBO = DAG.getNode(Opcode, DL: SDLoc (N), VT, N1: F0, N2: FVal, Flags: N->getFlags());
2537	return DAG.getSelect(DL: SDLoc (N), VT, Cond, LHS: F0, RHS: NewBO);
2538	}
2539	// binop N0, (vselect Cond, TVal, IDC) --> vselect Cond, (binop N0, TVal), N0
2540	if (isNeutralConstant(Opc: Opcode, Flags: N->getFlags(), V: FVal, OperandNo: OpNo) &&
2541	TLI.shouldFoldSelectWithIdentityConstant(BinOpcode: Opcode, VT, SelectOpcode: SelOpcode, X: N0,
2542	Y: TVal)) {
2543	SDValue F0 = DAG.getFreeze(V: N0);
2544	SDValue NewBO = DAG.getNode(Opcode, DL: SDLoc (N), VT, N1: F0, N2: TVal, Flags: N->getFlags());
2545	return DAG.getSelect(DL: SDLoc (N), VT, Cond, LHS: NewBO, RHS: F0);
2546	}
2547
2548	return SDValue ();
2549	}
2550
2551	SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2552	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2553	assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == `1` &&
2554	"Unexpected binary operator");
2555
2556	if (SDValue Sel = foldSelectWithIdentityConstant(N: BO, DAG, ShouldCommuteOperands: false))
2557	return Sel;
2558
2559	if (TLI.isCommutativeBinOp(Opcode: BO->getOpcode()))
2560	if (SDValue Sel = foldSelectWithIdentityConstant(N: BO, DAG, ShouldCommuteOperands: true))
2561	return Sel;
2562
2563	// Don't do this unless the old select is going away. We want to eliminate the
2564	// binary operator, not replace a binop with a select.
2565	// TODO: Handle ISD::SELECT_CC.
2566	unsigned SelOpNo = `0`;
2567	SDValue Sel = BO->getOperand(Num: `0`);
2568	auto BinOpcode = BO->getOpcode();
2569	if (Sel.getOpcode() != ISD::SELECT \|\| !Sel.hasOneUse()) {
2570	SelOpNo = `1`;
2571	Sel = BO->getOperand(Num: `1`);
2572
2573	// Peek through trunc to shift amount type.
2574	if ((BinOpcode == ISD::SHL \|\| BinOpcode == ISD::SRA \|\|
2575	BinOpcode == ISD::SRL) && Sel.hasOneUse()) {
2576	// This is valid when the truncated bits of x are already zero.
2577	SDValue Op;
2578	KnownBits Known;
2579	if (isTruncateOf(DAG, N: Sel, Op, Known) &&
2580	Known.countMaxActiveBits() < Sel.getScalarValueSizeInBits())
2581	Sel = Op;
2582	}
2583	}
2584
2585	if (Sel.getOpcode() != ISD::SELECT \|\| !Sel.hasOneUse())
2586	return SDValue ();
2587
2588	SDValue CT = Sel.getOperand(i: `1`);
2589	if (!isConstantOrConstantVector(N: CT, NoOpaques: true) &&
2590	!DAG.isConstantFPBuildVectorOrConstantFP(N: CT))
2591	return SDValue ();
2592
2593	SDValue CF = Sel.getOperand(i: `2`);
2594	if (!isConstantOrConstantVector(N: CF, NoOpaques: true) &&
2595	!DAG.isConstantFPBuildVectorOrConstantFP(N: CF))
2596	return SDValue ();
2597
2598	// Bail out if any constants are opaque because we can't constant fold those.
2599	// The exception is "and" and "or" with either 0 or -1 in which case we can
2600	// propagate non constant operands into select. I.e.:
2601	// and (select Cond, 0, -1), X --> select Cond, 0, X
2602	// or X, (select Cond, -1, 0) --> select Cond, -1, X
2603	bool CanFoldNonConst =
2604	(BinOpcode == ISD::AND \|\| BinOpcode == ISD::OR) &&
2605	((isNullOrNullSplat(V: CT) && isAllOnesOrAllOnesSplat(V: CF)) \|\|
2606	(isNullOrNullSplat(V: CF) && isAllOnesOrAllOnesSplat(V: CT)));
2607
2608	SDValue CBO = BO->getOperand(Num: SelOpNo ^ `1`);
2609	if (!CanFoldNonConst &&
2610	!isConstantOrConstantVector(N: CBO, NoOpaques: true) &&
2611	!DAG.isConstantFPBuildVectorOrConstantFP(N: CBO))
2612	return SDValue ();
2613
2614	SDLoc DL(Sel);
2615	SDValue NewCT, NewCF;
2616	EVT VT = BO->getValueType(ResNo: `0`);
2617
2618	if (CanFoldNonConst) {
2619	// If CBO is an opaque constant, we can't rely on getNode to constant fold.
2620	if ((BinOpcode == ISD::AND && isNullOrNullSplat(V: CT)) \|\|
2621	(BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(V: CT)))
2622	NewCT = CT;
2623	else
2624	NewCT = CBO;
2625
2626	if ((BinOpcode == ISD::AND && isNullOrNullSplat(V: CF)) \|\|
2627	(BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(V: CF)))
2628	NewCF = CF;
2629	else
2630	NewCF = CBO;
2631	} else {
2632	// We have a select-of-constants followed by a binary operator with a
2633	// constant. Eliminate the binop by pulling the constant math into the
2634	// select. Example: add (select Cond, CT, CF), CBO --> select Cond, CT +
2635	// CBO, CF + CBO
2636	NewCT = SelOpNo ? DAG.FoldConstantArithmetic(Opcode: BinOpcode, DL, VT, Ops: {CBO, CT})
2637	: DAG.FoldConstantArithmetic(Opcode: BinOpcode, DL, VT, Ops: {CT, CBO});
2638	if (!NewCT)
2639	return SDValue ();
2640
2641	NewCF = SelOpNo ? DAG.FoldConstantArithmetic(Opcode: BinOpcode, DL, VT, Ops: {CBO, CF})
2642	: DAG.FoldConstantArithmetic(Opcode: BinOpcode, DL, VT, Ops: {CF, CBO});
2643	if (!NewCF)
2644	return SDValue ();
2645	}
2646
2647	return DAG.getSelect(DL, VT, Cond: Sel.getOperand(i: `0`), LHS: NewCT, RHS: NewCF, Flags: BO->getFlags());
2648	}
2649
2650	static SDValue foldAddSubBoolOfMaskedVal(SDNode N, const* SDLoc &DL,
2651	SelectionDAG &DAG) {
2652	assert((N->getOpcode() == ISD::ADD \|\| N->getOpcode() == ISD::SUB) &&
2653	"Expecting add or sub");
2654
2655	// Match a constant operand and a zext operand for the math instruction:
2656	// add Z, C
2657	// sub C, Z
2658	bool IsAdd = N->getOpcode() == ISD::ADD;
2659	SDValue C = IsAdd ? N->getOperand(Num: `1`) : N->getOperand(Num: `0`);
2660	SDValue Z = IsAdd ? N->getOperand(Num: `0`) : N->getOperand(Num: `1`);
2661	auto *CN = dyn_cast<ConstantSDNode>(Val&: C);
2662	if (!CN \|\| Z.getOpcode() != ISD::ZERO_EXTEND)
2663	return SDValue ();
2664
2665	// Match the zext operand as a setcc of a boolean.
2666	if (Z.getOperand(i: `0`).getValueType() != MVT::i1)
2667	return SDValue ();
2668
2669	// Match the compare as: setcc (X & 1), 0, eq.
2670	if (!sd_match(N: Z.getOperand(i: `0`), P: m_SetCC(LHS: m_And(L: m_Value(), R: m_One()), RHS: m_Zero(),
2671	CC: m_SpecificCondCode(CC: ISD::SETEQ))))
2672	return SDValue ();
2673
2674	// We are adding/subtracting a constant and an inverted low bit. Turn that
2675	// into a subtract/add of the low bit with incremented/decremented constant:
2676	// add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2677	// sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2678	EVT VT = C.getValueType();
2679	SDValue LowBit = DAG.getZExtOrTrunc(Op: Z.getOperand(i: `0`).getOperand(i: `0`), DL, VT);
2680	SDValue C1 = IsAdd ? DAG.getConstant(Val: CN->getAPIntValue() + `1`, DL, VT)
2681	: DAG.getConstant(Val: CN->getAPIntValue() - `1`, DL, VT);
2682	return DAG.getNode(Opcode: IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N1: C1, N2: LowBit);
2683	}
2684
2685	// Attempt to form avgceil(A, B) from (A \| B) - ((A ^ B) >> 1)
2686	SDValue DAGCombiner::foldSubToAvg(SDNode N, const* SDLoc &DL) {
2687	SDValue N0 = N->getOperand(Num: `0`);
2688	EVT VT = N0.getValueType();
2689	SDValue A, B;
2690
2691	if ((!LegalOperations \|\| hasOperation(Opcode: ISD::AVGCEILU, VT)) &&
2692	sd_match(N, P: m_Sub(L: m_Or(L: m_Value(N&: A), R: m_Value(N&: B)),
2693	R: m_Srl(L: m_Xor(L: m_Deferred(V&: A), R: m_Deferred(V&: B)), R: m_One())))) {
2694	return DAG.getNode(Opcode: ISD::AVGCEILU, DL, VT, N1: A, N2: B);
2695	}
2696	if ((!LegalOperations \|\| hasOperation(Opcode: ISD::AVGCEILS, VT)) &&
2697	sd_match(N, P: m_Sub(L: m_Or(L: m_Value(N&: A), R: m_Value(N&: B)),
2698	R: m_Sra(L: m_Xor(L: m_Deferred(V&: A), R: m_Deferred(V&: B)), R: m_One())))) {
2699	return DAG.getNode(Opcode: ISD::AVGCEILS, DL, VT, N1: A, N2: B);
2700	}
2701	return SDValue ();
2702	}
2703
2704	/// Try to fold a pointer arithmetic node.
2705	/// This needs to be done separately from normal addition, because pointer
2706	/// addition is not commutative.
2707	SDValue DAGCombiner::visitPTRADD(SDNode *N) {
2708	SDValue N0 = N->getOperand(Num: `0`);
2709	SDValue N1 = N->getOperand(Num: `1`);
2710	EVT PtrVT = N0.getValueType();
2711	EVT IntVT = N1.getValueType();
2712	SDLoc DL(N);
2713
2714	// This is already ensured by an assert in SelectionDAG::getNode(). Several
2715	// combines here depend on this assumption.
2716	assert(PtrVT == IntVT &&
2717	"PTRADD with different operand types is not supported");
2718
2719	// fold (ptradd x, 0) -> x
2720	if (isNullConstant(V: N1))
2721	return N0;
2722
2723	// fold (ptradd 0, x) -> x
2724	if (PtrVT == IntVT && isNullConstant(V: N0))
2725	return N1;
2726
2727	if (N0.getOpcode() == ISD::PTRADD &&
2728	!reassociationCanBreakAddressingModePattern(Opc: ISD::PTRADD, DL, N, N0, N1)) {
2729	SDValue X = N0.getOperand(i: `0`);
2730	SDValue Y = N0.getOperand(i: `1`);
2731	SDValue Z = N1;
2732	bool N0OneUse = N0.hasOneUse();
2733	bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(N: Y);
2734	bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(N: Z);
2735
2736	// (ptradd (ptradd x, y), z) -> (ptradd x, (add y, z)) if:
2737	// y is a constant and (ptradd x, y) has one use; or*
2738	// y and z are both constants.*
2739	if ((YIsConstant && N0OneUse) \|\| (YIsConstant && ZIsConstant)) {
2740	// If both additions in the original were NUW, the new ones are as well.
2741	SDNodeFlags Flags =
2742	(N->getFlags() & N0 ->getFlags()) & SDNodeFlags::NoUnsignedWrap;
2743	SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL, VT: IntVT, Ops: {Y, Z}, Flags);
2744	AddToWorklist(N: Add.getNode());
2745	// We can't set InBounds even if both original ptradds were InBounds and
2746	// NUW: SDAG usually represents pointers as integers, therefore, the
2747	// matched pattern behaves as if it had implicit casts:
2748	// (ptradd inbounds (inttoptr (ptrtoint (ptradd inbounds x, y))), z)
2749	// The outer inbounds ptradd might therefore rely on a provenance that x
2750	// does not have.
2751	return DAG.getMemBasePlusOffset(Base: X, Offset: Add, DL, Flags);
2752	}
2753	}
2754
2755	// The following combines can turn in-bounds pointer arithmetic out of bounds.
2756	// That is problematic for settings like AArch64's CPA, which checks that
2757	// intermediate results of pointer arithmetic remain in bounds. The target
2758	// therefore needs to opt-in to enable them.
2759	if (!TLI.canTransformPtrArithOutOfBounds(
2760	F: DAG.getMachineFunction().getFunction(), PtrVT))
2761	return SDValue ();
2762
2763	if (N0.getOpcode() == ISD::PTRADD && isa<ConstantSDNode>(Val: N1)) {
2764	// Fold (ptradd (ptradd GA, v), c) -> (ptradd (ptradd GA, c) v) with
2765	// global address GA and constant c, such that c can be folded into GA.
2766	// TODO: Support constant vector splats.
2767	SDValue GAValue = N0.getOperand(i: `0`);
2768	if (const GlobalAddressSDNode *GA =
2769	dyn_cast<GlobalAddressSDNode>(Val&: GAValue)) {
2770	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2771	if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
2772	// If both additions in the original were NUW, reassociation preserves
2773	// that.
2774	SDNodeFlags Flags =
2775	(N->getFlags() & N0 ->getFlags()) & SDNodeFlags::NoUnsignedWrap;
2776	// We can't set InBounds even if both original ptradds were InBounds and
2777	// NUW: SDAG usually represents pointers as integers, therefore, the
2778	// matched pattern behaves as if it had implicit casts:
2779	// (ptradd inbounds (inttoptr (ptrtoint (ptradd inbounds GA, v))), c)
2780	// The outer inbounds ptradd might therefore rely on a provenance that
2781	// GA does not have.
2782	SDValue Inner = DAG.getMemBasePlusOffset(Base: GAValue, Offset: N1, DL, Flags);
2783	AddToWorklist(N: Inner.getNode());
2784	return DAG.getMemBasePlusOffset(Base: Inner, Offset: N0.getOperand(i: `1`), DL, Flags);
2785	}
2786	}
2787	}
2788
2789	if (N1.getOpcode() == ISD::ADD && N1.hasOneUse()) {
2790	// (ptradd x, (add y, z)) -> (ptradd (ptradd x, y), z) if z is a constant,
2791	// y is not, and (add y, z) is used only once.
2792	// (ptradd x, (add y, z)) -> (ptradd (ptradd x, z), y) if y is a constant,
2793	// z is not, and (add y, z) is used only once.
2794	// The goal is to move constant offsets to the outermost ptradd, to create
2795	// more opportunities to fold offsets into memory instructions.
2796	// Together with the another combine above, this also implements
2797	// (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y)).
2798	SDValue X = N0;
2799	SDValue Y = N1.getOperand(i: `0`);
2800	SDValue Z = N1.getOperand(i: `1`);
2801	bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(N: Y);
2802	bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(N: Z);
2803
2804	// If both additions in the original were NUW, reassociation preserves that.
2805	SDNodeFlags CommonFlags = N->getFlags() & N1 ->getFlags();
2806	SDNodeFlags ReassocFlags = CommonFlags & SDNodeFlags::NoUnsignedWrap;
2807	if (CommonFlags.hasNoUnsignedWrap()) {
2808	// If both operations are NUW and the PTRADD is inbounds, the offests are
2809	// both non-negative, so the reassociated PTRADDs are also inbounds.
2810	ReassocFlags \|= N->getFlags() & SDNodeFlags::InBounds;
2811	}
2812
2813	if (ZIsConstant != YIsConstant) {
2814	if (YIsConstant)
2815	std::swap(a&: Y, b&: Z);
2816	SDValue Inner = DAG.getMemBasePlusOffset(Base: X, Offset: Y, DL, Flags: ReassocFlags);
2817	AddToWorklist(N: Inner.getNode());
2818	return DAG.getMemBasePlusOffset(Base: Inner, Offset: Z, DL, Flags: ReassocFlags);
2819	}
2820	}
2821
2822	// Transform (ptradd a, b) -> (or disjoint a, b) if it is equivalent and if
2823	// that transformation can't block an offset folding at any use of the ptradd.
2824	// This should be done late, after legalization, so that it doesn't block
2825	// other ptradd combines that could enable more offset folding.
2826	if (LegalOperations && DAG.haveNoCommonBitsSet(A: N0, B: N1)) {
2827	bool TransformCannotBreakAddrMode = none_of(Range: N->users(), P: [&](SDNode *User) {
2828	return canFoldInAddressingMode(N, Use: User, DAG, TLI);
2829	});
2830
2831	if (TransformCannotBreakAddrMode)
2832	return DAG.getNode(Opcode: ISD::OR, DL, VT: PtrVT, N1: N0, N2: N1, Flags: SDNodeFlags::Disjoint);
2833	}
2834
2835	return SDValue ();
2836	}
2837
2838	/// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2839	/// a shift and add with a different constant.
2840	static SDValue foldAddSubOfSignBit(SDNode N, const* SDLoc &DL,
2841	SelectionDAG &DAG) {
2842	assert((N->getOpcode() == ISD::ADD \|\| N->getOpcode() == ISD::SUB) &&
2843	"Expecting add or sub");
2844
2845	// We need a constant operand for the add/sub, and the other operand is a
2846	// logical shift right: add (srl), C or sub C, (srl).
2847	bool IsAdd = N->getOpcode() == ISD::ADD;
2848	SDValue ConstantOp = IsAdd ? N->getOperand(Num: `1`) : N->getOperand(Num: `0`);
2849	SDValue ShiftOp = IsAdd ? N->getOperand(Num: `0`) : N->getOperand(Num: `1`);
2850	if (!DAG.isConstantIntBuildVectorOrConstantInt(N: ConstantOp) \|\|
2851	ShiftOp.getOpcode() != ISD::SRL)
2852	return SDValue ();
2853
2854	// The shift must be of a 'not' value.
2855	SDValue Not = ShiftOp.getOperand(i: `0`);
2856	if (!Not.hasOneUse() \|\| !isBitwiseNot(V: Not))
2857	return SDValue ();
2858
2859	// The shift must be moving the sign bit to the least-significant-bit.
2860	EVT VT = ShiftOp.getValueType();
2861	SDValue ShAmt = ShiftOp.getOperand(i: `1`);
2862	ConstantSDNode *ShAmtC = isConstOrConstSplat(N: ShAmt);
2863	if (!ShAmtC \|\| ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - `1`))
2864	return SDValue ();
2865
2866	// Eliminate the 'not' by adjusting the shift and add/sub constant:
2867	// add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2868	// sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2869	if (SDValue NewC = DAG.FoldConstantArithmetic(
2870	Opcode: IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
2871	Ops: {ConstantOp, DAG.getConstant(Val: `1`, DL, VT)})) {
2872	SDValue NewShift = DAG.getNode(Opcode: IsAdd ? ISD::SRA : ISD::SRL, DL, VT,
2873	N1: Not.getOperand(i: `0`), N2: ShAmt);
2874	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: NewShift, N2: NewC);
2875	}
2876
2877	return SDValue ();
2878	}
2879
2880	static bool
2881	areBitwiseNotOfEachother(SDValue Op0, SDValue Op1) {
2882	return (isBitwiseNot(V: Op0) && Op0.getOperand(i: `0`) == Op1) \|\|
2883	(isBitwiseNot(V: Op1) && Op1.getOperand(i: `0`) == Op0);
2884	}
2885
2886	/// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2887	/// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2888	/// are no common bits set in the operands).
2889	SDValue DAGCombiner::visitADDLike(SDNode *N) {
2890	SDValue N0 = N->getOperand(Num: `0`);
2891	SDValue N1 = N->getOperand(Num: `1`);
2892	EVT VT = N0.getValueType();
2893	SDLoc DL(N);
2894
2895	// fold (add x, undef) -> undef
2896	if (N0.isUndef())
2897	return N0;
2898	if (N1.isUndef())
2899	return N1;
2900
2901	// fold (add c1, c2) -> c1+c2
2902	if (SDValue C = DAG.FoldConstantArithmetic(Opcode: ISD::ADD, DL, VT, Ops: {N0, N1}))
2903	return C;
2904
2905	// canonicalize constant to RHS
2906	if (DAG.isConstantIntBuildVectorOrConstantInt(N: N0) &&
2907	!DAG.isConstantIntBuildVectorOrConstantInt(N: N1))
2908	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1, N2: N0);
2909
2910	if (areBitwiseNotOfEachother(Op0: N0, Op1: N1))
2911	return DAG.getConstant(Val: APInt::getAllOnes(numBits: VT.getScalarSizeInBits()), DL, VT);
2912
2913	// fold vector ops
2914	if (VT.isVector()) {
2915	if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
2916	return FoldedVOp;
2917
2918	// fold (add x, 0) -> x, vector edition
2919	if (ISD::isConstantSplatVectorAllZeros(N: N1.getNode()))
2920	return N0;
2921	}
2922
2923	// fold (add x, 0) -> x
2924	if (isNullConstant(V: N1))
2925	return N0;
2926
2927	if (N0.getOpcode() == ISD::SUB) {
2928	SDValue N00 = N0.getOperand(i: `0`);
2929	SDValue N01 = N0.getOperand(i: `1`);
2930
2931	// fold ((A-c1)+c2) -> (A+(c2-c1))
2932	if (SDValue Sub = DAG.FoldConstantArithmetic(Opcode: ISD::SUB, DL, VT, Ops: {N1, N01}))
2933	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: N0.getOperand(i: `0`), N2: Sub);
2934
2935	// fold ((c1-A)+c2) -> (c1+c2)-A
2936	if (SDValue Add = DAG.FoldConstantArithmetic(Opcode: ISD::ADD, DL, VT, Ops: {N1, N00}))
2937	return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Add, N2: N0.getOperand(i: `1`));
2938	}
2939
2940	// add (sext i1 X), 1 -> zext (not i1 X)
2941	// We don't transform this pattern:
2942	// add (zext i1 X), -1 -> sext (not i1 X)
2943	// because most (?) targets generate better code for the zext form.
2944	if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2945	isOneOrOneSplat(V: N1)) {
2946	SDValue X = N0.getOperand(i: `0`);
2947	if ((!LegalOperations \|\|
2948	(TLI.isOperationLegal(Op: ISD::XOR, VT: X.getValueType()) &&
2949	TLI.isOperationLegal(Op: ISD::ZERO_EXTEND, VT))) &&
2950	X.getScalarValueSizeInBits() == `1`) {
2951	SDValue Not = DAG.getNOT(DL, Val: X, VT: X.getValueType());
2952	return DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT, Operand: Not);
2953	}
2954	}
2955
2956	// Fold (add (or x, c0), c1) -> (add x, (c0 + c1))
2957	// iff (or x, c0) is equivalent to (add x, c0).
2958	// Fold (add (xor x, c0), c1) -> (add x, (c0 + c1))
2959	// iff (xor x, c0) is equivalent to (add x, c0).
2960	if (DAG.isADDLike(Op: N0)) {
2961	SDValue N01 = N0.getOperand(i: `1`);
2962	if (SDValue Add = DAG.FoldConstantArithmetic(Opcode: ISD::ADD, DL, VT, Ops: {N1, N01}))
2963	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: N0.getOperand(i: `0`), N2: Add);
2964	}
2965
2966	if (SDValue NewSel = foldBinOpIntoSelect(BO: N))
2967	return NewSel;
2968
2969	// reassociate add
2970	if (!reassociationCanBreakAddressingModePattern(Opc: ISD::ADD, DL, N, N0, N1)) {
2971	if (SDValue RADD = reassociateOps(Opc: ISD::ADD, DL, N0, N1, Flags: N->getFlags()))
2972	return RADD;
2973
2974	// Reassociate (add (or x, c), y) -> (add add(x, y), c)) if (or x, c) is
2975	// equivalent to (add x, c).
2976	// Reassociate (add (xor x, c), y) -> (add add(x, y), c)) if (xor x, c) is
2977	// equivalent to (add x, c).
2978	// Do this optimization only when adding c does not introduce instructions
2979	// for adding carries.
2980	auto ReassociateAddOr = [&](SDValue N0, SDValue N1) {
2981	if (DAG.isADDLike(Op: N0) && N0.hasOneUse() &&
2982	isConstantOrConstantVector(N: N0.getOperand(i: `1`), / NoOpaque / NoOpaques: true)) {
2983	// If N0's type does not split or is a sign mask, it does not introduce
2984	// add carry.
2985	auto TyActn = TLI.getTypeAction(Context&: *DAG.getContext(), VT: N0.getValueType());
2986	bool NoAddCarry = TyActn == TargetLoweringBase::TypeLegal \|\|
2987	TyActn == TargetLoweringBase::TypePromoteInteger \|\|
2988	isMinSignedConstant(V: N0.getOperand(i: `1`));
2989	if (NoAddCarry)
2990	return DAG.getNode(
2991	Opcode: ISD::ADD, DL, VT,
2992	N1: DAG.getNode(Opcode: ISD::ADD, DL, VT, N1, N2: N0.getOperand(i: `0`)),
2993	N2: N0.getOperand(i: `1`));
2994	}
2995	return SDValue ();
2996	};
2997	if (SDValue Add = ReassociateAddOr (N0, N1))
2998	return Add;
2999	if (SDValue Add = ReassociateAddOr (N1, N0))
3000	return Add;
3001
3002	// Fold add(vecreduce(x), vecreduce(y)) -> vecreduce(add(x, y))
3003	if (SDValue SD =
3004	reassociateReduction(RedOpc: ISD::VECREDUCE_ADD, Opc: ISD::ADD, DL, VT, N0, N1))
3005	return SD;
3006	}
3007
3008	SDValue A, B, C, D;
3009
3010	// fold ((0-A) + B) -> B-A
3011	if (sd_match(N: N0, P: m_Neg(V: m_Value(N&: A))))
3012	return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1, N2: A);
3013
3014	// fold (A + (0-B)) -> A-B
3015	if (sd_match(N: N1, P: m_Neg(V: m_Value(N&: B))))
3016	return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: N0, N2: B);
3017
3018	// fold (A+(B-A)) -> B
3019	if (sd_match(N: N1, P: m_Sub(L: m_Value(N&: B), R: m_Specific(N: N0))))
3020	return B;
3021
3022	// fold ((B-A)+A) -> B
3023	if (sd_match(N: N0, P: m_Sub(L: m_Value(N&: B), R: m_Specific(N: N1))))
3024	return B;
3025
3026	// fold ((A-B)+(C-A)) -> (C-B)
3027	if (sd_match(N: N0, P: m_Sub(L: m_Value(N&: A), R: m_Value(N&: B))) &&
3028	sd_match(N: N1, P: m_Sub(L: m_Value(N&: C), R: m_Specific(N: A))))
3029	return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: C, N2: B);
3030
3031	// fold ((A-B)+(B-C)) -> (A-C)
3032	if (sd_match(N: N0, P: m_Sub(L: m_Value(N&: A), R: m_Value(N&: B))) &&
3033	sd_match(N: N1, P: m_Sub(L: m_Specific(N: B), R: m_Value(N&: C))))
3034	return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: A, N2: C);
3035
3036	// fold (A+(B-(A+C))) to (B-C)
3037	// fold (A+(B-(C+A))) to (B-C)
3038	if (sd_match(N: N1, P: m_Sub(L: m_Value(N&: B), R: m_Add(L: m_Specific(N: N0), R: m_Value(N&: C)))))
3039	return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: B, N2: C);
3040
3041	// fold (A+((B-A)+or-C)) to (B+or-C)
3042	if (sd_match(N: N1,
3043	P: m_AnyOf(preds: m_Add(L: m_Sub(L: m_Value(N&: B), R: m_Specific(N: N0)), R: m_Value(N&: C)),
3044	preds: m_Sub(L: m_Sub(L: m_Value(N&: B), R: m_Specific(N: N0)), R: m_Value(N&: C)))))
3045	return DAG.getNode(Opcode: N1.getOpcode(), DL, VT, N1: B, N2: C);
3046
3047	// fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
3048	if (sd_match(N: N0, P: m_OneUse(P: m_Sub(L: m_Value(N&: A), R: m_Value(N&: B)))) &&
3049	sd_match(N: N1, P: m_OneUse(P: m_Sub(L: m_Value(N&: C), R: m_Value(N&: D)))) &&
3050	(isConstantOrConstantVector(N: A) \|\| isConstantOrConstantVector(N: C)))
3051	return DAG.getNode(Opcode: ISD::SUB, DL, VT,
3052	N1: DAG.getNode(Opcode: ISD::ADD, DL: SDLoc (N0), VT, N1: A, N2: C),
3053	N2: DAG.getNode(Opcode: ISD::ADD, DL: SDLoc (N1), VT, N1: B, N2: D));
3054
3055	// fold (add (umax X, C), -C) --> (usubsat X, C)
3056	if (N0.getOpcode() == ISD::UMAX && hasOperation(Opcode: ISD::USUBSAT, VT)) {
3057	auto MatchUSUBSAT = [](ConstantSDNode Max, ConstantSDNode Op) {
3058	return (!Max && !Op) \|\|
3059	(Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
3060	};
3061	if (ISD::matchBinaryPredicate(LHS: N0.getOperand(i: `1`), RHS: N1, Match: MatchUSUBSAT,
3062	/AllowUndefs/ true))
3063	return DAG.getNode(Opcode: ISD::USUBSAT, DL, VT, N1: N0.getOperand(i: `0`),
3064	N2: N0.getOperand(i: `1`));
3065	}
3066
3067	if (SimplifyDemandedBits(Op: SDValue (N, `0`)))
3068	return SDValue (N, `0`);
3069
3070	if (isOneOrOneSplat(V: N1)) {
3071	// fold (add (xor a, -1), 1) -> (sub 0, a)
3072	if (isBitwiseNot(V: N0))
3073	return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: DAG.getConstant(Val: `0`, DL, VT),
3074	N2: N0.getOperand(i: `0`));
3075
3076	// fold (add (add (xor a, -1), b), 1) -> (sub b, a)
3077	if (N0.getOpcode() == ISD::ADD) {
3078	SDValue A, Xor;
3079
3080	if (isBitwiseNot(V: N0.getOperand(i: `0`))) {
3081	A = N0.getOperand(i: `1`);
3082	Xor = N0.getOperand(i: `0`);
3083	} else if (isBitwiseNot(V: N0.getOperand(i: `1`))) {
3084	A = N0.getOperand(i: `0`);
3085	Xor = N0.getOperand(i: `1`);
3086	}
3087
3088	if (Xor)
3089	return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: A, N2: Xor.getOperand(i: `0`));
3090	}
3091
3092	// Look for:
3093	// add (add x, y), 1
3094	// And if the target does not like this form then turn into:
3095	// sub y, (xor x, -1)
3096	if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
3097	N0.hasOneUse() &&
3098	// Limit this to after legalization if the add has wrap flags
3099	(Level >= AfterLegalizeDAG \|\| (!N->getFlags().hasNoUnsignedWrap() &&
3100	!N->getFlags().hasNoSignedWrap()))) {
3101	SDValue Not = DAG.getNOT(DL, Val: N0.getOperand(i: `0`), VT);
3102	return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: N0.getOperand(i: `1`), N2: Not);
3103	}
3104	}
3105
3106	// (x - y) + -1 -> add (xor y, -1), x
3107	if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
3108	isAllOnesOrAllOnesSplat(V: N1, /AllowUndefs=/true)) {
3109	SDValue Not = DAG.getNOT(DL, Val: N0.getOperand(i: `1`), VT);
3110	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Not, N2: N0.getOperand(i: `0`));
3111	}
3112
3113	// Fold add(mul(add(A, CA), CM), CB) -> add(mul(A, CM), CMCA+CB).*
3114	// This can help if the inner add has multiple uses.
3115	APInt CM, CA;
3116	if (ConstantSDNode *CB = dyn_cast<ConstantSDNode>(Val&: N1)) {
3117	if (VT.getScalarSizeInBits() <= `64`) {
3118	if (sd_match(N: N0, P: m_OneUse(P: m_Mul(L: m_Add(L: m_Value(N&: A), R: m_ConstInt(V&: CA)),
3119	R: m_ConstInt(V&: CM)))) &&
3120	TLI.isLegalAddImmediate(
3121	(CA * CM + CB->getAPIntValue()).getSExtValue())) {
3122	SDNodeFlags Flags;
3123	// If all the inputs are nuw, the outputs can be nuw. If all the input
3124	// are _also_ nsw the outputs can be too.
3125	if (N->getFlags().hasNoUnsignedWrap() &&
3126	N0 ->getFlags().hasNoUnsignedWrap() &&
3127	N0.getOperand(i: `0`)->getFlags().hasNoUnsignedWrap()) {
3128	Flags \|= SDNodeFlags::NoUnsignedWrap;
3129	if (N->getFlags().hasNoSignedWrap() &&
3130	N0 ->getFlags().hasNoSignedWrap() &&
3131	N0.getOperand(i: `0`)->getFlags().hasNoSignedWrap())
3132	Flags \|= SDNodeFlags::NoSignedWrap;
3133	}
3134	SDValue Mul = DAG.getNode(Opcode: ISD::MUL, DL: SDLoc (N1), VT, N1: A,
3135	N2: DAG.getConstant(Val: CM, DL, VT), Flags);
3136	return DAG.getNode(
3137	Opcode: ISD::ADD, DL, VT, N1: Mul,
3138	N2: DAG.getConstant(Val: CA * CM + CB->getAPIntValue(), DL, VT), Flags);
3139	}
3140	// Also look in case there is an intermediate add.
3141	if (sd_match(N: N0, P: m_OneUse(P: m_Add(
3142	L: m_OneUse(P: m_Mul(L: m_Add(L: m_Value(N&: A), R: m_ConstInt(V&: CA)),
3143	R: m_ConstInt(V&: CM))),
3144	R: m_Value(N&: B)))) &&
3145	TLI.isLegalAddImmediate(
3146	(CA * CM + CB->getAPIntValue()).getSExtValue())) {
3147	SDNodeFlags Flags;
3148	// If all the inputs are nuw, the outputs can be nuw. If all the input
3149	// are _also_ nsw the outputs can be too.
3150	SDValue OMul =
3151	N0.getOperand(i: `0`) == B ? N0.getOperand(i: `1`) : N0.getOperand(i: `0`);
3152	if (N->getFlags().hasNoUnsignedWrap() &&
3153	N0 ->getFlags().hasNoUnsignedWrap() &&
3154	OMul ->getFlags().hasNoUnsignedWrap() &&
3155	OMul.getOperand(i: `0`)->getFlags().hasNoUnsignedWrap()) {
3156	Flags \|= SDNodeFlags::NoUnsignedWrap;
3157	if (N->getFlags().hasNoSignedWrap() &&
3158	N0 ->getFlags().hasNoSignedWrap() &&
3159	OMul ->getFlags().hasNoSignedWrap() &&
3160	OMul.getOperand(i: `0`)->getFlags().hasNoSignedWrap())
3161	Flags \|= SDNodeFlags::NoSignedWrap;
3162	}
3163	SDValue Mul = DAG.getNode(Opcode: ISD::MUL, DL: SDLoc (N1), VT, N1: A,
3164	N2: DAG.getConstant(Val: CM, DL, VT), Flags);
3165	SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL: SDLoc (N1), VT, N1: Mul, N2: B, Flags);
3166	return DAG.getNode(
3167	Opcode: ISD::ADD, DL, VT, N1: Add,
3168	N2: DAG.getConstant(Val: CA * CM + CB->getAPIntValue(), DL, VT), Flags);
3169	}
3170	}
3171	}
3172
3173	if (SDValue Combined = visitADDLikeCommutative(N0, N1, LocReference: N))
3174	return Combined;
3175
3176	if (SDValue Combined = visitADDLikeCommutative(N0: N1, N1: N0, LocReference: N))
3177	return Combined;
3178
3179	return SDValue ();
3180	}
3181
3182	// Attempt to form avgfloor(A, B) from (A & B) + ((A ^ B) >> 1)
3183	// Attempt to form avgfloor(A, B) from ((A >> 1) + (B >> 1)) + (A & B & 1)
3184	// Attempt to form avgceil(A, B) from ((A >> 1) + (B >> 1)) + ((A \| B) & 1)
3185	SDValue DAGCombiner::foldAddToAvg(SDNode N, const* SDLoc &DL) {
3186	SDValue N0 = N->getOperand(Num: `0`);
3187	EVT VT = N0.getValueType();
3188	SDValue A, B;
3189
3190	if ((!LegalOperations \|\| hasOperation(Opcode: ISD::AVGFLOORU, VT)) &&
3191	(sd_match(N,
3192	P: m_Add(L: m_And(L: m_Value(N&: A), R: m_Value(N&: B)),
3193	R: m_Srl(L: m_Xor(L: m_Deferred(V&: A), R: m_Deferred(V&: B)), R: m_One()))) \|\|
3194	sd_match(N, P: m_ReassociatableAdd(
3195	Patterns: m_ReassociatableAnd(Patterns: m_Value(N&: A), Patterns: m_Value(N&: B), Patterns: m_One()),
3196	Patterns: m_Srl(L: m_Deferred(V&: A), R: m_One()),
3197	Patterns: m_Srl(L: m_Deferred(V&: B), R: m_One()))))) {
3198	return DAG.getNode(Opcode: ISD::AVGFLOORU, DL, VT, N1: A, N2: B);
3199	}
3200	if ((!LegalOperations \|\| hasOperation(Opcode: ISD::AVGFLOORS, VT)) &&
3201	(sd_match(N,
3202	P: m_Add(L: m_And(L: m_Value(N&: A), R: m_Value(N&: B)),
3203	R: m_Sra(L: m_Xor(L: m_Deferred(V&: A), R: m_Deferred(V&: B)), R: m_One()))) \|\|
3204	sd_match(N, P: m_ReassociatableAdd(
3205	Patterns: m_ReassociatableAnd(Patterns: m_Value(N&: A), Patterns: m_Value(N&: B), Patterns: m_One()),
3206	Patterns: m_Sra(L: m_Deferred(V&: A), R: m_One()),
3207	Patterns: m_Sra(L: m_Deferred(V&: B), R: m_One()))))) {
3208	return DAG.getNode(Opcode: ISD::AVGFLOORS, DL, VT, N1: A, N2: B);
3209	}
3210
3211	if ((!LegalOperations \|\| hasOperation(Opcode: ISD::AVGCEILU, VT)) &&
3212	sd_match(N,
3213	P: m_ReassociatableAdd(Patterns: m_And(L: m_Or(L: m_Value(N&: A), R: m_Value(N&: B)), R: m_One()),
3214	Patterns: m_Srl(L: m_Deferred(V&: A), R: m_One()),
3215	Patterns: m_Srl(L: m_Deferred(V&: B), R: m_One())))) {
3216	return DAG.getNode(Opcode: ISD::AVGCEILU, DL, VT, N1: A, N2: B);
3217	}
3218	if ((!LegalOperations \|\| hasOperation(Opcode: ISD::AVGCEILS, VT)) &&
3219	sd_match(N,
3220	P: m_ReassociatableAdd(Patterns: m_And(L: m_Or(L: m_Value(N&: A), R: m_Value(N&: B)), R: m_One()),
3221	Patterns: m_Sra(L: m_Deferred(V&: A), R: m_One()),
3222	Patterns: m_Sra(L: m_Deferred(V&: B), R: m_One())))) {
3223	return DAG.getNode(Opcode: ISD::AVGCEILS, DL, VT, N1: A, N2: B);
3224	}
3225
3226	return SDValue ();
3227	}
3228
3229	SDValue DAGCombiner::visitADD(SDNode *N) {
3230	SDValue N0 = N->getOperand(Num: `0`);
3231	SDValue N1 = N->getOperand(Num: `1`);
3232	EVT VT = N0.getValueType();
3233	SDLoc DL(N);
3234
3235	if (SDValue Combined = visitADDLike(N))
3236	return Combined;
3237
3238	if (SDValue V = foldAddSubBoolOfMaskedVal(N, DL, DAG))
3239	return V;
3240
3241	if (SDValue V = foldAddSubOfSignBit(N, DL, DAG))
3242	return V;
3243
3244	if (SDValue V = MatchRotate(LHS: N0, RHS: N1, DL: SDLoc (N), /FromAdd=/true))
3245	return V;
3246
3247	// Try to match AVGFLOOR fixedwidth pattern
3248	if (SDValue V = foldAddToAvg(N, DL))
3249	return V;
3250
3251	// fold (a+b) -> (a\|b) iff a and b share no bits.
3252	if ((!LegalOperations \|\| TLI.isOperationLegal(Op: ISD::OR, VT)) &&
3253	DAG.haveNoCommonBitsSet(A: N0, B: N1))
3254	return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: N0, N2: N1, Flags: SDNodeFlags::Disjoint);
3255
3256	// Fold (add (vscale C0), (vscale * C1)) to (vscale * (C0 + C1)).*
3257	if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
3258	const APInt &C0 = N0 ->getConstantOperandAPInt(Num: `0`);
3259	const APInt &C1 = N1 ->getConstantOperandAPInt(Num: `0`);
3260	return DAG.getVScale(DL, VT, MulImm: C0 + C1);
3261	}
3262
3263	// fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
3264	if (N0.getOpcode() == ISD::ADD &&
3265	N0.getOperand(i: `1`).getOpcode() == ISD::VSCALE &&
3266	N1.getOpcode() == ISD::VSCALE) {
3267	const APInt &VS0 = N0.getOperand(i: `1`)->getConstantOperandAPInt(Num: `0`);
3268	const APInt &VS1 = N1 ->getConstantOperandAPInt(Num: `0`);
3269	SDValue VS = DAG.getVScale(DL, VT, MulImm: VS0 + VS1);
3270	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: N0.getOperand(i: `0`), N2: VS);
3271	}
3272
3273	// Fold (add step_vector(c1), step_vector(c2) to step_vector(c1+c2))
3274	if (N0.getOpcode() == ISD::STEP_VECTOR &&
3275	N1.getOpcode() == ISD::STEP_VECTOR) {
3276	const APInt &C0 = N0 ->getConstantOperandAPInt(Num: `0`);
3277	const APInt &C1 = N1 ->getConstantOperandAPInt(Num: `0`);
3278	APInt NewStep = C0 + C1;
3279	return DAG.getStepVector(DL, ResVT: VT, StepVal: NewStep);
3280	}
3281
3282	// Fold a + step_vector(c1) + step_vector(c2) to a + step_vector(c1+c2)
3283	if (N0.getOpcode() == ISD::ADD &&
3284	N0.getOperand(i: `1`).getOpcode() == ISD::STEP_VECTOR &&
3285	N1.getOpcode() == ISD::STEP_VECTOR) {
3286	const APInt &SV0 = N0.getOperand(i: `1`)->getConstantOperandAPInt(Num: `0`);
3287	const APInt &SV1 = N1 ->getConstantOperandAPInt(Num: `0`);
3288	APInt NewStep = SV0 + SV1;
3289	SDValue SV = DAG.getStepVector(DL, ResVT: VT, StepVal: NewStep);
3290	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: N0.getOperand(i: `0`), N2: SV);
3291	}
3292
3293	return SDValue ();
3294	}
3295
3296	SDValue DAGCombiner::visitADDSAT(SDNode *N) {
3297	unsigned Opcode = N->getOpcode();
3298	SDValue N0 = N->getOperand(Num: `0`);
3299	SDValue N1 = N->getOperand(Num: `1`);
3300	EVT VT = N0.getValueType();
3301	bool IsSigned = Opcode == ISD::SADDSAT;
3302	SDLoc DL(N);
3303
3304	// fold (add_sat x, undef) -> -1
3305	if (N0.isUndef() \|\| N1.isUndef())
3306	return DAG.getAllOnesConstant(DL, VT);
3307
3308	// fold (add_sat c1, c2) -> c3
3309	if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, Ops: {N0, N1}))
3310	return C;
3311
3312	// canonicalize constant to RHS
3313	if (DAG.isConstantIntBuildVectorOrConstantInt(N: N0) &&
3314	!DAG.isConstantIntBuildVectorOrConstantInt(N: N1))
3315	return DAG.getNode(Opcode, DL, VT, N1, N2: N0);
3316
3317	// fold vector ops
3318	if (VT.isVector()) {
3319	if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3320	return FoldedVOp;
3321
3322	// fold (add_sat x, 0) -> x, vector edition
3323	if (ISD::isConstantSplatVectorAllZeros(N: N1.getNode()))
3324	return N0;
3325	}
3326
3327	// fold (add_sat x, 0) -> x
3328	if (isNullConstant(V: N1))
3329	return N0;
3330
3331	// If it cannot overflow, transform into an add.
3332	if (DAG.willNotOverflowAdd(IsSigned, N0, N1))
3333	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: N0, N2: N1);
3334
3335	return SDValue ();
3336	}
3337
3338	static SDValue getAsCarry(const TargetLowering &TLI, SDValue V,
3339	bool ForceCarryReconstruction = false) {
3340	bool Masked = false;
3341
3342	// First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
3343	while (true) {
3344	if (ForceCarryReconstruction && V.getValueType() == MVT::i1)
3345	return V;
3346
3347	if (V.getOpcode() == ISD::TRUNCATE \|\| V.getOpcode() == ISD::ZERO_EXTEND) {
3348	V = V.getOperand(i: `0`);
3349	continue;
3350	}
3351
3352	if (V.getOpcode() == ISD::AND && isOneConstant(V: V.getOperand(i: `1`))) {
3353	if (ForceCarryReconstruction)
3354	return V;
3355
3356	Masked = true;
3357	V = V.getOperand(i: `0`);
3358	continue;
3359	}
3360
3361	break;
3362	}
3363
3364	// If this is not a carry, return.
3365	if (V.getResNo() != `1`)
3366	return SDValue ();
3367
3368	if (V.getOpcode() != ISD::UADDO_CARRY && V.getOpcode() != ISD::USUBO_CARRY &&
3369	V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
3370	return SDValue ();
3371
3372	EVT VT = V ->getValueType(ResNo: `0`);
3373	if (!TLI.isOperationLegalOrCustom(Op: V.getOpcode(), VT))
3374	return SDValue ();
3375
3376	// If the result is masked, then no matter what kind of bool it is we can
3377	// return. If it isn't, then we need to make sure the bool type is either 0 or
3378	// 1 and not other values.
3379	if (Masked \|\|
3380	TLI.getBooleanContents(Type: V.getValueType()) ==
3381	TargetLoweringBase::ZeroOrOneBooleanContent)
3382	return V;
3383
3384	return SDValue ();
3385	}
3386
3387	/// Given the operands of an add/sub operation, see if the 2nd operand is a
3388	/// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
3389	/// the opcode and bypass the mask operation.
3390	static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
3391	SelectionDAG &DAG, const SDLoc &DL) {
3392	if (N1.getOpcode() == ISD::ZERO_EXTEND)
3393	N1 = N1.getOperand(i: `0`);
3394
3395	if (N1.getOpcode() != ISD::AND \|\| !isOneOrOneSplat(V: N1 ->getOperand(Num: `1`)))
3396	return SDValue ();
3397
3398	EVT VT = N0.getValueType();
3399	SDValue N10 = N1.getOperand(i: `0`);
3400	if (N10.getValueType() != VT && N10.getOpcode() == ISD::TRUNCATE)
3401	N10 = N10.getOperand(i: `0`);
3402
3403	if (N10.getValueType() != VT)
3404	return SDValue ();
3405
3406	if (DAG.ComputeNumSignBits(Op: N10) != VT.getScalarSizeInBits())
3407	return SDValue ();
3408
3409	// add N0, (and (AssertSext X, i1), 1) --> sub N0, X
3410	// sub N0, (and (AssertSext X, i1), 1) --> add N0, X
3411	return DAG.getNode(Opcode: IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N1: N0, N2: N10);
3412	}
3413
3414	/// Helper for doing combines based on N0 and N1 being added to each other.
3415	SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
3416	SDNode *LocReference) {
3417	EVT VT = N0.getValueType();
3418	SDLoc DL(LocReference);
3419
3420	// fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
3421	SDValue Y, N;
3422	if (sd_match(N: N1, P: m_Shl(L: m_Neg(V: m_Value(N&: Y)), R: m_Value(N))))
3423	return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: N0,
3424	N2: DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Y, N2: N));
3425
3426	if (SDValue V = foldAddSubMasked1(IsAdd: true, N0, N1, DAG, DL))
3427	return V;
3428
3429	// Look for:
3430	// add (add x, 1), y
3431	// And if the target does not like this form then turn into:
3432	// sub y, (xor x, -1)
3433	if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
3434	N0.hasOneUse() && isOneOrOneSplat(V: N0.getOperand(i: `1`)) &&
3435	// Limit this to after legalization if the add has wrap flags
3436	(Level >= AfterLegalizeDAG \|\| (!N0 ->getFlags().hasNoUnsignedWrap() &&
3437	!N0 ->getFlags().hasNoSignedWrap()))) {
3438	SDValue Not = DAG.getNOT(DL, Val: N0.getOperand(i: `0`), VT);
3439	return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1, N2: Not);
3440	}
3441
3442	if (N0.getOpcode() == ISD::SUB && N0.hasOneUse()) {
3443	// Hoist one-use subtraction by non-opaque constant:
3444	// (x - C) + y -> (x + y) - C
3445	// This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3446	if (isConstantOrConstantVector(N: N0.getOperand(i: `1`), /NoOpaques=/true)) {
3447	SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: N0.getOperand(i: `0`), N2: N1);
3448	return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Add, N2: N0.getOperand(i: `1`));
3449	}
3450	// Hoist one-use subtraction from non-opaque constant:
3451	// (C - x) + y -> (y - x) + C
3452	if (isConstantOrConstantVector(N: N0.getOperand(i: `0`), /NoOpaques=/true)) {
3453	SDValue Sub = DAG.getNode(Opcode: ISD::SUB, DL, VT, N1, N2: N0.getOperand(i: `1`));
3454	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Sub, N2: N0.getOperand(i: `0`));
3455	}
3456	}
3457
3458	// add (mul x, C), x -> mul x, C+1
3459	if (N0.getOpcode() == ISD::MUL && N0.getOperand(i: `0`) == N1 &&
3460	isConstantOrConstantVector(N: N0.getOperand(i: `1`), /NoOpaques=/true) &&
3461	N0.hasOneUse()) {
3462	SDValue NewC = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: N0.getOperand(i: `1`),
3463	N2: DAG.getConstant(Val: `1`, DL, VT));
3464	return DAG.getNode(Opcode: ISD::MUL, DL, VT, N1: N0.getOperand(i: `0`), N2: NewC);
3465	}
3466
3467	// If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
3468	// rather than 'add 0/-1' (the zext should get folded).
3469	// add (sext i1 Y), X --> sub X, (zext i1 Y)
3470	if (N0.getOpcode() == ISD::SIGN_EXTEND &&
3471	N0.getOperand(i: `0`).getScalarValueSizeInBits() == `1` &&
3472	TLI.getBooleanContents(Type: VT) == TargetLowering::ZeroOrOneBooleanContent) {
3473	SDValue ZExt = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT, Operand: N0.getOperand(i: `0`));
3474	return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1, N2: ZExt);
3475	}
3476
3477	// add X, (sextinreg Y i1) -> sub X, (and Y 1)
3478	if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3479	VTSDNode *TN = cast<VTSDNode>(Val: N1.getOperand(i: `1`));
3480	if (TN->getVT() == MVT::i1) {
3481	SDValue ZExt = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: N1.getOperand(i: `0`),
3482	N2: DAG.getConstant(Val: `1`, DL, VT));
3483	return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: N0, N2: ZExt);
3484	}
3485	}
3486
3487	// (add X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
3488	if (N1.getOpcode() == ISD::UADDO_CARRY && isNullConstant(V: N1.getOperand(i: `1`)) &&
3489	N1.getResNo() == `0`)
3490	return DAG.getNode(Opcode: ISD::UADDO_CARRY, DL, VTList: N1 ->getVTList(),
3491	N1: N0, N2: N1.getOperand(i: `0`), N3: N1.getOperand(i: `2`));
3492
3493	// (add X, Carry) -> (uaddo_carry X, 0, Carry)
3494	if (TLI.isOperationLegalOrCustom(Op: ISD::UADDO_CARRY, VT))
3495	if (SDValue Carry = getAsCarry(TLI, V: N1))
3496	return DAG.getNode(Opcode: ISD::UADDO_CARRY, DL,
3497	VTList: DAG.getVTList(VT1: VT, VT2: Carry.getValueType()), N1: N0,
3498	N2: DAG.getConstant(Val: `0`, DL, VT), N3: Carry);
3499
3500	return SDValue ();
3501	}
3502
3503	SDValue DAGCombiner::visitADDC(SDNode *N) {
3504	SDValue N0 = N->getOperand(Num: `0`);
3505	SDValue N1 = N->getOperand(Num: `1`);
3506	EVT VT = N0.getValueType();
3507	SDLoc DL(N);
3508
3509	// If the flag result is dead, turn this into an ADD.
3510	if (!N->hasAnyUseOfValue(Value: `1`))
3511	return CombineTo(N, Res0: DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: N0, N2: N1),
3512	Res1: DAG.getNode(Opcode: ISD::CARRY_FALSE, DL, VT: MVT::Glue));
3513
3514	// canonicalize constant to RHS.
3515	ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(Val&: N0);
3516	ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(Val&: N1);
3517	if (N0C && !N1C)
3518	return DAG.getNode(Opcode: ISD::ADDC, DL, VTList: N->getVTList(), N1, N2: N0);
3519
3520	// fold (addc x, 0) -> x + no carry out
3521	if (isNullConstant(V: N1))
3522	return CombineTo(N, Res0: N0, Res1: DAG.getNode(Opcode: ISD::CARRY_FALSE,
3523	DL, VT: MVT::Glue));
3524
3525	// If it cannot overflow, transform into an add.
3526	if (DAG.computeOverflowForUnsignedAdd(N0, N1) == SelectionDAG::OFK_Never)
3527	return CombineTo(N, Res0: DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: N0, N2: N1),
3528	Res1: DAG.getNode(Opcode: ISD::CARRY_FALSE, DL, VT: MVT::Glue));
3529
3530	return SDValue ();
3531	}
3532
3533	/**
3534	* Flips a boolean if it is cheaper to compute. If the Force parameters is set,
3535	* then the flip also occurs if computing the inverse is the same cost.
3536	* This function returns an empty SDValue in case it cannot flip the boolean
3537	* without increasing the cost of the computation. If you want to flip a boolean
3538	* no matter what, use DAG.getLogicalNOT.
3539	*/
3540	static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG,
3541	const TargetLowering &TLI,
3542	bool Force) {
3543	if (Force && isa<ConstantSDNode>(Val: V))
3544	return DAG.getLogicalNOT(DL: SDLoc (V), Val: V, VT: V.getValueType());
3545
3546	if (V.getOpcode() != ISD::XOR)
3547	return SDValue ();
3548
3549	if (DAG.isBoolConstant(N: V.getOperand(i: `1`)) == true)
3550	return V.getOperand(i: `0`);
3551	if (Force && isConstOrConstSplat(N: V.getOperand(i: `1`), AllowUndefs: false))
3552	return DAG.getLogicalNOT(DL: SDLoc (V), Val: V, VT: V.getValueType());
3553	return SDValue ();
3554	}
3555
3556	SDValue DAGCombiner::visitADDO(SDNode *N) {
3557	SDValue N0 = N->getOperand(Num: `0`);
3558	SDValue N1 = N->getOperand(Num: `1`);
3559	EVT VT = N0.getValueType();
3560	bool IsSigned = (ISD::SADDO == N->getOpcode());
3561
3562	EVT CarryVT = N->getValueType(ResNo: `1`);
3563	SDLoc DL(N);
3564
3565	// If the flag result is dead, turn this into an ADD.
3566	if (!N->hasAnyUseOfValue(Value: `1`))
3567	return CombineTo(N, Res0: DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: N0, N2: N1),
3568	Res1: DAG.getUNDEF(VT: CarryVT));
3569
3570	// canonicalize constant to RHS.
3571	if (DAG.isConstantIntBuildVectorOrConstantInt(N: N0) &&
3572	!DAG.isConstantIntBuildVectorOrConstantInt(N: N1))
3573	return DAG.getNode(Opcode: N->getOpcode(), DL, VTList: N->getVTList(), N1, N2: N0);
3574
3575	// fold (addo x, 0) -> x + no carry out
3576	if (isNullOrNullSplat(V: N1))
3577	return CombineTo(N, Res0: N0, Res1: DAG.getConstant(Val: `0`, DL, VT: CarryVT));
3578
3579	// If it cannot overflow, transform into an add.
3580	if (DAG.willNotOverflowAdd(IsSigned, N0, N1))
3581	return CombineTo(N, Res0: DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: N0, N2: N1),
3582	Res1: DAG.getConstant(Val: `0`, DL, VT: CarryVT));
3583
3584	if (IsSigned) {
3585	// fold (saddo (xor a, -1), 1) -> (ssub 0, a).
3586	if (isBitwiseNot(V: N0) && isOneOrOneSplat(V: N1))
3587	return DAG.getNode(Opcode: ISD::SSUBO, DL, VTList: N->getVTList(),
3588	N1: DAG.getConstant(Val: `0`, DL, VT), N2: N0.getOperand(i: `0`));
3589	} else {
3590	// fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
3591	if (isBitwiseNot(V: N0) && isOneOrOneSplat(V: N1)) {
3592	SDValue Sub = DAG.getNode(Opcode: ISD::USUBO, DL, VTList: N->getVTList(),
3593	N1: DAG.getConstant(Val: `0`, DL, VT), N2: N0.getOperand(i: `0`));
3594	return CombineTo(
3595	N, Res0: Sub, Res1: DAG.getLogicalNOT(DL, Val: Sub.getValue(R: `1`), VT: Sub ->getValueType(ResNo: `1`)));
3596	}
3597
3598	if (SDValue Combined = visitUADDOLike(N0, N1, N))
3599	return Combined;
3600
3601	if (SDValue Combined = visitUADDOLike(N0: N1, N1: N0, N))
3602	return Combined;
3603	}
3604
3605	return SDValue ();
3606	}
3607
3608	SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
3609	EVT VT = N0.getValueType();
3610	if (VT.isVector())
3611	return SDValue ();
3612
3613	// (uaddo X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
3614	// If Y + 1 cannot overflow.
3615	if (N1.getOpcode() == ISD::UADDO_CARRY && isNullConstant(V: N1.getOperand(i: `1`))) {
3616	SDValue Y = N1.getOperand(i: `0`);
3617	SDValue One = DAG.getConstant(Val: `1`, DL: SDLoc (N), VT: Y.getValueType());
3618	if (DAG.computeOverflowForUnsignedAdd(N0: Y, N1: One) == SelectionDAG::OFK_Never)
3619	return DAG.getNode(Opcode: ISD::UADDO_CARRY, DL: SDLoc (N), VTList: N->getVTList(), N1: N0, N2: Y,
3620	N3: N1.getOperand(i: `2`));
3621	}
3622
3623	// (uaddo X, Carry) -> (uaddo_carry X, 0, Carry)
3624	if (TLI.isOperationLegalOrCustom(Op: ISD::UADDO_CARRY, VT))
3625	if (SDValue Carry = getAsCarry(TLI, V: N1))
3626	return DAG.getNode(Opcode: ISD::UADDO_CARRY, DL: SDLoc (N), VTList: N->getVTList(), N1: N0,
3627	N2: DAG.getConstant(Val: `0`, DL: SDLoc (N), VT), N3: Carry);
3628
3629	return SDValue ();
3630	}
3631
3632	SDValue DAGCombiner::visitADDE(SDNode *N) {
3633	SDValue N0 = N->getOperand(Num: `0`);
3634	SDValue N1 = N->getOperand(Num: `1`);
3635	SDValue CarryIn = N->getOperand(Num: `2`);
3636
3637	// canonicalize constant to RHS
3638	ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(Val&: N0);
3639	ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(Val&: N1);
3640	if (N0C && !N1C)
3641	return DAG.getNode(Opcode: ISD::ADDE, DL: SDLoc (N), VTList: N->getVTList(),
3642	N1, N2: N0, N3: CarryIn);
3643
3644	// fold (adde x, y, false) -> (addc x, y)
3645	if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3646	return DAG.getNode(Opcode: ISD::ADDC, DL: SDLoc (N), VTList: N->getVTList(), N1: N0, N2: N1);
3647
3648	return SDValue ();
3649	}
3650
3651	SDValue DAGCombiner::visitUADDO_CARRY(SDNode *N) {
3652	SDValue N0 = N->getOperand(Num: `0`);
3653	SDValue N1 = N->getOperand(Num: `1`);
3654	SDValue CarryIn = N->getOperand(Num: `2`);
3655	SDLoc DL(N);
3656
3657	// canonicalize constant to RHS
3658	ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(Val&: N0);
3659	ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(Val&: N1);
3660	if (N0C && !N1C)
3661	return DAG.getNode(Opcode: ISD::UADDO_CARRY, DL, VTList: N->getVTList(), N1, N2: N0, N3: CarryIn);
3662
3663	// fold (uaddo_carry x, y, false) -> (uaddo x, y)
3664	if (isNullConstant(V: CarryIn)) {
3665	if (!LegalOperations \|\|
3666	TLI.isOperationLegalOrCustom(Op: ISD::UADDO, VT: N->getValueType(ResNo: `0`)))
3667	return DAG.getNode(Opcode: ISD::UADDO, DL, VTList: N->getVTList(), N1: N0, N2: N1);
3668	}
3669
3670	// fold (uaddo_carry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
3671	if (isNullConstant(V: N0) && isNullConstant(V: N1)) {
3672	EVT VT = N0.getValueType();
3673	EVT CarryVT = CarryIn.getValueType();
3674	SDValue CarryExt = DAG.getBoolExtOrTrunc(Op: CarryIn, SL: DL, VT, OpVT: CarryVT);
3675	AddToWorklist(N: CarryExt.getNode());
3676	return CombineTo(N, Res0: DAG.getNode(Opcode: ISD::AND, DL, VT, N1: CarryExt,
3677	N2: DAG.getConstant(Val: `1`, DL, VT)),
3678	Res1: DAG.getConstant(Val: `0`, DL, VT: CarryVT));
3679	}
3680
3681	if (SDValue Combined = visitUADDO_CARRYLike(N0, N1, CarryIn, N))
3682	return Combined;
3683
3684	if (SDValue Combined = visitUADDO_CARRYLike(N0: N1, N1: N0, CarryIn, N))
3685	return Combined;
3686
3687	// We want to avoid useless duplication.
3688	// TODO: This is done automatically for binary operations. As UADDO_CARRY is
3689	// not a binary operation, this is not really possible to leverage this
3690	// existing mechanism for it. However, if more operations require the same
3691	// deduplication logic, then it may be worth generalize.
3692	SDValue Ops[] = {N1, N0, CarryIn};
3693	SDNode *CSENode =
3694	DAG.getNodeIfExists(Opcode: ISD::UADDO_CARRY, VTList: N->getVTList(), Ops, Flags: N->getFlags());
3695	if (CSENode)
3696	return SDValue (CSENode, `0`);
3697
3698	return SDValue ();
3699	}
3700
3701	/**
3702	* If we are facing some sort of diamond carry propagation pattern try to
3703	* break it up to generate something like:
3704	* (uaddo_carry X, 0, (uaddo_carry A, B, Z):Carry)
3705	*
3706	* The end result is usually an increase in operation required, but because the
3707	* carry is now linearized, other transforms can kick in and optimize the DAG.
3708	*
3709	* Patterns typically look something like
3710	* (uaddo A, B)
3711	* / \
3712	* Carry Sum
3713	* \| \
3714	* \| (uaddo_carry *, 0, Z)
3715	* \| /
3716	* \ Carry
3717	* \| /
3718	* (uaddo_carry X, , )
3719	*
3720	* But numerous variation exist. Our goal is to identify A, B, X and Z and
3721	* produce a combine with a single path for carry propagation.
3722	*/
3723	static SDValue combineUADDO_CARRYDiamond(DAGCombiner &Combiner,
3724	SelectionDAG &DAG, SDValue X,
3725	SDValue Carry0, SDValue Carry1,
3726	SDNode *N) {
3727	if (Carry1.getResNo() != `1` \|\| Carry0.getResNo() != `1`)
3728	return SDValue ();
3729	if (Carry1.getOpcode() != ISD::UADDO)
3730	return SDValue ();
3731
3732	SDValue Z;
3733
3734	/**
3735	* First look for a suitable Z. It will present itself in the form of
3736	* (uaddo_carry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
3737	*/
3738	if (Carry0.getOpcode() == ISD::UADDO_CARRY &&
3739	isNullConstant(V: Carry0.getOperand(i: `1`))) {
3740	Z = Carry0.getOperand(i: `2`);
3741	} else if (Carry0.getOpcode() == ISD::UADDO &&
3742	isOneConstant(V: Carry0.getOperand(i: `1`))) {
3743	EVT VT = Carry0 ->getValueType(ResNo: `1`);
3744	Z = DAG.getConstant(Val: `1`, DL: SDLoc (Carry0.getOperand(i: `1`)), VT);
3745	} else {
3746	// We couldn't find a suitable Z.
3747	return SDValue ();
3748	}
3749
3750
3751	auto cancelDiamond = [&](SDValue A,SDValue B) {
3752	SDLoc DL(N);
3753	SDValue NewY =
3754	DAG.getNode(Opcode: ISD::UADDO_CARRY, DL, VTList: Carry0 ->getVTList(), N1: A, N2: B, N3: Z);
3755	Combiner.AddToWorklist(N: NewY.getNode());
3756	return DAG.getNode(Opcode: ISD::UADDO_CARRY, DL, VTList: N->getVTList(), N1: X,
3757	N2: DAG.getConstant(Val: `0`, DL, VT: X.getValueType()),
3758	N3: NewY.getValue(R: `1`));
3759	};
3760
3761	/**
3762	* (uaddo A, B)
3763	* \|
3764	* Sum
3765	* \|
3766	* (uaddo_carry *, 0, Z)
3767	*/
3768	if (Carry0.getOperand(i: `0`) == Carry1.getValue(R: `0`)) {
3769	return cancelDiamond (Carry1.getOperand(i: `0`), Carry1.getOperand(i: `1`));
3770	}
3771
3772	/**
3773	* (uaddo_carry A, 0, Z)
3774	* \|
3775	* Sum
3776	* \|
3777	* (uaddo *, B)
3778	*/
3779	if (Carry1.getOperand(i: `0`) == Carry0.getValue(R: `0`)) {
3780	return cancelDiamond (Carry0.getOperand(i: `0`), Carry1.getOperand(i: `1`));
3781	}
3782
3783	if (Carry1.getOperand(i: `1`) == Carry0.getValue(R: `0`)) {
3784	return cancelDiamond (Carry1.getOperand(i: `0`), Carry0.getOperand(i: `0`));
3785	}
3786
3787	return SDValue ();
3788	}
3789
3790	// If we are facing some sort of diamond carry/borrow in/out pattern try to
3791	// match patterns like:
3792	//
3793	// (uaddo A, B) CarryIn
3794	// \| \ \|
3795	// \| \ \|
3796	// PartialSum PartialCarryOutX /
3797	// \| \| /
3798	// \| ____\|____________/
3799	// \| / \|
3800	// (uaddo , ) \________
3801	// \| \ \
3802	// \| \ \|
3803	// \| PartialCarryOutY \|
3804	// \| \ \|
3805	// \| \ /
3806	// AddCarrySum \| ______/
3807	// \| /
3808	// CarryOut = (or , )
3809	//
3810	// And generate UADDO_CARRY (or USUBO_CARRY) with two result values:
3811	//
3812	// {AddCarrySum, CarryOut} = (uaddo_carry A, B, CarryIn)
3813	//
3814	// Our goal is to identify A, B, and CarryIn and produce UADDO_CARRY/USUBO_CARRY
3815	// with a single path for carry/borrow out propagation.
3816	static SDValue combineCarryDiamond(SelectionDAG &DAG, const TargetLowering &TLI,
3817	SDValue N0, SDValue N1, SDNode *N) {
3818	SDValue Carry0 = getAsCarry(TLI, V: N0);
3819	if (!Carry0)
3820	return SDValue ();
3821	SDValue Carry1 = getAsCarry(TLI, V: N1);
3822	if (!Carry1)
3823	return SDValue ();
3824
3825	unsigned Opcode = Carry0.getOpcode();
3826	if (Opcode != Carry1.getOpcode())
3827	return SDValue ();
3828	if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
3829	return SDValue ();
3830	// Guarantee identical type of CarryOut
3831	EVT CarryOutType = N->getValueType(ResNo: `0`);
3832	if (CarryOutType != Carry0.getValue(R: `1`).getValueType() \|\|
3833	CarryOutType != Carry1.getValue(R: `1`).getValueType())
3834	return SDValue ();
3835
3836	// Canonicalize the add/sub of A and B (the top node in the above ASCII art)
3837	// as Carry0 and the add/sub of the carry in as Carry1 (the middle node).
3838	if (Carry1.getNode()->isOperandOf(N: Carry0.getNode()))
3839	std::swap(a&: Carry0, b&: Carry1);
3840
3841	// Check if nodes are connected in expected way.
3842	if (Carry1.getOperand(i: `0`) != Carry0.getValue(R: `0`) &&
3843	Carry1.getOperand(i: `1`) != Carry0.getValue(R: `0`))
3844	return SDValue ();
3845
3846	// The carry in value must be on the righthand side for subtraction.
3847	unsigned CarryInOperandNum =
3848	Carry1.getOperand(i: `0`) == Carry0.getValue(R: `0`) ? `1` : `0`;
3849	if (Opcode == ISD::USUBO && CarryInOperandNum != `1`)
3850	return SDValue ();
3851	SDValue CarryIn = Carry1.getOperand(i: CarryInOperandNum);
3852
3853	unsigned NewOp = Opcode == ISD::UADDO ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
3854	if (!TLI.isOperationLegalOrCustom(Op: NewOp, VT: Carry0.getValue(R: `0`).getValueType()))
3855	return SDValue ();
3856
3857	// Verify that the carry/borrow in is plausibly a carry/borrow bit.
3858	CarryIn = getAsCarry(TLI, V: CarryIn, ForceCarryReconstruction: true);
3859	if (!CarryIn)
3860	return SDValue ();
3861
3862	SDLoc DL(N);
3863	CarryIn = DAG.getBoolExtOrTrunc(Op: CarryIn, SL: DL, VT: Carry1 ->getValueType(ResNo: `1`),
3864	OpVT: Carry1 ->getValueType(ResNo: `0`));
3865	SDValue Merged =
3866	DAG.getNode(Opcode: NewOp, DL, VTList: Carry1 ->getVTList(), N1: Carry0.getOperand(i: `0`),
3867	N2: Carry0.getOperand(i: `1`), N3: CarryIn);
3868
3869	// Please note that because we have proven that the result of the UADDO/USUBO
3870	// of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
3871	// therefore prove that if the first UADDO/USUBO overflows, the second
3872	// UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
3873	// maximum value.
3874	//
3875	// 0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
3876	// 0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
3877	//
3878	// This is important because it means that OR and XOR can be used to merge
3879	// carry flags; and that AND can return a constant zero.
3880	//
3881	// TODO: match other operations that can merge flags (ADD, etc)
3882	DAG.ReplaceAllUsesOfValueWith(From: Carry1.getValue(R: `0`), To: Merged.getValue(R: `0`));
3883	if (N->getOpcode() == ISD::AND)
3884	return DAG.getConstant(Val: `0`, DL, VT: CarryOutType);
3885	return Merged.getValue(R: `1`);
3886	}
3887
3888	SDValue DAGCombiner::visitUADDO_CARRYLike(SDValue N0, SDValue N1,
3889	SDValue CarryIn, SDNode *N) {
3890	// fold (uaddo_carry (xor a, -1), b, c) -> (usubo_carry b, a, !c) and flip
3891	// carry.
3892	if (isBitwiseNot(V: N0))
3893	if (SDValue NotC = extractBooleanFlip(V: CarryIn, DAG, TLI, Force: true)) {
3894	SDLoc DL(N);
3895	SDValue Sub = DAG.getNode(Opcode: ISD::USUBO_CARRY, DL, VTList: N->getVTList(), N1,
3896	N2: N0.getOperand(i: `0`), N3: NotC);
3897	return CombineTo(
3898	N, Res0: Sub, Res1: DAG.getLogicalNOT(DL, Val: Sub.getValue(R: `1`), VT: Sub ->getValueType(ResNo: `1`)));
3899	}
3900
3901	// Iff the flag result is dead:
3902	// (uaddo_carry (add\|uaddo X, Y), 0, Carry) -> (uaddo_carry X, Y, Carry)
3903	// Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
3904	// or the dependency between the instructions.
3905	if ((N0.getOpcode() == ISD::ADD \|\|
3906	(N0.getOpcode() == ISD::UADDO && N0.getResNo() == `0` &&
3907	N0.getValue(R: `1`) != CarryIn)) &&
3908	isNullConstant(V: N1) && !N->hasAnyUseOfValue(Value: `1`))
3909	return DAG.getNode(Opcode: ISD::UADDO_CARRY, DL: SDLoc (N), VTList: N->getVTList(),
3910	N1: N0.getOperand(i: `0`), N2: N0.getOperand(i: `1`), N3: CarryIn);
3911
3912	/**
3913	* When one of the uaddo_carry argument is itself a carry, we may be facing
3914	* a diamond carry propagation. In which case we try to transform the DAG
3915	* to ensure linear carry propagation if that is possible.
3916	*/
3917	if (auto Y = getAsCarry(TLI, V: N1)) {
3918	// Because both are carries, Y and Z can be swapped.
3919	if (auto R = combineUADDO_CARRYDiamond(Combiner&: *this, DAG, X: N0, Carry0: Y, Carry1: CarryIn, N))
3920	return R;
3921	if (auto R = combineUADDO_CARRYDiamond(Combiner&: *this, DAG, X: N0, Carry0: CarryIn, Carry1: Y, N))
3922	return R;
3923	}
3924
3925	return SDValue ();
3926	}
3927
3928	SDValue DAGCombiner::visitSADDO_CARRYLike(SDValue N0, SDValue N1,
3929	SDValue CarryIn, SDNode *N) {
3930	// fold (saddo_carry (xor a, -1), b, c) -> (ssubo_carry b, a, !c)
3931	if (isBitwiseNot(V: N0)) {
3932	if (SDValue NotC = extractBooleanFlip(V: CarryIn, DAG, TLI, Force: true))
3933	return DAG.getNode(Opcode: ISD::SSUBO_CARRY, DL: SDLoc (N), VTList: N->getVTList(), N1,
3934	N2: N0.getOperand(i: `0`), N3: NotC);
3935	}
3936
3937	return SDValue ();
3938	}
3939
3940	SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
3941	SDValue N0 = N->getOperand(Num: `0`);
3942	SDValue N1 = N->getOperand(Num: `1`);
3943	SDValue CarryIn = N->getOperand(Num: `2`);
3944	SDLoc DL(N);
3945
3946	// canonicalize constant to RHS
3947	ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(Val&: N0);
3948	ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(Val&: N1);
3949	if (N0C && !N1C)
3950	return DAG.getNode(Opcode: ISD::SADDO_CARRY, DL, VTList: N->getVTList(), N1, N2: N0, N3: CarryIn);
3951
3952	// fold (saddo_carry x, y, false) -> (saddo x, y)
3953	if (isNullConstant(V: CarryIn)) {
3954	if (!LegalOperations \|\|
3955	TLI.isOperationLegalOrCustom(Op: ISD::SADDO, VT: N->getValueType(ResNo: `0`)))
3956	return DAG.getNode(Opcode: ISD::SADDO, DL, VTList: N->getVTList(), N1: N0, N2: N1);
3957	}
3958
3959	if (SDValue Combined = visitSADDO_CARRYLike(N0, N1, CarryIn, N))
3960	return Combined;
3961
3962	if (SDValue Combined = visitSADDO_CARRYLike(N0: N1, N1: N0, CarryIn, N))
3963	return Combined;
3964
3965	return SDValue ();
3966	}
3967
3968	// Attempt to create a USUBSAT(LHS, RHS) node with DstVT, performing a
3969	// clamp/truncation if necessary.
3970	static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS,
3971	SDValue RHS, SelectionDAG &DAG,
3972	const SDLoc &DL) {
3973	assert(DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() &&
3974	"Illegal truncation");
3975
3976	if (DstVT == SrcVT)
3977	return DAG.getNode(Opcode: ISD::USUBSAT, DL, VT: DstVT, N1: LHS, N2: RHS);
3978
3979	// If the LHS is zero-extended then we can perform the USUBSAT as DstVT by
3980	// clamping RHS.
3981	APInt UpperBits = APInt::getBitsSetFrom(numBits: SrcVT.getScalarSizeInBits(),
3982	loBit: DstVT.getScalarSizeInBits());
3983	if (!DAG.MaskedValueIsZero(Op: LHS, Mask: UpperBits))
3984	return SDValue ();
3985
3986	SDValue SatLimit =
3987	DAG.getConstant(Val: APInt::getLowBitsSet(numBits: SrcVT.getScalarSizeInBits(),
3988	loBitsSet: DstVT.getScalarSizeInBits()),
3989	DL, VT: SrcVT);
3990	RHS = DAG.getNode(Opcode: ISD::UMIN, DL, VT: SrcVT, N1: RHS, N2: SatLimit);
3991	RHS = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: DstVT, Operand: RHS);
3992	LHS = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: DstVT, Operand: LHS);
3993	return DAG.getNode(Opcode: ISD::USUBSAT, DL, VT: DstVT, N1: LHS, N2: RHS);
3994	}
3995
3996	// Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to
3997	// usubsat(a,b), optionally as a truncated type.
3998	SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode N, const* SDLoc &DL) {
3999	if (N->getOpcode() != ISD::SUB \|\|
4000	!(!LegalOperations \|\| hasOperation(Opcode: ISD::USUBSAT, VT: DstVT)))
4001	return SDValue ();
4002
4003	EVT SubVT = N->getValueType(ResNo: `0`);
4004	SDValue Op0 = N->getOperand(Num: `0`);
4005	SDValue Op1 = N->getOperand(Num: `1`);
4006
4007	// Try to find umax(a,b) - b or a - umin(a,b) patterns
4008	// they may be converted to usubsat(a,b).
4009	if (Op0.getOpcode() == ISD::UMAX && Op0.hasOneUse()) {
4010	SDValue MaxLHS = Op0.getOperand(i: `0`);
4011	SDValue MaxRHS = Op0.getOperand(i: `1`);
4012	if (MaxLHS == Op1)
4013	return getTruncatedUSUBSAT(DstVT, SrcVT: SubVT, LHS: MaxRHS, RHS: Op1, DAG, DL);
4014	if (MaxRHS == Op1)
4015	return getTruncatedUSUBSAT(DstVT, SrcVT: SubVT, LHS: MaxLHS, RHS: Op1, DAG, DL);
4016	}
4017
4018	if (Op1.getOpcode() == ISD::UMIN && Op1.hasOneUse()) {
4019	SDValue MinLHS = Op1.getOperand(i: `0`);
4020	SDValue MinRHS = Op1.getOperand(i: `1`);
4021	if (MinLHS == Op0)
4022	return getTruncatedUSUBSAT(DstVT, SrcVT: SubVT, LHS: Op0, RHS: MinRHS, DAG, DL);
4023	if (MinRHS == Op0)
4024	return getTruncatedUSUBSAT(DstVT, SrcVT: SubVT, LHS: Op0, RHS: MinLHS, DAG, DL);
4025	}
4026
4027	// sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit)))
4028	if (Op1.getOpcode() == ISD::TRUNCATE &&
4029	Op1.getOperand(i: `0`).getOpcode() == ISD::UMIN &&
4030	Op1.getOperand(i: `0`).hasOneUse()) {
4031	SDValue MinLHS = Op1.getOperand(i: `0`).getOperand(i: `0`);
4032	SDValue MinRHS = Op1.getOperand(i: `0`).getOperand(i: `1`);
4033	if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && MinLHS.getOperand(i: `0`) == Op0)
4034	return getTruncatedUSUBSAT(DstVT, SrcVT: MinLHS.getValueType(), LHS: MinLHS, RHS: MinRHS,
4035	DAG, DL);
4036	if (MinRHS.getOpcode() == ISD::ZERO_EXTEND && MinRHS.getOperand(i: `0`) == Op0)
4037	return getTruncatedUSUBSAT(DstVT, SrcVT: MinLHS.getValueType(), LHS: MinRHS, RHS: MinLHS,
4038	DAG, DL);
4039	}
4040
4041	return SDValue ();
4042	}
4043
4044	// Refinement of DAG/Type Legalisation (promotion) when CTLZ is used for
4045	// counting leading ones. Broadly, it replaces the substraction with a left
4046	// shift.
4047	//
4048	// DAG Legalisation Pattern:*
4049	//
4050	// (sub (ctlz (zeroextend (not Src)))
4051	// BitWidthDiff)
4052	//
4053	// if BitWidthDiff == BitWidth(Node) - BitWidth(Src)
4054	// -->
4055	//
4056	// (ctlz_zero_undef (not (shl (anyextend Src)
4057	// BitWidthDiff)))
4058	//
4059	// Type Legalisation Pattern:*
4060	//
4061	// (sub (ctlz (and (xor Src XorMask)
4062	// AndMask))
4063	// BitWidthDiff)
4064	//
4065	// if AndMask has only trailing ones
4066	// and MaskBitWidth(AndMask) == BitWidth(Node) - BitWidthDiff
4067	// and XorMask has more trailing ones than AndMask
4068	// -->
4069	//
4070	// (ctlz_zero_undef (not (shl Src BitWidthDiff)))
4071	template <class MatchContextClass>
4072	static SDValue foldSubCtlzNot(SDNode *N, SelectionDAG &DAG) {
4073	const SDLoc DL(N);
4074	SDValue N0 = N->getOperand(Num: `0`);
4075	EVT VT = N0.getValueType();
4076	unsigned BitWidth = VT.getScalarSizeInBits();
4077
4078	MatchContextClass Matcher(DAG, DAG.getTargetLoweringInfo(), N);
4079
4080	APInt AndMask;
4081	APInt XorMask;
4082	uint64_t BitWidthDiff;
4083
4084	SDValue CtlzOp;
4085	SDValue Src;
4086
4087	if (!sd_context_match(
4088	N, Matcher, m_Sub(L: m_Ctlz(Op: m_Value(N&: CtlzOp)), R: m_ConstInt(V&: BitWidthDiff))))
4089	return SDValue ();
4090
4091	if (sd_context_match(CtlzOp, Matcher, m_ZExt(Op: m_Not(V: m_Value(N&: Src))))) {
4092	// DAG Legalisation Pattern:
4093	// (sub (ctlz (zero_extend (not Op)) BitWidthDiff))
4094	if ((BitWidth - Src.getValueType().getScalarSizeInBits()) != BitWidthDiff)
4095	return SDValue ();
4096
4097	Src = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT, Operand: Src);
4098	} else if (sd_context_match(CtlzOp, Matcher,
4099	m_And(L: m_Xor(L: m_Value(N&: Src), R: m_ConstInt(V&: XorMask)),
4100	R: m_ConstInt(V&: AndMask)))) {
4101	// Type Legalisation Pattern:
4102	// (sub (ctlz (and (xor Op XorMask) AndMask)) BitWidthDiff)
4103	if (BitWidthDiff >= BitWidth)
4104	return SDValue ();
4105	unsigned AndMaskWidth = BitWidth - BitWidthDiff;
4106	if (!(AndMask.isMask(numBits: AndMaskWidth) && XorMask.countr_one() >= AndMaskWidth))
4107	return SDValue ();
4108	} else
4109	return SDValue ();
4110
4111	SDValue ShiftConst = DAG.getShiftAmountConstant(Val: BitWidthDiff, VT, DL);
4112	SDValue LShift = Matcher.getNode(ISD::SHL, DL, VT, Src, ShiftConst);
4113	SDValue Not =
4114	Matcher.getNode(ISD::XOR, DL, VT, LShift, DAG.getAllOnesConstant(DL, VT));
4115
4116	return Matcher.getNode(ISD::CTLZ_ZERO_UNDEF, DL, VT, Not);
4117	}
4118
4119	// Fold sub(x, mul(divrem(x,y)[0], y)) to divrem(x, y)[1]
4120	static SDValue foldRemainderIdiom(SDNode *N, SelectionDAG &DAG,
4121	const SDLoc &DL) {
4122	assert(N->getOpcode() == ISD::SUB && "Node must be a SUB");
4123	SDValue Sub0 = N->getOperand(Num: `0`);
4124	SDValue Sub1 = N->getOperand(Num: `1`);
4125
4126	auto CheckAndFoldMulCase = [&](SDValue DivRem, SDValue MaybeY) -> SDValue {
4127	if ((DivRem.getOpcode() == ISD::SDIVREM \|\|
4128	DivRem.getOpcode() == ISD::UDIVREM) &&
4129	DivRem.getResNo() == `0` && DivRem.getOperand(i: `0`) == Sub0 &&
4130	DivRem.getOperand(i: `1`) == MaybeY) {
4131	return SDValue (DivRem.getNode(), `1`);
4132	}
4133	return SDValue ();
4134	};
4135
4136	if (Sub1.getOpcode() == ISD::MUL) {
4137	// (sub x, (mul divrem(x,y)[0], y))
4138	SDValue Mul0 = Sub1.getOperand(i: `0`);
4139	SDValue Mul1 = Sub1.getOperand(i: `1`);
4140
4141	if (SDValue Res = CheckAndFoldMulCase (Mul0, Mul1))
4142	return Res;
4143
4144	if (SDValue Res = CheckAndFoldMulCase (Mul1, Mul0))
4145	return Res;
4146
4147	} else if (Sub1.getOpcode() == ISD::SHL) {
4148	// Handle (sub x, (shl divrem(x,y)[0], C)) where y = 1 << C
4149	SDValue Shl0 = Sub1.getOperand(i: `0`);
4150	SDValue Shl1 = Sub1.getOperand(i: `1`);
4151	// Check if Shl0 is divrem(x, Y)[0]
4152	if ((Shl0.getOpcode() == ISD::SDIVREM \|\|
4153	Shl0.getOpcode() == ISD::UDIVREM) &&
4154	Shl0.getResNo() == `0` && Shl0.getOperand(i: `0`) == Sub0) {
4155
4156	SDValue Divisor = Shl0.getOperand(i: `1`);
4157
4158	ConstantSDNode *DivC = isConstOrConstSplat(N: Divisor);
4159	ConstantSDNode *ShC = isConstOrConstSplat(N: Shl1);
4160	if (!DivC \|\| !ShC)
4161	return SDValue ();
4162
4163	if (DivC->getAPIntValue().isPowerOf2() &&
4164	DivC->getAPIntValue().logBase2() == ShC->getAPIntValue())
4165	return SDValue (Shl0.getNode(), `1`);
4166	}
4167	}
4168	return SDValue ();
4169	}
4170
4171	// Since it may not be valid to emit a fold to zero for vector initializers
4172	// check if we can before folding.
4173	static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
4174	SelectionDAG &DAG, bool LegalOperations) {
4175	if (!VT.isVector())
4176	return DAG.getConstant(Val: `0`, DL, VT);
4177	if (!LegalOperations \|\| TLI.isOperationLegal(Op: ISD::BUILD_VECTOR, VT))
4178	return DAG.getConstant(Val: `0`, DL, VT);
4179	return SDValue ();
4180	}
4181
4182	SDValue DAGCombiner::visitSUB(SDNode *N) {
4183	SDValue N0 = N->getOperand(Num: `0`);
4184	SDValue N1 = N->getOperand(Num: `1`);
4185	EVT VT = N0.getValueType();
4186	unsigned BitWidth = VT.getScalarSizeInBits();
4187	SDLoc DL(N);
4188
4189	if (SDValue V = foldSubCtlzNot<EmptyMatchContext>(N, DAG))
4190	return V;
4191
4192	// fold (sub x, x) -> 0
4193	if (N0 == N1)
4194	return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4195
4196	// fold (sub c1, c2) -> c3
4197	if (SDValue C = DAG.FoldConstantArithmetic(Opcode: ISD::SUB, DL, VT, Ops: {N0, N1}))
4198	return C;
4199
4200	// fold vector ops
4201	if (VT.isVector()) {
4202	if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4203	return FoldedVOp;
4204
4205	// fold (sub x, 0) -> x, vector edition
4206	if (ISD::isConstantSplatVectorAllZeros(N: N1.getNode()))
4207	return N0;
4208	}
4209
4210	// (sub x, ([v]select (ult x, y), 0, y)) -> (umin x, (sub x, y))
4211	// (sub x, ([v]select (uge x, y), y, 0)) -> (umin x, (sub x, y))
4212	if (N1.hasOneUse() && hasUMin(VT)) {
4213	SDValue Y;
4214	auto MS0 = m_Specific(N: N0);
4215	auto MVY = m_Value(N&: Y);
4216	auto MZ = m_Zero();
4217	auto MCC1 = m_SpecificCondCode(CC: ISD::SETULT);
4218	auto MCC2 = m_SpecificCondCode(CC: ISD::SETUGE);
4219
4220	if (sd_match(N: N1, P: m_SelectCCLike(L: MS0, R: MVY, T: MZ, F: m_Deferred(V&: Y), CC: MCC1)) \|\|
4221	sd_match(N: N1, P: m_SelectCCLike(L: MS0, R: MVY, T: m_Deferred(V&: Y), F: MZ, CC: MCC2)) \|\|
4222	sd_match(N: N1, P: m_VSelect(Cond: m_SetCC(LHS: MS0, RHS: MVY, CC: MCC1), T: MZ, F: m_Deferred(V&: Y))) \|\|
4223	sd_match(N: N1, P: m_VSelect(Cond: m_SetCC(LHS: MS0, RHS: MVY, CC: MCC2), T: m_Deferred(V&: Y), F: MZ)))
4224
4225	return DAG.getNode(Opcode: ISD::UMIN, DL, VT, N1: N0,
4226	N2: DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: N0, N2: Y));
4227	}
4228
4229	if (SDValue NewSel = foldBinOpIntoSelect(BO: N))
4230	return NewSel;
4231
4232	// fold (sub x, c) -> (add x, -c)
4233	if (ConstantSDNode *N1C = getAsNonOpaqueConstant(N: N1))
4234	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: N0,
4235	N2: DAG.getConstant(Val: -N1C->getAPIntValue(), DL, VT));
4236
4237	if (isNullOrNullSplat(V: N0)) {
4238	// Right-shifting everything out but the sign bit followed by negation is
4239	// the same as flipping arithmetic/logical shift type without the negation:
4240	// -(X >>u 31) -> (X >>s 31)
4241	// -(X >>s 31) -> (X >>u 31)
4242	if (N1 ->getOpcode() == ISD::SRA \|\| N1 ->getOpcode() == ISD::SRL) {
4243	ConstantSDNode *ShiftAmt = isConstOrConstSplat(N: N1.getOperand(i: `1`));
4244	if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - `1`)) {
4245	auto NewSh = N1 ->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
4246	if (!LegalOperations \|\| TLI.isOperationLegal(Op: NewSh, VT))
4247	return DAG.getNode(Opcode: NewSh, DL, VT, N1: N1.getOperand(i: `0`), N2: N1.getOperand(i: `1`));
4248	}
4249	}
4250
4251	// 0 - X --> 0 if the sub is NUW.
4252	if (N->getFlags().hasNoUnsignedWrap())
4253	return N0;
4254
4255	if (DAG.MaskedValueIsZero(Op: N1, Mask: ~APInt::getSignMask(BitWidth))) {
4256	// N1 is either 0 or the minimum signed value. If the sub is NSW, then
4257	// N1 must be 0 because negating the minimum signed value is undefined.
4258	if (N->getFlags().hasNoSignedWrap())
4259	return N0;
4260
4261	// 0 - X --> X if X is 0 or the minimum signed value.
4262	return N1;
4263	}
4264
4265	// Convert 0 - abs(x).
4266	if (N1.getOpcode() == ISD::ABS && N1.hasOneUse() &&
4267	!TLI.isOperationLegalOrCustom(Op: ISD::ABS, VT))
4268	if (SDValue Result = TLI.expandABS(N: N1.getNode(), DAG, IsNegative: true))
4269	return Result;
4270
4271	// Similar to the previous rule, but this time targeting an expanded abs.
4272	// (sub 0, (max X, (sub 0, X))) --> (min X, (sub 0, X))
4273	// as well as
4274	// (sub 0, (min X, (sub 0, X))) --> (max X, (sub 0, X))
4275	// Note that these two are applicable to both signed and unsigned min/max.
4276	SDValue X;
4277	SDValue S0;
4278	auto NegPat = m_AllOf(preds: m_Neg(V: m_Deferred(V&: X)), preds: m_Value(N&: S0));
4279	if (sd_match(N: N1, P: m_OneUse(P: m_AnyOf(preds: m_SMax(L: m_Value(N&: X), R: NegPat),
4280	preds: m_UMax(L: m_Value(N&: X), R: NegPat),
4281	preds: m_SMin(L: m_Value(N&: X), R: NegPat),
4282	preds: m_UMin(L: m_Value(N&: X), R: NegPat))))) {
4283	unsigned NewOpc = ISD::getInverseMinMaxOpcode(MinMaxOpc: N1 ->getOpcode());
4284	if (hasOperation(Opcode: NewOpc, VT))
4285	return DAG.getNode(Opcode: NewOpc, DL, VT, N1: X, N2: S0);
4286	}
4287
4288	// Fold neg(splat(neg(x)) -> splat(x)
4289	if (VT.isVector()) {
4290	SDValue N1S = DAG.getSplatValue(V: N1, LegalTypes: true);
4291	if (N1S && N1S.getOpcode() == ISD::SUB &&
4292	isNullConstant(V: N1S.getOperand(i: `0`)))
4293	return DAG.getSplat(VT, DL, Op: N1S.getOperand(i: `1`));
4294	}
4295
4296	// sub 0, (and x, 1) --> SIGN_EXTEND_INREG x, i1
4297	if (N1.getOpcode() == ISD::AND && N1.hasOneUse() &&
4298	isOneOrOneSplat(V: N1 ->getOperand(Num: `1`))) {
4299	EVT ExtVT = VT.changeElementType(Context&: *DAG.getContext(), EltVT: MVT::i1);
4300	if (TLI.getOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: ExtVT) ==
4301	TargetLowering::Legal) {
4302	return DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL, VT, N1: N1 ->getOperand(Num: `0`),
4303	N2: DAG.getValueType(ExtVT));
4304	}
4305	}
4306	}
4307
4308	// Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
4309	if (isAllOnesOrAllOnesSplat(V: N0))
4310	return DAG.getNode(Opcode: ISD::XOR, DL, VT, N1, N2: N0);
4311
4312	// fold (A - (0-B)) -> A+B
4313	if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(V: N1.getOperand(i: `0`)))
4314	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: N0, N2: N1.getOperand(i: `1`));
4315
4316	// fold A-(A-B) -> B
4317	if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(i: `0`))
4318	return N1.getOperand(i: `1`);
4319
4320	// fold (A+B)-A -> B
4321	if (N0.getOpcode() == ISD::ADD && N0.getOperand(i: `0`) == N1)
4322	return N0.getOperand(i: `1`);
4323
4324	// fold (A+B)-B -> A
4325	if (N0.getOpcode() == ISD::ADD && N0.getOperand(i: `1`) == N1)
4326	return N0.getOperand(i: `0`);
4327
4328	// fold (A+C1)-C2 -> A+(C1-C2)
4329	if (N0.getOpcode() == ISD::ADD) {
4330	SDValue N01 = N0.getOperand(i: `1`);
4331	if (SDValue NewC = DAG.FoldConstantArithmetic(Opcode: ISD::SUB, DL, VT, Ops: {N01, N1}))
4332	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: N0.getOperand(i: `0`), N2: NewC);
4333	}
4334
4335	// fold C2-(A+C1) -> (C2-C1)-A
4336	if (N1.getOpcode() == ISD::ADD) {
4337	SDValue N11 = N1.getOperand(i: `1`);
4338	if (SDValue NewC = DAG.FoldConstantArithmetic(Opcode: ISD::SUB, DL, VT, Ops: {N0, N11}))
4339	return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: NewC, N2: N1.getOperand(i: `0`));
4340	}
4341
4342	// fold (A-C1)-C2 -> A-(C1+C2)
4343	if (N0.getOpcode() == ISD::SUB) {
4344	SDValue N01 = N0.getOperand(i: `1`);
4345	if (SDValue NewC = DAG.FoldConstantArithmetic(Opcode: ISD::ADD, DL, VT, Ops: {N01, N1}))
4346	return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: N0.getOperand(i: `0`), N2: NewC);
4347	}
4348
4349	// fold (c1-A)-c2 -> (c1-c2)-A
4350	if (N0.getOpcode() == ISD::SUB) {
4351	SDValue N00 = N0.getOperand(i: `0`);
4352	if (SDValue NewC = DAG.FoldConstantArithmetic(Opcode: ISD::SUB, DL, VT, Ops: {N00, N1}))
4353	return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: NewC, N2: N0.getOperand(i: `1`));
4354	}
4355
4356	SDValue A, B, C;
4357
4358	// fold ((A+(B+C))-B) -> A+C
4359	if (sd_match(N: N0, P: m_Add(L: m_Value(N&: A), R: m_Add(L: m_Specific(N: N1), R: m_Value(N&: C)))))
4360	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: A, N2: C);
4361
4362	// fold ((A+(B-C))-B) -> A-C
4363	if (sd_match(N: N0, P: m_Add(L: m_Value(N&: A), R: m_Sub(L: m_Specific(N: N1), R: m_Value(N&: C)))))
4364	return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: A, N2: C);
4365
4366	// fold ((A-(B-C))-C) -> A-B
4367	if (sd_match(N: N0, P: m_Sub(L: m_Value(N&: A), R: m_Sub(L: m_Value(N&: B), R: m_Specific(N: N1)))))
4368	return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: A, N2: B);
4369
4370	// fold (A-(B-C)) -> A+(C-B)
4371	if (sd_match(N: N1, P: m_OneUse(P: m_Sub(L: m_Value(N&: B), R: m_Value(N&: C)))))
4372	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: N0,
4373	N2: DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: C, N2: B));
4374
4375	// A - (A & B) -> A & (~B)
4376	if (sd_match(N: N1, P: m_And(L: m_Specific(N: N0), R: m_Value(N&: B))) &&
4377	(N1.hasOneUse() \|\| isConstantOrConstantVector(N: B, /NoOpaques=/true)))
4378	return DAG.getNode(Opcode: ISD::AND, DL, VT, N1: N0, N2: DAG.getNOT(DL, Val: B, VT));
4379
4380	// fold (A - (-B C)) -> (A + (B * C))*
4381	if (sd_match(N: N1, P: m_OneUse(P: m_Mul(L: m_Neg(V: m_Value(N&: B)), R: m_Value(N&: C)))))
4382	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: N0,
4383	N2: DAG.getNode(Opcode: ISD::MUL, DL, VT, N1: B, N2: C));
4384
4385	// If either operand of a sub is undef, the result is undef
4386	if (N0.isUndef())
4387	return N0;
4388	if (N1.isUndef())
4389	return N1;
4390
4391	if (SDValue V = foldAddSubBoolOfMaskedVal(N, DL, DAG))
4392	return V;
4393
4394	if (SDValue V = foldAddSubOfSignBit(N, DL, DAG))
4395	return V;
4396
4397	// Try to match AVGCEIL fixedwidth pattern
4398	if (SDValue V = foldSubToAvg(N, DL))
4399	return V;
4400
4401	if (SDValue V = foldAddSubMasked1(IsAdd: false, N0, N1, DAG, DL))
4402	return V;
4403
4404	if (SDValue V = foldSubToUSubSat(DstVT: VT, N, DL))
4405	return V;
4406
4407	if (SDValue V = foldRemainderIdiom(N, DAG, DL))
4408	return V;
4409
4410	// (A - B) - 1 -> add (xor B, -1), A
4411	if (sd_match(N, P: m_Sub(L: m_OneUse(P: m_Sub(L: m_Value(N&: A), R: m_Value(N&: B))),
4412	R: m_One(/AllowUndefs=/true))))
4413	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: A, N2: DAG.getNOT(DL, Val: B, VT));
4414
4415	// Look for:
4416	// sub y, (xor x, -1)
4417	// And if the target does not like this form then turn into:
4418	// add (add x, y), 1
4419	if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(V: N1)) {
4420	SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: N0, N2: N1.getOperand(i: `0`));
4421	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Add, N2: DAG.getConstant(Val: `1`, DL, VT));
4422	}
4423
4424	// Hoist one-use addition by non-opaque constant:
4425	// (x + C) - y -> (x - y) + C
4426	if (!reassociationCanBreakAddressingModePattern(Opc: ISD::SUB, DL, N, N0, N1) &&
4427	N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
4428	isConstantOrConstantVector(N: N0.getOperand(i: `1`), /NoOpaques=/true)) {
4429	SDValue Sub = DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: N0.getOperand(i: `0`), N2: N1);
4430	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Sub, N2: N0.getOperand(i: `1`));
4431	}
4432	// y - (x + C) -> (y - x) - C
4433	if (N1.getOpcode() == ISD::ADD && N1.hasOneUse() &&
4434	isConstantOrConstantVector(N: N1.getOperand(i: `1`), /NoOpaques=/true)) {
4435	SDValue Sub = DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: N0, N2: N1.getOperand(i: `0`));
4436	return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Sub, N2: N1.getOperand(i: `1`));
4437	}
4438	// (x - C) - y -> (x - y) - C
4439	// This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
4440	if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
4441	isConstantOrConstantVector(N: N0.getOperand(i: `1`), /NoOpaques=/true)) {
4442	SDValue Sub = DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: N0.getOperand(i: `0`), N2: N1);
4443	return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Sub, N2: N0.getOperand(i: `1`));
4444	}
4445	// (C - x) - y -> C - (x + y)
4446	if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
4447	isConstantOrConstantVector(N: N0.getOperand(i: `0`), /NoOpaques=/true)) {
4448	SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: N0.getOperand(i: `1`), N2: N1);
4449	return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: N0.getOperand(i: `0`), N2: Add);
4450	}
4451
4452	// If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
4453	// rather than 'sub 0/1' (the sext should get folded).
4454	// sub X, (zext i1 Y) --> add X, (sext i1 Y)
4455	if (N1.getOpcode() == ISD::ZERO_EXTEND &&
4456	N1.getOperand(i: `0`).getScalarValueSizeInBits() == `1` &&
4457	TLI.getBooleanContents(Type: VT) ==
4458	TargetLowering::ZeroOrNegativeOneBooleanContent) {
4459	SDValue SExt = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT, Operand: N1.getOperand(i: `0`));
4460	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: N0, N2: SExt);
4461	}
4462
4463	// fold B = sra (A, size(A)-1); sub (xor (A, B), B) -> (abs A)
4464	if ((!LegalOperations \|\| hasOperation(Opcode: ISD::ABS, VT)) &&
4465	sd_match(N: N1, P: m_Sra(L: m_Value(N&: A), R: m_SpecificInt(V: BitWidth - `1`))) &&
4466	sd_match(N: N0, P: m_Xor(L: m_Specific(N: A), R: m_Specific(N: N1))))
4467	return DAG.getNode(Opcode: ISD::ABS, DL, VT, Operand: A);
4468
4469	// If the relocation model supports it, consider symbol offsets.
4470	if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Val&: N0))
4471	if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
4472	// fold (sub Sym+c1, Sym+c2) -> c1-c2
4473	if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(Val&: N1))
4474	if (GA->getGlobal() == GB->getGlobal())
4475	return DAG.getConstant(Val: (uint64_t)GA->getOffset() - GB->getOffset(),
4476	DL, VT);
4477	}
4478
4479	// sub X, (sextinreg Y i1) -> add X, (and Y 1)
4480	if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
4481	VTSDNode *TN = cast<VTSDNode>(Val: N1.getOperand(i: `1`));
4482	if (TN->getVT() == MVT::i1) {
4483	SDValue ZExt = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: N1.getOperand(i: `0`),
4484	N2: DAG.getConstant(Val: `1`, DL, VT));
4485	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: N0, N2: ZExt);
4486	}
4487	}
4488
4489	// canonicalize (sub X, (vscale C)) to (add X, (vscale * -C))*
4490	// avoid if ISD::MUL handling is poor and ISD::SHL isn't an option.
4491	if (N1.getOpcode() == ISD::VSCALE && N1.hasOneUse()) {
4492	const APInt &IntVal = N1.getConstantOperandAPInt(i: `0`);
4493	if (!IntVal.isPowerOf2() \|\|
4494	hasOperation(Opcode: ISD::MUL, VT: N1.getOperand(i: `0`).getValueType()))
4495	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: N0, N2: DAG.getVScale(DL, VT, MulImm: -IntVal));
4496	}
4497
4498	// canonicalize (sub X, step_vector(C)) to (add X, step_vector(-C))
4499	if (N1.getOpcode() == ISD::STEP_VECTOR && N1.hasOneUse()) {
4500	APInt NewStep = -N1.getConstantOperandAPInt(i: `0`);
4501	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: N0,
4502	N2: DAG.getStepVector(DL, ResVT: VT, StepVal: NewStep));
4503	}
4504
4505	// Prefer an add for more folding potential and possibly better codegen:
4506	// sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
4507	if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
4508	SDValue ShAmt = N1.getOperand(i: `1`);
4509	ConstantSDNode *ShAmtC = isConstOrConstSplat(N: ShAmt);
4510	if (ShAmtC && ShAmtC->getAPIntValue() == (BitWidth - `1`)) {
4511	SDValue SRA = DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: N1.getOperand(i: `0`), N2: ShAmt);
4512	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: N0, N2: SRA);
4513	}
4514	}
4515
4516	// As with the previous fold, prefer add for more folding potential.
4517	// Subtracting SMIN/0 is the same as adding SMIN/0:
4518	// N0 - (X << BW-1) --> N0 + (X << BW-1)
4519	if (N1.getOpcode() == ISD::SHL) {
4520	ConstantSDNode *ShlC = isConstOrConstSplat(N: N1.getOperand(i: `1`));
4521	if (ShlC && ShlC->getAPIntValue() == (BitWidth - `1`))
4522	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1, N2: N0);
4523	}
4524
4525	// (sub (usubo_carry X, 0, Carry), Y) -> (usubo_carry X, Y, Carry)
4526	if (N0.getOpcode() == ISD::USUBO_CARRY && isNullConstant(V: N0.getOperand(i: `1`)) &&
4527	N0.getResNo() == `0` && N0.hasOneUse())
4528	return DAG.getNode(Opcode: ISD::USUBO_CARRY, DL, VTList: N0 ->getVTList(),
4529	N1: N0.getOperand(i: `0`), N2: N1, N3: N0.getOperand(i: `2`));
4530
4531	if (TLI.isOperationLegalOrCustom(Op: ISD::UADDO_CARRY, VT)) {
4532	// (sub Carry, X) -> (uaddo_carry (sub 0, X), 0, Carry)
4533	if (SDValue Carry = getAsCarry(TLI, V: N0)) {
4534	SDValue X = N1;
4535	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT);
4536	SDValue NegX = DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Zero, N2: X);
4537	return DAG.getNode(Opcode: ISD::UADDO_CARRY, DL,
4538	VTList: DAG.getVTList(VT1: VT, VT2: Carry.getValueType()), N1: NegX, N2: Zero,
4539	N3: Carry);
4540	}
4541	}
4542
4543	// If there's no chance of borrowing from adjacent bits, then sub is xor:
4544	// sub C0, X --> xor X, C0
4545	if (ConstantSDNode *C0 = isConstOrConstSplat(N: N0)) {
4546	if (!C0->isOpaque()) {
4547	const APInt &C0Val = C0->getAPIntValue();
4548	const APInt &MaybeOnes = ~DAG.computeKnownBits(Op: N1).Zero;
4549	if ((C0Val - MaybeOnes) == (C0Val ^ MaybeOnes))
4550	return DAG.getNode(Opcode: ISD::XOR, DL, VT, N1, N2: N0);
4551	}
4552	}
4553
4554	// smax(a,b) - smin(a,b) --> abds(a,b)
4555	if ((!LegalOperations \|\| hasOperation(Opcode: ISD::ABDS, VT)) &&
4556	sd_match(N: N0, DAG: &DAG, P: m_SMaxLike(L: m_Value(N&: A), R: m_Value(N&: B))) &&
4557	sd_match(N: N1, DAG: &DAG, P: m_SMinLike(L: m_Specific(N: A), R: m_Specific(N: B))))
4558	return DAG.getNode(Opcode: ISD::ABDS, DL, VT, N1: A, N2: B);
4559
4560	// smin(a,b) - smax(a,b) --> neg(abds(a,b))
4561	if (hasOperation(Opcode: ISD::ABDS, VT) &&
4562	sd_match(N: N0, DAG: &DAG, P: m_SMinLike(L: m_Value(N&: A), R: m_Value(N&: B))) &&
4563	sd_match(N: N1, DAG: &DAG, P: m_SMaxLike(L: m_Specific(N: A), R: m_Specific(N: B))))
4564	return DAG.getNegative(Val: DAG.getNode(Opcode: ISD::ABDS, DL, VT, N1: A, N2: B), DL, VT);
4565
4566	// umax(a,b) - umin(a,b) --> abdu(a,b)
4567	if ((!LegalOperations \|\| hasOperation(Opcode: ISD::ABDU, VT)) &&
4568	sd_match(N: N0, DAG: &DAG, P: m_UMaxLike(L: m_Value(N&: A), R: m_Value(N&: B))) &&
4569	sd_match(N: N1, DAG: &DAG, P: m_UMinLike(L: m_Specific(N: A), R: m_Specific(N: B))))
4570	return DAG.getNode(Opcode: ISD::ABDU, DL, VT, N1: A, N2: B);
4571
4572	// umin(a,b) - umax(a,b) --> neg(abdu(a,b))
4573	if (hasOperation(Opcode: ISD::ABDU, VT) &&
4574	sd_match(N: N0, DAG: &DAG, P: m_UMinLike(L: m_Value(N&: A), R: m_Value(N&: B))) &&
4575	sd_match(N: N1, DAG: &DAG, P: m_UMaxLike(L: m_Specific(N: A), R: m_Specific(N: B))))
4576	return DAG.getNegative(Val: DAG.getNode(Opcode: ISD::ABDU, DL, VT, N1: A, N2: B), DL, VT);
4577
4578	return SDValue ();
4579	}
4580
4581	SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
4582	unsigned Opcode = N->getOpcode();
4583	SDValue N0 = N->getOperand(Num: `0`);
4584	SDValue N1 = N->getOperand(Num: `1`);
4585	EVT VT = N0.getValueType();
4586	bool IsSigned = Opcode == ISD::SSUBSAT;
4587	SDLoc DL(N);
4588
4589	// fold (sub_sat x, undef) -> 0
4590	if (N0.isUndef() \|\| N1.isUndef())
4591	return DAG.getConstant(Val: `0`, DL, VT);
4592
4593	// fold (sub_sat x, x) -> 0
4594	if (N0 == N1)
4595	return DAG.getConstant(Val: `0`, DL, VT);
4596
4597	// fold (sub_sat c1, c2) -> c3
4598	if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, Ops: {N0, N1}))
4599	return C;
4600
4601	// fold vector ops
4602	if (VT.isVector()) {
4603	if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4604	return FoldedVOp;
4605
4606	// fold (sub_sat x, 0) -> x, vector edition
4607	if (ISD::isConstantSplatVectorAllZeros(N: N1.getNode()))
4608	return N0;
4609	}
4610
4611	// fold (sub_sat x, 0) -> x
4612	if (isNullConstant(V: N1))
4613	return N0;
4614
4615	// If it cannot overflow, transform into an sub.
4616	if (DAG.willNotOverflowSub(IsSigned, N0, N1))
4617	return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: N0, N2: N1);
4618
4619	return SDValue ();
4620	}
4621
4622	SDValue DAGCombiner::visitSUBC(SDNode *N) {
4623	SDValue N0 = N->getOperand(Num: `0`);
4624	SDValue N1 = N->getOperand(Num: `1`);
4625	EVT VT = N0.getValueType();
4626	SDLoc DL(N);
4627
4628	// If the flag result is dead, turn this into an SUB.
4629	if (!N->hasAnyUseOfValue(Value: `1`))
4630	return CombineTo(N, Res0: DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: N0, N2: N1),
4631	Res1: DAG.getNode(Opcode: ISD::CARRY_FALSE, DL, VT: MVT::Glue));
4632
4633	// fold (subc x, x) -> 0 + no borrow
4634	if (N0 == N1)
4635	return CombineTo(N, Res0: DAG.getConstant(Val: `0`, DL, VT),
4636	Res1: DAG.getNode(Opcode: ISD::CARRY_FALSE, DL, VT: MVT::Glue));
4637
4638	// fold (subc x, 0) -> x + no borrow
4639	if (isNullConstant(V: N1))
4640	return CombineTo(N, Res0: N0, Res1: DAG.getNode(Opcode: ISD::CARRY_FALSE, DL, VT: MVT::Glue));
4641
4642	// Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
4643	if (isAllOnesConstant(V: N0))
4644	return CombineTo(N, Res0: DAG.getNode(Opcode: ISD::XOR, DL, VT, N1, N2: N0),
4645	Res1: DAG.getNode(Opcode: ISD::CARRY_FALSE, DL, VT: MVT::Glue));
4646
4647	return SDValue ();
4648	}
4649
4650	SDValue DAGCombiner::visitSUBO(SDNode *N) {
4651	SDValue N0 = N->getOperand(Num: `0`);
4652	SDValue N1 = N->getOperand(Num: `1`);
4653	EVT VT = N0.getValueType();
4654	bool IsSigned = (ISD::SSUBO == N->getOpcode());
4655
4656	EVT CarryVT = N->getValueType(ResNo: `1`);
4657	SDLoc DL(N);
4658
4659	// If the flag result is dead, turn this into an SUB.
4660	if (!N->hasAnyUseOfValue(Value: `1`))
4661	return CombineTo(N, Res0: DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: N0, N2: N1),
4662	Res1: DAG.getUNDEF(VT: CarryVT));
4663
4664	// fold (subo x, x) -> 0 + no borrow
4665	if (N0 == N1)
4666	return CombineTo(N, Res0: DAG.getConstant(Val: `0`, DL, VT),
4667	Res1: DAG.getConstant(Val: `0`, DL, VT: CarryVT));
4668
4669	// fold (subox, c) -> (addo x, -c)
4670	if (ConstantSDNode *N1C = getAsNonOpaqueConstant(N: N1))
4671	if (IsSigned && !N1C->isMinSignedValue())
4672	return DAG.getNode(Opcode: ISD::SADDO, DL, VTList: N->getVTList(), N1: N0,
4673	N2: DAG.getConstant(Val: -N1C->getAPIntValue(), DL, VT));
4674
4675	// fold (subo x, 0) -> x + no borrow
4676	if (isNullOrNullSplat(V: N1))
4677	return CombineTo(N, Res0: N0, Res1: DAG.getConstant(Val: `0`, DL, VT: CarryVT));
4678
4679	// If it cannot overflow, transform into an sub.
4680	if (DAG.willNotOverflowSub(IsSigned, N0, N1))
4681	return CombineTo(N, Res0: DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: N0, N2: N1),
4682	Res1: DAG.getConstant(Val: `0`, DL, VT: CarryVT));
4683
4684	// Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
4685	if (!IsSigned && isAllOnesOrAllOnesSplat(V: N0))
4686	return CombineTo(N, Res0: DAG.getNode(Opcode: ISD::XOR, DL, VT, N1, N2: N0),
4687	Res1: DAG.getConstant(Val: `0`, DL, VT: CarryVT));
4688
4689	return SDValue ();
4690	}
4691
4692	SDValue DAGCombiner::visitSUBE(SDNode *N) {
4693	SDValue N0 = N->getOperand(Num: `0`);
4694	SDValue N1 = N->getOperand(Num: `1`);
4695	SDValue CarryIn = N->getOperand(Num: `2`);
4696
4697	// fold (sube x, y, false) -> (subc x, y)
4698	if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
4699	return DAG.getNode(Opcode: ISD::SUBC, DL: SDLoc (N), VTList: N->getVTList(), N1: N0, N2: N1);
4700
4701	return SDValue ();
4702	}
4703
4704	SDValue DAGCombiner::visitUSUBO_CARRY(SDNode *N) {
4705	SDValue N0 = N->getOperand(Num: `0`);
4706	SDValue N1 = N->getOperand(Num: `1`);
4707	SDValue CarryIn = N->getOperand(Num: `2`);
4708
4709	// fold (usubo_carry x, y, false) -> (usubo x, y)
4710	if (isNullConstant(V: CarryIn)) {
4711	if (!LegalOperations \|\|
4712	TLI.isOperationLegalOrCustom(Op: ISD::USUBO, VT: N->getValueType(ResNo: `0`)))
4713	return DAG.getNode(Opcode: ISD::USUBO, DL: SDLoc (N), VTList: N->getVTList(), N1: N0, N2: N1);
4714	}
4715
4716	return SDValue ();
4717	}
4718
4719	SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) {
4720	SDValue N0 = N->getOperand(Num: `0`);
4721	SDValue N1 = N->getOperand(Num: `1`);
4722	SDValue CarryIn = N->getOperand(Num: `2`);
4723
4724	// fold (ssubo_carry x, y, false) -> (ssubo x, y)
4725	if (isNullConstant(V: CarryIn)) {
4726	if (!LegalOperations \|\|
4727	TLI.isOperationLegalOrCustom(Op: ISD::SSUBO, VT: N->getValueType(ResNo: `0`)))
4728	return DAG.getNode(Opcode: ISD::SSUBO, DL: SDLoc (N), VTList: N->getVTList(), N1: N0, N2: N1);
4729	}
4730
4731	return SDValue ();
4732	}
4733
4734	// Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
4735	// UMULFIXSAT here.
4736	SDValue DAGCombiner::visitMULFIX(SDNode *N) {
4737	SDValue N0 = N->getOperand(Num: `0`);
4738	SDValue N1 = N->getOperand(Num: `1`);
4739	SDValue Scale = N->getOperand(Num: `2`);
4740	EVT VT = N0.getValueType();
4741
4742	// fold (mulfix x, undef, scale) -> 0
4743	if (N0.isUndef() \|\| N1.isUndef())
4744	return DAG.getConstant(Val: `0`, DL: SDLoc (N), VT);
4745
4746	// Canonicalize constant to RHS (vector doesn't have to splat)
4747	if (DAG.isConstantIntBuildVectorOrConstantInt(N: N0) &&
4748	!DAG.isConstantIntBuildVectorOrConstantInt(N: N1))
4749	return DAG.getNode(Opcode: N->getOpcode(), DL: SDLoc (N), VT, N1, N2: N0, N3: Scale);
4750
4751	// fold (mulfix x, 0, scale) -> 0
4752	if (isNullConstant(V: N1))
4753	return DAG.getConstant(Val: `0`, DL: SDLoc (N), VT);
4754
4755	return SDValue ();
4756	}
4757
4758	template <class MatchContextClass> SDValue DAGCombiner::visitMUL(SDNode *N) {
4759	SDValue N0 = N->getOperand(Num: `0`);
4760	SDValue N1 = N->getOperand(Num: `1`);
4761	EVT VT = N0.getValueType();
4762	unsigned BitWidth = VT.getScalarSizeInBits();
4763	SDLoc DL(N);
4764	bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
4765	MatchContextClass Matcher(DAG, TLI, N);
4766
4767	// fold (mul x, undef) -> 0
4768	if (N0.isUndef() \|\| N1.isUndef())
4769	return DAG.getConstant(Val: `0`, DL, VT);
4770
4771	// fold (mul c1, c2) -> c1c2*
4772	if (SDValue C = DAG.FoldConstantArithmetic(Opcode: ISD::MUL, DL, VT, Ops: {N0, N1}))
4773	return C;
4774
4775	// canonicalize constant to RHS (vector doesn't have to splat)
4776	if (DAG.isConstantIntBuildVectorOrConstantInt(N: N0) &&
4777	!DAG.isConstantIntBuildVectorOrConstantInt(N: N1))
4778	return Matcher.getNode(ISD::MUL, DL, VT, N1, N0);
4779
4780	bool N1IsConst = false;
4781	bool N1IsOpaqueConst = false;
4782	APInt ConstValue1;
4783
4784	// fold vector ops
4785	if (VT.isVector()) {
4786	// TODO: Change this to use SimplifyVBinOp when it supports VP op.
4787	if (!UseVP)
4788	if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4789	return FoldedVOp;
4790
4791	N1IsConst = ISD::isConstantSplatVector(N: N1.getNode(), SplatValue&: ConstValue1);
4792	assert((!N1IsConst \|\| ConstValue1.getBitWidth() == BitWidth) &&
4793	"Splat APInt should be element width");
4794	} else {
4795	N1IsConst = isa<ConstantSDNode>(Val: N1);
4796	if (N1IsConst) {
4797	ConstValue1 = N1 ->getAsAPIntVal();
4798	N1IsOpaqueConst = cast<ConstantSDNode>(Val&: N1)->isOpaque();
4799	}
4800	}
4801
4802	// fold (mul x, 0) -> 0
4803	if (N1IsConst && ConstValue1.isZero())
4804	return N1;
4805
4806	// fold (mul x, 1) -> x
4807	if (N1IsConst && ConstValue1.isOne())
4808	return N0;
4809
4810	if (!UseVP)
4811	if (SDValue NewSel = foldBinOpIntoSelect(BO: N))
4812	return NewSel;
4813
4814	// fold (mul x, -1) -> 0-x
4815	if (N1IsConst && ConstValue1.isAllOnes())
4816	return Matcher.getNode(ISD::SUB, DL, VT, DAG.getConstant(Val: `0`, DL, VT), N0);
4817
4818	// fold (mul x, (1 << c)) -> x << c
4819	if (isConstantOrConstantVector(N: N1, /NoOpaques/ true) &&
4820	(!VT.isVector() \|\| Level <= AfterLegalizeVectorOps)) {
4821	if (SDValue LogBase2 = BuildLogBase2(V: N1, DL)) {
4822	EVT ShiftVT = getShiftAmountTy(LHSTy: N0.getValueType());
4823	SDValue Trunc = DAG.getZExtOrTrunc(Op: LogBase2, DL, VT: ShiftVT);
4824	SDNodeFlags Flags;
4825	Flags.setNoUnsignedWrap(N->getFlags().hasNoUnsignedWrap());
4826	// TODO: Preserve setNoSignedWrap if LogBase2 isn't BitWidth - 1.
4827	return Matcher.getNode(ISD::SHL, DL, VT, N0, Trunc, Flags);
4828	}
4829	}
4830
4831	// fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
4832	if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isNegatedPowerOf2()) {
4833	unsigned Log2Val = (-ConstValue1).logBase2();
4834
4835	// FIXME: If the input is something that is easily negated (e.g. a
4836	// single-use add), we should put the negate there.
4837	return Matcher.getNode(
4838	ISD::SUB, DL, VT, DAG.getConstant(Val: `0`, DL, VT),
4839	Matcher.getNode(ISD::SHL, DL, VT, N0,
4840	DAG.getShiftAmountConstant(Val: Log2Val, VT, DL)));
4841	}
4842
4843	// Attempt to reuse an existing umul_lohi/smul_lohi node, but only if the
4844	// hi result is in use in case we hit this mid-legalization.
4845	if (!UseVP) {
4846	for (unsigned LoHiOpc : {ISD::UMUL_LOHI, ISD::SMUL_LOHI}) {
4847	if (!LegalOperations \|\| TLI.isOperationLegalOrCustom(Op: LoHiOpc, VT)) {
4848	SDVTList LoHiVT = DAG.getVTList(VT1: VT, VT2: VT);
4849	// TODO: Can we match commutable operands with getNodeIfExists?
4850	if (SDNode *LoHi = DAG.getNodeIfExists(Opcode: LoHiOpc, VTList: LoHiVT, Ops: {N0, N1}))
4851	if (LoHi->hasAnyUseOfValue(Value: `1`))
4852	return SDValue (LoHi, `0`);
4853	if (SDNode *LoHi = DAG.getNodeIfExists(Opcode: LoHiOpc, VTList: LoHiVT, Ops: {N1, N0}))
4854	if (LoHi->hasAnyUseOfValue(Value: `1`))
4855	return SDValue (LoHi, `0`);
4856	}
4857	}
4858	}
4859
4860	// Try to transform:
4861	// (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
4862	// mul x, (2^N + 1) --> add (shl x, N), x
4863	// mul x, (2^N - 1) --> sub (shl x, N), x
4864	// Examples: x 33 --> (x << 5) + x*
4865	// x 15 --> (x << 4) - x*
4866	// x -33 --> -((x << 5) + x)*
4867	// x -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)*
4868	// (2) multiply-by-(power-of-2 +/- power-of-2) into shifts and add/sub.
4869	// mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M))
4870	// mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M))
4871	// Examples: x 0x8800 --> (x << 15) + (x << 11)*
4872	// x 0xf800 --> (x << 16) - (x << 11)*
4873	// x -0x8800 --> -((x << 15) + (x << 11))*
4874	// x -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)*
4875	if (!UseVP && N1IsConst &&
4876	TLI.decomposeMulByConstant(Context&: *DAG.getContext(), VT, C: N1)) {
4877	// TODO: We could handle more general decomposition of any constant by
4878	// having the target set a limit on number of ops and making a
4879	// callback to determine that sequence (similar to sqrt expansion).
4880	unsigned MathOp = ISD::DELETED_NODE;
4881	APInt MulC = ConstValue1.abs();
4882	// The constant `2` should be treated as (2^0 + 1).
4883	unsigned TZeros = MulC == `2` ? `0` : MulC.countr_zero();
4884	MulC.lshrInPlace(ShiftAmt: TZeros);
4885	if ((MulC - `1`).isPowerOf2())
4886	MathOp = ISD::ADD;
4887	else if ((MulC + `1`).isPowerOf2())
4888	MathOp = ISD::SUB;
4889
4890	if (MathOp != ISD::DELETED_NODE) {
4891	unsigned ShAmt =
4892	MathOp == ISD::ADD ? (MulC - `1`).logBase2() : (MulC + `1`).logBase2();
4893	ShAmt += TZeros;
4894	assert(ShAmt < BitWidth &&
4895	"multiply-by-constant generated out of bounds shift");
4896	SDValue Shl =
4897	DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: N0, N2: DAG.getConstant(Val: ShAmt, DL, VT));
4898	SDValue R =
4899	TZeros ? DAG.getNode(Opcode: MathOp, DL, VT, N1: Shl,
4900	N2: DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: N0,
4901	N2: DAG.getConstant(Val: TZeros, DL, VT)))
4902	: DAG.getNode(Opcode: MathOp, DL, VT, N1: Shl, N2: N0);
4903	if (ConstValue1.isNegative())
4904	R = DAG.getNegative(Val: R, DL, VT);
4905	return R;
4906	}
4907	}
4908
4909	// (mul (shl X, c1), c2) -> (mul X, c2 << c1)
4910	if (sd_context_match(N0, Matcher, m_Opc(Opcode: ISD::SHL))) {
4911	SDValue N01 = N0.getOperand(i: `1`);
4912	if (SDValue C3 = DAG.FoldConstantArithmetic(Opcode: ISD::SHL, DL, VT, Ops: {N1, N01}))
4913	return DAG.getNode(Opcode: ISD::MUL, DL, VT, N1: N0.getOperand(i: `0`), N2: C3);
4914	}
4915
4916	// Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
4917	// use.
4918	{
4919	SDValue Sh, Y;
4920
4921	// Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
4922	if (sd_context_match(N0, Matcher, m_OneUse(P: m_Opc(Opcode: ISD::SHL))) &&
4923	isConstantOrConstantVector(N: N0.getOperand(i: `1`))) {
4924	Sh = N0; Y = N1;
4925	} else if (sd_context_match(N1, Matcher, m_OneUse(P: m_Opc(Opcode: ISD::SHL))) &&
4926	isConstantOrConstantVector(N: N1.getOperand(i: `1`))) {
4927	Sh = N1; Y = N0;
4928	}
4929
4930	if (Sh.getNode()) {
4931	SDValue Mul = Matcher.getNode(ISD::MUL, DL, VT, Sh.getOperand(i: `0`), Y);
4932	return Matcher.getNode(ISD::SHL, DL, VT, Mul, Sh.getOperand(i: `1`));
4933	}
4934	}
4935
4936	// fold (mul (add x, c1), c2) -> (add (mul x, c2), c1c2)*
4937	if (sd_context_match(N0, Matcher, m_Opc(Opcode: ISD::ADD)) &&
4938	isConstantOrConstantVector(N: N1) &&
4939	isConstantOrConstantVector(N: N0.getOperand(i: `1`)) &&
4940	isMulAddWithConstProfitable(MulNode: N, AddNode: N0, ConstNode: N1))
4941	return Matcher.getNode(
4942	ISD::ADD, DL, VT,
4943	Matcher.getNode(ISD::MUL, SDLoc (N0), VT, N0.getOperand(i: `0`), N1),
4944	Matcher.getNode(ISD::MUL, SDLoc (N1), VT, N0.getOperand(i: `1`), N1));
4945
4946	// Fold (mul (vscale C0), C1) to (vscale * (C0 * C1)).*
4947	// avoid if ISD::MUL handling is poor and ISD::SHL isn't an option.
4948	ConstantSDNode *NC1 = isConstOrConstSplat(N: N1);
4949	if (!UseVP && N0.getOpcode() == ISD::VSCALE && NC1) {
4950	const APInt &C0 = N0.getConstantOperandAPInt(i: `0`);
4951	const APInt &C1 = NC1->getAPIntValue();
4952	if (!C0.isPowerOf2() \|\| C1.isPowerOf2() \|\|
4953	hasOperation(Opcode: ISD::MUL, VT: NC1->getValueType(ResNo: `0`)))
4954	return DAG.getVScale(DL, VT, MulImm: C0 * C1);
4955	}
4956
4957	// Fold (mul step_vector(C0), C1) to (step_vector(C0 C1)).*
4958	APInt MulVal;
4959	if (!UseVP && N0.getOpcode() == ISD::STEP_VECTOR &&
4960	ISD::isConstantSplatVector(N: N1.getNode(), SplatValue&: MulVal)) {
4961	const APInt &C0 = N0.getConstantOperandAPInt(i: `0`);
4962	APInt NewStep = C0 * MulVal;
4963	return DAG.getStepVector(DL, ResVT: VT, StepVal: NewStep);
4964	}
4965
4966	// Fold Y = sra (X, size(X)-1); mul (or (Y, 1), X) -> (abs X)
4967	SDValue X;
4968	if (!UseVP && (!LegalOperations \|\| hasOperation(Opcode: ISD::ABS, VT)) &&
4969	sd_context_match(
4970	N, Matcher,
4971	m_Mul(L: m_Or(L: m_Sra(L: m_Value(N&: X), R: m_SpecificInt(V: BitWidth - `1`)), R: m_One()),
4972	R: m_Deferred(V&: X)))) {
4973	return Matcher.getNode(ISD::ABS, DL, VT, X);
4974	}
4975
4976	// Fold ((mul x, 0/undef) -> 0,
4977	// (mul x, 1) -> x) -> x)
4978	// -> and(x, mask)
4979	// We can replace vectors with '0' and '1' factors with a clearing mask.
4980	if (VT.isFixedLengthVector()) {
4981	unsigned NumElts = VT.getVectorNumElements();
4982	SmallBitVector ClearMask;
4983	ClearMask.reserve(N: NumElts);
4984	auto IsClearMask = [&ClearMask](ConstantSDNode *V) {
4985	if (!V \|\| V->isZero()) {
4986	ClearMask.push_back(Val: true);
4987	return true;
4988	}
4989	ClearMask.push_back(Val: false);
4990	return V->isOne();
4991	};
4992	if ((!LegalOperations \|\| TLI.isOperationLegalOrCustom(Op: ISD::AND, VT)) &&
4993	ISD::matchUnaryPredicate(Op: N1, Match: IsClearMask, /AllowUndefs/ true)) {
4994	assert(N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector");
4995	EVT LegalSVT = N1.getOperand(i: `0`).getValueType();
4996	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT: LegalSVT);
4997	SDValue AllOnes = DAG.getAllOnesConstant(DL, VT: LegalSVT);
4998	SmallVector<SDValue, `16`> Mask(NumElts, AllOnes);
4999	for (unsigned I = `0`; I != NumElts; ++I)
5000	if (ClearMask [I])
5001	Mask [I] = Zero;
5002	return DAG.getNode(Opcode: ISD::AND, DL, VT, N1: N0, N2: DAG.getBuildVector(VT, DL, Ops: Mask));
5003	}
5004	}
5005
5006	// reassociate mul
5007	// TODO: Change reassociateOps to support vp ops.
5008	if (!UseVP)
5009	if (SDValue RMUL = reassociateOps(Opc: ISD::MUL, DL, N0, N1, Flags: N->getFlags()))
5010	return RMUL;
5011
5012	// Fold mul(vecreduce(x), vecreduce(y)) -> vecreduce(mul(x, y))
5013	// TODO: Change reassociateReduction to support vp ops.
5014	if (!UseVP)
5015	if (SDValue SD =
5016	reassociateReduction(RedOpc: ISD::VECREDUCE_MUL, Opc: ISD::MUL, DL, VT, N0, N1))
5017	return SD;
5018
5019	// Simplify the operands using demanded-bits information.
5020	if (SimplifyDemandedBits(Op: SDValue (N, `0`)))
5021	return SDValue (N, `0`);
5022
5023	return SDValue ();
5024	}
5025
5026	/// Return true if divmod libcall is available.
5027	static bool isDivRemLibcallAvailable(SDNode Node, bool* isSigned,
5028	const SelectionDAG &DAG) {
5029	RTLIB::Libcall LC;
5030	EVT NodeType = Node->getValueType(ResNo: `0`);
5031	if (!NodeType.isSimple())
5032	return false;
5033	switch (NodeType.getSimpleVT().SimpleTy) {
5034	default: return false; // No libcall for vector types.
5035	case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
5036	case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
5037	case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
5038	case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
5039	case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
5040	}
5041
5042	return DAG.getLibcalls().getLibcallImpl(Call: LC) != RTLIB::Unsupported;
5043	}
5044
5045	/// Issue divrem if both quotient and remainder are needed.
5046	SDValue DAGCombiner::useDivRem(SDNode *Node) {
5047	if (Node->use_empty())
5048	return SDValue (); // This is a dead node, leave it alone.
5049
5050	unsigned Opcode = Node->getOpcode();
5051	bool isSigned = (Opcode == ISD::SDIV) \|\| (Opcode == ISD::SREM);
5052	unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
5053
5054	// DivMod lib calls can still work on non-legal types if using lib-calls.
5055	EVT VT = Node->getValueType(ResNo: `0`);
5056	if (VT.isVector() \|\| !VT.isInteger())
5057	return SDValue ();
5058
5059	if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(Op: DivRemOpc, VT))
5060	return SDValue ();
5061
5062	// If DIVREM is going to get expanded into a libcall,
5063	// but there is no libcall available, then don't combine.
5064	if (!TLI.isOperationLegalOrCustom(Op: DivRemOpc, VT) &&
5065	!isDivRemLibcallAvailable(Node, isSigned, DAG))
5066	return SDValue ();
5067
5068	// If div is legal, it's better to do the normal expansion
5069	unsigned OtherOpcode = `0`;
5070	if ((Opcode == ISD::SDIV) \|\| (Opcode == ISD::UDIV)) {
5071	OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
5072	if (TLI.isOperationLegalOrCustom(Op: Opcode, VT))
5073	return SDValue ();
5074	} else {
5075	OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
5076	if (TLI.isOperationLegalOrCustom(Op: OtherOpcode, VT))
5077	return SDValue ();
5078	}
5079
5080	SDValue Op0 = Node->getOperand(Num: `0`);
5081	SDValue Op1 = Node->getOperand(Num: `1`);
5082	SDValue combined;
5083	for (SDNode *User : Op0 ->users()) {
5084	if (User == Node \|\| User->getOpcode() == ISD::DELETED_NODE \|\|
5085	User->use_empty())
5086	continue;
5087	// Convert the other matching node(s), too;
5088	// otherwise, the DIVREM may get target-legalized into something
5089	// target-specific that we won't be able to recognize.
5090	unsigned UserOpc = User->getOpcode();
5091	if ((UserOpc == Opcode \|\| UserOpc == OtherOpcode \|\| UserOpc == DivRemOpc) &&
5092	User->getOperand(Num: `0`) == Op0 &&
5093	User->getOperand(Num: `1`) == Op1) {
5094	if (!combined) {
5095	if (UserOpc == OtherOpcode) {
5096	SDVTList VTs = DAG.getVTList(VT1: VT, VT2: VT);
5097	combined = DAG.getNode(Opcode: DivRemOpc, DL: SDLoc (Node), VTList: VTs, N1: Op0, N2: Op1);
5098	} else if (UserOpc == DivRemOpc) {
5099	combined = SDValue (User, `0`);
5100	} else {
5101	assert(UserOpc == Opcode);
5102	continue;
5103	}
5104	}
5105	if (UserOpc == ISD::SDIV \|\| UserOpc == ISD::UDIV)
5106	CombineTo(N: User, Res: combined);
5107	else if (UserOpc == ISD::SREM \|\| UserOpc == ISD::UREM)
5108	CombineTo(N: User, Res: combined.getValue(R: `1`));
5109	}
5110	}
5111	return combined;
5112	}
5113
5114	static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
5115	SDValue N0 = N->getOperand(Num: `0`);
5116	SDValue N1 = N->getOperand(Num: `1`);
5117	EVT VT = N->getValueType(ResNo: `0`);
5118	SDLoc DL(N);
5119
5120	unsigned Opc = N->getOpcode();
5121	bool IsDiv = (ISD::SDIV == Opc) \|\| (ISD::UDIV == Opc);
5122
5123	// X / undef -> undef
5124	// X % undef -> undef
5125	// X / 0 -> undef
5126	// X % 0 -> undef
5127	// NOTE: This includes vectors where any divisor element is zero/undef.
5128	if (DAG.isUndef(Opcode: Opc, Ops: {N0, N1}))
5129	return DAG.getUNDEF(VT);
5130
5131	// undef / X -> 0
5132	// undef % X -> 0
5133	if (N0.isUndef())
5134	return DAG.getConstant(Val: `0`, DL, VT);
5135
5136	// 0 / X -> 0
5137	// 0 % X -> 0
5138	ConstantSDNode *N0C = isConstOrConstSplat(N: N0);
5139	if (N0C && N0C->isZero())
5140	return N0;
5141
5142	// X / X -> 1
5143	// X % X -> 0
5144	if (N0 == N1)
5145	return DAG.getConstant(Val: IsDiv ? `1` : `0`, DL, VT);
5146
5147	// X / 1 -> X
5148	// X % 1 -> 0
5149	// If this is a boolean op (single-bit element type), we can't have
5150	// division-by-zero or remainder-by-zero, so assume the divisor is 1.
5151	// TODO: Similarly, if we're zero-extending a boolean divisor, then assume
5152	// it's a 1.
5153	if (isOneOrOneSplat(V: N1) \|\| (VT.getScalarType() == MVT::i1))
5154	return IsDiv ? N0 : DAG.getConstant(Val: `0`, DL, VT);
5155
5156	return SDValue ();
5157	}
5158
5159	SDValue DAGCombiner::visitSDIV(SDNode *N) {
5160	SDValue N0 = N->getOperand(Num: `0`);
5161	SDValue N1 = N->getOperand(Num: `1`);
5162	EVT VT = N->getValueType(ResNo: `0`);
5163	EVT CCVT = getSetCCResultType(VT);
5164	SDLoc DL(N);
5165
5166	// fold (sdiv c1, c2) -> c1/c2
5167	if (SDValue C = DAG.FoldConstantArithmetic(Opcode: ISD::SDIV, DL, VT, Ops: {N0, N1}))
5168	return C;
5169
5170	// fold vector ops
5171	if (VT.isVector())
5172	if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5173	return FoldedVOp;
5174
5175	// fold (sdiv X, -1) -> 0-X
5176	ConstantSDNode *N1C = isConstOrConstSplat(N: N1);
5177	if (N1C && N1C->isAllOnes())
5178	return DAG.getNegative(Val: N0, DL, VT);
5179
5180	// fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
5181	if (N1C && N1C->isMinSignedValue())
5182	return DAG.getSelect(DL, VT, Cond: DAG.getSetCC(DL, VT: CCVT, LHS: N0, RHS: N1, Cond: ISD::SETEQ),
5183	LHS: DAG.getConstant(Val: `1`, DL, VT),
5184	RHS: DAG.getConstant(Val: `0`, DL, VT));
5185
5186	if (SDValue V = simplifyDivRem(N, DAG))
5187	return V;
5188
5189	if (SDValue NewSel = foldBinOpIntoSelect(BO: N))
5190	return NewSel;
5191
5192	// If we know the sign bits of both operands are zero, strength reduce to a
5193	// udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
5194	if (DAG.SignBitIsZero(Op: N1) && DAG.SignBitIsZero(Op: N0))
5195	return DAG.getNode(Opcode: ISD::UDIV, DL, VT: N1.getValueType(), N1: N0, N2: N1);
5196
5197	if (SDValue V = visitSDIVLike(N0, N1, N)) {
5198	// If the corresponding remainder node exists, update its users with
5199	// (Dividend - (Quotient Divisor).*
5200	if (SDNode *RemNode = DAG.getNodeIfExists(Opcode: ISD::SREM, VTList: N->getVTList(),
5201	Ops: { N0, N1 })) {
5202	// If the sdiv has the exact flag we shouldn't propagate it to the
5203	// remainder node.
5204	if (!N->getFlags().hasExact()) {
5205	SDValue Mul = DAG.getNode(Opcode: ISD::MUL, DL, VT, N1: V, N2: N1);
5206	SDValue Sub = DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: N0, N2: Mul);
5207	AddToWorklist(N: Mul.getNode());
5208	AddToWorklist(N: Sub.getNode());
5209	CombineTo(N: RemNode, Res: Sub);
5210	}
5211	}
5212	return V;
5213	}
5214
5215	// sdiv, srem -> sdivrem
5216	// If the divisor is constant, then return DIVREM only if isIntDivCheap() is
5217	// true. Otherwise, we break the simplification logic in visitREM().
5218	AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
5219	if (!N1C \|\| TLI.isIntDivCheap(VT: N->getValueType(ResNo: `0`), Attr))
5220	if (SDValue DivRem = useDivRem(Node: N))
5221	return DivRem;
5222
5223	return SDValue ();
5224	}
5225
5226	static bool isDivisorPowerOfTwo(SDValue Divisor) {
5227	// Helper for determining whether a value is a power-2 constant scalar or a
5228	// vector of such elements.
5229	auto IsPowerOfTwo = [](ConstantSDNode *C) {
5230	if (C->isZero() \|\| C->isOpaque())
5231	return false;
5232	if (C->getAPIntValue().isPowerOf2())
5233	return true;
5234	if (C->getAPIntValue().isNegatedPowerOf2())
5235	return true;
5236	return false;
5237	};
5238
5239	return ISD::matchUnaryPredicate(Op: Divisor, Match: IsPowerOfTwo, /AllowUndefs=/false,
5240	/AllowTruncation=/true);
5241	}
5242
5243	SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
5244	SDLoc DL(N);
5245	EVT VT = N->getValueType(ResNo: `0`);
5246	EVT CCVT = getSetCCResultType(VT);
5247	unsigned BitWidth = VT.getScalarSizeInBits();
5248	unsigned MaxLegalDivRemBitWidth = TLI.getMaxDivRemBitWidthSupported();
5249
5250	// fold (sdiv X, pow2) -> simple ops after legalize
5251	// FIXME: We check for the exact bit here because the generic lowering gives
5252	// better results in that case. The target-specific lowering should learn how
5253	// to handle exact sdivs efficiently. An exception is made for large bitwidths
5254	// exceeding what the target can natively support, as division expansion was
5255	// skipped in favor of this optimization.
5256	if ((!N->getFlags().hasExact() \|\| BitWidth > MaxLegalDivRemBitWidth) &&
5257	isDivisorPowerOfTwo(Divisor: N1)) {
5258	// Target-specific implementation of sdiv x, pow2.
5259	if (SDValue Res = BuildSDIVPow2(N))
5260	return Res;
5261
5262	// Create constants that are functions of the shift amount value.
5263	EVT ShiftAmtTy = getShiftAmountTy(LHSTy: N0.getValueType());
5264	SDValue Bits = DAG.getConstant(Val: BitWidth, DL, VT: ShiftAmtTy);
5265	SDValue C1 = DAG.getNode(Opcode: ISD::CTTZ, DL, VT, Operand: N1);
5266	C1 = DAG.getZExtOrTrunc(Op: C1, DL, VT: ShiftAmtTy);
5267	SDValue Inexact = DAG.getNode(Opcode: ISD::SUB, DL, VT: ShiftAmtTy, N1: Bits, N2: C1);
5268	if (!isConstantOrConstantVector(N: Inexact))
5269	return SDValue ();
5270
5271	// Splat the sign bit into the register
5272	SDValue Sign = DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: N0,
5273	N2: DAG.getConstant(Val: BitWidth - `1`, DL, VT: ShiftAmtTy));
5274	AddToWorklist(N: Sign.getNode());
5275
5276	// Add (N0 < 0) ? abs2 - 1 : 0;
5277	SDValue Srl = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Sign, N2: Inexact);
5278	AddToWorklist(N: Srl.getNode());
5279	SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: N0, N2: Srl);
5280	AddToWorklist(N: Add.getNode());
5281	SDValue Sra = DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: Add, N2: C1);
5282	AddToWorklist(N: Sra.getNode());
5283
5284	// Special case: (sdiv X, 1) -> X
5285	// Special Case: (sdiv X, -1) -> 0-X
5286	SDValue One = DAG.getConstant(Val: `1`, DL, VT);
5287	SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
5288	SDValue IsOne = DAG.getSetCC(DL, VT: CCVT, LHS: N1, RHS: One, Cond: ISD::SETEQ);
5289	SDValue IsAllOnes = DAG.getSetCC(DL, VT: CCVT, LHS: N1, RHS: AllOnes, Cond: ISD::SETEQ);
5290	SDValue IsOneOrAllOnes = DAG.getNode(Opcode: ISD::OR, DL, VT: CCVT, N1: IsOne, N2: IsAllOnes);
5291	Sra = DAG.getSelect(DL, VT, Cond: IsOneOrAllOnes, LHS: N0, RHS: Sra);
5292
5293	// If dividing by a positive value, we're done. Otherwise, the result must
5294	// be negated.
5295	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT);
5296	SDValue Sub = DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Zero, N2: Sra);
5297
5298	// FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
5299	SDValue IsNeg = DAG.getSetCC(DL, VT: CCVT, LHS: N1, RHS: Zero, Cond: ISD::SETLT);
5300	SDValue Res = DAG.getSelect(DL, VT, Cond: IsNeg, LHS: Sub, RHS: Sra);
5301	return Res;
5302	}
5303
5304	// If integer divide is expensive and we satisfy the requirements, emit an
5305	// alternate sequence. Targets may check function attributes for size/speed
5306	// trade-offs.
5307	AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
5308	if (isConstantOrConstantVector(N: N1, /NoOpaques=/false,
5309	/AllowTruncation=/true) &&
5310	!TLI.isIntDivCheap(VT: N->getValueType(ResNo: `0`), Attr))
5311	if (SDValue Op = BuildSDIV(N))
5312	return Op;
5313
5314	return SDValue ();
5315	}
5316
5317	SDValue DAGCombiner::visitUDIV(SDNode *N) {
5318	SDValue N0 = N->getOperand(Num: `0`);
5319	SDValue N1 = N->getOperand(Num: `1`);
5320	EVT VT = N->getValueType(ResNo: `0`);
5321	EVT CCVT = getSetCCResultType(VT);
5322	SDLoc DL(N);
5323
5324	// fold (udiv c1, c2) -> c1/c2
5325	if (SDValue C = DAG.FoldConstantArithmetic(Opcode: ISD::UDIV, DL, VT, Ops: {N0, N1}))
5326	return C;
5327
5328	// fold vector ops
5329	if (VT.isVector())
5330	if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5331	return FoldedVOp;
5332
5333	// fold (udiv X, -1) -> select(X == -1, 1, 0)
5334	ConstantSDNode *N1C = isConstOrConstSplat(N: N1);
5335	if (N1C && N1C->isAllOnes() && CCVT.isVector() == VT.isVector()) {
5336	return DAG.getSelect(DL, VT, Cond: DAG.getSetCC(DL, VT: CCVT, LHS: N0, RHS: N1, Cond: ISD::SETEQ),
5337	LHS: DAG.getConstant(Val: `1`, DL, VT),
5338	RHS: DAG.getConstant(Val: `0`, DL, VT));
5339	}
5340
5341	if (SDValue V = simplifyDivRem(N, DAG))
5342	return V;
5343
5344	if (SDValue NewSel = foldBinOpIntoSelect(BO: N))
5345	return NewSel;
5346
5347	if (SDValue V = visitUDIVLike(N0, N1, N)) {
5348	// If the corresponding remainder node exists, update its users with
5349	// (Dividend - (Quotient Divisor).*
5350	if (SDNode *RemNode = DAG.getNodeIfExists(Opcode: ISD::UREM, VTList: N->getVTList(),
5351	Ops: { N0, N1 })) {
5352	// If the udiv has the exact flag we shouldn't propagate it to the
5353	// remainder node.
5354	if (!N->getFlags().hasExact()) {
5355	SDValue Mul = DAG.getNode(Opcode: ISD::MUL, DL, VT, N1: V, N2: N1);
5356	SDValue Sub = DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: N0, N2: Mul);
5357	AddToWorklist(N: Mul.getNode());
5358	AddToWorklist(N: Sub.getNode());
5359	CombineTo(N: RemNode, Res: Sub);
5360	}
5361	}
5362	return V;
5363	}
5364
5365	// sdiv, srem -> sdivrem
5366	// If the divisor is constant, then return DIVREM only if isIntDivCheap() is
5367	// true. Otherwise, we break the simplification logic in visitREM().
5368	AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
5369	if (!N1C \|\| TLI.isIntDivCheap(VT: N->getValueType(ResNo: `0`), Attr))
5370	if (SDValue DivRem = useDivRem(Node: N))
5371	return DivRem;
5372
5373	// Simplify the operands using demanded-bits information.
5374	// We don't have demanded bits support for UDIV so this just enables constant
5375	// folding based on known bits.
5376	if (SimplifyDemandedBits(Op: SDValue (N, `0`)))
5377	return SDValue (N, `0`);
5378
5379	return SDValue ();
5380	}
5381
5382	SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
5383	SDLoc DL(N);
5384	EVT VT = N->getValueType(ResNo: `0`);
5385
5386	// fold (udiv x, (1 << c)) -> x >>u c
5387	if (isConstantOrConstantVector(N: N1, /NoOpaques=/true,
5388	/AllowTruncation=/true)) {
5389	if (SDValue LogBase2 = BuildLogBase2(V: N1, DL)) {
5390	AddToWorklist(N: LogBase2.getNode());
5391
5392	EVT ShiftVT = getShiftAmountTy(LHSTy: N0.getValueType());
5393	SDValue Trunc = DAG.getZExtOrTrunc(Op: LogBase2, DL, VT: ShiftVT);
5394	AddToWorklist(N: Trunc.getNode());
5395	return DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: N0, N2: Trunc);
5396	}
5397	}
5398
5399	// fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
5400	if (N1.getOpcode() == ISD::SHL) {
5401	SDValue N10 = N1.getOperand(i: `0`);
5402	if (isConstantOrConstantVector(N: N10, /NoOpaques=/true,
5403	/AllowTruncation=/true)) {
5404	if (SDValue LogBase2 = BuildLogBase2(V: N10, DL)) {
5405	AddToWorklist(N: LogBase2.getNode());
5406
5407	EVT ADDVT = N1.getOperand(i: `1`).getValueType();
5408	SDValue Trunc = DAG.getZExtOrTrunc(Op: LogBase2, DL, VT: ADDVT);
5409	AddToWorklist(N: Trunc.getNode());
5410	SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL, VT: ADDVT, N1: N1.getOperand(i: `1`), N2: Trunc);
5411	AddToWorklist(N: Add.getNode());
5412	return DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: N0, N2: Add);
5413	}
5414	}
5415	}
5416
5417	// fold (udiv x, c) -> alternate
5418	AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
5419	if (isConstantOrConstantVector(N: N1, /NoOpaques=/false,
5420	/AllowTruncation=/true) &&
5421	!TLI.isIntDivCheap(VT: N->getValueType(ResNo: `0`), Attr))
5422	if (SDValue Op = BuildUDIV(N))
5423	return Op;
5424
5425	return SDValue ();
5426	}
5427
5428	SDValue DAGCombiner::buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N) {
5429	if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(Divisor: N1) &&
5430	!DAG.doesNodeExist(Opcode: ISD::SDIV, VTList: N->getVTList(), Ops: {N0, N1})) {
5431	// Target-specific implementation of srem x, pow2.
5432	if (SDValue Res = BuildSREMPow2(N))
5433	return Res;
5434	}
5435	return SDValue ();
5436	}
5437
5438	// handles ISD::SREM and ISD::UREM
5439	SDValue DAGCombiner::visitREM(SDNode *N) {
5440	unsigned Opcode = N->getOpcode();
5441	SDValue N0 = N->getOperand(Num: `0`);
5442	SDValue N1 = N->getOperand(Num: `1`);
5443	EVT VT = N->getValueType(ResNo: `0`);
5444	EVT CCVT = getSetCCResultType(VT);
5445
5446	bool isSigned = (Opcode == ISD::SREM);
5447	SDLoc DL(N);
5448
5449	// fold (rem c1, c2) -> c1%c2
5450	if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, Ops: {N0, N1}))
5451	return C;
5452
5453	// fold (urem X, -1) -> select(FX == -1, 0, FX)
5454	// Freeze the numerator to avoid a miscompile with an undefined value.
5455	if (!isSigned && llvm::isAllOnesOrAllOnesSplat(V: N1, /AllowUndefs/ false) &&
5456	CCVT.isVector() == VT.isVector()) {
5457	SDValue F0 = DAG.getFreeze(V: N0);
5458	SDValue EqualsNeg1 = DAG.getSetCC(DL, VT: CCVT, LHS: F0, RHS: N1, Cond: ISD::SETEQ);
5459	return DAG.getSelect(DL, VT, Cond: EqualsNeg1, LHS: DAG.getConstant(Val: `0`, DL, VT), RHS: F0);
5460	}
5461
5462	if (SDValue V = simplifyDivRem(N, DAG))
5463	return V;
5464
5465	if (SDValue NewSel = foldBinOpIntoSelect(BO: N))
5466	return NewSel;
5467
5468	if (isSigned) {
5469	// If we know the sign bits of both operands are zero, strength reduce to a
5470	// urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
5471	if (DAG.SignBitIsZero(Op: N1) && DAG.SignBitIsZero(Op: N0))
5472	return DAG.getNode(Opcode: ISD::UREM, DL, VT, N1: N0, N2: N1);
5473	} else {
5474	if (DAG.isKnownToBeAPowerOfTwo(Val: N1, /OrZero=/true)) {
5475	// fold (urem x, pow2) -> (and x, pow2-1)
5476	SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
5477	SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1, N2: NegOne);
5478	AddToWorklist(N: Add.getNode());
5479	return DAG.getNode(Opcode: ISD::AND, DL, VT, N1: N0, N2: Add);
5480	}
5481	}
5482
5483	AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
5484
5485	// If X/C can be simplified by the division-by-constant logic, lower
5486	// X%C to the equivalent of X-X/CC.*
5487	// Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
5488	// speculative DIV must not cause a DIVREM conversion. We guard against this
5489	// by skipping the simplification if isIntDivCheap(). When div is not cheap,
5490	// combine will not return a DIVREM. Regardless, checking cheapness here
5491	// makes sense since the simplification results in fatter code.
5492	if (DAG.isKnownNeverZero(Op: N1) && !TLI.isIntDivCheap(VT, Attr)) {
5493	if (isSigned) {
5494	// check if we can build faster implementation for srem
5495	if (SDValue OptimizedRem = buildOptimizedSREM(N0, N1, N))
5496	return OptimizedRem;
5497	}
5498
5499	SDValue OptimizedDiv =
5500	isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
5501	if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != N) {
5502	// If the equivalent Div node also exists, update its users.
5503	unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
5504	if (SDNode *DivNode = DAG.getNodeIfExists(Opcode: DivOpcode, VTList: N->getVTList(),
5505	Ops: { N0, N1 }))
5506	CombineTo(N: DivNode, Res: OptimizedDiv);
5507	SDValue Mul = DAG.getNode(Opcode: ISD::MUL, DL, VT, N1: OptimizedDiv, N2: N1);
5508	SDValue Sub = DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: N0, N2: Mul);
5509	AddToWorklist(N: OptimizedDiv.getNode());
5510	AddToWorklist(N: Mul.getNode());
5511	return Sub;
5512	}
5513	}
5514
5515	// sdiv, srem -> sdivrem
5516	if (SDValue DivRem = useDivRem(Node: N))
5517	return DivRem.getValue(R: `1`);
5518
5519	// fold urem(urem(A, BCst), Op1Cst) -> urem(A, Op1Cst)
5520	// iff urem(BCst, Op1Cst) == 0
5521	SDValue A;
5522	APInt Op1Cst, BCst;
5523	if (sd_match(N, P: m_URem(L: m_URem(L: m_Value(N&: A), R: m_ConstInt(V&: BCst)),
5524	R: m_ConstInt(V&: Op1Cst))) &&
5525	BCst.urem(RHS: Op1Cst).isZero()) {
5526	return DAG.getNode(Opcode: ISD::UREM, DL, VT, N1: A, N2: DAG.getConstant(Val: Op1Cst, DL, VT));
5527	}
5528
5529	// fold srem(srem(A, BCst), Op1Cst) -> srem(A, Op1Cst)
5530	// iff srem(BCst, Op1Cst) == 0 && Op1Cst != 1
5531	if (sd_match(N, P: m_SRem(L: m_SRem(L: m_Value(N&: A), R: m_ConstInt(V&: BCst)),
5532	R: m_ConstInt(V&: Op1Cst))) &&
5533	BCst.srem(RHS: Op1Cst).isZero() && !Op1Cst.isAllOnes()) {
5534	return DAG.getNode(Opcode: ISD::SREM, DL, VT, N1: A, N2: DAG.getConstant(Val: Op1Cst, DL, VT));
5535	}
5536
5537	return SDValue ();
5538	}
5539
5540	SDValue DAGCombiner::visitMULHS(SDNode *N) {
5541	SDValue N0 = N->getOperand(Num: `0`);
5542	SDValue N1 = N->getOperand(Num: `1`);
5543	EVT VT = N->getValueType(ResNo: `0`);
5544	SDLoc DL(N);
5545
5546	// fold (mulhs c1, c2)
5547	if (SDValue C = DAG.FoldConstantArithmetic(Opcode: ISD::MULHS, DL, VT, Ops: {N0, N1}))
5548	return C;
5549
5550	// canonicalize constant to RHS.
5551	if (DAG.isConstantIntBuildVectorOrConstantInt(N: N0) &&
5552	!DAG.isConstantIntBuildVectorOrConstantInt(N: N1))
5553	return DAG.getNode(Opcode: ISD::MULHS, DL, VTList: N->getVTList(), N1, N2: N0);
5554
5555	if (VT.isVector()) {
5556	if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5557	return FoldedVOp;
5558
5559	// fold (mulhs x, 0) -> 0
5560	// do not return N1, because undef node may exist.
5561	if (ISD::isConstantSplatVectorAllZeros(N: N1.getNode()))
5562	return DAG.getConstant(Val: `0`, DL, VT);
5563	}
5564
5565	// fold (mulhs x, 0) -> 0
5566	if (isNullConstant(V: N1))
5567	return N1;
5568
5569	// fold (mulhs x, 1) -> (sra x, size(x)-1)
5570	if (isOneConstant(V: N1))
5571	return DAG.getNode(
5572	Opcode: ISD::SRA, DL, VT, N1: N0,
5573	N2: DAG.getShiftAmountConstant(Val: N0.getScalarValueSizeInBits() - `1`, VT, DL));
5574
5575	// fold (mulhs x, undef) -> 0
5576	if (N0.isUndef() \|\| N1.isUndef())
5577	return DAG.getConstant(Val: `0`, DL, VT);
5578
5579	// If the type twice as wide is legal, transform the mulhs to a wider multiply
5580	// plus a shift.
5581	if (!TLI.isOperationLegalOrCustom(Op: ISD::MULHS, VT) && VT.isSimple() &&
5582	!VT.isVector()) {
5583	MVT Simple = VT.getSimpleVT();
5584	unsigned SimpleSize = Simple.getSizeInBits();
5585	EVT NewVT = EVT::getIntegerVT(Context&: DAG.getContext(), BitWidth: SimpleSize`2`);
5586	if (TLI.isOperationLegal(Op: ISD::MUL, VT: NewVT)) {
5587	N0 = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: NewVT, Operand: N0);
5588	N1 = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: NewVT, Operand: N1);
5589	N1 = DAG.getNode(Opcode: ISD::MUL, DL, VT: NewVT, N1: N0, N2: N1);
5590	N1 = DAG.getNode(Opcode: ISD::SRL, DL, VT: NewVT, N1,
5591	N2: DAG.getShiftAmountConstant(Val: SimpleSize, VT: NewVT, DL));
5592	return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: N1);
5593	}
5594	}
5595
5596	return SDValue ();
5597	}
5598
5599	SDValue DAGCombiner::visitMULHU(SDNode *N) {
5600	SDValue N0 = N->getOperand(Num: `0`);
5601	SDValue N1 = N->getOperand(Num: `1`);
5602	EVT VT = N->getValueType(ResNo: `0`);
5603	SDLoc DL(N);
5604
5605	// fold (mulhu c1, c2)
5606	if (SDValue C = DAG.FoldConstantArithmetic(Opcode: ISD::MULHU, DL, VT, Ops: {N0, N1}))
5607	return C;
5608
5609	// canonicalize constant to RHS.
5610	if (DAG.isConstantIntBuildVectorOrConstantInt(N: N0) &&
5611	!DAG.isConstantIntBuildVectorOrConstantInt(N: N1))
5612	return DAG.getNode(Opcode: ISD::MULHU, DL, VTList: N->getVTList(), N1, N2: N0);
5613
5614	if (VT.isVector()) {
5615	if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5616	return FoldedVOp;
5617
5618	// fold (mulhu x, 0) -> 0
5619	// do not return N1, because undef node may exist.
5620	if (ISD::isConstantSplatVectorAllZeros(N: N1.getNode()))
5621	return DAG.getConstant(Val: `0`, DL, VT);
5622	}
5623
5624	// fold (mulhu x, 0) -> 0
5625	if (isNullConstant(V: N1))
5626	return N1;
5627
5628	// fold (mulhu x, 1) -> 0
5629	if (isOneConstant(V: N1))
5630	return DAG.getConstant(Val: `0`, DL, VT);
5631
5632	// fold (mulhu x, undef) -> 0
5633	if (N0.isUndef() \|\| N1.isUndef())
5634	return DAG.getConstant(Val: `0`, DL, VT);
5635
5636	// fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
5637	if (isConstantOrConstantVector(N: N1, /NoOpaques=/true,
5638	/AllowTruncation=/true) &&
5639	hasOperation(Opcode: ISD::SRL, VT)) {
5640	if (SDValue LogBase2 = BuildLogBase2(V: N1, DL)) {
5641	unsigned NumEltBits = VT.getScalarSizeInBits();
5642	SDValue SRLAmt = DAG.getNode(
5643	Opcode: ISD::SUB, DL, VT, N1: DAG.getConstant(Val: NumEltBits, DL, VT), N2: LogBase2);
5644	EVT ShiftVT = getShiftAmountTy(LHSTy: N0.getValueType());
5645	SDValue Trunc = DAG.getZExtOrTrunc(Op: SRLAmt, DL, VT: ShiftVT);
5646	return DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: N0, N2: Trunc);
5647	}
5648	}
5649
5650	// If the type twice as wide is legal, transform the mulhu to a wider multiply
5651	// plus a shift.
5652	if (!TLI.isOperationLegalOrCustom(Op: ISD::MULHU, VT) && VT.isSimple() &&
5653	!VT.isVector()) {
5654	MVT Simple = VT.getSimpleVT();
5655	unsigned SimpleSize = Simple.getSizeInBits();
5656	EVT NewVT = EVT::getIntegerVT(Context&: DAG.getContext(), BitWidth: SimpleSize`2`);
5657	if (TLI.isOperationLegal(Op: ISD::MUL, VT: NewVT)) {
5658	N0 = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: NewVT, Operand: N0);
5659	N1 = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: NewVT, Operand: N1);
5660	N1 = DAG.getNode(Opcode: ISD::MUL, DL, VT: NewVT, N1: N0, N2: N1);
5661	N1 = DAG.getNode(Opcode: ISD::SRL, DL, VT: NewVT, N1,
5662	N2: DAG.getShiftAmountConstant(Val: SimpleSize, VT: NewVT, DL));
5663	return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: N1);
5664	}
5665	}
5666
5667	// Simplify the operands using demanded-bits information.
5668	// We don't have demanded bits support for MULHU so this just enables constant
5669	// folding based on known bits.
5670	if (SimplifyDemandedBits(Op: SDValue (N, `0`)))
5671	return SDValue (N, `0`);
5672
5673	return SDValue ();
5674	}
5675
5676	SDValue DAGCombiner::visitAVG(SDNode *N) {
5677	unsigned Opcode = N->getOpcode();
5678	SDValue N0 = N->getOperand(Num: `0`);
5679	SDValue N1 = N->getOperand(Num: `1`);
5680	EVT VT = N->getValueType(ResNo: `0`);
5681	SDLoc DL(N);
5682	bool IsSigned = Opcode == ISD::AVGCEILS \|\| Opcode == ISD::AVGFLOORS;
5683
5684	// fold (avg c1, c2)
5685	if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, Ops: {N0, N1}))
5686	return C;
5687
5688	// canonicalize constant to RHS.
5689	if (DAG.isConstantIntBuildVectorOrConstantInt(N: N0) &&
5690	!DAG.isConstantIntBuildVectorOrConstantInt(N: N1))
5691	return DAG.getNode(Opcode, DL, VTList: N->getVTList(), N1, N2: N0);
5692
5693	if (VT.isVector())
5694	if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5695	return FoldedVOp;
5696
5697	// fold (avg x, undef) -> x
5698	if (N0.isUndef())
5699	return N1;
5700	if (N1.isUndef())
5701	return N0;
5702
5703	// fold (avg x, x) --> x
5704	if (N0 == N1 && Level >= AfterLegalizeTypes)
5705	return N0;
5706
5707	// fold (avgfloor x, 0) -> x >> 1
5708	SDValue X, Y;
5709	if (sd_match(N, P: m_c_BinOp(Opc: ISD::AVGFLOORS, L: m_Value(N&: X), R: m_Zero())))
5710	return DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: X,
5711	N2: DAG.getShiftAmountConstant(Val: `1`, VT, DL));
5712	if (sd_match(N, P: m_c_BinOp(Opc: ISD::AVGFLOORU, L: m_Value(N&: X), R: m_Zero())))
5713	return DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: X,
5714	N2: DAG.getShiftAmountConstant(Val: `1`, VT, DL));
5715
5716	// fold avgu(zext(x), zext(y)) -> zext(avgu(x, y))
5717	// fold avgs(sext(x), sext(y)) -> sext(avgs(x, y))
5718	if (!IsSigned &&
5719	sd_match(N, P: m_BinOp(Opc: Opcode, L: m_ZExt(Op: m_Value(N&: X)), R: m_ZExt(Op: m_Value(N&: Y)))) &&
5720	X.getValueType() == Y.getValueType() &&
5721	hasOperation(Opcode, VT: X.getValueType())) {
5722	SDValue AvgU = DAG.getNode(Opcode, DL, VT: X.getValueType(), N1: X, N2: Y);
5723	return DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT, Operand: AvgU);
5724	}
5725	if (IsSigned &&
5726	sd_match(N, P: m_BinOp(Opc: Opcode, L: m_SExt(Op: m_Value(N&: X)), R: m_SExt(Op: m_Value(N&: Y)))) &&
5727	X.getValueType() == Y.getValueType() &&
5728	hasOperation(Opcode, VT: X.getValueType())) {
5729	SDValue AvgS = DAG.getNode(Opcode, DL, VT: X.getValueType(), N1: X, N2: Y);
5730	return DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT, Operand: AvgS);
5731	}
5732
5733	// Fold avgflooru(x,y) -> avgceilu(x,y-1) iff y != 0
5734	// Fold avgflooru(x,y) -> avgceilu(x-1,y) iff x != 0
5735	// Check if avgflooru isn't legal/custom but avgceilu is.
5736	if (Opcode == ISD::AVGFLOORU && !hasOperation(Opcode: ISD::AVGFLOORU, VT) &&
5737	(!LegalOperations \|\| hasOperation(Opcode: ISD::AVGCEILU, VT))) {
5738	if (DAG.isKnownNeverZero(Op: N1))
5739	return DAG.getNode(
5740	Opcode: ISD::AVGCEILU, DL, VT, N1: N0,
5741	N2: DAG.getNode(Opcode: ISD::ADD, DL, VT, N1, N2: DAG.getAllOnesConstant(DL, VT)));
5742	if (DAG.isKnownNeverZero(Op: N0))
5743	return DAG.getNode(
5744	Opcode: ISD::AVGCEILU, DL, VT, N1,
5745	N2: DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: N0, N2: DAG.getAllOnesConstant(DL, VT)));
5746	}
5747
5748	// Fold avgfloor((add nw x,y), 1) -> avgceil(x,y)
5749	// Fold avgfloor((add nw x,1), y) -> avgceil(x,y)
5750	if ((Opcode == ISD::AVGFLOORU && hasOperation(Opcode: ISD::AVGCEILU, VT)) \|\|
5751	(Opcode == ISD::AVGFLOORS && hasOperation(Opcode: ISD::AVGCEILS, VT))) {
5752	SDValue Add;
5753	if (sd_match(N,
5754	P: m_c_BinOp(Opc: Opcode,
5755	L: m_AllOf(preds: m_Value(N&: Add), preds: m_Add(L: m_Value(N&: X), R: m_Value(N&: Y))),
5756	R: m_One())) \|\|
5757	sd_match(N, P: m_c_BinOp(Opc: Opcode,
5758	L: m_AllOf(preds: m_Value(N&: Add), preds: m_Add(L: m_Value(N&: X), R: m_One())),
5759	R: m_Value(N&: Y)))) {
5760
5761	if (IsSigned && Add ->getFlags().hasNoSignedWrap())
5762	return DAG.getNode(Opcode: ISD::AVGCEILS, DL, VT, N1: X, N2: Y);
5763
5764	if (!IsSigned && Add ->getFlags().hasNoUnsignedWrap())
5765	return DAG.getNode(Opcode: ISD::AVGCEILU, DL, VT, N1: X, N2: Y);
5766	}
5767	}
5768
5769	// Fold avgfloors(x,y) -> avgflooru(x,y) if both x and y are non-negative
5770	if (Opcode == ISD::AVGFLOORS && hasOperation(Opcode: ISD::AVGFLOORU, VT)) {
5771	if (DAG.SignBitIsZero(Op: N0) && DAG.SignBitIsZero(Op: N1))
5772	return DAG.getNode(Opcode: ISD::AVGFLOORU, DL, VT, N1: N0, N2: N1);
5773	}
5774
5775	return SDValue ();
5776	}
5777
5778	SDValue DAGCombiner::visitABD(SDNode *N) {
5779	unsigned Opcode = N->getOpcode();
5780	SDValue N0 = N->getOperand(Num: `0`);
5781	SDValue N1 = N->getOperand(Num: `1`);
5782	EVT VT = N->getValueType(ResNo: `0`);
5783	SDLoc DL(N);
5784
5785	// fold (abd c1, c2)
5786	if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, Ops: {N0, N1}))
5787	return C;
5788
5789	// canonicalize constant to RHS.
5790	if (DAG.isConstantIntBuildVectorOrConstantInt(N: N0) &&
5791	!DAG.isConstantIntBuildVectorOrConstantInt(N: N1))
5792	return DAG.getNode(Opcode, DL, VTList: N->getVTList(), N1, N2: N0);
5793
5794	if (VT.isVector())
5795	if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5796	return FoldedVOp;
5797
5798	// fold (abd x, undef) -> 0
5799	if (N0.isUndef() \|\| N1.isUndef())
5800	return DAG.getConstant(Val: `0`, DL, VT);
5801
5802	// fold (abd x, x) -> 0
5803	if (N0 == N1)
5804	return DAG.getConstant(Val: `0`, DL, VT);
5805
5806	SDValue X, Y;
5807
5808	// fold (abds x, 0) -> abs x
5809	if (sd_match(N, P: m_c_BinOp(Opc: ISD::ABDS, L: m_Value(N&: X), R: m_Zero())) &&
5810	(!LegalOperations \|\| hasOperation(Opcode: ISD::ABS, VT)))
5811	return DAG.getNode(Opcode: ISD::ABS, DL, VT, Operand: X);
5812
5813	// fold (abdu x, 0) -> x
5814	if (sd_match(N, P: m_c_BinOp(Opc: ISD::ABDU, L: m_Value(N&: X), R: m_Zero())))
5815	return X;
5816
5817	// fold (abds x, y) -> (abdu x, y) iff both args are known positive
5818	if (Opcode == ISD::ABDS && hasOperation(Opcode: ISD::ABDU, VT) &&
5819	DAG.SignBitIsZero(Op: N0) && DAG.SignBitIsZero(Op: N1))
5820	return DAG.getNode(Opcode: ISD::ABDU, DL, VT, N1, N2: N0);
5821
5822	// fold (abd? (?ext x), (?ext y)) -> (zext (abd? x, y))
5823	if (sd_match(N, P: m_BinOp(Opc: ISD::ABDU, L: m_ZExt(Op: m_Value(N&: X)), R: m_ZExt(Op: m_Value(N&: Y)))) \|\|
5824	sd_match(N, P: m_BinOp(Opc: ISD::ABDS, L: m_SExt(Op: m_Value(N&: X)), R: m_SExt(Op: m_Value(N&: Y))))) {
5825	EVT SmallVT = X.getScalarValueSizeInBits() > Y.getScalarValueSizeInBits()
5826	? X.getValueType()
5827	: Y.getValueType();
5828	if (!LegalOperations \|\| hasOperation(Opcode, VT: SmallVT)) {
5829	SDValue ExtedX = DAG.getExtOrTrunc(Op: X, DL: SDLoc (X), VT: SmallVT, Opcode: N0 ->getOpcode());
5830	SDValue ExtedY = DAG.getExtOrTrunc(Op: Y, DL: SDLoc (Y), VT: SmallVT, Opcode: N0 ->getOpcode());
5831	SDValue SmallABD = DAG.getNode(Opcode, DL, VT: SmallVT, Ops: {ExtedX, ExtedY});
5832	SDValue ZExted = DAG.getZExtOrTrunc(Op: SmallABD, DL, VT);
5833	return ZExted;
5834	}
5835	}
5836
5837	// fold (abd? (?ext ty:x), small_const:c) -> (zext (abd? x, c))
5838	if (sd_match(N, P: m_c_BinOp(Opc: ISD::ABDU, L: m_ZExt(Op: m_Value(N&: X)), R: m_Value(N&: Y))) \|\|
5839	sd_match(N, P: m_c_BinOp(Opc: ISD::ABDS, L: m_SExt(Op: m_Value(N&: X)), R: m_Value(N&: Y)))) {
5840	EVT SmallVT = X.getValueType();
5841	if (!LegalOperations \|\| hasOperation(Opcode, VT: SmallVT)) {
5842	uint64_t Bits = SmallVT.getScalarSizeInBits();
5843	unsigned RelevantBits =
5844	(Opcode == ISD::ABDS) ? DAG.ComputeMaxSignificantBits(Op: Y)
5845	: DAG.computeKnownBits(Op: Y).countMaxActiveBits();
5846	bool TruncatingYIsCheap = TLI.isTruncateFree(Val: Y, VT2: SmallVT) \|\|
5847	ISD::matchUnaryPredicate(
5848	Op: Y,
5849	Match: [&](auto *C) {
5850	const APInt &YConst = C->getAsAPIntVal();
5851	return (Opcode == ISD::ABDS)
5852	? YConst.isSignedIntN(N: Bits)
5853	: YConst.isIntN(N: Bits);
5854	},
5855	/AllowUndefs=/true);
5856
5857	if (RelevantBits <= Bits && TruncatingYIsCheap) {
5858	SDValue NewY = DAG.getNode(Opcode: ISD::TRUNCATE, DL: SDLoc (Y), VT: SmallVT, Operand: Y);
5859	SDValue SmallABD = DAG.getNode(Opcode, DL, VT: SmallVT, Ops: {X, NewY});
5860	return DAG.getZExtOrTrunc(Op: SmallABD, DL, VT);
5861	}
5862	}
5863	}
5864
5865	return SDValue ();
5866	}
5867
5868	/// Perform optimizations common to nodes that compute two values. LoOp and HiOp
5869	/// give the opcodes for the two computations that are being performed. Return
5870	/// true if a simplification was made.
5871	SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode N, unsigned* LoOp,
5872	unsigned HiOp) {
5873	// If the high half is not needed, just compute the low half.
5874	bool HiExists = N->hasAnyUseOfValue(Value: `1`);
5875	if (!HiExists && (!LegalOperations \|\|
5876	TLI.isOperationLegalOrCustom(Op: LoOp, VT: N->getValueType(ResNo: `0`)))) {
5877	SDValue Res = DAG.getNode(Opcode: LoOp, DL: SDLoc (N), VT: N->getValueType(ResNo: `0`), Ops: N->ops());
5878	return CombineTo(N, Res0: Res, Res1: Res);
5879	}
5880
5881	// If the low half is not needed, just compute the high half.
5882	bool LoExists = N->hasAnyUseOfValue(Value: `0`);
5883	if (!LoExists && (!LegalOperations \|\|
5884	TLI.isOperationLegalOrCustom(Op: HiOp, VT: N->getValueType(ResNo: `1`)))) {
5885	SDValue Res = DAG.getNode(Opcode: HiOp, DL: SDLoc (N), VT: N->getValueType(ResNo: `1`), Ops: N->ops());
5886	return CombineTo(N, Res0: Res, Res1: Res);
5887	}
5888
5889	// If both halves are used, return as it is.
5890	if (LoExists && HiExists)
5891	return SDValue ();
5892
5893	// If the two computed results can be simplified separately, separate them.
5894	if (LoExists) {
5895	SDValue Lo = DAG.getNode(Opcode: LoOp, DL: SDLoc (N), VT: N->getValueType(ResNo: `0`), Ops: N->ops());
5896	AddToWorklist(N: Lo.getNode());
5897	SDValue LoOpt = combine(N: Lo.getNode());
5898	if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
5899	(!LegalOperations \|\|
5900	TLI.isOperationLegalOrCustom(Op: LoOpt.getOpcode(), VT: LoOpt.getValueType())))
5901	return CombineTo(N, Res0: LoOpt, Res1: LoOpt);
5902	}
5903
5904	if (HiExists) {
5905	SDValue Hi = DAG.getNode(Opcode: HiOp, DL: SDLoc (N), VT: N->getValueType(ResNo: `1`), Ops: N->ops());
5906	AddToWorklist(N: Hi.getNode());
5907	SDValue HiOpt = combine(N: Hi.getNode());
5908	if (HiOpt.getNode() && HiOpt != Hi &&
5909	(!LegalOperations \|\|
5910	TLI.isOperationLegalOrCustom(Op: HiOpt.getOpcode(), VT: HiOpt.getValueType())))
5911	return CombineTo(N, Res0: HiOpt, Res1: HiOpt);
5912	}
5913
5914	return SDValue ();
5915	}
5916
5917	SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
5918	if (SDValue Res = SimplifyNodeWithTwoResults(N, LoOp: ISD::MUL, HiOp: ISD::MULHS))
5919	return Res;
5920
5921	SDValue N0 = N->getOperand(Num: `0`);
5922	SDValue N1 = N->getOperand(Num: `1`);
5923	EVT VT = N->getValueType(ResNo: `0`);
5924	SDLoc DL(N);
5925
5926	// Constant fold.
5927	if (isa<ConstantSDNode>(Val: N0) && isa<ConstantSDNode>(Val: N1))
5928	return DAG.getNode(Opcode: ISD::SMUL_LOHI, DL, VTList: N->getVTList(), N1: N0, N2: N1);
5929
5930	// canonicalize constant to RHS (vector doesn't have to splat)
5931	if (DAG.isConstantIntBuildVectorOrConstantInt(N: N0) &&
5932	!DAG.isConstantIntBuildVectorOrConstantInt(N: N1))
5933	return DAG.getNode(Opcode: ISD::SMUL_LOHI, DL, VTList: N->getVTList(), N1, N2: N0);
5934
5935	// If the type is twice as wide is legal, transform the mulhu to a wider
5936	// multiply plus a shift.
5937	if (VT.isSimple() && !VT.isVector()) {
5938	MVT Simple = VT.getSimpleVT();
5939	unsigned SimpleSize = Simple.getSizeInBits();
5940	EVT NewVT = EVT::getIntegerVT(Context&: DAG.getContext(), BitWidth: SimpleSize`2`);
5941	if (TLI.isOperationLegal(Op: ISD::MUL, VT: NewVT)) {
5942	SDValue Lo = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: NewVT, Operand: N0);
5943	SDValue Hi = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: NewVT, Operand: N1);
5944	Lo = DAG.getNode(Opcode: ISD::MUL, DL, VT: NewVT, N1: Lo, N2: Hi);
5945	// Compute the high part as N1.
5946	Hi = DAG.getNode(Opcode: ISD::SRL, DL, VT: NewVT, N1: Lo,
5947	N2: DAG.getShiftAmountConstant(Val: SimpleSize, VT: NewVT, DL));
5948	Hi = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Hi);
5949	// Compute the low part as N0.
5950	Lo = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Lo);
5951	return CombineTo(N, Res0: Lo, Res1: Hi);
5952	}
5953	}
5954
5955	return SDValue ();
5956	}
5957
5958	SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
5959	if (SDValue Res = SimplifyNodeWithTwoResults(N, LoOp: ISD::MUL, HiOp: ISD::MULHU))
5960	return Res;
5961
5962	SDValue N0 = N->getOperand(Num: `0`);
5963	SDValue N1 = N->getOperand(Num: `1`);
5964	EVT VT = N->getValueType(ResNo: `0`);
5965	SDLoc DL(N);
5966
5967	// Constant fold.
5968	if (isa<ConstantSDNode>(Val: N0) && isa<ConstantSDNode>(Val: N1))
5969	return DAG.getNode(Opcode: ISD::UMUL_LOHI, DL, VTList: N->getVTList(), N1: N0, N2: N1);
5970
5971	// canonicalize constant to RHS (vector doesn't have to splat)
5972	if (DAG.isConstantIntBuildVectorOrConstantInt(N: N0) &&
5973	!DAG.isConstantIntBuildVectorOrConstantInt(N: N1))
5974	return DAG.getNode(Opcode: ISD::UMUL_LOHI, DL, VTList: N->getVTList(), N1, N2: N0);
5975
5976	// (umul_lohi N0, 0) -> (0, 0)
5977	if (isNullConstant(V: N1)) {
5978	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT);
5979	return CombineTo(N, Res0: Zero, Res1: Zero);
5980	}
5981
5982	// (umul_lohi N0, 1) -> (N0, 0)
5983	if (isOneConstant(V: N1)) {
5984	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT);
5985	return CombineTo(N, Res0: N0, Res1: Zero);
5986	}
5987
5988	// If the type is twice as wide is legal, transform the mulhu to a wider
5989	// multiply plus a shift.
5990	if (VT.isSimple() && !VT.isVector()) {
5991	MVT Simple = VT.getSimpleVT();
5992	unsigned SimpleSize = Simple.getSizeInBits();
5993	EVT NewVT = EVT::getIntegerVT(Context&: DAG.getContext(), BitWidth: SimpleSize`2`);
5994	if (TLI.isOperationLegal(Op: ISD::MUL, VT: NewVT)) {
5995	SDValue Lo = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: NewVT, Operand: N0);
5996	SDValue Hi = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: NewVT, Operand: N1);
5997	Lo = DAG.getNode(Opcode: ISD::MUL, DL, VT: NewVT, N1: Lo, N2: Hi);
5998	// Compute the high part as N1.
5999	Hi = DAG.getNode(Opcode: ISD::SRL, DL, VT: NewVT, N1: Lo,
6000	N2: DAG.getShiftAmountConstant(Val: SimpleSize, VT: NewVT, DL));
6001	Hi = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Hi);
6002	// Compute the low part as N0.
6003	Lo = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Lo);
6004	return CombineTo(N, Res0: Lo, Res1: Hi);
6005	}
6006	}
6007
6008	return SDValue ();
6009	}
6010
6011	SDValue DAGCombiner::visitMULO(SDNode *N) {
6012	SDValue N0 = N->getOperand(Num: `0`);
6013	SDValue N1 = N->getOperand(Num: `1`);
6014	EVT VT = N0.getValueType();
6015	bool IsSigned = (ISD::SMULO == N->getOpcode());
6016
6017	EVT CarryVT = N->getValueType(ResNo: `1`);
6018	SDLoc DL(N);
6019
6020	ConstantSDNode *N0C = isConstOrConstSplat(N: N0);
6021	ConstantSDNode *N1C = isConstOrConstSplat(N: N1);
6022
6023	// fold operation with constant operands.
6024	// TODO: Move this to FoldConstantArithmetic when it supports nodes with
6025	// multiple results.
6026	if (N0C && N1C) {
6027	bool Overflow;
6028	APInt Result =
6029	IsSigned ? N0C->getAPIntValue().smul_ov(RHS: N1C->getAPIntValue(), Overflow)
6030	: N0C->getAPIntValue().umul_ov(RHS: N1C->getAPIntValue(), Overflow);
6031	return CombineTo(N, Res0: DAG.getConstant(Val: Result, DL, VT),
6032	Res1: DAG.getBoolConstant(V: Overflow, DL, VT: CarryVT, OpVT: CarryVT));
6033	}
6034
6035	// canonicalize constant to RHS.
6036	if (DAG.isConstantIntBuildVectorOrConstantInt(N: N0) &&
6037	!DAG.isConstantIntBuildVectorOrConstantInt(N: N1))
6038	return DAG.getNode(Opcode: N->getOpcode(), DL, VTList: N->getVTList(), N1, N2: N0);
6039
6040	// fold (mulo x, 0) -> 0 + no carry out
6041	if (isNullOrNullSplat(V: N1))
6042	return CombineTo(N, Res0: DAG.getConstant(Val: `0`, DL, VT),
6043	Res1: DAG.getConstant(Val: `0`, DL, VT: CarryVT));
6044
6045	// (mulo x, 2) -> (addo x, x)
6046	// FIXME: This needs a freeze.
6047	if (N1C && N1C->getAPIntValue() == `2` &&
6048	(!IsSigned \|\| VT.getScalarSizeInBits() > `2`))
6049	return DAG.getNode(Opcode: IsSigned ? ISD::SADDO : ISD::UADDO, DL,
6050	VTList: N->getVTList(), N1: N0, N2: N0);
6051
6052	// A 1 bit SMULO overflows if both inputs are 1.
6053	if (IsSigned && VT.getScalarSizeInBits() == `1`) {
6054	SDValue And = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: N0, N2: N1);
6055	SDValue Cmp = DAG.getSetCC(DL, VT: CarryVT, LHS: And,
6056	RHS: DAG.getConstant(Val: `0`, DL, VT), Cond: ISD::SETNE);
6057	return CombineTo(N, Res0: And, Res1: Cmp);
6058	}
6059
6060	// If it cannot overflow, transform into a mul.
6061	if (DAG.willNotOverflowMul(IsSigned, N0, N1))
6062	return CombineTo(N, Res0: DAG.getNode(Opcode: ISD::MUL, DL, VT, N1: N0, N2: N1),
6063	Res1: DAG.getConstant(Val: `0`, DL, VT: CarryVT));
6064	return SDValue ();
6065	}
6066
6067	// Function to calculate whether the Min/Max pair of SDNodes (potentially
6068	// swapped around) make a signed saturate pattern, clamping to between a signed
6069	// saturate of -2^(BW-1) and 2^(BW-1)-1, or an unsigned saturate of 0 and 2^BW.
6070	// Returns the node being clamped and the bitwidth of the clamp in BW. Should
6071	// work with both SMIN/SMAX nodes and setcc/select combo. The operands are the
6072	// same as SimplifySelectCC. N0<N1 ? N2 : N3.
6073	static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2,
6074	SDValue N3, ISD::CondCode CC, unsigned &BW,
6075	bool &Unsigned, SelectionDAG &DAG) {
6076	auto isSignedMinMax = [&](SDValue N0, SDValue N1, SDValue N2, SDValue N3,
6077	ISD::CondCode CC) {
6078	// The compare and select operand should be the same or the select operands
6079	// should be truncated versions of the comparison.
6080	if (N0 != N2 && (N2.getOpcode() != ISD::TRUNCATE \|\| N0 != N2.getOperand(i: `0`)))
6081	return `0`;
6082	// The constants need to be the same or a truncated version of each other.
6083	ConstantSDNode *N1C = isConstOrConstSplat(N: peekThroughTruncates(V: N1));
6084	ConstantSDNode *N3C = isConstOrConstSplat(N: peekThroughTruncates(V: N3));
6085	if (!N1C \|\| !N3C)
6086	return `0`;
6087	const APInt &C1 = N1C->getAPIntValue().trunc(width: N1.getScalarValueSizeInBits());
6088	const APInt &C2 = N3C->getAPIntValue().trunc(width: N3.getScalarValueSizeInBits());
6089	if (C1.getBitWidth() < C2.getBitWidth() \|\| C1 != C2.sext(width: C1.getBitWidth()))
6090	return `0`;
6091	return CC == ISD::SETLT ? ISD::SMIN : (CC == ISD::SETGT ? ISD::SMAX : `0`);
6092	};
6093
6094	// Check the initial value is a SMIN/SMAX equivalent.
6095	unsigned Opcode0 = isSignedMinMax (N0, N1, N2, N3, CC);
6096	if (!Opcode0)
6097	return SDValue ();
6098
6099	// We could only need one range check, if the fptosi could never produce
6100	// the upper value.
6101	if (N0.getOpcode() == ISD::FP_TO_SINT && Opcode0 == ISD::SMAX) {
6102	if (isNullOrNullSplat(V: N3)) {
6103	EVT IntVT = N0.getValueType().getScalarType();
6104	EVT FPVT = N0.getOperand(i: `0`).getValueType().getScalarType();
6105	if (FPVT.isSimple()) {
6106	Type InputTy = FPVT.getTypeForEVT(Context&: DAG.getContext());
6107	const fltSemantics &Semantics = InputTy->getFltSemantics();
6108	uint32_t MinBitWidth =
6109	APFloatBase::semanticsIntSizeInBits(Semantics, /isSigned/ true);
6110	if (IntVT.getSizeInBits() >= MinBitWidth) {
6111	Unsigned = true;
6112	BW = PowerOf2Ceil(A: MinBitWidth);
6113	return N0;
6114	}
6115	}
6116	}
6117	}
6118
6119	SDValue N00, N01, N02, N03;
6120	ISD::CondCode N0CC;
6121	switch (N0.getOpcode()) {
6122	case ISD::SMIN:
6123	case ISD::SMAX:
6124	N00 = N02 = N0.getOperand(i: `0`);
6125	N01 = N03 = N0.getOperand(i: `1`);
6126	N0CC = N0.getOpcode() == ISD::SMIN ? ISD::SETLT : ISD::SETGT;
6127	break;
6128	case ISD::SELECT_CC:
6129	N00 = N0.getOperand(i: `0`);
6130	N01 = N0.getOperand(i: `1`);
6131	N02 = N0.getOperand(i: `2`);
6132	N03 = N0.getOperand(i: `3`);
6133	N0CC = cast<CondCodeSDNode>(Val: N0.getOperand(i: `4`))->get();
6134	break;
6135	case ISD::SELECT:
6136	case ISD::VSELECT:
6137	if (N0.getOperand(i: `0`).getOpcode() != ISD::SETCC)
6138	return SDValue ();
6139	N00 = N0.getOperand(i: `0`).getOperand(i: `0`);
6140	N01 = N0.getOperand(i: `0`).getOperand(i: `1`);
6141	N02 = N0.getOperand(i: `1`);
6142	N03 = N0.getOperand(i: `2`);
6143	N0CC = cast<CondCodeSDNode>(Val: N0.getOperand(i: `0`).getOperand(i: `2`))->get();
6144	break;
6145	default:
6146	return SDValue ();
6147	}
6148
6149	unsigned Opcode1 = isSignedMinMax (N00, N01, N02, N03, N0CC);
6150	if (!Opcode1 \|\| Opcode0 == Opcode1)
6151	return SDValue ();
6152
6153	ConstantSDNode *MinCOp = isConstOrConstSplat(N: Opcode0 == ISD::SMIN ? N1 : N01);
6154	ConstantSDNode *MaxCOp = isConstOrConstSplat(N: Opcode0 == ISD::SMIN ? N01 : N1);
6155	if (!MinCOp \|\| !MaxCOp \|\| MinCOp->getValueType(ResNo: `0`) != MaxCOp->getValueType(ResNo: `0`))
6156	return SDValue ();
6157
6158	const APInt &MinC = MinCOp->getAPIntValue();
6159	const APInt &MaxC = MaxCOp->getAPIntValue();
6160	APInt MinCPlus1 = MinC + `1`;
6161	if (-MaxC == MinCPlus1 && MinCPlus1.isPowerOf2()) {
6162	BW = MinCPlus1.exactLogBase2() + `1`;
6163	Unsigned = false;
6164	return N02;
6165	}
6166
6167	if (MaxC == `0` && MinC != `0` && MinCPlus1.isPowerOf2()) {
6168	BW = MinCPlus1.exactLogBase2();
6169	Unsigned = true;
6170	return N02;
6171	}
6172
6173	return SDValue ();
6174	}
6175
6176	static SDValue PerformMinMaxFpToSatCombine(SDValue N0, SDValue N1, SDValue N2,
6177	SDValue N3, ISD::CondCode CC,
6178	SelectionDAG &DAG) {
6179	unsigned BW;
6180	bool Unsigned;
6181	SDValue Fp = isSaturatingMinMax(N0, N1, N2, N3, CC, BW, Unsigned, DAG);
6182	if (!Fp \|\| Fp.getOpcode() != ISD::FP_TO_SINT)
6183	return SDValue ();
6184	EVT FPVT = Fp.getOperand(i: `0`).getValueType();
6185	EVT NewVT = FPVT.changeElementType(Context&: *DAG.getContext(),
6186	EltVT: EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: BW));
6187	unsigned NewOpc = Unsigned ? ISD::FP_TO_UINT_SAT : ISD::FP_TO_SINT_SAT;
6188	if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(Op: NewOpc, FPVT, VT: NewVT))
6189	return SDValue ();
6190	SDLoc DL(Fp);
6191	SDValue Sat = DAG.getNode(Opcode: NewOpc, DL, VT: NewVT, N1: Fp.getOperand(i: `0`),
6192	N2: DAG.getValueType(NewVT.getScalarType()));
6193	return DAG.getExtOrTrunc(IsSigned: !Unsigned, Op: Sat, DL, VT: N2 ->getValueType(ResNo: `0`));
6194	}
6195
6196	static SDValue PerformUMinFpToSatCombine(SDValue N0, SDValue N1, SDValue N2,
6197	SDValue N3, ISD::CondCode CC,
6198	SelectionDAG &DAG) {
6199	// We are looking for UMIN(FPTOUI(X), (2^n)-1), which may have come via a
6200	// select/vselect/select_cc. The two operands pairs for the select (N2/N3) may
6201	// be truncated versions of the setcc (N0/N1).
6202	if ((N0 != N2 &&
6203	(N2.getOpcode() != ISD::TRUNCATE \|\| N0 != N2.getOperand(i: `0`))) \|\|
6204	N0.getOpcode() != ISD::FP_TO_UINT \|\| CC != ISD::SETULT)
6205	return SDValue ();
6206	ConstantSDNode *N1C = isConstOrConstSplat(N: N1);
6207	ConstantSDNode *N3C = isConstOrConstSplat(N: N3);
6208	if (!N1C \|\| !N3C)
6209	return SDValue ();
6210	const APInt &C1 = N1C->getAPIntValue();
6211	const APInt &C3 = N3C->getAPIntValue();
6212	if (!(C1 + `1`).isPowerOf2() \|\| C1.getBitWidth() < C3.getBitWidth() \|\|
6213	C1 != C3.zext(width: C1.getBitWidth()))
6214	return SDValue ();
6215
6216	unsigned BW = (C1 + `1`).exactLogBase2();
6217	EVT FPVT = N0.getOperand(i: `0`).getValueType();
6218	EVT NewVT = FPVT.changeElementType(Context&: *DAG.getContext(),
6219	EltVT: EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: BW));
6220	if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(Op: ISD::FP_TO_UINT_SAT,
6221	FPVT, VT: NewVT))
6222	return SDValue ();
6223
6224	SDValue Sat =
6225	DAG.getNode(Opcode: ISD::FP_TO_UINT_SAT, DL: SDLoc (N0), VT: NewVT, N1: N0.getOperand(i: `0`),
6226	N2: DAG.getValueType(NewVT.getScalarType()));
6227	return DAG.getZExtOrTrunc(Op: Sat, DL: SDLoc (N0), VT: N3.getValueType());
6228	}
6229
6230	SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
6231	SDValue N0 = N->getOperand(Num: `0`);
6232	SDValue N1 = N->getOperand(Num: `1`);
6233	EVT VT = N0.getValueType();
6234	unsigned Opcode = N->getOpcode();
6235	SDLoc DL(N);
6236
6237	// fold operation with constant operands.
6238	if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, Ops: {N0, N1}))
6239	return C;
6240
6241	// If the operands are the same, this is a no-op.
6242	if (N0 == N1)
6243	return N0;
6244
6245	// canonicalize constant to RHS
6246	if (DAG.isConstantIntBuildVectorOrConstantInt(N: N0) &&
6247	!DAG.isConstantIntBuildVectorOrConstantInt(N: N1))
6248	return DAG.getNode(Opcode, DL, VT, N1, N2: N0);
6249
6250	// fold vector ops
6251	if (VT.isVector())
6252	if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
6253	return FoldedVOp;
6254
6255	// reassociate minmax
6256	if (SDValue RMINMAX = reassociateOps(Opc: Opcode, DL, N0, N1, Flags: N->getFlags()))
6257	return RMINMAX;
6258
6259	// If both operands are known to have the same sign (both non-negative or both
6260	// negative), flip between UMIN/UMAX and SMIN/SMAX.
6261	// Only do this if:
6262	// 1. The current op isn't legal and the flipped is.
6263	// 2. The saturation pattern is broken by canonicalization in InstCombine.
6264	bool IsOpIllegal = !TLI.isOperationLegal(Op: Opcode, VT);
6265	bool IsSatBroken = Opcode == ISD::UMIN && N0.getOpcode() == ISD::SMAX;
6266
6267	if (IsSatBroken \|\| IsOpIllegal) {
6268	auto HasKnownSameSign = [&](SDValue A, SDValue B) {
6269	if (A.isUndef() \|\| B.isUndef())
6270	return true;
6271
6272	KnownBits KA = DAG.computeKnownBits(Op: A);
6273	if (!KA.isNonNegative() && !KA.isNegative())
6274	return false;
6275
6276	KnownBits KB = DAG.computeKnownBits(Op: B);
6277	if (KA.isNonNegative())
6278	return KB.isNonNegative();
6279	return KB.isNegative();
6280	};
6281
6282	if (HasKnownSameSign (N0, N1)) {
6283	unsigned AltOpcode = ISD::getOppositeSignednessMinMaxOpcode(MinMaxOpc: Opcode);
6284	if ((IsSatBroken && IsOpIllegal) \|\| TLI.isOperationLegal(Op: AltOpcode, VT))
6285	return DAG.getNode(Opcode: AltOpcode, DL, VT, N1: N0, N2: N1);
6286	}
6287	}
6288
6289	if (Opcode == ISD::SMIN \|\| Opcode == ISD::SMAX)
6290	if (SDValue S = PerformMinMaxFpToSatCombine(
6291	N0, N1, N2: N0, N3: N1, CC: Opcode == ISD::SMIN ? ISD::SETLT : ISD::SETGT, DAG))
6292	return S;
6293	if (Opcode == ISD::UMIN)
6294	if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N2: N0, N3: N1, CC: ISD::SETULT, DAG))
6295	return S;
6296
6297	// Fold min/max(vecreduce(x), vecreduce(y)) -> vecreduce(min/max(x, y))
6298	auto ReductionOpcode = [](unsigned Opcode) {
6299	switch (Opcode) {
6300	case ISD::SMIN:
6301	return ISD::VECREDUCE_SMIN;
6302	case ISD::SMAX:
6303	return ISD::VECREDUCE_SMAX;
6304	case ISD::UMIN:
6305	return ISD::VECREDUCE_UMIN;
6306	case ISD::UMAX:
6307	return ISD::VECREDUCE_UMAX;
6308	default:
6309	llvm_unreachable("Unexpected opcode");
6310	}
6311	};
6312	if (SDValue SD = reassociateReduction(RedOpc: ReductionOpcode (Opcode), Opc: Opcode,
6313	DL: SDLoc (N), VT, N0, N1))
6314	return SD;
6315
6316	// Fold operation with vscale operands.
6317	if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
6318	uint64_t C0 = N0 ->getConstantOperandVal(Num: `0`);
6319	uint64_t C1 = N1 ->getConstantOperandVal(Num: `0`);
6320	if (Opcode == ISD::UMAX)
6321	return C0 > C1 ? N0 : N1;
6322	else if (Opcode == ISD::UMIN)
6323	return C0 > C1 ? N1 : N0;
6324	}
6325
6326	// If we know the range of vscale, see if we can fold it given a constant.
6327	// TODO: Generalize this to other nodes by adding computeConstantRange
6328	if (N0.getOpcode() == ISD::VSCALE) {
6329	if (auto *C1 = dyn_cast<ConstantSDNode>(Val&: N1)) {
6330	const Function &F = DAG.getMachineFunction().getFunction();
6331	ConstantRange Range =
6332	getVScaleRange(F: &F, BitWidth: VT.getScalarSizeInBits())
6333	.multiply(Other: ConstantRange (N0.getConstantOperandAPInt(i: `0`)));
6334
6335	const APInt &C1V = C1->getAPIntValue();
6336	if ((Opcode == ISD::UMAX && Range.getUnsignedMax().ule(RHS: C1V)) \|\|
6337	(Opcode == ISD::UMIN && Range.getUnsignedMin().uge(RHS: C1V)) \|\|
6338	(Opcode == ISD::SMAX && Range.getSignedMax().sle(RHS: C1V)) \|\|
6339	(Opcode == ISD::SMIN && Range.getSignedMin().sge(RHS: C1V))) {
6340	return N1;
6341	}
6342	}
6343	}
6344
6345	// Simplify the operands using demanded-bits information.
6346	if (SimplifyDemandedBits(Op: SDValue (N, `0`)))
6347	return SDValue (N, `0`);
6348
6349	return SDValue ();
6350	}
6351
6352	/// If this is a bitwise logic instruction and both operands have the same
6353	/// opcode, try to sink the other opcode after the logic instruction.
6354	SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
6355	SDValue N0 = N->getOperand(Num: `0`), N1 = N->getOperand(Num: `1`);
6356	EVT VT = N0.getValueType();
6357	unsigned LogicOpcode = N->getOpcode();
6358	unsigned HandOpcode = N0.getOpcode();
6359	assert(ISD::isBitwiseLogicOp(LogicOpcode) && "Expected logic opcode");
6360	assert(HandOpcode == N1.getOpcode() && "Bad input!");
6361
6362	// Bail early if none of these transforms apply.
6363	if (N0.getNumOperands() == `0`)
6364	return SDValue ();
6365
6366	// FIXME: We should check number of uses of the operands to not increase
6367	// the instruction count for all transforms.
6368
6369	// Handle size-changing casts (or sign_extend_inreg).
6370	SDValue X = N0.getOperand(i: `0`);
6371	SDValue Y = N1.getOperand(i: `0`);
6372	EVT XVT = X.getValueType();
6373	SDLoc DL(N);
6374	if (ISD::isExtOpcode(Opcode: HandOpcode) \|\| ISD::isExtVecInRegOpcode(Opcode: HandOpcode) \|\|
6375	(HandOpcode == ISD::SIGN_EXTEND_INREG &&
6376	N0.getOperand(i: `1`) == N1.getOperand(i: `1`))) {
6377	// If both operands have other uses, this transform would create extra
6378	// instructions without eliminating anything.
6379	if (!N0.hasOneUse() && !N1.hasOneUse())
6380	return SDValue ();
6381	// We need matching integer source types.
6382	if (XVT != Y.getValueType())
6383	return SDValue ();
6384	// Don't create an illegal op during or after legalization. Don't ever
6385	// create an unsupported vector op.
6386	if ((VT.isVector() \|\| LegalOperations) &&
6387	!TLI.isOperationLegalOrCustom(Op: LogicOpcode, VT: XVT))
6388	return SDValue ();
6389	// Avoid infinite looping with PromoteIntBinOp.
6390	// TODO: Should we apply desirable/legal constraints to all opcodes?
6391	if ((HandOpcode == ISD::ANY_EXTEND \|\|
6392	HandOpcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
6393	LegalTypes && !TLI.isTypeDesirableForOp(LogicOpcode, VT: XVT))
6394	return SDValue ();
6395	// logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
6396	SDNodeFlags LogicFlags;
6397	LogicFlags.setDisjoint(N->getFlags().hasDisjoint() &&
6398	ISD::isExtOpcode(Opcode: HandOpcode));
6399	SDValue Logic = DAG.getNode(Opcode: LogicOpcode, DL, VT: XVT, N1: X, N2: Y, Flags: LogicFlags);
6400	if (HandOpcode == ISD::SIGN_EXTEND_INREG)
6401	return DAG.getNode(Opcode: HandOpcode, DL, VT, N1: Logic, N2: N0.getOperand(i: `1`));
6402	return DAG.getNode(Opcode: HandOpcode, DL, VT, Operand: Logic);
6403	}
6404
6405	// logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
6406	if (HandOpcode == ISD::TRUNCATE) {
6407	// If both operands have other uses, this transform would create extra
6408	// instructions without eliminating anything.
6409	if (!N0.hasOneUse() && !N1.hasOneUse())
6410	return SDValue ();
6411	// We need matching source types.
6412	if (XVT != Y.getValueType())
6413	return SDValue ();
6414	// Don't create an illegal op during or after legalization.
6415	if (LegalOperations && !TLI.isOperationLegal(Op: LogicOpcode, VT: XVT))
6416	return SDValue ();
6417	// Be extra careful sinking truncate. If it's free, there's no benefit in
6418	// widening a binop. Also, don't create a logic op on an illegal type.
6419	if (TLI.isZExtFree(FromTy: VT, ToTy: XVT) && TLI.isTruncateFree(FromVT: XVT, ToVT: VT))
6420	return SDValue ();
6421	if (!TLI.isTypeLegal(VT: XVT))
6422	return SDValue ();
6423	SDValue Logic = DAG.getNode(Opcode: LogicOpcode, DL, VT: XVT, N1: X, N2: Y);
6424	return DAG.getNode(Opcode: HandOpcode, DL, VT, Operand: Logic);
6425	}
6426
6427	// For binops SHL/SRL/SRA/AND:
6428	// logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
6429	if ((HandOpcode == ISD::SHL \|\| HandOpcode == ISD::SRL \|\|
6430	HandOpcode == ISD::SRA \|\| HandOpcode == ISD::AND) &&
6431	N0.getOperand(i: `1`) == N1.getOperand(i: `1`)) {
6432	// If either operand has other uses, this transform is not an improvement.
6433	if (!N0.hasOneUse() \|\| !N1.hasOneUse())
6434	return SDValue ();
6435	SDValue Logic = DAG.getNode(Opcode: LogicOpcode, DL, VT: XVT, N1: X, N2: Y);
6436	return DAG.getNode(Opcode: HandOpcode, DL, VT, N1: Logic, N2: N0.getOperand(i: `1`));
6437	}
6438
6439	// Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
6440	if (HandOpcode == ISD::BSWAP) {
6441	// If either operand has other uses, this transform is not an improvement.
6442	if (!N0.hasOneUse() \|\| !N1.hasOneUse())
6443	return SDValue ();
6444	SDValue Logic = DAG.getNode(Opcode: LogicOpcode, DL, VT: XVT, N1: X, N2: Y);
6445	return DAG.getNode(Opcode: HandOpcode, DL, VT, Operand: Logic);
6446	}
6447
6448	// For funnel shifts FSHL/FSHR:
6449	// logic_op (OP x, x1, s), (OP y, y1, s) -->
6450	// --> OP (logic_op x, y), (logic_op, x1, y1), s
6451	if ((HandOpcode == ISD::FSHL \|\| HandOpcode == ISD::FSHR) &&
6452	N0.getOperand(i: `2`) == N1.getOperand(i: `2`)) {
6453	if (!N0.hasOneUse() \|\| !N1.hasOneUse())
6454	return SDValue ();
6455	SDValue X1 = N0.getOperand(i: `1`);
6456	SDValue Y1 = N1.getOperand(i: `1`);
6457	SDValue S = N0.getOperand(i: `2`);
6458	SDValue Logic0 = DAG.getNode(Opcode: LogicOpcode, DL, VT, N1: X, N2: Y);
6459	SDValue Logic1 = DAG.getNode(Opcode: LogicOpcode, DL, VT, N1: X1, N2: Y1);
6460	return DAG.getNode(Opcode: HandOpcode, DL, VT, N1: Logic0, N2: Logic1, N3: S);
6461	}
6462
6463	// Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
6464	// Only perform this optimization up until type legalization, before
6465	// LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
6466	// adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
6467	// we don't want to undo this promotion.
6468	// We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
6469	// on scalars.
6470	if ((HandOpcode == ISD::BITCAST \|\| HandOpcode == ISD::SCALAR_TO_VECTOR) &&
6471	Level <= AfterLegalizeTypes) {
6472	// Input types must be integer and the same.
6473	if (XVT.isInteger() && XVT == Y.getValueType() &&
6474	!(VT.isVector() && TLI.isTypeLegal(VT) &&
6475	!XVT.isVector() && !TLI.isTypeLegal(VT: XVT))) {
6476	SDValue Logic = DAG.getNode(Opcode: LogicOpcode, DL, VT: XVT, N1: X, N2: Y);
6477	return DAG.getNode(Opcode: HandOpcode, DL, VT, Operand: Logic);
6478	}
6479	}
6480
6481	// Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
6482	// Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
6483	// If both shuffles use the same mask, and both shuffle within a single
6484	// vector, then it is worthwhile to move the swizzle after the operation.
6485	// The type-legalizer generates this pattern when loading illegal
6486	// vector types from memory. In many cases this allows additional shuffle
6487	// optimizations.
6488	// There are other cases where moving the shuffle after the xor/and/or
6489	// is profitable even if shuffles don't perform a swizzle.
6490	// If both shuffles use the same mask, and both shuffles have the same first
6491	// or second operand, then it might still be profitable to move the shuffle
6492	// after the xor/and/or operation.
6493	if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
6494	auto *SVN0 = cast<ShuffleVectorSDNode>(Val&: N0);
6495	auto *SVN1 = cast<ShuffleVectorSDNode>(Val&: N1);
6496	assert(X.getValueType() == Y.getValueType() &&
6497	"Inputs to shuffles are not the same type");
6498
6499	// Check that both shuffles use the same mask. The masks are known to be of
6500	// the same length because the result vector type is the same.
6501	// Check also that shuffles have only one use to avoid introducing extra
6502	// instructions.
6503	if (!SVN0->hasOneUse() \|\| !SVN1->hasOneUse() \|\|
6504	!SVN0->getMask().equals(RHS: SVN1->getMask()))
6505	return SDValue ();
6506
6507	// Don't try to fold this node if it requires introducing a
6508	// build vector of all zeros that might be illegal at this stage.
6509	SDValue ShOp = N0.getOperand(i: `1`);
6510	if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
6511	ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
6512
6513	// (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
6514	if (N0.getOperand(i: `1`) == N1.getOperand(i: `1`) && ShOp.getNode()) {
6515	SDValue Logic = DAG.getNode(Opcode: LogicOpcode, DL, VT,
6516	N1: N0.getOperand(i: `0`), N2: N1.getOperand(i: `0`));
6517	return DAG.getVectorShuffle(VT, dl: DL, N1: Logic, N2: ShOp, Mask: SVN0->getMask());
6518	}
6519
6520	// Don't try to fold this node if it requires introducing a
6521	// build vector of all zeros that might be illegal at this stage.
6522	ShOp = N0.getOperand(i: `0`);
6523	if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
6524	ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
6525
6526	// (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
6527	if (N0.getOperand(i: `0`) == N1.getOperand(i: `0`) && ShOp.getNode()) {
6528	SDValue Logic = DAG.getNode(Opcode: LogicOpcode, DL, VT, N1: N0.getOperand(i: `1`),
6529	N2: N1.getOperand(i: `1`));
6530	return DAG.getVectorShuffle(VT, dl: DL, N1: ShOp, N2: Logic, Mask: SVN0->getMask());
6531	}
6532	}
6533
6534	return SDValue ();
6535	}
6536
6537	/// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
6538	SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
6539	const SDLoc &DL) {
6540	SDValue LL, LR, RL, RR, N0CC, N1CC;
6541	if (!isSetCCEquivalent(N: N0, LHS&: LL, RHS&: LR, CC&: N0CC) \|\|
6542	!isSetCCEquivalent(N: N1, LHS&: RL, RHS&: RR, CC&: N1CC))
6543	return SDValue ();
6544
6545	assert(N0.getValueType() == N1.getValueType() &&
6546	"Unexpected operand types for bitwise logic op");
6547	assert(LL.getValueType() == LR.getValueType() &&
6548	RL.getValueType() == RR.getValueType() &&
6549	"Unexpected operand types for setcc");
6550
6551	// If we're here post-legalization or the logic op type is not i1, the logic
6552	// op type must match a setcc result type. Also, all folds require new
6553	// operations on the left and right operands, so those types must match.
6554	EVT VT = N0.getValueType();
6555	EVT OpVT = LL.getValueType();
6556	if (LegalOperations \|\| VT.getScalarType() != MVT::i1)
6557	if (VT != getSetCCResultType(VT: OpVT))
6558	return SDValue ();
6559	if (OpVT != RL.getValueType())
6560	return SDValue ();
6561
6562	ISD::CondCode CC0 = cast<CondCodeSDNode>(Val&: N0CC)->get();
6563	ISD::CondCode CC1 = cast<CondCodeSDNode>(Val&: N1CC)->get();
6564	bool IsInteger = OpVT.isInteger();
6565	if (LR == RR && CC0 == CC1 && IsInteger) {
6566	bool IsZero = isNullOrNullSplat(V: LR);
6567	bool IsNeg1 = isAllOnesOrAllOnesSplat(V: LR);
6568
6569	// All bits clear?
6570	bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
6571	// All sign bits clear?
6572	bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
6573	// Any bits set?
6574	bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
6575	// Any sign bits set?
6576	bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
6577
6578	// (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0)
6579	// (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
6580	// (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0)
6581	// (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0)
6582	if (AndEqZero \|\| AndGtNeg1 \|\| OrNeZero \|\| OrLtZero) {
6583	SDValue Or = DAG.getNode(Opcode: ISD::OR, DL: SDLoc (N0), VT: OpVT, N1: LL, N2: RL);
6584	AddToWorklist(N: Or.getNode());
6585	return DAG.getSetCC(DL, VT, LHS: Or, RHS: LR, Cond: CC1);
6586	}
6587
6588	// All bits set?
6589	bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
6590	// All sign bits set?
6591	bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
6592	// Any bits clear?
6593	bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
6594	// Any sign bits clear?
6595	bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
6596
6597	// (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
6598	// (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0)
6599	// (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
6600	// (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1)
6601	if (AndEqNeg1 \|\| AndLtZero \|\| OrNeNeg1 \|\| OrGtNeg1) {
6602	SDValue And = DAG.getNode(Opcode: ISD::AND, DL: SDLoc (N0), VT: OpVT, N1: LL, N2: RL);
6603	AddToWorklist(N: And.getNode());
6604	return DAG.getSetCC(DL, VT, LHS: And, RHS: LR, Cond: CC1);
6605	}
6606	}
6607
6608	// TODO: What is the 'or' equivalent of this fold?
6609	// (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
6610	if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > `1` &&
6611	IsInteger && CC0 == ISD::SETNE &&
6612	((isNullConstant(V: LR) && isAllOnesConstant(V: RR)) \|\|
6613	(isAllOnesConstant(V: LR) && isNullConstant(V: RR)))) {
6614	SDValue One = DAG.getConstant(Val: `1`, DL, VT: OpVT);
6615	SDValue Two = DAG.getConstant(Val: `2`, DL, VT: OpVT);
6616	SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL: SDLoc (N0), VT: OpVT, N1: LL, N2: One);
6617	AddToWorklist(N: Add.getNode());
6618	return DAG.getSetCC(DL, VT, LHS: Add, RHS: Two, Cond: ISD::SETUGE);
6619	}
6620
6621	// Try more general transforms if the predicates match and the only user of
6622	// the compares is the 'and' or 'or'.
6623	if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(VT: OpVT) && CC0 == CC1 &&
6624	N0.hasOneUse() && N1.hasOneUse()) {
6625	// and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
6626	// or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
6627	if ((IsAnd && CC1 == ISD::SETEQ) \|\| (!IsAnd && CC1 == ISD::SETNE)) {
6628	SDValue XorL = DAG.getNode(Opcode: ISD::XOR, DL: SDLoc (N0), VT: OpVT, N1: LL, N2: LR);
6629	SDValue XorR = DAG.getNode(Opcode: ISD::XOR, DL: SDLoc (N1), VT: OpVT, N1: RL, N2: RR);
6630	SDValue Or = DAG.getNode(Opcode: ISD::OR, DL, VT: OpVT, N1: XorL, N2: XorR);
6631	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT: OpVT);
6632	return DAG.getSetCC(DL, VT, LHS: Or, RHS: Zero, Cond: CC1);
6633	}
6634
6635	// Turn compare of constants whose difference is 1 bit into add+and+setcc.
6636	if ((IsAnd && CC1 == ISD::SETNE) \|\| (!IsAnd && CC1 == ISD::SETEQ)) {
6637	// Match a shared variable operand and 2 non-opaque constant operands.
6638	auto MatchDiffPow2 = [&](ConstantSDNode C0, ConstantSDNode C1) {
6639	// The difference of the constants must be a single bit.
6640	const APInt &CMax =
6641	APIntOps::umax(A: C0->getAPIntValue(), B: C1->getAPIntValue());
6642	const APInt &CMin =
6643	APIntOps::umin(A: C0->getAPIntValue(), B: C1->getAPIntValue());
6644	return !C0->isOpaque() && !C1->isOpaque() && (CMax - CMin).isPowerOf2();
6645	};
6646	if (LL == RL && ISD::matchBinaryPredicate(LHS: LR, RHS: RR, Match: MatchDiffPow2)) {
6647	// and/or (setcc X, CMax, ne), (setcc X, CMin, ne/eq) -->
6648	// setcc ((sub X, CMin), ~(CMax - CMin)), 0, ne/eq
6649	SDValue Max = DAG.getNode(Opcode: ISD::UMAX, DL, VT: OpVT, N1: LR, N2: RR);
6650	SDValue Min = DAG.getNode(Opcode: ISD::UMIN, DL, VT: OpVT, N1: LR, N2: RR);
6651	SDValue Offset = DAG.getNode(Opcode: ISD::SUB, DL, VT: OpVT, N1: LL, N2: Min);
6652	SDValue Diff = DAG.getNode(Opcode: ISD::SUB, DL, VT: OpVT, N1: Max, N2: Min);
6653	SDValue Mask = DAG.getNOT(DL, Val: Diff, VT: OpVT);
6654	SDValue And = DAG.getNode(Opcode: ISD::AND, DL, VT: OpVT, N1: Offset, N2: Mask);
6655	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT: OpVT);
6656	return DAG.getSetCC(DL, VT, LHS: And, RHS: Zero, Cond: CC0);
6657	}
6658	}
6659	}
6660
6661	// Canonicalize equivalent operands to LL == RL.
6662	if (LL == RR && LR == RL) {
6663	CC1 = ISD::getSetCCSwappedOperands(Operation: CC1);
6664	std::swap(a&: RL, b&: RR);
6665	}
6666
6667	// (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
6668	// (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
6669	if (LL == RL && LR == RR) {
6670	ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(Op1: CC0, Op2: CC1, Type: OpVT)
6671	: ISD::getSetCCOrOperation(Op1: CC0, Op2: CC1, Type: OpVT);
6672	if (NewCC != ISD::SETCC_INVALID &&
6673	(!LegalOperations \|\|
6674	(TLI.isCondCodeLegal(CC: NewCC, VT: LL.getSimpleValueType()) &&
6675	TLI.isOperationLegal(Op: ISD::SETCC, VT: OpVT))))
6676	return DAG.getSetCC(DL, VT, LHS: LL, RHS: LR, Cond: NewCC);
6677	}
6678
6679	return SDValue ();
6680	}
6681
6682	static bool arebothOperandsNotSNan(SDValue Operand1, SDValue Operand2,
6683	SelectionDAG &DAG) {
6684	return DAG.isKnownNeverSNaN(Op: Operand2) && DAG.isKnownNeverSNaN(Op: Operand1);
6685	}
6686
6687	static bool arebothOperandsNotNan(SDValue Operand1, SDValue Operand2,
6688	SelectionDAG &DAG) {
6689	return DAG.isKnownNeverNaN(Op: Operand2) && DAG.isKnownNeverNaN(Op: Operand1);
6690	}
6691
6692	/// Returns an appropriate FP min/max opcode for clamping operations.
6693	static unsigned getMinMaxOpcodeForClamp(bool IsMin, SDValue Operand1,
6694	SDValue Operand2, SelectionDAG &DAG,
6695	const TargetLowering &TLI) {
6696	EVT VT = Operand1.getValueType();
6697	unsigned IEEEOp = IsMin ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
6698	if (TLI.isOperationLegalOrCustom(Op: IEEEOp, VT) &&
6699	arebothOperandsNotNan(Operand1, Operand2, DAG))
6700	return IEEEOp;
6701	unsigned PreferredOp = IsMin ? ISD::FMINNUM : ISD::FMAXNUM;
6702	if (TLI.isOperationLegalOrCustom(Op: PreferredOp, VT))
6703	return PreferredOp;
6704	return ISD::DELETED_NODE;
6705	}
6706
6707	// FIXME: use FMINIMUMNUM if possible, such as for RISC-V.
6708	static unsigned getMinMaxOpcodeForCompareFold(
6709	SDValue Operand1, SDValue Operand2, bool SetCCNoNaNs, ISD::CondCode CC,
6710	unsigned OrAndOpcode, SelectionDAG &DAG, bool isFMAXNUMFMINNUM_IEEE,
6711	bool isFMAXNUMFMINNUM) {
6712	// The optimization cannot be applied for all the predicates because
6713	// of the way FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle
6714	// NaNs. For FMINNUM_IEEE/FMAXNUM_IEEE, the optimization cannot be
6715	// applied at all if one of the operands is a signaling NaN.
6716
6717	// It is safe to use FMINNUM_IEEE/FMAXNUM_IEEE if all the operands
6718	// are non NaN values.
6719	if (((CC == ISD::SETLT \|\| CC == ISD::SETLE) && (OrAndOpcode == ISD::OR)) \|\|
6720	((CC == ISD::SETGT \|\| CC == ISD::SETGE) && (OrAndOpcode == ISD::AND))) {
6721	return (SetCCNoNaNs \|\| arebothOperandsNotNan(Operand1, Operand2, DAG)) &&
6722	isFMAXNUMFMINNUM_IEEE
6723	? ISD::FMINNUM_IEEE
6724	: ISD::DELETED_NODE;
6725	}
6726
6727	if (((CC == ISD::SETGT \|\| CC == ISD::SETGE) && (OrAndOpcode == ISD::OR)) \|\|
6728	((CC == ISD::SETLT \|\| CC == ISD::SETLE) && (OrAndOpcode == ISD::AND))) {
6729	return (SetCCNoNaNs \|\| arebothOperandsNotNan(Operand1, Operand2, DAG)) &&
6730	isFMAXNUMFMINNUM_IEEE
6731	? ISD::FMAXNUM_IEEE
6732	: ISD::DELETED_NODE;
6733	}
6734
6735	// Both FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle quiet
6736	// NaNs in the same way. But, FMINNUM/FMAXNUM and FMINNUM_IEEE/
6737	// FMAXNUM_IEEE handle signaling NaNs differently. If we cannot prove
6738	// that there are not any sNaNs, then the optimization is not valid
6739	// for FMINNUM_IEEE/FMAXNUM_IEEE. In the presence of sNaNs, we apply
6740	// the optimization using FMINNUM/FMAXNUM for the following cases. If
6741	// we can prove that we do not have any sNaNs, then we can do the
6742	// optimization using FMINNUM_IEEE/FMAXNUM_IEEE for the following
6743	// cases.
6744	if (((CC == ISD::SETOLT \|\| CC == ISD::SETOLE) && (OrAndOpcode == ISD::OR)) \|\|
6745	((CC == ISD::SETUGT \|\| CC == ISD::SETUGE) && (OrAndOpcode == ISD::AND))) {
6746	return isFMAXNUMFMINNUM ? ISD::FMINNUM
6747	: arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
6748	isFMAXNUMFMINNUM_IEEE
6749	? ISD::FMINNUM_IEEE
6750	: ISD::DELETED_NODE;
6751	}
6752
6753	if (((CC == ISD::SETOGT \|\| CC == ISD::SETOGE) && (OrAndOpcode == ISD::OR)) \|\|
6754	((CC == ISD::SETULT \|\| CC == ISD::SETULE) && (OrAndOpcode == ISD::AND))) {
6755	return isFMAXNUMFMINNUM ? ISD::FMAXNUM
6756	: arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
6757	isFMAXNUMFMINNUM_IEEE
6758	? ISD::FMAXNUM_IEEE
6759	: ISD::DELETED_NODE;
6760	}
6761
6762	return ISD::DELETED_NODE;
6763	}
6764
6765	static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG) {
6766	using AndOrSETCCFoldKind = TargetLowering::AndOrSETCCFoldKind;
6767	assert(
6768	(LogicOp->getOpcode() == ISD::AND \|\| LogicOp->getOpcode() == ISD::OR) &&
6769	"Invalid Op to combine SETCC with");
6770
6771	// TODO: Search past casts/truncates.
6772	SDValue LHS = LogicOp->getOperand(Num: `0`);
6773	SDValue RHS = LogicOp->getOperand(Num: `1`);
6774	if (LHS ->getOpcode() != ISD::SETCC \|\| RHS ->getOpcode() != ISD::SETCC \|\|
6775	!LHS ->hasOneUse() \|\| !RHS ->hasOneUse())
6776	return SDValue ();
6777
6778	SDNodeFlags LHSSetCCFlags = LHS ->getFlags();
6779	SDNodeFlags RHSSetCCFlags = RHS ->getFlags();
6780	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6781	AndOrSETCCFoldKind TargetPreference = TLI.isDesirableToCombineLogicOpOfSETCC(
6782	LogicOp, SETCC0: LHS.getNode(), SETCC1: RHS.getNode());
6783
6784	SDValue LHS0 = LHS ->getOperand(Num: `0`);
6785	SDValue RHS0 = RHS ->getOperand(Num: `0`);
6786	SDValue LHS1 = LHS ->getOperand(Num: `1`);
6787	SDValue RHS1 = RHS ->getOperand(Num: `1`);
6788	// TODO: We don't actually need a splat here, for vectors we just need the
6789	// invariants to hold for each element.
6790	auto *LHS1C = isConstOrConstSplat(N: LHS1);
6791	auto *RHS1C = isConstOrConstSplat(N: RHS1);
6792	ISD::CondCode CCL = cast<CondCodeSDNode>(Val: LHS.getOperand(i: `2`))->get();
6793	ISD::CondCode CCR = cast<CondCodeSDNode>(Val: RHS.getOperand(i: `2`))->get();
6794	EVT VT = LogicOp->getValueType(ResNo: `0`);
6795	EVT OpVT = LHS0.getValueType();
6796	SDLoc DL(LogicOp);
6797
6798	// Check if the operands of an and/or operation are comparisons and if they
6799	// compare against the same value. Replace the and/or-cmp-cmp sequence with
6800	// min/max cmp sequence. If LHS1 is equal to RHS1, then the or-cmp-cmp
6801	// sequence will be replaced with min-cmp sequence:
6802	// (LHS0 < LHS1) \| (RHS0 < RHS1) -> min(LHS0, RHS0) < LHS1
6803	// and and-cmp-cmp will be replaced with max-cmp sequence:
6804	// (LHS0 < LHS1) & (RHS0 < RHS1) -> max(LHS0, RHS0) < LHS1
6805	// The optimization does not work for `==` or `!=` .
6806	// The two comparisons should have either the same predicate or the
6807	// predicate of one of the comparisons is the opposite of the other one.
6808	bool isFMAXNUMFMINNUM_IEEE = TLI.isOperationLegal(Op: ISD::FMAXNUM_IEEE, VT: OpVT) &&
6809	TLI.isOperationLegal(Op: ISD::FMINNUM_IEEE, VT: OpVT);
6810	bool isFMAXNUMFMINNUM = TLI.isOperationLegalOrCustom(Op: ISD::FMAXNUM, VT: OpVT) &&
6811	TLI.isOperationLegalOrCustom(Op: ISD::FMINNUM, VT: OpVT);
6812	if (((OpVT.isInteger() && TLI.isOperationLegal(Op: ISD::UMAX, VT: OpVT) &&
6813	TLI.isOperationLegal(Op: ISD::SMAX, VT: OpVT) &&
6814	TLI.isOperationLegal(Op: ISD::UMIN, VT: OpVT) &&
6815	TLI.isOperationLegal(Op: ISD::SMIN, VT: OpVT)) \|\|
6816	(OpVT.isFloatingPoint() &&
6817	(isFMAXNUMFMINNUM_IEEE \|\| isFMAXNUMFMINNUM))) &&
6818	!ISD::isIntEqualitySetCC(Code: CCL) && !ISD::isFPEqualitySetCC(Code: CCL) &&
6819	CCL != ISD::SETFALSE && CCL != ISD::SETO && CCL != ISD::SETUO &&
6820	CCL != ISD::SETTRUE &&
6821	(CCL == CCR \|\| CCL == ISD::getSetCCSwappedOperands(Operation: CCR))) {
6822
6823	SDValue CommonValue, Operand1, Operand2;
6824	ISD::CondCode CC = ISD::SETCC_INVALID;
6825	if (CCL == CCR) {
6826	if (LHS0 == RHS0) {
6827	CommonValue = LHS0;
6828	Operand1 = LHS1;
6829	Operand2 = RHS1;
6830	CC = ISD::getSetCCSwappedOperands(Operation: CCL);
6831	} else if (LHS1 == RHS1) {
6832	CommonValue = LHS1;
6833	Operand1 = LHS0;
6834	Operand2 = RHS0;
6835	CC = CCL;
6836	}
6837	} else {
6838	assert(CCL == ISD::getSetCCSwappedOperands(CCR) && "Unexpected CC");
6839	if (LHS0 == RHS1) {
6840	CommonValue = LHS0;
6841	Operand1 = LHS1;
6842	Operand2 = RHS0;
6843	CC = CCR;
6844	} else if (RHS0 == LHS1) {
6845	CommonValue = LHS1;
6846	Operand1 = LHS0;
6847	Operand2 = RHS1;
6848	CC = CCL;
6849	}
6850	}
6851
6852	// Don't do this transform for sign bit tests. Let foldLogicOfSetCCs
6853	// handle it using OR/AND.
6854	if (CC == ISD::SETLT && isNullOrNullSplat(V: CommonValue))
6855	CC = ISD::SETCC_INVALID;
6856	else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(V: CommonValue))
6857	CC = ISD::SETCC_INVALID;
6858
6859	if (CC != ISD::SETCC_INVALID) {
6860	unsigned NewOpcode = ISD::DELETED_NODE;
6861	bool IsSigned = isSignedIntSetCC(Code: CC);
6862	if (OpVT.isInteger()) {
6863	bool IsLess = (CC == ISD::SETLE \|\| CC == ISD::SETULE \|\|
6864	CC == ISD::SETLT \|\| CC == ISD::SETULT);
6865	bool IsOr = (LogicOp->getOpcode() == ISD::OR);
6866	if (IsLess == IsOr)
6867	NewOpcode = IsSigned ? ISD::SMIN : ISD::UMIN;
6868	else
6869	NewOpcode = IsSigned ? ISD::SMAX : ISD::UMAX;
6870	} else if (OpVT.isFloatingPoint())
6871	NewOpcode = getMinMaxOpcodeForCompareFold(
6872	Operand1, Operand2,
6873	SetCCNoNaNs: LHSSetCCFlags.hasNoNaNs() && RHSSetCCFlags.hasNoNaNs(), CC,
6874	OrAndOpcode: LogicOp->getOpcode(), DAG, isFMAXNUMFMINNUM_IEEE, isFMAXNUMFMINNUM);
6875
6876	if (NewOpcode != ISD::DELETED_NODE) {
6877	// Propagate fast-math flags from setcc.
6878	SDNodeFlags Flags = LHS ->getFlags() & RHS ->getFlags();
6879	SDValue MinMaxValue =
6880	DAG.getNode(Opcode: NewOpcode, DL, VT: OpVT, N1: Operand1, N2: Operand2, Flags);
6881	return DAG.getSetCC(DL, VT, LHS: MinMaxValue, RHS: CommonValue, Cond: CC, /Chain=/{},
6882	/IsSignaling=/false, Flags);
6883	}
6884	}
6885	}
6886
6887	if (LHS0 == LHS1 && RHS0 == RHS1 && CCL == CCR &&
6888	LHS0.getValueType() == RHS0.getValueType() &&
6889	((LogicOp->getOpcode() == ISD::AND && CCL == ISD::SETO) \|\|
6890	(LogicOp->getOpcode() == ISD::OR && CCL == ISD::SETUO)))
6891	return DAG.getSetCC(DL, VT, LHS: LHS0, RHS: RHS0, Cond: CCL);
6892
6893	if (TargetPreference == AndOrSETCCFoldKind::None)
6894	return SDValue ();
6895
6896	if (CCL == CCR &&
6897	CCL == (LogicOp->getOpcode() == ISD::AND ? ISD::SETNE : ISD::SETEQ) &&
6898	LHS0 == RHS0 && LHS1C && RHS1C && OpVT.isInteger()) {
6899	const APInt &APLhs = LHS1C->getAPIntValue();
6900	const APInt &APRhs = RHS1C->getAPIntValue();
6901
6902	// Preference is to use ISD::ABS or we already have an ISD::ABS (in which
6903	// case this is just a compare).
6904	if (APLhs == (-APRhs) &&
6905	((TargetPreference & AndOrSETCCFoldKind::ABS) \|\|
6906	DAG.doesNodeExist(Opcode: ISD::ABS, VTList: DAG.getVTList(VT: OpVT), Ops: {LHS0}))) {
6907	const APInt &C = APLhs.isNegative() ? APRhs : APLhs;
6908	// (icmp eq A, C) \| (icmp eq A, -C)
6909	// -> (icmp eq Abs(A), C)
6910	// (icmp ne A, C) & (icmp ne A, -C)
6911	// -> (icmp ne Abs(A), C)
6912	SDValue AbsOp = DAG.getNode(Opcode: ISD::ABS, DL, VT: OpVT, Operand: LHS0);
6913	return DAG.getNode(Opcode: ISD::SETCC, DL, VT, N1: AbsOp,
6914	N2: DAG.getConstant(Val: C, DL, VT: OpVT), N3: LHS.getOperand(i: `2`));
6915	} else if (TargetPreference &
6916	(AndOrSETCCFoldKind::AddAnd \| AndOrSETCCFoldKind::NotAnd)) {
6917
6918	// AndOrSETCCFoldKind::AddAnd:
6919	// A == C0 \| A == C1
6920	// IF IsPow2(smax(C0, C1)-smin(C0, C1))
6921	// -> ((A - smin(C0, C1)) & ~(smax(C0, C1)-smin(C0, C1))) == 0
6922	// A != C0 & A != C1
6923	// IF IsPow2(smax(C0, C1)-smin(C0, C1))
6924	// -> ((A - smin(C0, C1)) & ~(smax(C0, C1)-smin(C0, C1))) != 0
6925
6926	// AndOrSETCCFoldKind::NotAnd:
6927	// A == C0 \| A == C1
6928	// IF smax(C0, C1) == -1 AND IsPow2(smax(C0, C1) - smin(C0, C1))
6929	// -> ~A & smin(C0, C1) == 0
6930	// A != C0 & A != C1
6931	// IF smax(C0, C1) == -1 AND IsPow2(smax(C0, C1) - smin(C0, C1))
6932	// -> ~A & smin(C0, C1) != 0
6933
6934	const APInt &MaxC = APIntOps::smax(A: APRhs, B: APLhs);
6935	const APInt &MinC = APIntOps::smin(A: APRhs, B: APLhs);
6936	APInt Dif = MaxC - MinC;
6937	if (!Dif.isZero() && Dif.isPowerOf2()) {
6938	if (MaxC.isAllOnes() &&
6939	(TargetPreference & AndOrSETCCFoldKind::NotAnd)) {
6940	SDValue NotOp = DAG.getNOT(DL, Val: LHS0, VT: OpVT);
6941	SDValue AndOp = DAG.getNode(Opcode: ISD::AND, DL, VT: OpVT, N1: NotOp,
6942	N2: DAG.getConstant(Val: MinC, DL, VT: OpVT));
6943	return DAG.getNode(Opcode: ISD::SETCC, DL, VT, N1: AndOp,
6944	N2: DAG.getConstant(Val: `0`, DL, VT: OpVT), N3: LHS.getOperand(i: `2`));
6945	} else if (TargetPreference & AndOrSETCCFoldKind::AddAnd) {
6946
6947	SDValue AddOp = DAG.getNode(Opcode: ISD::ADD, DL, VT: OpVT, N1: LHS0,
6948	N2: DAG.getConstant(Val: -MinC, DL, VT: OpVT));
6949	SDValue AndOp = DAG.getNode(Opcode: ISD::AND, DL, VT: OpVT, N1: AddOp,
6950	N2: DAG.getConstant(Val: ~Dif, DL, VT: OpVT));
6951	return DAG.getNode(Opcode: ISD::SETCC, DL, VT, N1: AndOp,
6952	N2: DAG.getConstant(Val: `0`, DL, VT: OpVT), N3: LHS.getOperand(i: `2`));
6953	}
6954	}
6955	}
6956	}
6957
6958	return SDValue ();
6959	}
6960
6961	// Combine `(select c, (X & 1), 0)` -> `(and (zext c), X)`.
6962	// We canonicalize to the `select` form in the middle end, but the `and` form
6963	// gets better codegen and all tested targets (arm, x86, riscv)
6964	static SDValue combineSelectAsExtAnd(SDValue Cond, SDValue T, SDValue F,
6965	const SDLoc &DL, SelectionDAG &DAG) {
6966	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6967	if (!isNullConstant(V: F))
6968	return SDValue ();
6969
6970	EVT CondVT = Cond.getValueType();
6971	if (TLI.getBooleanContents(Type: CondVT) !=
6972	TargetLoweringBase::ZeroOrOneBooleanContent)
6973	return SDValue ();
6974
6975	if (T.getOpcode() != ISD::AND)
6976	return SDValue ();
6977
6978	if (!isOneConstant(V: T.getOperand(i: `1`)))
6979	return SDValue ();
6980
6981	EVT OpVT = T.getValueType();
6982
6983	SDValue CondMask =
6984	OpVT == CondVT ? Cond : DAG.getBoolExtOrTrunc(Op: Cond, SL: DL, VT: OpVT, OpVT: CondVT);
6985	return DAG.getNode(Opcode: ISD::AND, DL, VT: OpVT, N1: CondMask, N2: T.getOperand(i: `0`));
6986	}
6987
6988	/// This contains all DAGCombine rules which reduce two values combined by
6989	/// an And operation to a single value. This makes them reusable in the context
6990	/// of visitSELECT(). Rules involving constants are not included as
6991	/// visitSELECT() already handles those cases.
6992	SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
6993	EVT VT = N1.getValueType();
6994	SDLoc DL(N);
6995
6996	// fold (and x, undef) -> 0
6997	if (N0.isUndef() \|\| N1.isUndef())
6998	return DAG.getConstant(Val: `0`, DL, VT);
6999
7000	if (SDValue V = foldLogicOfSetCCs(IsAnd: true, N0, N1, DL))
7001	return V;
7002
7003	// Canonicalize:
7004	// and(x, add) -> and(add, x)
7005	if (N1.getOpcode() == ISD::ADD)
7006	std::swap(a&: N0, b&: N1);
7007
7008	// TODO: Rewrite this to return a new 'AND' instead of using CombineTo.
7009	if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
7010	VT.isScalarInteger() && VT.getSizeInBits() <= `64` && N0 ->hasOneUse()) {
7011	if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: `1`))) {
7012	if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: `1`))) {
7013	// Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
7014	// immediate for an add, but it is legal if its top c2 bits are set,
7015	// transform the ADD so the immediate doesn't need to be materialized
7016	// in a register.
7017	APInt ADDC = ADDI->getAPIntValue();
7018	APInt SRLC = SRLI->getAPIntValue();
7019	if (ADDC.getSignificantBits() <= `64` && SRLC.ult(RHS: VT.getSizeInBits()) &&
7020	!TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
7021	APInt Mask = APInt::getHighBitsSet(numBits: VT.getSizeInBits(),
7022	hiBitsSet: SRLC.getZExtValue());
7023	if (DAG.MaskedValueIsZero(Op: N0.getOperand(i: `1`), Mask)) {
7024	ADDC \|= Mask;
7025	if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
7026	SDLoc DL0(N0);
7027	SDValue NewAdd =
7028	DAG.getNode(Opcode: ISD::ADD, DL: DL0, VT,
7029	N1: N0.getOperand(i: `0`), N2: DAG.getConstant(Val: ADDC, DL, VT));
7030	CombineTo(N: N0.getNode(), Res: NewAdd);
7031	// Return N so it doesn't get rechecked!
7032	return SDValue (N, `0`);
7033	}
7034	}
7035	}
7036	}
7037	}
7038	}
7039
7040	return SDValue ();
7041	}
7042
7043	bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode AndC, LoadSDNode LoadN,
7044	EVT LoadResultTy, EVT &ExtVT) {
7045	if (!AndC->getAPIntValue().isMask())
7046	return false;
7047
7048	unsigned ActiveBits = AndC->getAPIntValue().countr_one();
7049
7050	ExtVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: ActiveBits);
7051	EVT LoadedVT = LoadN->getMemoryVT();
7052
7053	if (ExtVT == LoadedVT &&
7054	(!LegalOperations \|\|
7055	TLI.isLoadExtLegal(ExtType: ISD::ZEXTLOAD, ValVT: LoadResultTy, MemVT: ExtVT))) {
7056	// ZEXTLOAD will match without needing to change the size of the value being
7057	// loaded.
7058	return true;
7059	}
7060
7061	// Do not change the width of a volatile or atomic loads.
7062	if (!LoadN->isSimple())
7063	return false;
7064
7065	// Do not generate loads of non-round integer types since these can
7066	// be expensive (and would be wrong if the type is not byte sized).
7067	if (!LoadedVT.bitsGT(VT: ExtVT) \|\| !ExtVT.isRound())
7068	return false;
7069
7070	if (LegalOperations &&
7071	!TLI.isLoadExtLegal(ExtType: ISD::ZEXTLOAD, ValVT: LoadResultTy, MemVT: ExtVT))
7072	return false;
7073
7074	if (!TLI.shouldReduceLoadWidth(Load: LoadN, ExtTy: ISD::ZEXTLOAD, NewVT: ExtVT, /ByteOffset=/`0`))
7075	return false;
7076
7077	return true;
7078	}
7079
7080	bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
7081	ISD::LoadExtType ExtType, EVT &MemVT,
7082	unsigned ShAmt) {
7083	if (!LDST)
7084	return false;
7085
7086	// Only allow byte offsets.
7087	if (ShAmt % `8`)
7088	return false;
7089	const unsigned ByteShAmt = ShAmt / `8`;
7090
7091	// Do not generate loads of non-round integer types since these can
7092	// be expensive (and would be wrong if the type is not byte sized).
7093	if (!MemVT.isRound())
7094	return false;
7095
7096	// Don't change the width of a volatile or atomic loads.
7097	if (!LDST->isSimple())
7098	return false;
7099
7100	EVT LdStMemVT = LDST->getMemoryVT();
7101
7102	// Bail out when changing the scalable property, since we can't be sure that
7103	// we're actually narrowing here.
7104	if (LdStMemVT.isScalableVector() != MemVT.isScalableVector())
7105	return false;
7106
7107	// Verify that we are actually reducing a load width here.
7108	if (LdStMemVT.bitsLT(VT: MemVT))
7109	return false;
7110
7111	// Ensure that this isn't going to produce an unsupported memory access.
7112	if (ShAmt) {
7113	const Align LDSTAlign = LDST->getAlign();
7114	const Align NarrowAlign = commonAlignment(A: LDSTAlign, Offset: ByteShAmt);
7115	if (!TLI.allowsMemoryAccess(Context&: *DAG.getContext(), DL: DAG.getDataLayout(), VT: MemVT,
7116	AddrSpace: LDST->getAddressSpace(), Alignment: NarrowAlign,
7117	Flags: LDST->getMemOperand()->getFlags()))
7118	return false;
7119	}
7120
7121	// It's not possible to generate a constant of extended or untyped type.
7122	EVT PtrType = LDST->getBasePtr().getValueType();
7123	if (PtrType == MVT::Untyped \|\| PtrType.isExtended())
7124	return false;
7125
7126	if (isa<LoadSDNode>(Val: LDST)) {
7127	LoadSDNode *Load = cast<LoadSDNode>(Val: LDST);
7128	// Don't transform one with multiple uses, this would require adding a new
7129	// load.
7130	if (!SDValue (Load, `0`).hasOneUse())
7131	return false;
7132
7133	if (LegalOperations &&
7134	!TLI.isLoadExtLegal(ExtType, ValVT: Load->getValueType(ResNo: `0`), MemVT))
7135	return false;
7136
7137	// For the transform to be legal, the load must produce only two values
7138	// (the value loaded and the chain). Don't transform a pre-increment
7139	// load, for example, which produces an extra value. Otherwise the
7140	// transformation is not equivalent, and the downstream logic to replace
7141	// uses gets things wrong.
7142	if (Load->getNumValues() > `2`)
7143	return false;
7144
7145	// If the load that we're shrinking is an extload and we're not just
7146	// discarding the extension we can't simply shrink the load. Bail.
7147	// TODO: It would be possible to merge the extensions in some cases.
7148	if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
7149	Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
7150	return false;
7151
7152	if (!TLI.shouldReduceLoadWidth(Load, ExtTy: ExtType, NewVT: MemVT, ByteOffset: ByteShAmt))
7153	return false;
7154	} else {
7155	assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
7156	StoreSDNode *Store = cast<StoreSDNode>(Val: LDST);
7157	// Can't write outside the original store
7158	if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
7159	return false;
7160
7161	if (LegalOperations &&
7162	!TLI.isTruncStoreLegal(ValVT: Store->getValue().getValueType(), MemVT))
7163	return false;
7164	}
7165	return true;
7166	}
7167
7168	bool DAGCombiner::SearchForAndLoads(SDNode *N,
7169	SmallVectorImpl<LoadSDNode*> &Loads,
7170	SmallPtrSetImpl<SDNode*> &NodesWithConsts,
7171	ConstantSDNode *Mask,
7172	SDNode *&NodeToMask) {
7173	// Recursively search for the operands, looking for loads which can be
7174	// narrowed.
7175	for (SDValue Op : N->op_values()) {
7176	if (Op.getValueType().isVector())
7177	return false;
7178
7179	// Some constants may need fixing up later if they are too large.
7180	if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op)) {
7181	assert(ISD::isBitwiseLogicOp(N->getOpcode()) &&
7182	"Expected bitwise logic operation");
7183	if (!C->getAPIntValue().isSubsetOf(RHS: Mask->getAPIntValue()))
7184	NodesWithConsts.insert(Ptr: N);
7185	continue;
7186	}
7187
7188	if (!Op.hasOneUse())
7189	return false;
7190
7191	switch(Op.getOpcode()) {
7192	case ISD::LOAD: {
7193	auto *Load = cast<LoadSDNode>(Val&: Op);
7194	EVT ExtVT;
7195	if (isAndLoadExtLoad(AndC: Mask, LoadN: Load, LoadResultTy: Load->getValueType(ResNo: `0`), ExtVT) &&
7196	isLegalNarrowLdSt(LDST: Load, ExtType: ISD::ZEXTLOAD, MemVT&: ExtVT)) {
7197
7198	// ZEXTLOAD is already small enough.
7199	if (Load->getExtensionType() == ISD::ZEXTLOAD &&
7200	ExtVT.bitsGE(VT: Load->getMemoryVT()))
7201	continue;
7202
7203	// Use LE to convert equal sized loads to zext.
7204	if (ExtVT.bitsLE(VT: Load->getMemoryVT()))
7205	Loads.push_back(Elt: Load);
7206
7207	continue;
7208	}
7209	return false;
7210	}
7211	case ISD::ZERO_EXTEND:
7212	case ISD::AssertZext: {
7213	unsigned ActiveBits = Mask->getAPIntValue().countr_one();
7214	EVT ExtVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: ActiveBits);
7215	EVT VT = Op.getOpcode() == ISD::AssertZext ?
7216	cast<VTSDNode>(Val: Op.getOperand(i: `1`))->getVT() :
7217	Op.getOperand(i: `0`).getValueType();
7218
7219	// We can accept extending nodes if the mask is wider or an equal
7220	// width to the original type.
7221	if (ExtVT.bitsGE(VT))
7222	continue;
7223	break;
7224	}
7225	case ISD::OR:
7226	case ISD::XOR:
7227	case ISD::AND:
7228	if (!SearchForAndLoads(N: Op.getNode(), Loads, NodesWithConsts, Mask,
7229	NodeToMask))
7230	return false;
7231	continue;
7232	}
7233
7234	// Allow one node which will masked along with any loads found.
7235	if (NodeToMask)
7236	return false;
7237
7238	// Also ensure that the node to be masked only produces one data result.
7239	NodeToMask = Op.getNode();
7240	if (NodeToMask->getNumValues() > `1`) {
7241	bool HasValue = false;
7242	for (unsigned i = `0`, e = NodeToMask->getNumValues(); i < e; ++i) {
7243	MVT VT = SDValue (NodeToMask, i).getSimpleValueType();
7244	if (VT != MVT::Glue && VT != MVT::Other) {
7245	if (HasValue) {
7246	NodeToMask = nullptr;
7247	return false;
7248	}
7249	HasValue = true;
7250	}
7251	}
7252	assert(HasValue && "Node to be masked has no data result?");
7253	}
7254	}
7255	return true;
7256	}
7257
7258	bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
7259	auto *Mask = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: `1`));
7260	if (!Mask)
7261	return false;
7262
7263	if (!Mask->getAPIntValue().isMask())
7264	return false;
7265
7266	// No need to do anything if the and directly uses a load.
7267	if (isa<LoadSDNode>(Val: N->getOperand(Num: `0`)))
7268	return false;
7269
7270	SmallVector<LoadSDNode*, `8`> Loads;
7271	SmallPtrSet<SDNode*, `2`> NodesWithConsts;
7272	SDNode FixupNode = nullptr*;
7273	if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, NodeToMask&: FixupNode)) {
7274	if (Loads.empty())
7275	return false;
7276
7277	LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
7278	SDValue MaskOp = N->getOperand(Num: `1`);
7279
7280	// If it exists, fixup the single node we allow in the tree that needs
7281	// masking.
7282	if (FixupNode) {
7283	LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
7284	SDValue And = DAG.getNode(Opcode: ISD::AND, DL: SDLoc (FixupNode),
7285	VT: FixupNode->getValueType(ResNo: `0`),
7286	N1: SDValue (FixupNode, `0`), N2: MaskOp);
7287	DAG.ReplaceAllUsesOfValueWith(From: SDValue (FixupNode, `0`), To: And);
7288	if (And.getOpcode() == ISD ::AND)
7289	DAG.UpdateNodeOperands(N: And.getNode(), Op1: SDValue (FixupNode, `0`), Op2: MaskOp);
7290	}
7291
7292	// Narrow any constants that need it.
7293	for (auto *LogicN : NodesWithConsts) {
7294	SDValue Op0 = LogicN->getOperand(Num: `0`);
7295	SDValue Op1 = LogicN->getOperand(Num: `1`);
7296
7297	// We only need to fix AND if both inputs are constants. And we only need
7298	// to fix one of the constants.
7299	if (LogicN->getOpcode() == ISD::AND &&
7300	(!isa<ConstantSDNode>(Val: Op0) \|\| !isa<ConstantSDNode>(Val: Op1)))
7301	continue;
7302
7303	if (isa<ConstantSDNode>(Val: Op0) && LogicN->getOpcode() != ISD::AND)
7304	Op0 =
7305	DAG.getNode(Opcode: ISD::AND, DL: SDLoc (Op0), VT: Op0.getValueType(), N1: Op0, N2: MaskOp);
7306
7307	if (isa<ConstantSDNode>(Val: Op1))
7308	Op1 =
7309	DAG.getNode(Opcode: ISD::AND, DL: SDLoc (Op1), VT: Op1.getValueType(), N1: Op1, N2: MaskOp);
7310
7311	if (isa<ConstantSDNode>(Val: Op0) && !isa<ConstantSDNode>(Val: Op1))
7312	std::swap(a&: Op0, b&: Op1);
7313
7314	DAG.UpdateNodeOperands(N: LogicN, Op1: Op0, Op2: Op1);
7315	}
7316
7317	// Create narrow loads.
7318	for (auto *Load : Loads) {
7319	LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
7320	SDValue And = DAG.getNode(Opcode: ISD::AND, DL: SDLoc (Load), VT: Load->getValueType(ResNo: `0`),
7321	N1: SDValue (Load, `0`), N2: MaskOp);
7322	DAG.ReplaceAllUsesOfValueWith(From: SDValue (Load, `0`), To: And);
7323	if (And.getOpcode() == ISD ::AND)
7324	And = SDValue (
7325	DAG.UpdateNodeOperands(N: And.getNode(), Op1: SDValue (Load, `0`), Op2: MaskOp), `0`);
7326	SDValue NewLoad = reduceLoadWidth(N: And.getNode());
7327	assert(NewLoad &&
7328	"Shouldn't be masking the load if it can't be narrowed");
7329	CombineTo(N: Load, Res0: NewLoad, Res1: NewLoad.getValue(R: `1`));
7330	}
7331	DAG.ReplaceAllUsesWith(From: N, To: N->getOperand(Num: `0`).getNode());
7332	return true;
7333	}
7334	return false;
7335	}
7336
7337	// Unfold
7338	// x & (-1 'logical shift' y)
7339	// To
7340	// (x 'opposite logical shift' y) 'logical shift' y
7341	// if it is better for performance.
7342	SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
7343	assert(N->getOpcode() == ISD::AND);
7344
7345	SDValue N0 = N->getOperand(Num: `0`);
7346	SDValue N1 = N->getOperand(Num: `1`);
7347
7348	// Do we actually prefer shifts over mask?
7349	if (!TLI.shouldFoldMaskToVariableShiftPair(X: N0))
7350	return SDValue ();
7351
7352	// Try to match (-1 '[outer] logical shift' y)
7353	unsigned OuterShift;
7354	unsigned InnerShift; // The opposite direction to the OuterShift.
7355	SDValue Y; // Shift amount.
7356	auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
7357	if (!M.hasOneUse())
7358	return false;
7359	OuterShift = M ->getOpcode();
7360	if (OuterShift == ISD::SHL)
7361	InnerShift = ISD::SRL;
7362	else if (OuterShift == ISD::SRL)
7363	InnerShift = ISD::SHL;
7364	else
7365	return false;
7366	if (!isAllOnesConstant(V: M ->getOperand(Num: `0`)))
7367	return false;
7368	Y = M ->getOperand(Num: `1`);
7369	return true;
7370	};
7371
7372	SDValue X;
7373	if (matchMask (N1))
7374	X = N0;
7375	else if (matchMask (N0))
7376	X = N1;
7377	else
7378	return SDValue ();
7379
7380	SDLoc DL(N);
7381	EVT VT = N->getValueType(ResNo: `0`);
7382
7383	// tmp = x 'opposite logical shift' y
7384	SDValue T0 = DAG.getNode(Opcode: InnerShift, DL, VT, N1: X, N2: Y);
7385	// ret = tmp 'logical shift' y
7386	SDValue T1 = DAG.getNode(Opcode: OuterShift, DL, VT, N1: T0, N2: Y);
7387
7388	return T1;
7389	}
7390
7391	/// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
7392	/// For a target with a bit test, this is expected to become test + set and save
7393	/// at least 1 instruction.
7394	static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) {
7395	assert(And->getOpcode() == ISD::AND && "Expected an 'and' op");
7396
7397	// Look through an optional extension.
7398	SDValue And0 = And->getOperand(Num: `0`), And1 = And->getOperand(Num: `1`);
7399	if (And0.getOpcode() == ISD::ANY_EXTEND && And0.hasOneUse())
7400	And0 = And0.getOperand(i: `0`);
7401	if (!isOneConstant(V: And1) \|\| !And0.hasOneUse())
7402	return SDValue ();
7403
7404	SDValue Src = And0;
7405
7406	// Attempt to find a 'not' op.
7407	// TODO: Should we favor test+set even without the 'not' op?
7408	bool FoundNot = false;
7409	if (isBitwiseNot(V: Src)) {
7410	FoundNot = true;
7411	Src = Src.getOperand(i: `0`);
7412
7413	// Look though an optional truncation. The source operand may not be the
7414	// same type as the original 'and', but that is ok because we are masking
7415	// off everything but the low bit.
7416	if (Src.getOpcode() == ISD::TRUNCATE && Src.hasOneUse())
7417	Src = Src.getOperand(i: `0`);
7418	}
7419
7420	// Match a shift-right by constant.
7421	if (Src.getOpcode() != ISD::SRL \|\| !Src.hasOneUse())
7422	return SDValue ();
7423
7424	// This is probably not worthwhile without a supported type.
7425	EVT SrcVT = Src.getValueType();
7426	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7427	if (!TLI.isTypeLegal(VT: SrcVT))
7428	return SDValue ();
7429
7430	// We might have looked through casts that make this transform invalid.
7431	unsigned BitWidth = SrcVT.getScalarSizeInBits();
7432	SDValue ShiftAmt = Src.getOperand(i: `1`);
7433	auto *ShiftAmtC = dyn_cast<ConstantSDNode>(Val&: ShiftAmt);
7434	if (!ShiftAmtC \|\| !ShiftAmtC->getAPIntValue().ult(RHS: BitWidth))
7435	return SDValue ();
7436
7437	// Set source to shift source.
7438	Src = Src.getOperand(i: `0`);
7439
7440	// Try again to find a 'not' op.
7441	// TODO: Should we favor test+set even with two 'not' ops?
7442	if (!FoundNot) {
7443	if (!isBitwiseNot(V: Src))
7444	return SDValue ();
7445	Src = Src.getOperand(i: `0`);
7446	}
7447
7448	if (!TLI.hasBitTest(X: Src, Y: ShiftAmt))
7449	return SDValue ();
7450
7451	// Turn this into a bit-test pattern using mask op + setcc:
7452	// and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
7453	// and (srl (not X), C)), 1 --> (and X, 1<<C) == 0
7454	SDLoc DL(And);
7455	SDValue X = DAG.getZExtOrTrunc(Op: Src, DL, VT: SrcVT);
7456	EVT CCVT =
7457	TLI.getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: SrcVT);
7458	SDValue Mask = DAG.getConstant(
7459	Val: APInt::getOneBitSet(numBits: BitWidth, BitNo: ShiftAmtC->getZExtValue()), DL, VT: SrcVT);
7460	SDValue NewAnd = DAG.getNode(Opcode: ISD::AND, DL, VT: SrcVT, N1: X, N2: Mask);
7461	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT: SrcVT);
7462	SDValue Setcc = DAG.getSetCC(DL, VT: CCVT, LHS: NewAnd, RHS: Zero, Cond: ISD::SETEQ);
7463	return DAG.getZExtOrTrunc(Op: Setcc, DL, VT: And->getValueType(ResNo: `0`));
7464	}
7465
7466	/// For targets that support usubsat, match a bit-hack form of that operation
7467	/// that ends in 'and' and convert it.
7468	static SDValue foldAndToUsubsat(SDNode N, SelectionDAG &DAG, const* SDLoc &DL) {
7469	EVT VT = N->getValueType(ResNo: `0`);
7470	unsigned BitWidth = VT.getScalarSizeInBits();
7471	APInt SignMask = APInt::getSignMask(BitWidth);
7472
7473	// (i8 X ^ 128) & (i8 X s>> 7) --> usubsat X, 128
7474	// (i8 X + 128) & (i8 X s>> 7) --> usubsat X, 128
7475	// xor/add with SMIN (signmask) are logically equivalent.
7476	SDValue X;
7477	if (!sd_match(N, P: m_And(L: m_OneUse(P: m_Xor(L: m_Value(N&: X), R: m_SpecificInt(V: SignMask))),
7478	R: m_OneUse(P: m_Sra(L: m_Deferred(V&: X),
7479	R: m_SpecificInt(V: BitWidth - `1`))))) &&
7480	!sd_match(N, P: m_And(L: m_OneUse(P: m_Add(L: m_Value(N&: X), R: m_SpecificInt(V: SignMask))),
7481	R: m_OneUse(P: m_Sra(L: m_Deferred(V&: X),
7482	R: m_SpecificInt(V: BitWidth - `1`))))))
7483	return SDValue ();
7484
7485	return DAG.getNode(Opcode: ISD::USUBSAT, DL, VT, N1: X,
7486	N2: DAG.getConstant(Val: SignMask, DL, VT));
7487	}
7488
7489	/// Given a bitwise logic operation N with a matching bitwise logic operand,
7490	/// fold a pattern where 2 of the source operands are identically shifted
7491	/// values. For example:
7492	/// ((X0 << Y) \| Z) \| (X1 << Y) --> ((X0 \| X1) << Y) \| Z
7493	static SDValue foldLogicOfShifts(SDNode *N, SDValue LogicOp, SDValue ShiftOp,
7494	SelectionDAG &DAG) {
7495	unsigned LogicOpcode = N->getOpcode();
7496	assert(ISD::isBitwiseLogicOp(LogicOpcode) &&
7497	"Expected bitwise logic operation");
7498
7499	if (!LogicOp.hasOneUse() \|\| !ShiftOp.hasOneUse())
7500	return SDValue ();
7501
7502	// Match another bitwise logic op and a shift.
7503	unsigned ShiftOpcode = ShiftOp.getOpcode();
7504	if (LogicOp.getOpcode() != LogicOpcode \|\|
7505	!(ShiftOpcode == ISD::SHL \|\| ShiftOpcode == ISD::SRL \|\|
7506	ShiftOpcode == ISD::SRA))
7507	return SDValue ();
7508
7509	// Match another shift op inside the first logic operand. Handle both commuted
7510	// possibilities.
7511	// LOGIC (LOGIC (SH X0, Y), Z), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
7512	// LOGIC (LOGIC Z, (SH X0, Y)), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
7513	SDValue X1 = ShiftOp.getOperand(i: `0`);
7514	SDValue Y = ShiftOp.getOperand(i: `1`);
7515	SDValue X0, Z;
7516	if (LogicOp.getOperand(i: `0`).getOpcode() == ShiftOpcode &&
7517	LogicOp.getOperand(i: `0`).getOperand(i: `1`) == Y) {
7518	X0 = LogicOp.getOperand(i: `0`).getOperand(i: `0`);
7519	Z = LogicOp.getOperand(i: `1`);
7520	} else if (LogicOp.getOperand(i: `1`).getOpcode() == ShiftOpcode &&
7521	LogicOp.getOperand(i: `1`).getOperand(i: `1`) == Y) {
7522	X0 = LogicOp.getOperand(i: `1`).getOperand(i: `0`);
7523	Z = LogicOp.getOperand(i: `0`);
7524	} else {
7525	return SDValue ();
7526	}
7527
7528	EVT VT = N->getValueType(ResNo: `0`);
7529	SDLoc DL(N);
7530	SDValue LogicX = DAG.getNode(Opcode: LogicOpcode, DL, VT, N1: X0, N2: X1);
7531	SDValue NewShift = DAG.getNode(Opcode: ShiftOpcode, DL, VT, N1: LogicX, N2: Y);
7532	return DAG.getNode(Opcode: LogicOpcode, DL, VT, N1: NewShift, N2: Z);
7533	}
7534
7535	/// Given a tree of logic operations with shape like
7536	/// (LOGIC (LOGIC (X, Y), LOGIC (Z, Y)))
7537	/// try to match and fold shift operations with the same shift amount.
7538	/// For example:
7539	/// LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W) -->
7540	/// --> LOGIC (SH (LOGIC X0, X1), Y), (LOGIC Z, W)
7541	static SDValue foldLogicTreeOfShifts(SDNode *N, SDValue LeftHand,
7542	SDValue RightHand, SelectionDAG &DAG) {
7543	unsigned LogicOpcode = N->getOpcode();
7544	assert(ISD::isBitwiseLogicOp(LogicOpcode) &&
7545	"Expected bitwise logic operation");
7546	if (LeftHand.getOpcode() != LogicOpcode \|\|
7547	RightHand.getOpcode() != LogicOpcode)
7548	return SDValue ();
7549	if (!LeftHand.hasOneUse() \|\| !RightHand.hasOneUse())
7550	return SDValue ();
7551
7552	// Try to match one of following patterns:
7553	// LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W)
7554	// LOGIC (LOGIC (SH X0, Y), Z), (LOGIC W, (SH X1, Y))
7555	// Note that foldLogicOfShifts will handle commuted versions of the left hand
7556	// itself.
7557	SDValue CombinedShifts, W;
7558	SDValue R0 = RightHand.getOperand(i: `0`);
7559	SDValue R1 = RightHand.getOperand(i: `1`);
7560	if ((CombinedShifts = foldLogicOfShifts(N, LogicOp: LeftHand, ShiftOp: R0, DAG)))
7561	W = R1;
7562	else if ((CombinedShifts = foldLogicOfShifts(N, LogicOp: LeftHand, ShiftOp: R1, DAG)))
7563	W = R0;
7564	else
7565	return SDValue ();
7566
7567	EVT VT = N->getValueType(ResNo: `0`);
7568	SDLoc DL(N);
7569	return DAG.getNode(Opcode: LogicOpcode, DL, VT, N1: CombinedShifts, N2: W);
7570	}
7571
7572	/// Fold "masked merge" expressions like `(m & x) \| (~m & y)` and its DeMorgan
7573	/// variant `(~m \| x) & (m \| y)` into the equivalent `((x ^ y) & m) ^ y)`
7574	/// pattern. This is typically a better representation for targets without a
7575	/// fused "and-not" operation.
7576	static SDValue foldMaskedMerge(SDNode *Node, SelectionDAG &DAG,
7577	const TargetLowering &TLI, const SDLoc &DL) {
7578	// Note that masked-merge variants using XOR or ADD expressions are
7579	// normalized to OR by InstCombine so we only check for OR or AND.
7580	assert((Node->getOpcode() == ISD::OR \|\| Node->getOpcode() == ISD::AND) &&
7581	"Must be called with ISD::OR or ISD::AND node");
7582
7583	// If the target supports and-not, don't fold this.
7584	if (TLI.hasAndNot(X: SDValue (Node, `0`)))
7585	return SDValue ();
7586
7587	SDValue M, X, Y;
7588
7589	if (sd_match(N: Node,
7590	P: m_Or(L: m_OneUse(P: m_And(L: m_OneUse(P: m_Not(V: m_Value(N&: M))), R: m_Value(N&: Y))),
7591	R: m_OneUse(P: m_And(L: m_Deferred(V&: M), R: m_Value(N&: X))))) \|\|
7592	sd_match(N: Node,
7593	P: m_And(L: m_OneUse(P: m_Or(L: m_OneUse(P: m_Not(V: m_Value(N&: M))), R: m_Value(N&: X))),
7594	R: m_OneUse(P: m_Or(L: m_Deferred(V&: M), R: m_Value(N&: Y)))))) {
7595	EVT VT = M.getValueType();
7596	SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: X, N2: Y);
7597	SDValue And = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Xor, N2: M);
7598	return DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: And, N2: Y);
7599	}
7600	return SDValue ();
7601	}
7602
7603	SDValue DAGCombiner::visitAND(SDNode *N) {
7604	SDValue N0 = N->getOperand(Num: `0`);
7605	SDValue N1 = N->getOperand(Num: `1`);
7606	EVT VT = N1.getValueType();
7607	SDLoc DL(N);
7608
7609	// x & x --> x
7610	if (N0 == N1)
7611	return N0;
7612
7613	// fold (and c1, c2) -> c1&c2
7614	if (SDValue C = DAG.FoldConstantArithmetic(Opcode: ISD::AND, DL, VT, Ops: {N0, N1}))
7615	return C;
7616
7617	// canonicalize constant to RHS
7618	if (DAG.isConstantIntBuildVectorOrConstantInt(N: N0) &&
7619	!DAG.isConstantIntBuildVectorOrConstantInt(N: N1))
7620	return DAG.getNode(Opcode: ISD::AND, DL, VT, N1, N2: N0);
7621
7622	if (areBitwiseNotOfEachother(Op0: N0, Op1: N1))
7623	return DAG.getConstant(Val: APInt::getZero(numBits: VT.getScalarSizeInBits()), DL, VT);
7624
7625	// fold vector ops
7626	if (VT.isVector()) {
7627	if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
7628	return FoldedVOp;
7629
7630	// fold (and x, 0) -> 0, vector edition
7631	if (ISD::isConstantSplatVectorAllZeros(N: N1.getNode()))
7632	// do not return N1, because undef node may exist in N1
7633	return DAG.getConstant(Val: APInt::getZero(numBits: N1.getScalarValueSizeInBits()), DL,
7634	VT: N1.getValueType());
7635
7636	// fold (and x, -1) -> x, vector edition
7637	if (ISD::isConstantSplatVectorAllOnes(N: N1.getNode()))
7638	return N0;
7639
7640	// fold (and (masked_load) (splat_vec (x, ...))) to zext_masked_load
7641	bool Frozen = N0.getOpcode() == ISD::FREEZE;
7642	auto *MLoad = dyn_cast<MaskedLoadSDNode>(Val: Frozen ? N0.getOperand(i: `0`) : N0);
7643	ConstantSDNode Splat = isConstOrConstSplat(N: N1, AllowUndefs: true, AllowTruncation: true*);
7644	if (MLoad && MLoad->getExtensionType() == ISD::EXTLOAD && Splat) {
7645	EVT MemVT = MLoad->getMemoryVT();
7646	if (TLI.isLoadExtLegal(ExtType: ISD::ZEXTLOAD, ValVT: VT, MemVT)) {
7647	// For this AND to be a zero extension of the masked load the elements
7648	// of the BuildVec must mask the bottom bits of the extended element
7649	// type
7650	if (Splat->getAPIntValue().isMask(numBits: MemVT.getScalarSizeInBits())) {
7651	SDValue NewLoad = DAG.getMaskedLoad(
7652	VT, dl: DL, Chain: MLoad->getChain(), Base: MLoad->getBasePtr(),
7653	Offset: MLoad->getOffset(), Mask: MLoad->getMask(), Src0: MLoad->getPassThru(), MemVT,
7654	MMO: MLoad->getMemOperand(), AM: MLoad->getAddressingMode(), ISD::ZEXTLOAD,
7655	IsExpanding: MLoad->isExpandingLoad());
7656	CombineTo(N, Res: Frozen ? N0 : NewLoad);
7657	CombineTo(N: MLoad, Res0: NewLoad, Res1: NewLoad.getValue(R: `1`));
7658	return SDValue (N, `0`);
7659	}
7660	}
7661	}
7662	}
7663
7664	// fold (and x, -1) -> x
7665	if (isAllOnesConstant(V: N1))
7666	return N0;
7667
7668	// if (and x, c) is known to be zero, return 0
7669	unsigned BitWidth = VT.getScalarSizeInBits();
7670	ConstantSDNode *N1C = isConstOrConstSplat(N: N1);
7671	if (N1C && DAG.MaskedValueIsZero(Op: SDValue (N, `0`), Mask: APInt::getAllOnes(numBits: BitWidth)))
7672	return DAG.getConstant(Val: `0`, DL, VT);
7673
7674	if (SDValue R = foldAndOrOfSETCC(LogicOp: N, DAG))
7675	return R;
7676
7677	if (SDValue NewSel = foldBinOpIntoSelect(BO: N))
7678	return NewSel;
7679
7680	// reassociate and
7681	if (SDValue RAND = reassociateOps(Opc: ISD::AND, DL, N0, N1, Flags: N->getFlags()))
7682	return RAND;
7683
7684	// Fold and(vecreduce(x), vecreduce(y)) -> vecreduce(and(x, y))
7685	if (SDValue SD =
7686	reassociateReduction(RedOpc: ISD::VECREDUCE_AND, Opc: ISD::AND, DL, VT, N0, N1))
7687	return SD;
7688
7689	// fold (and (or x, C), D) -> D if (C & D) == D
7690	auto MatchSubset = [](ConstantSDNode LHS, ConstantSDNode RHS) {
7691	return RHS->getAPIntValue().isSubsetOf(RHS: LHS->getAPIntValue());
7692	};
7693	if (N0.getOpcode() == ISD::OR &&
7694	ISD::matchBinaryPredicate(LHS: N0.getOperand(i: `1`), RHS: N1, Match: MatchSubset))
7695	return N1;
7696
7697	if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
7698	SDValue N0Op0 = N0.getOperand(i: `0`);
7699	EVT SrcVT = N0Op0.getValueType();
7700	unsigned SrcBitWidth = SrcVT.getScalarSizeInBits();
7701	APInt Mask = ~N1C->getAPIntValue();
7702	Mask = Mask.trunc(width: SrcBitWidth);
7703
7704	// fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
7705	if (DAG.MaskedValueIsZero(Op: N0Op0, Mask))
7706	return DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT, Operand: N0Op0);
7707
7708	// fold (and (any_ext V), c) -> (zero_ext (and (trunc V), c)) if profitable.
7709	if (N1C->getAPIntValue().countLeadingZeros() >= (BitWidth - SrcBitWidth) &&
7710	TLI.isTruncateFree(FromVT: VT, ToVT: SrcVT) && TLI.isZExtFree(FromTy: SrcVT, ToTy: VT) &&
7711	TLI.isTypeDesirableForOp(ISD::AND, VT: SrcVT) &&
7712	TLI.isNarrowingProfitable(N, SrcVT: VT, DestVT: SrcVT))
7713	return DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT,
7714	Operand: DAG.getNode(Opcode: ISD::AND, DL, VT: SrcVT, N1: N0Op0,
7715	N2: DAG.getZExtOrTrunc(Op: N1, DL, VT: SrcVT)));
7716	}
7717
7718	// fold (and (ext (and V, c1)), c2) -> (and (ext V), (and c1, (ext c2)))
7719	if (ISD::isExtOpcode(Opcode: N0.getOpcode())) {
7720	unsigned ExtOpc = N0.getOpcode();
7721	SDValue N0Op0 = N0.getOperand(i: `0`);
7722	if (N0Op0.getOpcode() == ISD::AND &&
7723	(ExtOpc != ISD::ZERO_EXTEND \|\| !TLI.isZExtFree(Val: N0Op0, VT2: VT)) &&
7724	N0 ->hasOneUse() && N0Op0 ->hasOneUse()) {
7725	if (SDValue NewExt = DAG.FoldConstantArithmetic(Opcode: ExtOpc, DL, VT,
7726	Ops: {N0Op0.getOperand(i: `1`)})) {
7727	if (SDValue NewMask =
7728	DAG.FoldConstantArithmetic(Opcode: ISD::AND, DL, VT, Ops: {N1, NewExt})) {
7729	return DAG.getNode(Opcode: ISD::AND, DL, VT,
7730	N1: DAG.getNode(Opcode: ExtOpc, DL, VT, Operand: N0Op0.getOperand(i: `0`)),
7731	N2: NewMask);
7732	}
7733	}
7734	}
7735	}
7736
7737	// similarly fold (and (X (load ([non_ext\|any_ext\|zero_ext] V))), c) ->
7738	// (X (load ([non_ext\|zero_ext] V))) if 'and' only clears top bits which must
7739	// already be zero by virtue of the width of the base type of the load.
7740	//
7741	// the 'X' node here can either be nothing or an extract_vector_elt to catch
7742	// more cases.
7743	if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7744	N0.getValueSizeInBits() == N0.getOperand(i: `0`).getScalarValueSizeInBits() &&
7745	N0.getOperand(i: `0`).getOpcode() == ISD::LOAD &&
7746	N0.getOperand(i: `0`).getResNo() == `0`) \|\|
7747	(N0.getOpcode() == ISD::LOAD && N0.getResNo() == `0`)) {
7748	auto *Load =
7749	cast<LoadSDNode>(Val: (N0.getOpcode() == ISD::LOAD) ? N0 : N0.getOperand(i: `0`));
7750
7751	// Get the constant (if applicable) the zero'th operand is being ANDed with.
7752	// This can be a pure constant or a vector splat, in which case we treat the
7753	// vector as a scalar and use the splat value.
7754	APInt Constant = APInt::getZero(numBits: `1`);
7755	if (const ConstantSDNode *C = isConstOrConstSplat(
7756	N: N1, /AllowUndefs=/false, /AllowTruncation=/true)) {
7757	Constant = C->getAPIntValue();
7758	} else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(Val&: N1)) {
7759	unsigned EltBitWidth = Vector->getValueType(ResNo: `0`).getScalarSizeInBits();
7760	APInt SplatValue, SplatUndef;
7761	unsigned SplatBitSize;
7762	bool HasAnyUndefs;
7763	// Endianness should not matter here. Code below makes sure that we only
7764	// use the result if the SplatBitSize is a multiple of the vector element
7765	// size. And after that we AND all element sized parts of the splat
7766	// together. So the end result should be the same regardless of in which
7767	// order we do those operations.
7768	const bool IsBigEndian = false;
7769	bool IsSplat =
7770	Vector->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
7771	HasAnyUndefs, MinSplatBits: EltBitWidth, isBigEndian: IsBigEndian);
7772
7773	// Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
7774	// multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
7775	if (IsSplat && (SplatBitSize % EltBitWidth) == `0`) {
7776	// Undef bits can contribute to a possible optimisation if set, so
7777	// set them.
7778	SplatValue \|= SplatUndef;
7779
7780	// The splat value may be something like "0x00FFFFFF", which means 0 for
7781	// the first vector value and FF for the rest, repeating. We need a mask
7782	// that will apply equally to all members of the vector, so AND all the
7783	// lanes of the constant together.
7784	Constant = APInt::getAllOnes(numBits: EltBitWidth);
7785	for (unsigned i = `0`, n = (SplatBitSize / EltBitWidth); i < n; ++i)
7786	Constant &= SplatValue.extractBits(numBits: EltBitWidth, bitPosition: i * EltBitWidth);
7787	}
7788	}
7789
7790	// If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
7791	// actually legal and isn't going to get expanded, else this is a false
7792	// optimisation.
7793	bool CanZextLoadProfitably = TLI.isLoadExtLegal(ExtType: ISD::ZEXTLOAD,
7794	ValVT: Load->getValueType(ResNo: `0`),
7795	MemVT: Load->getMemoryVT());
7796
7797	// Resize the constant to the same size as the original memory access before
7798	// extension. If it is still the AllOnesValue then this AND is completely
7799	// unneeded.
7800	Constant = Constant.zextOrTrunc(width: Load->getMemoryVT().getScalarSizeInBits());
7801
7802	bool B;
7803	switch (Load->getExtensionType()) {
7804	default: B = false; break;
7805	case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
7806	case ISD::ZEXTLOAD:
7807	case ISD::NON_EXTLOAD: B = true; break;
7808	}
7809
7810	if (B && Constant.isAllOnes()) {
7811	// If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
7812	// preserve semantics once we get rid of the AND.
7813	SDValue NewLoad(Load, `0`);
7814
7815	// Fold the AND away. NewLoad may get replaced immediately.
7816	CombineTo(N, Res: (N0.getNode() == Load) ? NewLoad : N0);
7817
7818	if (Load->getExtensionType() == ISD::EXTLOAD) {
7819	NewLoad = DAG.getLoad(AM: Load->getAddressingMode(), ExtType: ISD::ZEXTLOAD,
7820	VT: Load->getValueType(ResNo: `0`), dl: SDLoc (Load),
7821	Chain: Load->getChain(), Ptr: Load->getBasePtr(),
7822	Offset: Load->getOffset(), MemVT: Load->getMemoryVT(),
7823	MMO: Load->getMemOperand());
7824	// Replace uses of the EXTLOAD with the new ZEXTLOAD.
7825	if (Load->getNumValues() == `3`) {
7826	// PRE/POST_INC loads have 3 values.
7827	SDValue To[] = { NewLoad.getValue(R: `0`), NewLoad.getValue(R: `1`),
7828	NewLoad.getValue(R: `2`) };
7829	CombineTo(N: Load, To, NumTo: `3`, AddTo: true);
7830	} else {
7831	CombineTo(N: Load, Res0: NewLoad.getValue(R: `0`), Res1: NewLoad.getValue(R: `1`));
7832	}
7833	}
7834
7835	return SDValue (N, `0`); // Return N so it doesn't get rechecked!
7836	}
7837	}
7838
7839	// Try to convert a constant mask AND into a shuffle clear mask.
7840	if (VT.isVector())
7841	if (SDValue Shuffle = XformToShuffleWithZero(N))
7842	return Shuffle;
7843
7844	if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
7845	return Combined;
7846
7847	if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() && N1C &&
7848	ISD::isExtOpcode(Opcode: N0.getOperand(i: `0`).getOpcode())) {
7849	SDValue Ext = N0.getOperand(i: `0`);
7850	EVT ExtVT = Ext ->getValueType(ResNo: `0`);
7851	SDValue Extendee = Ext ->getOperand(Num: `0`);
7852
7853	unsigned ScalarWidth = Extendee.getValueType().getScalarSizeInBits();
7854	if (N1C->getAPIntValue().isMask(numBits: ScalarWidth) &&
7855	(!LegalOperations \|\| TLI.isOperationLegal(Op: ISD::ZERO_EXTEND, VT: ExtVT))) {
7856	// (and (extract_subvector (zext\|anyext\|sext v) _) iN_mask)
7857	// => (extract_subvector (iN_zeroext v))
7858	SDValue ZeroExtExtendee =
7859	DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: ExtVT, Operand: Extendee);
7860
7861	return DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT, N1: ZeroExtExtendee,
7862	N2: N0.getOperand(i: `1`));
7863	}
7864	}
7865
7866	// fold (and (masked_gather x)) -> (zext_masked_gather x)
7867	if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(Val&: N0)) {
7868	EVT MemVT = GN0->getMemoryVT();
7869	EVT ScalarVT = MemVT.getScalarType();
7870
7871	if (SDValue (GN0, `0`).hasOneUse() &&
7872	isConstantSplatVectorMaskForType(N: N1.getNode(), ScalarTy: ScalarVT) &&
7873	TLI.isVectorLoadExtDesirable(ExtVal: SDValue (N, `0`))) {
7874	SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
7875	GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
7876
7877	SDValue ZExtLoad = DAG.getMaskedGather(
7878	VTs: DAG.getVTList(VT1: VT, VT2: MVT::Other), MemVT, dl: DL, Ops, MMO: GN0->getMemOperand(),
7879	IndexType: GN0->getIndexType(), ExtTy: ISD::ZEXTLOAD);
7880
7881	CombineTo(N, Res: ZExtLoad);
7882	AddToWorklist(N: ZExtLoad.getNode());
7883	// Avoid recheck of N.
7884	return SDValue (N, `0`);
7885	}
7886	}
7887
7888	// fold (and (load x), 255) -> (zextload x, i8)
7889	// fold (and (extload x, i16), 255) -> (zextload x, i8)
7890	if (N1C && N0.getOpcode() == ISD::LOAD && !VT.isVector())
7891	if (SDValue Res = reduceLoadWidth(N))
7892	return Res;
7893
7894	if (LegalTypes) {
7895	// Attempt to propagate the AND back up to the leaves which, if they're
7896	// loads, can be combined to narrow loads and the AND node can be removed.
7897	// Perform after legalization so that extend nodes will already be
7898	// combined into the loads.
7899	if (BackwardsPropagateMask(N))
7900	return SDValue (N, `0`);
7901	}
7902
7903	if (SDValue Combined = visitANDLike(N0, N1, N))
7904	return Combined;
7905
7906	// Simplify: (and (op x...), (op y...)) -> (op (and x, y))
7907	if (N0.getOpcode() == N1.getOpcode())
7908	if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
7909	return V;
7910
7911	if (SDValue R = foldLogicOfShifts(N, LogicOp: N0, ShiftOp: N1, DAG))
7912	return R;
7913	if (SDValue R = foldLogicOfShifts(N, LogicOp: N1, ShiftOp: N0, DAG))
7914	return R;
7915
7916	// Fold (and X, (bswap (not Y))) -> (and X, (not (bswap Y)))
7917	// Fold (and X, (bitreverse (not Y))) -> (and X, (not (bitreverse Y)))
7918	SDValue X, Y, Z, NotY;
7919	for (unsigned Opc : {ISD::BSWAP, ISD::BITREVERSE})
7920	if (sd_match(N,
7921	P: m_And(L: m_Value(N&: X), R: m_OneUse(P: m_UnaryOp(Opc, Op: m_Value(N&: NotY))))) &&
7922	sd_match(N: NotY, P: m_Not(V: m_Value(N&: Y))) &&
7923	(TLI.hasAndNot(X: SDValue (N, `0`)) \|\| NotY ->hasOneUse()))
7924	return DAG.getNode(Opcode: ISD::AND, DL, VT, N1: X,
7925	N2: DAG.getNOT(DL, Val: DAG.getNode(Opcode: Opc, DL, VT, Operand: Y), VT));
7926
7927	// Fold (and X, (rot (not Y), Z)) -> (and X, (not (rot Y, Z)))
7928	for (unsigned Opc : {ISD::ROTL, ISD::ROTR})
7929	if (sd_match(N, P: m_And(L: m_Value(N&: X),
7930	R: m_OneUse(P: m_BinOp(Opc, L: m_Value(N&: NotY), R: m_Value(N&: Z))))) &&
7931	sd_match(N: NotY, P: m_Not(V: m_Value(N&: Y))) &&
7932	(TLI.hasAndNot(X: SDValue (N, `0`)) \|\| NotY ->hasOneUse()))
7933	return DAG.getNode(Opcode: ISD::AND, DL, VT, N1: X,
7934	N2: DAG.getNOT(DL, Val: DAG.getNode(Opcode: Opc, DL, VT, N1: Y, N2: Z), VT));
7935
7936	// Fold (and X, (add (not Y), Z)) -> (and X, (not (sub Y, Z)))
7937	// Fold (and X, (sub (not Y), Z)) -> (and X, (not (add Y, Z)))
7938	if (TLI.hasAndNot(X: SDValue (N, `0`)))
7939	if (SDValue Folded = foldBitwiseOpWithNeg(N, DL, VT))
7940	return Folded;
7941
7942	// Fold (and (srl X, C), 1) -> (srl X, BW-1) for signbit extraction
7943	// If we are shifting down an extended sign bit, see if we can simplify
7944	// this to shifting the MSB directly to expose further simplifications.
7945	// This pattern often appears after sext_inreg legalization.
7946	APInt Amt;
7947	if (sd_match(N, P: m_And(L: m_Srl(L: m_Value(N&: X), R: m_ConstInt(V&: Amt)), R: m_One())) &&
7948	Amt.ult(RHS: BitWidth - `1`) && Amt.uge(RHS: BitWidth - DAG.ComputeNumSignBits(Op: X)))
7949	return DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: X,
7950	N2: DAG.getShiftAmountConstant(Val: BitWidth - `1`, VT, DL));
7951
7952	// Masking the negated extension of a boolean is just the zero-extended
7953	// boolean:
7954	// and (sub 0, zext(bool X)), 1 --> zext(bool X)
7955	// and (sub 0, sext(bool X)), 1 --> zext(bool X)
7956	//
7957	// Note: the SimplifyDemandedBits fold below can make an information-losing
7958	// transform, and then we have no way to find this better fold.
7959	if (sd_match(N, P: m_And(L: m_Sub(L: m_Zero(), R: m_Value(N&: X)), R: m_One()))) {
7960	if (X.getOpcode() == ISD::ZERO_EXTEND &&
7961	X.getOperand(i: `0`).getScalarValueSizeInBits() == `1`)
7962	return X;
7963	if (X.getOpcode() == ISD::SIGN_EXTEND &&
7964	X.getOperand(i: `0`).getScalarValueSizeInBits() == `1`)
7965	return DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT, Operand: X.getOperand(i: `0`));
7966	}
7967
7968	// fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
7969	// fold (and (sra)) -> (and (srl)) when possible.
7970	if (SimplifyDemandedBits(Op: SDValue (N, `0`)))
7971	return SDValue (N, `0`);
7972
7973	// fold (zext_inreg (extload x)) -> (zextload x)
7974	// fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
7975	if (ISD::isUNINDEXEDLoad(N: N0.getNode()) &&
7976	(ISD::isEXTLoad(N: N0.getNode()) \|\|
7977	(ISD::isSEXTLoad(N: N0.getNode()) && N0.hasOneUse()))) {
7978	auto *LN0 = cast<LoadSDNode>(Val&: N0);
7979	EVT MemVT = LN0->getMemoryVT();
7980	// If we zero all the possible extended bits, then we can turn this into
7981	// a zextload if we are running before legalize or the operation is legal.
7982	unsigned ExtBitSize = N1.getScalarValueSizeInBits();
7983	unsigned MemBitSize = MemVT.getScalarSizeInBits();
7984	APInt ExtBits = APInt::getHighBitsSet(numBits: ExtBitSize, hiBitsSet: ExtBitSize - MemBitSize);
7985	if (DAG.MaskedValueIsZero(Op: N1, Mask: ExtBits) &&
7986	((!LegalOperations && LN0->isSimple()) \|\|
7987	TLI.isLoadExtLegal(ExtType: ISD::ZEXTLOAD, ValVT: VT, MemVT))) {
7988	SDValue ExtLoad =
7989	DAG.getExtLoad(ExtType: ISD::ZEXTLOAD, dl: SDLoc (N0), VT, Chain: LN0->getChain(),
7990	Ptr: LN0->getBasePtr(), MemVT, MMO: LN0->getMemOperand());
7991	AddToWorklist(N);
7992	CombineTo(N: N0.getNode(), Res0: ExtLoad, Res1: ExtLoad.getValue(R: `1`));
7993	return SDValue (N, `0`); // Return N so it doesn't get rechecked!
7994	}
7995	}
7996
7997	// fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
7998	if (N1C && N1C->getAPIntValue() == `0xffff` && N0.getOpcode() == ISD::OR) {
7999	if (SDValue BSwap = MatchBSwapHWordLow(N: N0.getNode(), N0: N0.getOperand(i: `0`),
8000	N1: N0.getOperand(i: `1`), DemandHighBits: false))
8001	return BSwap;
8002	}
8003
8004	if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
8005	return Shifts;
8006
8007	if (SDValue V = combineShiftAnd1ToBitTest(And: N, DAG))
8008	return V;
8009
8010	// Recognize the following pattern:
8011	//
8012	// AndVT = (and (sign_extend NarrowVT to AndVT) #bitmask)
8013	//
8014	// where bitmask is a mask that clears the upper bits of AndVT. The
8015	// number of bits in bitmask must be a power of two.
8016	auto IsAndZeroExtMask = [](SDValue LHS, SDValue RHS) {
8017	if (LHS ->getOpcode() != ISD::SIGN_EXTEND)
8018	return false;
8019
8020	auto *C = dyn_cast<ConstantSDNode>(Val&: RHS);
8021	if (!C)
8022	return false;
8023
8024	if (!C->getAPIntValue().isMask(
8025	numBits: LHS.getOperand(i: `0`).getValueType().getFixedSizeInBits()))
8026	return false;
8027
8028	return true;
8029	};
8030
8031	// Replace (and (sign_extend ...) #bitmask) with (zero_extend ...).
8032	if (IsAndZeroExtMask (N0, N1))
8033	return DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT, Operand: N0.getOperand(i: `0`));
8034
8035	if (hasOperation(Opcode: ISD::USUBSAT, VT))
8036	if (SDValue V = foldAndToUsubsat(N, DAG, DL))
8037	return V;
8038
8039	// Postpone until legalization completed to avoid interference with bswap
8040	// folding
8041	if (LegalOperations \|\| VT.isVector())
8042	if (SDValue R = foldLogicTreeOfShifts(N, LeftHand: N0, RightHand: N1, DAG))
8043	return R;
8044
8045	if (VT.isScalarInteger() && VT != MVT::i1)
8046	if (SDValue R = foldMaskedMerge(Node: N, DAG, TLI, DL))
8047	return R;
8048
8049	return SDValue ();
8050	}
8051
8052	/// Match (a >> 8) \| (a << 8) as (bswap a) >> 16.
8053	SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
8054	bool DemandHighBits) {
8055	if (!LegalOperations)
8056	return SDValue ();
8057
8058	EVT VT = N->getValueType(ResNo: `0`);
8059	if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
8060	return SDValue ();
8061	if (!TLI.isOperationLegalOrCustom(Op: ISD::BSWAP, VT))
8062	return SDValue ();
8063
8064	// Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
8065	bool LookPassAnd0 = false;
8066	bool LookPassAnd1 = false;
8067	if (N0.getOpcode() == ISD::AND && N0.getOperand(i: `0`).getOpcode() == ISD::SRL)
8068	std::swap(a&: N0, b&: N1);
8069	if (N1.getOpcode() == ISD::AND && N1.getOperand(i: `0`).getOpcode() == ISD::SHL)
8070	std::swap(a&: N0, b&: N1);
8071	if (N0.getOpcode() == ISD::AND) {
8072	if (!N0 ->hasOneUse())
8073	return SDValue ();
8074	ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: `1`));
8075	// Also handle 0xffff since the LHS is guaranteed to have zeros there.
8076	// This is needed for X86.
8077	if (!N01C \|\| (N01C->getZExtValue() != `0xFF00` &&
8078	N01C->getZExtValue() != `0xFFFF`))
8079	return SDValue ();
8080	N0 = N0.getOperand(i: `0`);
8081	LookPassAnd0 = true;
8082	}
8083
8084	if (N1.getOpcode() == ISD::AND) {
8085	if (!N1 ->hasOneUse())
8086	return SDValue ();
8087	ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: `1`));
8088	if (!N11C \|\| N11C->getZExtValue() != `0xFF`)
8089	return SDValue ();
8090	N1 = N1.getOperand(i: `0`);
8091	LookPassAnd1 = true;
8092	}
8093
8094	if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
8095	std::swap(a&: N0, b&: N1);
8096	if (N0.getOpcode() != ISD::SHL \|\| N1.getOpcode() != ISD::SRL)
8097	return SDValue ();
8098	if (!N0 ->hasOneUse() \|\| !N1 ->hasOneUse())
8099	return SDValue ();
8100
8101	ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: `1`));
8102	ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: `1`));
8103	if (!N01C \|\| !N11C)
8104	return SDValue ();
8105	if (N01C->getZExtValue() != `8` \|\| N11C->getZExtValue() != `8`)
8106	return SDValue ();
8107
8108	// Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
8109	SDValue N00 = N0 ->getOperand(Num: `0`);
8110	if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
8111	if (!N00 ->hasOneUse())
8112	return SDValue ();
8113	ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(Val: N00.getOperand(i: `1`));
8114	if (!N001C \|\| N001C->getZExtValue() != `0xFF`)
8115	return SDValue ();
8116	N00 = N00.getOperand(i: `0`);
8117	LookPassAnd0 = true;
8118	}
8119
8120	SDValue N10 = N1 ->getOperand(Num: `0`);
8121	if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
8122	if (!N10 ->hasOneUse())
8123	return SDValue ();
8124	ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(Val: N10.getOperand(i: `1`));
8125	// Also allow 0xFFFF since the bits will be shifted out. This is needed
8126	// for X86.
8127	if (!N101C \|\| (N101C->getZExtValue() != `0xFF00` &&
8128	N101C->getZExtValue() != `0xFFFF`))
8129	return SDValue ();
8130	N10 = N10.getOperand(i: `0`);
8131	LookPassAnd1 = true;
8132	}
8133
8134	if (N00 != N10)
8135	return SDValue ();
8136
8137	// Make sure everything beyond the low halfword gets set to zero since the SRL
8138	// 16 will clear the top bits.
8139	unsigned OpSizeInBits = VT.getSizeInBits();
8140	if (OpSizeInBits > `16`) {
8141	// If the left-shift isn't masked out then the only way this is a bswap is
8142	// if all bits beyond the low 8 are 0. In that case the entire pattern
8143	// reduces to a left shift anyway: leave it for other parts of the combiner.
8144	if (DemandHighBits && !LookPassAnd0)
8145	return SDValue ();
8146
8147	// However, if the right shift isn't masked out then it might be because
8148	// it's not needed. See if we can spot that too. If the high bits aren't
8149	// demanded, we only need bits 23:16 to be zero. Otherwise, we need all
8150	// upper bits to be zero.
8151	if (!LookPassAnd1) {
8152	unsigned HighBit = DemandHighBits ? OpSizeInBits : `24`;
8153	if (!DAG.MaskedValueIsZero(Op: N10,
8154	Mask: APInt::getBitsSet(numBits: OpSizeInBits, loBit: `16`, hiBit: HighBit)))
8155	return SDValue ();
8156	}
8157	}
8158
8159	SDValue Res = DAG.getNode(Opcode: ISD::BSWAP, DL: SDLoc (N), VT, Operand: N00);
8160	if (OpSizeInBits > `16`) {
8161	SDLoc DL(N);
8162	Res = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Res,
8163	N2: DAG.getShiftAmountConstant(Val: OpSizeInBits - `16`, VT, DL));
8164	}
8165	return Res;
8166	}
8167
8168	/// Return true if the specified node is an element that makes up a 32-bit
8169	/// packed halfword byteswap.
8170	/// ((x & 0x000000ff) << 8) \|
8171	/// ((x & 0x0000ff00) >> 8) \|
8172	/// ((x & 0x00ff0000) << 8) \|
8173	/// ((x & 0xff000000) >> 8)
8174	static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
8175	if (!N ->hasOneUse())
8176	return false;
8177
8178	unsigned Opc = N.getOpcode();
8179	if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
8180	return false;
8181
8182	SDValue N0 = N.getOperand(i: `0`);
8183	unsigned Opc0 = N0.getOpcode();
8184	if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
8185	return false;
8186
8187	ConstantSDNode N1C = nullptr*;
8188	// SHL or SRL: look upstream for AND mask operand
8189	if (Opc == ISD::AND)
8190	N1C = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`));
8191	else if (Opc0 == ISD::AND)
8192	N1C = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: `1`));
8193	if (!N1C)
8194	return false;
8195
8196	unsigned MaskByteOffset;
8197	switch (N1C->getZExtValue()) {
8198	default:
8199	return false;
8200	case `0xFF`: MaskByteOffset = `0`; break;
8201	case `0xFF00`: MaskByteOffset = `1`; break;
8202	case `0xFFFF`:
8203	// In case demanded bits didn't clear the bits that will be shifted out.
8204	// This is needed for X86.
8205	if (Opc == ISD::SRL \|\| (Opc == ISD::AND && Opc0 == ISD::SHL)) {
8206	MaskByteOffset = `1`;
8207	break;
8208	}
8209	return false;
8210	case `0xFF0000`: MaskByteOffset = `2`; break;
8211	case `0xFF000000`: MaskByteOffset = `3`; break;
8212	}
8213
8214	// Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
8215	if (Opc == ISD::AND) {
8216	if (MaskByteOffset == `0` \|\| MaskByteOffset == `2`) {
8217	// (x >> 8) & 0xff
8218	// (x >> 8) & 0xff0000
8219	if (Opc0 != ISD::SRL)
8220	return false;
8221	ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: `1`));
8222	if (!C \|\| C->getZExtValue() != `8`)
8223	return false;
8224	} else {
8225	// (x << 8) & 0xff00
8226	// (x << 8) & 0xff000000
8227	if (Opc0 != ISD::SHL)
8228	return false;
8229	ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: `1`));
8230	if (!C \|\| C->getZExtValue() != `8`)
8231	return false;
8232	}
8233	} else if (Opc == ISD::SHL) {
8234	// (x & 0xff) << 8
8235	// (x & 0xff0000) << 8
8236	if (MaskByteOffset != `0` && MaskByteOffset != `2`)
8237	return false;
8238	ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`));
8239	if (!C \|\| C->getZExtValue() != `8`)
8240	return false;
8241	} else { // Opc == ISD::SRL
8242	// (x & 0xff00) >> 8
8243	// (x & 0xff000000) >> 8
8244	if (MaskByteOffset != `1` && MaskByteOffset != `3`)
8245	return false;
8246	ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`));
8247	if (!C \|\| C->getZExtValue() != `8`)
8248	return false;
8249	}
8250
8251	if (Parts [MaskByteOffset])
8252	return false;
8253
8254	Parts [MaskByteOffset] = N0.getOperand(i: `0`).getNode();
8255	return true;
8256	}
8257
8258	// Match 2 elements of a packed halfword bswap.
8259	static bool isBSwapHWordPair(SDValue N, MutableArrayRef<SDNode *> Parts) {
8260	if (N.getOpcode() == ISD::OR)
8261	return isBSwapHWordElement(N: N.getOperand(i: `0`), Parts) &&
8262	isBSwapHWordElement(N: N.getOperand(i: `1`), Parts);
8263
8264	if (N.getOpcode() == ISD::SRL && N.getOperand(i: `0`).getOpcode() == ISD::BSWAP) {
8265	ConstantSDNode *C = isConstOrConstSplat(N: N.getOperand(i: `1`));
8266	if (!C \|\| C->getAPIntValue() != `16`)
8267	return false;
8268	Parts [`0`] = Parts [`1`] = N.getOperand(i: `0`).getOperand(i: `0`).getNode();
8269	return true;
8270	}
8271
8272	return false;
8273	}
8274
8275	// Match this pattern:
8276	// (or (and (shl (A, 8)), 0xff00ff00), (and (srl (A, 8)), 0x00ff00ff))
8277	// And rewrite this to:
8278	// (rotr (bswap A), 16)
8279	static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI,
8280	SelectionDAG &DAG, SDNode *N, SDValue N0,
8281	SDValue N1, EVT VT) {
8282	assert(N->getOpcode() == ISD::OR && VT == MVT::i32 &&
8283	"MatchBSwapHWordOrAndAnd: expecting i32");
8284	if (!TLI.isOperationLegalOrCustom(Op: ISD::ROTR, VT))
8285	return SDValue ();
8286	if (N0.getOpcode() != ISD::AND \|\| N1.getOpcode() != ISD::AND)
8287	return SDValue ();
8288	// TODO: this is too restrictive; lifting this restriction requires more tests
8289	if (!N0 ->hasOneUse() \|\| !N1 ->hasOneUse())
8290	return SDValue ();
8291	ConstantSDNode *Mask0 = isConstOrConstSplat(N: N0.getOperand(i: `1`));
8292	ConstantSDNode *Mask1 = isConstOrConstSplat(N: N1.getOperand(i: `1`));
8293	if (!Mask0 \|\| !Mask1)
8294	return SDValue ();
8295	if (Mask0->getAPIntValue() != `0xff00ff00` \|\|
8296	Mask1->getAPIntValue() != `0x00ff00ff`)
8297	return SDValue ();
8298	SDValue Shift0 = N0.getOperand(i: `0`);
8299	SDValue Shift1 = N1.getOperand(i: `0`);
8300	if (Shift0.getOpcode() != ISD::SHL \|\| Shift1.getOpcode() != ISD::SRL)
8301	return SDValue ();
8302	ConstantSDNode *ShiftAmt0 = isConstOrConstSplat(N: Shift0.getOperand(i: `1`));
8303	ConstantSDNode *ShiftAmt1 = isConstOrConstSplat(N: Shift1.getOperand(i: `1`));
8304	if (!ShiftAmt0 \|\| !ShiftAmt1)
8305	return SDValue ();
8306	if (ShiftAmt0->getAPIntValue() != `8` \|\| ShiftAmt1->getAPIntValue() != `8`)
8307	return SDValue ();
8308	if (Shift0.getOperand(i: `0`) != Shift1.getOperand(i: `0`))
8309	return SDValue ();
8310
8311	SDLoc DL(N);
8312	SDValue BSwap = DAG.getNode(Opcode: ISD::BSWAP, DL, VT, Operand: Shift0.getOperand(i: `0`));
8313	SDValue ShAmt = DAG.getShiftAmountConstant(Val: `16`, VT, DL);
8314	return DAG.getNode(Opcode: ISD::ROTR, DL, VT, N1: BSwap, N2: ShAmt);
8315	}
8316
8317	/// Match a 32-bit packed halfword bswap. That is
8318	/// ((x & 0x000000ff) << 8) \|
8319	/// ((x & 0x0000ff00) >> 8) \|
8320	/// ((x & 0x00ff0000) << 8) \|
8321	/// ((x & 0xff000000) >> 8)
8322	/// => (rotl (bswap x), 16)
8323	SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
8324	if (!LegalOperations)
8325	return SDValue ();
8326
8327	EVT VT = N->getValueType(ResNo: `0`);
8328	if (VT != MVT::i32)
8329	return SDValue ();
8330	if (!TLI.isOperationLegalOrCustom(Op: ISD::BSWAP, VT))
8331	return SDValue ();
8332
8333	if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT))
8334	return BSwap;
8335
8336	// Try again with commuted operands.
8337	if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0: N1, N1: N0, VT))
8338	return BSwap;
8339
8340
8341	// Look for either
8342	// (or (bswaphpair), (bswaphpair))
8343	// (or (or (bswaphpair), (and)), (and))
8344	// (or (or (and), (bswaphpair)), (and))
8345	SDNode *Parts[`4`] = {};
8346
8347	if (isBSwapHWordPair(N: N0, Parts)) {
8348	// (or (or (and), (and)), (or (and), (and)))
8349	if (!isBSwapHWordPair(N: N1, Parts))
8350	return SDValue ();
8351	} else if (N0.getOpcode() == ISD::OR) {
8352	// (or (or (or (and), (and)), (and)), (and))
8353	if (!isBSwapHWordElement(N: N1, Parts))
8354	return SDValue ();
8355	SDValue N00 = N0.getOperand(i: `0`);
8356	SDValue N01 = N0.getOperand(i: `1`);
8357	if (!(isBSwapHWordElement(N: N01, Parts) && isBSwapHWordPair(N: N00, Parts)) &&
8358	!(isBSwapHWordElement(N: N00, Parts) && isBSwapHWordPair(N: N01, Parts)))
8359	return SDValue ();
8360	} else {
8361	return SDValue ();
8362	}
8363
8364	// Make sure the parts are all coming from the same node.
8365	if (Parts[`0`] != Parts[`1`] \|\| Parts[`0`] != Parts[`2`] \|\| Parts[`0`] != Parts[`3`])
8366	return SDValue ();
8367
8368	SDLoc DL(N);
8369	SDValue BSwap = DAG.getNode(Opcode: ISD::BSWAP, DL, VT,
8370	Operand: SDValue (Parts[`0`], `0`));
8371
8372	// Result of the bswap should be rotated by 16. If it's not legal, then
8373	// do (x << 16) \| (x >> 16).
8374	SDValue ShAmt = DAG.getShiftAmountConstant(Val: `16`, VT, DL);
8375	if (TLI.isOperationLegalOrCustom(Op: ISD::ROTL, VT))
8376	return DAG.getNode(Opcode: ISD::ROTL, DL, VT, N1: BSwap, N2: ShAmt);
8377	if (TLI.isOperationLegalOrCustom(Op: ISD::ROTR, VT))
8378	return DAG.getNode(Opcode: ISD::ROTR, DL, VT, N1: BSwap, N2: ShAmt);
8379	return DAG.getNode(Opcode: ISD::OR, DL, VT,
8380	N1: DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: BSwap, N2: ShAmt),
8381	N2: DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: BSwap, N2: ShAmt));
8382	}
8383
8384	/// This contains all DAGCombine rules which reduce two values combined by
8385	/// an Or operation to a single value \see visitANDLike().
8386	SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, const SDLoc &DL) {
8387	EVT VT = N1.getValueType();
8388
8389	// fold (or x, undef) -> -1
8390	if (!LegalOperations && (N0.isUndef() \|\| N1.isUndef()))
8391	return DAG.getAllOnesConstant(DL, VT);
8392
8393	if (SDValue V = foldLogicOfSetCCs(IsAnd: false, N0, N1, DL))
8394	return V;
8395
8396	// (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
8397	if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
8398	// Don't increase # computations.
8399	(N0 ->hasOneUse() \|\| N1 ->hasOneUse())) {
8400	// We can only do this xform if we know that bits from X that are set in C2
8401	// but not in C1 are already zero. Likewise for Y.
8402	if (const ConstantSDNode *N0O1C =
8403	getAsNonOpaqueConstant(N: N0.getOperand(i: `1`))) {
8404	if (const ConstantSDNode *N1O1C =
8405	getAsNonOpaqueConstant(N: N1.getOperand(i: `1`))) {
8406	// We can only do this xform if we know that bits from X that are set in
8407	// C2 but not in C1 are already zero. Likewise for Y.
8408	const APInt &LHSMask = N0O1C->getAPIntValue();
8409	const APInt &RHSMask = N1O1C->getAPIntValue();
8410
8411	if (DAG.MaskedValueIsZero(Op: N0.getOperand(i: `0`), Mask: RHSMask &~LHSMask) &&
8412	DAG.MaskedValueIsZero(Op: N1.getOperand(i: `0`), Mask: LHSMask &~RHSMask)) {
8413	SDValue X = DAG.getNode(Opcode: ISD::OR, DL: SDLoc (N0), VT,
8414	N1: N0.getOperand(i: `0`), N2: N1.getOperand(i: `0`));
8415	return DAG.getNode(Opcode: ISD::AND, DL, VT, N1: X,
8416	N2: DAG.getConstant(Val: LHSMask \| RHSMask, DL, VT));
8417	}
8418	}
8419	}
8420	}
8421
8422	// (or (and X, M), (and X, N)) -> (and X, (or M, N))
8423	if (N0.getOpcode() == ISD::AND &&
8424	N1.getOpcode() == ISD::AND &&
8425	N0.getOperand(i: `0`) == N1.getOperand(i: `0`) &&
8426	// Don't increase # computations.
8427	(N0 ->hasOneUse() \|\| N1 ->hasOneUse())) {
8428	SDValue X = DAG.getNode(Opcode: ISD::OR, DL: SDLoc (N0), VT,
8429	N1: N0.getOperand(i: `1`), N2: N1.getOperand(i: `1`));
8430	return DAG.getNode(Opcode: ISD::AND, DL, VT, N1: N0.getOperand(i: `0`), N2: X);
8431	}
8432
8433	return SDValue ();
8434	}
8435
8436	/// OR combines for which the commuted variant will be tried as well.
8437	static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1,
8438	SDNode *N) {
8439	EVT VT = N0.getValueType();
8440	unsigned BW = VT.getScalarSizeInBits();
8441	SDLoc DL(N);
8442
8443	auto peekThroughResize = [](SDValue V) {
8444	if (V ->getOpcode() == ISD::ZERO_EXTEND \|\| V ->getOpcode() == ISD::TRUNCATE)
8445	return V ->getOperand(Num: `0`);
8446	return V;
8447	};
8448
8449	SDValue N0Resized = peekThroughResize (N0);
8450	if (N0Resized.getOpcode() == ISD::AND) {
8451	SDValue N1Resized = peekThroughResize (N1);
8452	SDValue N00 = N0Resized.getOperand(i: `0`);
8453	SDValue N01 = N0Resized.getOperand(i: `1`);
8454
8455	// fold or (and x, y), x --> x
8456	if (N00 == N1Resized \|\| N01 == N1Resized)
8457	return N1;
8458
8459	// fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
8460	// TODO: Set AllowUndefs = true.
8461	if (SDValue NotOperand = getBitwiseNotOperand(V: N01, Mask: N00,
8462	/ AllowUndefs / false)) {
8463	if (peekThroughResize (NotOperand) == N1Resized)
8464	return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: DAG.getZExtOrTrunc(Op: N00, DL, VT),
8465	N2: N1);
8466	}
8467
8468	// fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
8469	if (SDValue NotOperand = getBitwiseNotOperand(V: N00, Mask: N01,
8470	/ AllowUndefs / false)) {
8471	if (peekThroughResize (NotOperand) == N1Resized)
8472	return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: DAG.getZExtOrTrunc(Op: N01, DL, VT),
8473	N2: N1);
8474	}
8475	}
8476
8477	SDValue X, Y;
8478
8479	// fold or (xor X, N1), N1 --> or X, N1
8480	if (sd_match(N: N0, P: m_Xor(L: m_Value(N&: X), R: m_Specific(N: N1))))
8481	return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: X, N2: N1);
8482
8483	// fold or (xor x, y), (x and/or y) --> or x, y
8484	if (sd_match(N: N0, P: m_Xor(L: m_Value(N&: X), R: m_Value(N&: Y))) &&
8485	(sd_match(N: N1, P: m_And(L: m_Specific(N: X), R: m_Specific(N: Y))) \|\|
8486	sd_match(N: N1, P: m_Or(L: m_Specific(N: X), R: m_Specific(N: Y)))))
8487	return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: X, N2: Y);
8488
8489	if (SDValue R = foldLogicOfShifts(N, LogicOp: N0, ShiftOp: N1, DAG))
8490	return R;
8491
8492	auto peekThroughZext = [](SDValue V) {
8493	if (V ->getOpcode() == ISD::ZERO_EXTEND)
8494	return V ->getOperand(Num: `0`);
8495	return V;
8496	};
8497
8498	if (N0.getOpcode() == ISD::FSHL && N1.getOpcode() == ISD::SHL &&
8499	peekThroughZext (N0.getOperand(i: `2`)) == peekThroughZext (N1.getOperand(i: `1`))) {
8500	// (fshl X, ?, Y) \| (shl X, Y) --> fshl X, ?, Y
8501	if (N0.getOperand(i: `0`) == N1.getOperand(i: `0`))
8502	return N0;
8503	// (fshl A, X, Y) \| (shl X, Y) --> fshl (A\|X), X, Y
8504	if (N0.getOperand(i: `1`) == N1.getOperand(i: `0`) && N0.hasOneUse() &&
8505	N1.hasOneUse()) {
8506	SDValue A = N0.getOperand(i: `0`);
8507	SDValue X = N1.getOperand(i: `0`);
8508	SDValue NewLHS = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: A, N2: X);
8509	return DAG.getNode(Opcode: ISD::FSHL, DL, VT, N1: NewLHS, N2: X, N3: N0.getOperand(i: `2`));
8510	}
8511	}
8512
8513	if (N0.getOpcode() == ISD::FSHR && N1.getOpcode() == ISD::SRL &&
8514	peekThroughZext (N0.getOperand(i: `2`)) == peekThroughZext (N1.getOperand(i: `1`))) {
8515	// (fshr ?, X, Y) \| (srl X, Y) --> fshr ?, X, Y
8516	if (N0.getOperand(i: `1`) == N1.getOperand(i: `0`))
8517	return N0;
8518	// (fshr X, B, Y) \| (srl X, Y) --> fshr X, (X\|B), Y
8519	if (N0.getOperand(i: `0`) == N1.getOperand(i: `0`) && N0.hasOneUse() &&
8520	N1.hasOneUse()) {
8521	SDValue X = N1.getOperand(i: `0`);
8522	SDValue B = N0.getOperand(i: `1`);
8523	SDValue NewRHS = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: X, N2: B);
8524	return DAG.getNode(Opcode: ISD::FSHR, DL, VT, N1: X, N2: NewRHS, N3: N0.getOperand(i: `2`));
8525	}
8526	}
8527
8528	// (fshl A, B, S0) \| (fshr C, D, S1) --> fshl (A\|C), (B\|D), S0
8529	// iff S0 + S1 == bitwidth(S1)
8530	if (N0.getOpcode() == ISD::FSHL && N1.getOpcode() == ISD::FSHR &&
8531	N0.hasOneUse() && N1.hasOneUse()) {
8532	auto *S0 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: `2`));
8533	auto *S1 = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: `2`));
8534	if (S0 && S1 && S0->getZExtValue() < BW && S1->getZExtValue() < BW &&
8535	S0->getZExtValue() == (BW - S1->getZExtValue())) {
8536	SDValue A = N0.getOperand(i: `0`);
8537	SDValue B = N0.getOperand(i: `1`);
8538	SDValue C = N1.getOperand(i: `0`);
8539	SDValue D = N1.getOperand(i: `1`);
8540	SDValue NewLHS = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: A, N2: C);
8541	SDValue NewRHS = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: B, N2: D);
8542	return DAG.getNode(Opcode: ISD::FSHL, DL, VT, N1: NewLHS, N2: NewRHS, N3: N0.getOperand(i: `2`));
8543	}
8544	}
8545
8546	// Attempt to match a legalized build_pair-esque pattern:
8547	// or(shl(aext(Hi),BW/2),zext(Lo))
8548	SDValue Lo, Hi;
8549	if (sd_match(N: N0,
8550	P: m_OneUse(P: m_Shl(L: m_AnyExt(Op: m_Value(N&: Hi)), R: m_SpecificInt(V: BW / `2`)))) &&
8551	sd_match(N: N1, P: m_ZExt(Op: m_Value(N&: Lo))) &&
8552	Lo.getScalarValueSizeInBits() == (BW / `2`) &&
8553	Lo.getValueType() == Hi.getValueType()) {
8554	// Fold build_pair(not(Lo),not(Hi)) -> not(build_pair(Lo,Hi)).
8555	SDValue NotLo, NotHi;
8556	if (sd_match(N: Lo, P: m_OneUse(P: m_Not(V: m_Value(N&: NotLo)))) &&
8557	sd_match(N: Hi, P: m_OneUse(P: m_Not(V: m_Value(N&: NotHi))))) {
8558	Lo = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT, Operand: NotLo);
8559	Hi = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT, Operand: NotHi);
8560	Hi = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Hi,
8561	N2: DAG.getShiftAmountConstant(Val: BW / `2`, VT, DL));
8562	return DAG.getNOT(DL, Val: DAG.getNode(Opcode: ISD::OR, DL, VT, N1: Lo, N2: Hi), VT);
8563	}
8564	}
8565
8566	return SDValue ();
8567	}
8568
8569	SDValue DAGCombiner::visitOR(SDNode *N) {
8570	SDValue N0 = N->getOperand(Num: `0`);
8571	SDValue N1 = N->getOperand(Num: `1`);
8572	EVT VT = N1.getValueType();
8573	SDLoc DL(N);
8574
8575	// x \| x --> x
8576	if (N0 == N1)
8577	return N0;
8578
8579	// fold (or c1, c2) -> c1\|c2
8580	if (SDValue C = DAG.FoldConstantArithmetic(Opcode: ISD::OR, DL, VT, Ops: {N0, N1}))
8581	return C;
8582
8583	// canonicalize constant to RHS
8584	if (DAG.isConstantIntBuildVectorOrConstantInt(N: N0) &&
8585	!DAG.isConstantIntBuildVectorOrConstantInt(N: N1))
8586	return DAG.getNode(Opcode: ISD::OR, DL, VT, N1, N2: N0);
8587
8588	// fold vector ops
8589	if (VT.isVector()) {
8590	if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
8591	return FoldedVOp;
8592
8593	// fold (or x, 0) -> x, vector edition
8594	if (ISD::isConstantSplatVectorAllZeros(N: N1.getNode()))
8595	return N0;
8596
8597	// fold (or x, -1) -> -1, vector edition
8598	if (ISD::isConstantSplatVectorAllOnes(N: N1.getNode()))
8599	// do not return N1, because undef node may exist in N1
8600	return DAG.getAllOnesConstant(DL, VT: N1.getValueType());
8601
8602	// fold (or buildvector(x,0,-1,w), buildvector(0,y,z,w))
8603	// --> buildvector(x,y,-1,w)
8604	auto *BV0 = dyn_cast<BuildVectorSDNode>(Val&: N0);
8605	auto *BV1 = dyn_cast<BuildVectorSDNode>(Val&: N1);
8606	if (BV0 && BV1 && !BV0->getSplatValue() && !BV1->getSplatValue() &&
8607	N0.hasOneUse() && N1.hasOneUse() &&
8608	BV0->getOperand(Num: `0`).getValueType() ==
8609	BV1->getOperand(Num: `0`).getValueType()) {
8610	SmallVector<SDValue> MergedOps;
8611	unsigned NumElts = VT.getVectorNumElements();
8612	EVT EltVT = BV0->getOperand(Num: `0`).getValueType();
8613	for (unsigned I = `0`; I != NumElts; ++I) {
8614	auto *C0 = dyn_cast<ConstantSDNode>(Val: BV0->getOperand(Num: I));
8615	auto *C1 = dyn_cast<ConstantSDNode>(Val: BV1->getOperand(Num: I));
8616	if (C0 && C1)
8617	MergedOps.push_back(Elt: DAG.getConstant(
8618	Val: C0->getAPIntValue() \| C1->getAPIntValue(), DL, VT: EltVT));
8619	else if (C0 && C0->isZero())
8620	MergedOps.push_back(Elt: BV1->getOperand(Num: I));
8621	else if (C1 && C1->isZero())
8622	MergedOps.push_back(Elt: BV0->getOperand(Num: I));
8623	else if (C0 && C0->isAllOnes())
8624	MergedOps.push_back(Elt: BV0->getOperand(Num: I));
8625	else if (C1 && C1->isAllOnes())
8626	MergedOps.push_back(Elt: BV1->getOperand(Num: I));
8627	else if (BV0->getOperand(Num: I) == BV1->getOperand(Num: I))
8628	MergedOps.push_back(Elt: BV0->getOperand(Num: I));
8629	else
8630	break;
8631	}
8632	if (MergedOps.size() == NumElts)
8633	return DAG.getBuildVector(VT, DL, Ops: MergedOps);
8634	}
8635
8636	// fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
8637	// Do this only if the resulting type / shuffle is legal.
8638	auto *SV0 = dyn_cast<ShuffleVectorSDNode>(Val&: N0);
8639	auto *SV1 = dyn_cast<ShuffleVectorSDNode>(Val&: N1);
8640	if (SV0 && SV1 && TLI.isTypeLegal(VT)) {
8641	bool ZeroN00 = ISD::isBuildVectorAllZeros(N: N0.getOperand(i: `0`).getNode());
8642	bool ZeroN01 = ISD::isBuildVectorAllZeros(N: N0.getOperand(i: `1`).getNode());
8643	bool ZeroN10 = ISD::isBuildVectorAllZeros(N: N1.getOperand(i: `0`).getNode());
8644	bool ZeroN11 = ISD::isBuildVectorAllZeros(N: N1.getOperand(i: `1`).getNode());
8645	// Ensure both shuffles have a zero input.
8646	if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
8647	assert((!ZeroN00 \|\| !ZeroN01) && "Both inputs zero!");
8648	assert((!ZeroN10 \|\| !ZeroN11) && "Both inputs zero!");
8649	bool CanFold = true;
8650	int NumElts = VT.getVectorNumElements();
8651	SmallVector<int, `4`> Mask(NumElts, -`1`);
8652
8653	for (int i = `0`; i != NumElts; ++i) {
8654	int M0 = SV0->getMaskElt(Idx: i);
8655	int M1 = SV1->getMaskElt(Idx: i);
8656
8657	// Determine if either index is pointing to a zero vector.
8658	bool M0Zero = M0 < `0` \|\| (ZeroN00 == (M0 < NumElts));
8659	bool M1Zero = M1 < `0` \|\| (ZeroN10 == (M1 < NumElts));
8660
8661	// If one element is zero and the otherside is undef, keep undef.
8662	// This also handles the case that both are undef.
8663	if ((M0Zero && M1 < `0`) \|\| (M1Zero && M0 < `0`))
8664	continue;
8665
8666	// Make sure only one of the elements is zero.
8667	if (M0Zero == M1Zero) {
8668	CanFold = false;
8669	break;
8670	}
8671
8672	assert((M0 >= `0` \|\| M1 >= `0`) && "Undef index!");
8673
8674	// We have a zero and non-zero element. If the non-zero came from
8675	// SV0 make the index a LHS index. If it came from SV1, make it
8676	// a RHS index. We need to mod by NumElts because we don't care
8677	// which operand it came from in the original shuffles.
8678	Mask [i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
8679	}
8680
8681	if (CanFold) {
8682	SDValue NewLHS = ZeroN00 ? N0.getOperand(i: `1`) : N0.getOperand(i: `0`);
8683	SDValue NewRHS = ZeroN10 ? N1.getOperand(i: `1`) : N1.getOperand(i: `0`);
8684	SDValue LegalShuffle =
8685	TLI.buildLegalVectorShuffle(VT, DL, N0: NewLHS, N1: NewRHS, Mask, DAG);
8686	if (LegalShuffle)
8687	return LegalShuffle;
8688	}
8689	}
8690	}
8691	}
8692
8693	// fold (or x, 0) -> x
8694	if (isNullConstant(V: N1))
8695	return N0;
8696
8697	// fold (or x, -1) -> -1
8698	if (isAllOnesConstant(V: N1))
8699	return N1;
8700
8701	if (SDValue NewSel = foldBinOpIntoSelect(BO: N))
8702	return NewSel;
8703
8704	// fold (or x, c) -> c iff (x & ~c) == 0
8705	ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(Val&: N1);
8706	if (N1C && DAG.MaskedValueIsZero(Op: N0, Mask: ~N1C->getAPIntValue()))
8707	return N1;
8708
8709	if (SDValue R = foldAndOrOfSETCC(LogicOp: N, DAG))
8710	return R;
8711
8712	if (SDValue Combined = visitORLike(N0, N1, DL))
8713	return Combined;
8714
8715	if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
8716	return Combined;
8717
8718	// Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
8719	if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
8720	return BSwap;
8721	if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
8722	return BSwap;
8723
8724	// reassociate or
8725	if (SDValue ROR = reassociateOps(Opc: ISD::OR, DL, N0, N1, Flags: N->getFlags()))
8726	return ROR;
8727
8728	// Fold or(vecreduce(x), vecreduce(y)) -> vecreduce(or(x, y))
8729	if (SDValue SD =
8730	reassociateReduction(RedOpc: ISD::VECREDUCE_OR, Opc: ISD::OR, DL, VT, N0, N1))
8731	return SD;
8732
8733	// Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1\|c2)
8734	// iff (c1 & c2) != 0 or c1/c2 are undef.
8735	auto MatchIntersect = [](ConstantSDNode C1, ConstantSDNode C2) {
8736	return !C1 \|\| !C2 \|\| C1->getAPIntValue().intersects(RHS: C2->getAPIntValue());
8737	};
8738	if (N0.getOpcode() == ISD::AND && N0 ->hasOneUse() &&
8739	ISD::matchBinaryPredicate(LHS: N0.getOperand(i: `1`), RHS: N1, Match: MatchIntersect, AllowUndefs: true)) {
8740	if (SDValue COR = DAG.FoldConstantArithmetic(Opcode: ISD::OR, DL: SDLoc (N1), VT,
8741	Ops: {N1, N0.getOperand(i: `1`)})) {
8742	SDValue IOR = DAG.getNode(Opcode: ISD::OR, DL: SDLoc (N0), VT, N1: N0.getOperand(i: `0`), N2: N1);
8743	AddToWorklist(N: IOR.getNode());
8744	return DAG.getNode(Opcode: ISD::AND, DL, VT, N1: COR, N2: IOR);
8745	}
8746	}
8747
8748	if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
8749	return Combined;
8750	if (SDValue Combined = visitORCommutative(DAG, N0: N1, N1: N0, N))
8751	return Combined;
8752
8753	// Simplify: (or (op x...), (op y...)) -> (op (or x, y))
8754	if (N0.getOpcode() == N1.getOpcode())
8755	if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
8756	return V;
8757
8758	// See if this is some rotate idiom.
8759	if (SDValue Rot = MatchRotate(LHS: N0, RHS: N1, DL, /FromAdd=/false))
8760	return Rot;
8761
8762	if (SDValue Load = MatchLoadCombine(N))
8763	return Load;
8764
8765	// Simplify the operands using demanded-bits information.
8766	if (SimplifyDemandedBits(Op: SDValue (N, `0`)))
8767	return SDValue (N, `0`);
8768
8769	// If OR can be rewritten into ADD, try combines based on ADD.
8770	if ((!LegalOperations \|\| TLI.isOperationLegal(Op: ISD::ADD, VT)) &&
8771	DAG.isADDLike(Op: SDValue (N, `0`)))
8772	if (SDValue Combined = visitADDLike(N))
8773	return Combined;
8774
8775	// Postpone until legalization completed to avoid interference with bswap
8776	// folding
8777	if (LegalOperations \|\| VT.isVector())
8778	if (SDValue R = foldLogicTreeOfShifts(N, LeftHand: N0, RightHand: N1, DAG))
8779	return R;
8780
8781	if (VT.isScalarInteger() && VT != MVT::i1)
8782	if (SDValue R = foldMaskedMerge(Node: N, DAG, TLI, DL))
8783	return R;
8784
8785	return SDValue ();
8786	}
8787
8788	static SDValue stripConstantMask(const SelectionDAG &DAG, SDValue Op,
8789	SDValue &Mask) {
8790	if (Op.getOpcode() == ISD::AND &&
8791	DAG.isConstantIntBuildVectorOrConstantInt(N: Op.getOperand(i: `1`))) {
8792	Mask = Op.getOperand(i: `1`);
8793	return Op.getOperand(i: `0`);
8794	}
8795	return Op;
8796	}
8797
8798	/// Match "(X shl/srl V1) & V2" where V2 may not be present.
8799	static bool matchRotateHalf(const SelectionDAG &DAG, SDValue Op, SDValue &Shift,
8800	SDValue &Mask) {
8801	Op = stripConstantMask(DAG, Op, Mask);
8802	if (Op.getOpcode() == ISD::SRL \|\| Op.getOpcode() == ISD::SHL) {
8803	Shift = Op;
8804	return true;
8805	}
8806	return false;
8807	}
8808
8809	/// Helper function for visitOR to extract the needed side of a rotate idiom
8810	/// from a shl/srl/mul/udiv. This is meant to handle cases where
8811	/// InstCombine merged some outside op with one of the shifts from
8812	/// the rotate pattern.
8813	/// \returns An empty \c SDValue if the needed shift couldn't be extracted.
8814	/// Otherwise, returns an expansion of \p ExtractFrom based on the following
8815	/// patterns:
8816	///
8817	/// (or (add v v) (shrl v bitwidth-1)):
8818	/// expands (add v v) -> (shl v 1)
8819	///
8820	/// (or (mul v c0) (shrl (mul v c1) c2)):
8821	/// expands (mul v c0) -> (shl (mul v c1) c3)
8822	///
8823	/// (or (udiv v c0) (shl (udiv v c1) c2)):
8824	/// expands (udiv v c0) -> (shrl (udiv v c1) c3)
8825	///
8826	/// (or (shl v c0) (shrl (shl v c1) c2)):
8827	/// expands (shl v c0) -> (shl (shl v c1) c3)
8828	///
8829	/// (or (shrl v c0) (shl (shrl v c1) c2)):
8830	/// expands (shrl v c0) -> (shrl (shrl v c1) c3)
8831	///
8832	/// Such that in all cases, c3+c2==bitwidth(op v c1).
8833	static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
8834	SDValue ExtractFrom, SDValue &Mask,
8835	const SDLoc &DL) {
8836	assert(OppShift && ExtractFrom && "Empty SDValue");
8837	if (OppShift.getOpcode() != ISD::SHL && OppShift.getOpcode() != ISD::SRL)
8838	return SDValue ();
8839
8840	ExtractFrom = stripConstantMask(DAG, Op: ExtractFrom, Mask);
8841
8842	// Value and Type of the shift.
8843	SDValue OppShiftLHS = OppShift.getOperand(i: `0`);
8844	EVT ShiftedVT = OppShiftLHS.getValueType();
8845
8846	// Amount of the existing shift.
8847	ConstantSDNode *OppShiftCst = isConstOrConstSplat(N: OppShift.getOperand(i: `1`));
8848
8849	// (add v v) -> (shl v 1)
8850	// TODO: Should this be a general DAG canonicalization?
8851	if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
8852	ExtractFrom.getOpcode() == ISD::ADD &&
8853	ExtractFrom.getOperand(i: `0`) == ExtractFrom.getOperand(i: `1`) &&
8854	ExtractFrom.getOperand(i: `0`) == OppShiftLHS &&
8855	OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - `1`)
8856	return DAG.getNode(Opcode: ISD::SHL, DL, VT: ShiftedVT, N1: OppShiftLHS,
8857	N2: DAG.getShiftAmountConstant(Val: `1`, VT: ShiftedVT, DL));
8858
8859	// Preconditions:
8860	// (or (op0 v c0) (shiftl/r (op0 v c1) c2))
8861	//
8862	// Find opcode of the needed shift to be extracted from (op0 v c0).
8863	unsigned Opcode = ISD::DELETED_NODE;
8864	bool IsMulOrDiv = false;
8865	// Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
8866	// opcode or its arithmetic (mul or udiv) variant.
8867	auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
8868	IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
8869	if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
8870	return false;
8871	Opcode = NeededShift;
8872	return true;
8873	};
8874	// op0 must be either the needed shift opcode or the mul/udiv equivalent
8875	// that the needed shift can be extracted from.
8876	if ((OppShift.getOpcode() != ISD::SRL \|\| !SelectOpcode (ISD::SHL, ISD::MUL)) &&
8877	(OppShift.getOpcode() != ISD::SHL \|\| !SelectOpcode (ISD::SRL, ISD::UDIV)))
8878	return SDValue ();
8879
8880	// op0 must be the same opcode on both sides, have the same LHS argument,
8881	// and produce the same value type.
8882	if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() \|\|
8883	OppShiftLHS.getOperand(i: `0`) != ExtractFrom.getOperand(i: `0`) \|\|
8884	ShiftedVT != ExtractFrom.getValueType())
8885	return SDValue ();
8886
8887	// Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
8888	ConstantSDNode *OppLHSCst = isConstOrConstSplat(N: OppShiftLHS.getOperand(i: `1`));
8889	// Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
8890	ConstantSDNode *ExtractFromCst =
8891	isConstOrConstSplat(N: ExtractFrom.getOperand(i: `1`));
8892	// TODO: We should be able to handle non-uniform constant vectors for these values
8893	// Check that we have constant values.
8894	if (!OppShiftCst \|\| !OppShiftCst->getAPIntValue() \|\|
8895	!OppLHSCst \|\| !OppLHSCst->getAPIntValue() \|\|
8896	!ExtractFromCst \|\| !ExtractFromCst->getAPIntValue())
8897	return SDValue ();
8898
8899	// Compute the shift amount we need to extract to complete the rotate.
8900	const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
8901	if (OppShiftCst->getAPIntValue().ugt(RHS: VTWidth))
8902	return SDValue ();
8903	APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
8904	// Normalize the bitwidth of the two mul/udiv/shift constant operands.
8905	APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
8906	APInt OppLHSAmt = OppLHSCst->getAPIntValue();
8907	zeroExtendToMatch(LHS&: ExtractFromAmt, RHS&: OppLHSAmt);
8908
8909	// Now try extract the needed shift from the ExtractFrom op and see if the
8910	// result matches up with the existing shift's LHS op.
8911	if (IsMulOrDiv) {
8912	// Op to extract from is a mul or udiv by a constant.
8913	// Check:
8914	// c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
8915	// c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
8916	const APInt ExtractDiv = APInt::getOneBitSet(numBits: ExtractFromAmt.getBitWidth(),
8917	BitNo: NeededShiftAmt.getZExtValue());
8918	APInt ResultAmt;
8919	APInt Rem;
8920	APInt::udivrem(LHS: ExtractFromAmt, RHS: ExtractDiv, Quotient&: ResultAmt, Remainder&: Rem);
8921	if (Rem != `0` \|\| ResultAmt != OppLHSAmt)
8922	return SDValue ();
8923	} else {
8924	// Op to extract from is a shift by a constant.
8925	// Check:
8926	// c2 - (bitwidth(op0 v c0) - c1) == c0
8927	if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
8928	width: ExtractFromAmt.getBitWidth()))
8929	return SDValue ();
8930	}
8931
8932	// Return the expanded shift op that should allow a rotate to be formed.
8933	EVT ShiftVT = OppShift.getOperand(i: `1`).getValueType();
8934	EVT ResVT = ExtractFrom.getValueType();
8935	SDValue NewShiftNode = DAG.getConstant(Val: NeededShiftAmt, DL, VT: ShiftVT);
8936	return DAG.getNode(Opcode, DL, VT: ResVT, N1: OppShiftLHS, N2: NewShiftNode);
8937	}
8938
8939	// Return true if we can prove that, whenever Neg and Pos are both in the
8940	// range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that
8941	// for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
8942	//
8943	// (or (shift1 X, Neg), (shift2 X, Pos))
8944	//
8945	// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
8946	// in direction shift1 by Neg. The range [0, EltSize) means that we only need
8947	// to consider shift amounts with defined behavior.
8948	//
8949	// The IsRotate flag should be set when the LHS of both shifts is the same.
8950	// Otherwise if matching a general funnel shift, it should be clear.
8951	static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
8952	SelectionDAG &DAG, bool IsRotate, bool FromAdd) {
8953	const auto &TLI = DAG.getTargetLoweringInfo();
8954	// If EltSize is a power of 2 then:
8955	//
8956	// (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
8957	// (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
8958	//
8959	// So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
8960	// for the stronger condition:
8961	//
8962	// Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A]
8963	//
8964	// for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
8965	// we can just replace Neg with Neg' for the rest of the function.
8966	//
8967	// In other cases we check for the even stronger condition:
8968	//
8969	// Neg == EltSize - Pos [B]
8970	//
8971	// for all Neg and Pos. Note that the (or ...) then invokes undefined
8972	// behavior if Pos == 0 (and consequently Neg == EltSize).
8973	//
8974	// We could actually use [A] whenever EltSize is a power of 2, but the
8975	// only extra cases that it would match are those uninteresting ones
8976	// where Neg and Pos are never in range at the same time. E.g. for
8977	// EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
8978	// as well as (sub 32, Pos), but:
8979	//
8980	// (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
8981	//
8982	// always invokes undefined behavior for 32-bit X.
8983	//
8984	// Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
8985	// This allows us to peek through any operations that only affect Mask's
8986	// un-demanded bits.
8987	//
8988	// NOTE: We can only do this when matching operations which won't modify the
8989	// least Log2(EltSize) significant bits and not a general funnel shift.
8990	unsigned MaskLoBits = `0`;
8991	if (IsRotate && !FromAdd && isPowerOf2_64(Value: EltSize)) {
8992	unsigned Bits = Log2_64(Value: EltSize);
8993	unsigned NegBits = Neg.getScalarValueSizeInBits();
8994	if (NegBits >= Bits) {
8995	APInt DemandedBits = APInt::getLowBitsSet(numBits: NegBits, loBitsSet: Bits);
8996	if (SDValue Inner =
8997	TLI.SimplifyMultipleUseDemandedBits(Op: Neg, DemandedBits, DAG)) {
8998	Neg = Inner;
8999	MaskLoBits = Bits;
9000	}
9001	}
9002	}
9003
9004	// Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
9005	if (Neg.getOpcode() != ISD::SUB)
9006	return false;
9007	ConstantSDNode *NegC = isConstOrConstSplat(N: Neg.getOperand(i: `0`));
9008	if (!NegC)
9009	return false;
9010	SDValue NegOp1 = Neg.getOperand(i: `1`);
9011
9012	// On the RHS of [A], if Pos is the result of operation on Pos' that won't
9013	// affect Mask's demanded bits, just replace Pos with Pos'. These operations
9014	// are redundant for the purpose of the equality.
9015	if (MaskLoBits) {
9016	unsigned PosBits = Pos.getScalarValueSizeInBits();
9017	if (PosBits >= MaskLoBits) {
9018	APInt DemandedBits = APInt::getLowBitsSet(numBits: PosBits, loBitsSet: MaskLoBits);
9019	if (SDValue Inner =
9020	TLI.SimplifyMultipleUseDemandedBits(Op: Pos, DemandedBits, DAG)) {
9021	Pos = Inner;
9022	}
9023	}
9024	}
9025
9026	// The condition we need is now:
9027	//
9028	// (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
9029	//
9030	// If NegOp1 == Pos then we need:
9031	//
9032	// EltSize & Mask == NegC & Mask
9033	//
9034	// (because "x & Mask" is a truncation and distributes through subtraction).
9035	//
9036	// We also need to account for a potential truncation of NegOp1 if the amount
9037	// has already been legalized to a shift amount type.
9038	APInt Width;
9039	if ((Pos == NegOp1) \|\|
9040	(NegOp1.getOpcode() == ISD::TRUNCATE && Pos == NegOp1.getOperand(i: `0`)))
9041	Width = NegC->getAPIntValue();
9042
9043	// Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
9044	// Then the condition we want to prove becomes:
9045	//
9046	// (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
9047	//
9048	// which, again because "x & Mask" is a truncation, becomes:
9049	//
9050	// NegC & Mask == (EltSize - PosC) & Mask
9051	// EltSize & Mask == (NegC + PosC) & Mask
9052	else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(i: `0`) == NegOp1) {
9053	if (ConstantSDNode *PosC = isConstOrConstSplat(N: Pos.getOperand(i: `1`)))
9054	Width = PosC->getAPIntValue() + NegC->getAPIntValue();
9055	else
9056	return false;
9057	} else
9058	return false;
9059
9060	// Now we just need to check that EltSize & Mask == Width & Mask.
9061	if (MaskLoBits)
9062	// EltSize & Mask is 0 since Mask is EltSize - 1.
9063	return Width.getLoBits(numBits: MaskLoBits) == `0`;
9064	return Width == EltSize;
9065	}
9066
9067	// A subroutine of MatchRotate used once we have found an OR of two opposite
9068	// shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces
9069	// to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
9070	// former being preferred if supported. InnerPos and InnerNeg are Pos and
9071	// Neg with outer conversions stripped away.
9072	SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
9073	SDValue Neg, SDValue InnerPos,
9074	SDValue InnerNeg, bool FromAdd,
9075	bool HasPos, unsigned PosOpcode,
9076	unsigned NegOpcode, const SDLoc &DL) {
9077	// fold (or/add (shl x, (ext y)),*
9078	// (srl x, (ext (sub 32, y)))) ->*
9079	// (rotl x, y) or (rotr x, (sub 32, y))
9080	//
9081	// fold (or/add (shl x, (ext (sub 32, y))),*
9082	// (srl x, (ext y))) ->*
9083	// (rotr x, y) or (rotl x, (sub 32, y))
9084	EVT VT = Shifted.getValueType();
9085	if (matchRotateSub(Pos: InnerPos, Neg: InnerNeg, EltSize: VT.getScalarSizeInBits(), DAG,
9086	/IsRotate/ true, FromAdd))
9087	return DAG.getNode(Opcode: HasPos ? PosOpcode : NegOpcode, DL, VT, N1: Shifted,
9088	N2: HasPos ? Pos : Neg);
9089
9090	return SDValue ();
9091	}
9092
9093	// A subroutine of MatchRotate used once we have found an OR of two opposite
9094	// shifts of N0 + N1. If Neg == <operand size> - Pos then the OR reduces
9095	// to both (PosOpcode N0, N1, Pos) and (NegOpcode N0, N1, Neg), with the
9096	// former being preferred if supported. InnerPos and InnerNeg are Pos and
9097	// Neg with outer conversions stripped away.
9098	// TODO: Merge with MatchRotatePosNeg.
9099	SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
9100	SDValue Neg, SDValue InnerPos,
9101	SDValue InnerNeg, bool FromAdd,
9102	bool HasPos, unsigned PosOpcode,
9103	unsigned NegOpcode, const SDLoc &DL) {
9104	EVT VT = N0.getValueType();
9105	unsigned EltBits = VT.getScalarSizeInBits();
9106
9107	// fold (or/add (shl x0, (ext y)),*
9108	// (srl x1, (ext (sub 32, y)))) ->*
9109	// (fshl x0, x1, y) or (fshr x0, x1, (sub 32, y))
9110	//
9111	// fold (or/add (shl x0, (ext (sub 32, y))),*
9112	// (srl x1, (ext y))) ->*
9113	// (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
9114	if (matchRotateSub(Pos: InnerPos, Neg: InnerNeg, EltSize: EltBits, DAG, /IsRotate/ N0 == N1,
9115	FromAdd))
9116	return DAG.getNode(Opcode: HasPos ? PosOpcode : NegOpcode, DL, VT, N1: N0, N2: N1,
9117	N3: HasPos ? Pos : Neg);
9118
9119	// Matching the shift+xor cases, we can't easily use the xor'd shift amount
9120	// so for now just use the PosOpcode case if its legal.
9121	// TODO: When can we use the NegOpcode case?
9122	if (PosOpcode == ISD::FSHL && isPowerOf2_32(Value: EltBits)) {
9123	SDValue X;
9124	// fold (or/add (shl x0, y), (srl (srl x1, 1), (xor y, 31)))
9125	// -> (fshl x0, x1, y)
9126	if (sd_match(N: N1, P: m_Srl(L: m_Value(N&: X), R: m_One())) &&
9127	sd_match(N: InnerNeg,
9128	P: m_Xor(L: m_Specific(N: InnerPos), R: m_SpecificInt(V: EltBits - `1`))) &&
9129	TLI.isOperationLegalOrCustom(Op: ISD::FSHL, VT)) {
9130	return DAG.getNode(Opcode: ISD::FSHL, DL, VT, N1: N0, N2: X, N3: Pos);
9131	}
9132
9133	// fold (or/add (shl (shl x0, 1), (xor y, 31)), (srl x1, y))
9134	// -> (fshr x0, x1, y)
9135	if (sd_match(N: N0, P: m_Shl(L: m_Value(N&: X), R: m_One())) &&
9136	sd_match(N: InnerPos,
9137	P: m_Xor(L: m_Specific(N: InnerNeg), R: m_SpecificInt(V: EltBits - `1`))) &&
9138	TLI.isOperationLegalOrCustom(Op: ISD::FSHR, VT)) {
9139	return DAG.getNode(Opcode: ISD::FSHR, DL, VT, N1: X, N2: N1, N3: Neg);
9140	}
9141
9142	// fold (or/add (shl (add x0, x0), (xor y, 31)), (srl x1, y))
9143	// -> (fshr x0, x1, y)
9144	// TODO: Should add(x,x) -> shl(x,1) be a general DAG canonicalization?
9145	if (sd_match(N: N0, P: m_Add(L: m_Value(N&: X), R: m_Deferred(V&: X))) &&
9146	sd_match(N: InnerPos,
9147	P: m_Xor(L: m_Specific(N: InnerNeg), R: m_SpecificInt(V: EltBits - `1`))) &&
9148	TLI.isOperationLegalOrCustom(Op: ISD::FSHR, VT)) {
9149	return DAG.getNode(Opcode: ISD::FSHR, DL, VT, N1: X, N2: N1, N3: Neg);
9150	}
9151	}
9152
9153	return SDValue ();
9154	}
9155
9156	// MatchRotate - Handle an 'or' or 'add' of two operands. If this is one of the
9157	// many idioms for rotate, and if the target supports rotation instructions,
9158	// generate a rot[lr]. This also matches funnel shift patterns, similar to
9159	// rotation but with different shifted sources.
9160	SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL,
9161	bool FromAdd) {
9162	EVT VT = LHS.getValueType();
9163
9164	// The target must have at least one rotate/funnel flavor.
9165	// We still try to match rotate by constant pre-legalization.
9166	// TODO: Support pre-legalization funnel-shift by constant.
9167	bool HasROTL = hasOperation(Opcode: ISD::ROTL, VT);
9168	bool HasROTR = hasOperation(Opcode: ISD::ROTR, VT);
9169	bool HasFSHL = hasOperation(Opcode: ISD::FSHL, VT);
9170	bool HasFSHR = hasOperation(Opcode: ISD::FSHR, VT);
9171
9172	// If the type is going to be promoted and the target has enabled custom
9173	// lowering for rotate, allow matching rotate by non-constants. Only allow
9174	// this for scalar types.
9175	if (VT.isScalarInteger() && TLI.getTypeAction(Context&: *DAG.getContext(), VT) ==
9176	TargetLowering::TypePromoteInteger) {
9177	HasROTL \|= TLI.getOperationAction(Op: ISD::ROTL, VT) == TargetLowering::Custom;
9178	HasROTR \|= TLI.getOperationAction(Op: ISD::ROTR, VT) == TargetLowering::Custom;
9179	}
9180
9181	if (LegalOperations && !HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
9182	return SDValue ();
9183
9184	// Check for truncated rotate.
9185	if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
9186	LHS.getOperand(i: `0`).getValueType() == RHS.getOperand(i: `0`).getValueType()) {
9187	assert(LHS.getValueType() == RHS.getValueType());
9188	if (SDValue Rot =
9189	MatchRotate(LHS: LHS.getOperand(i: `0`), RHS: RHS.getOperand(i: `0`), DL, FromAdd))
9190	return DAG.getNode(Opcode: ISD::TRUNCATE, DL: SDLoc (LHS), VT: LHS.getValueType(), Operand: Rot);
9191	}
9192
9193	// Match "(X shl/srl V1) & V2" where V2 may not be present.
9194	SDValue LHSShift; // The shift.
9195	SDValue LHSMask; // AND value if any.
9196	matchRotateHalf(DAG, Op: LHS, Shift&: LHSShift, Mask&: LHSMask);
9197
9198	SDValue RHSShift; // The shift.
9199	SDValue RHSMask; // AND value if any.
9200	matchRotateHalf(DAG, Op: RHS, Shift&: RHSShift, Mask&: RHSMask);
9201
9202	// If neither side matched a rotate half, bail
9203	if (!LHSShift && !RHSShift)
9204	return SDValue ();
9205
9206	// InstCombine may have combined a constant shl, srl, mul, or udiv with one
9207	// side of the rotate, so try to handle that here. In all cases we need to
9208	// pass the matched shift from the opposite side to compute the opcode and
9209	// needed shift amount to extract. We still want to do this if both sides
9210	// matched a rotate half because one half may be a potential overshift that
9211	// can be broken down (ie if InstCombine merged two shl or srl ops into a
9212	// single one).
9213
9214	// Have LHS side of the rotate, try to extract the needed shift from the RHS.
9215	if (LHSShift)
9216	if (SDValue NewRHSShift =
9217	extractShiftForRotate(DAG, OppShift: LHSShift, ExtractFrom: RHS, Mask&: RHSMask, DL))
9218	RHSShift = NewRHSShift;
9219	// Have RHS side of the rotate, try to extract the needed shift from the LHS.
9220	if (RHSShift)
9221	if (SDValue NewLHSShift =
9222	extractShiftForRotate(DAG, OppShift: RHSShift, ExtractFrom: LHS, Mask&: LHSMask, DL))
9223	LHSShift = NewLHSShift;
9224
9225	// If a side is still missing, nothing else we can do.
9226	if (!RHSShift \|\| !LHSShift)
9227	return SDValue ();
9228
9229	// At this point we've matched or extracted a shift op on each side.
9230
9231	if (LHSShift.getOpcode() == RHSShift.getOpcode())
9232	return SDValue (); // Shifts must disagree.
9233
9234	// Canonicalize shl to left side in a shl/srl pair.
9235	if (RHSShift.getOpcode() == ISD::SHL) {
9236	std::swap(a&: LHS, b&: RHS);
9237	std::swap(a&: LHSShift, b&: RHSShift);
9238	std::swap(a&: LHSMask, b&: RHSMask);
9239	}
9240
9241	// Something has gone wrong - we've lost the shl/srl pair - bail.
9242	if (LHSShift.getOpcode() != ISD::SHL \|\| RHSShift.getOpcode() != ISD::SRL)
9243	return SDValue ();
9244
9245	unsigned EltSizeInBits = VT.getScalarSizeInBits();
9246	SDValue LHSShiftArg = LHSShift.getOperand(i: `0`);
9247	SDValue LHSShiftAmt = LHSShift.getOperand(i: `1`);
9248	SDValue RHSShiftArg = RHSShift.getOperand(i: `0`);
9249	SDValue RHSShiftAmt = RHSShift.getOperand(i: `1`);
9250
9251	auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
9252	ConstantSDNode *RHS) {
9253	return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
9254	};
9255
9256	auto ApplyMasks = [&](SDValue Res) {
9257	// If there is an AND of either shifted operand, apply it to the result.
9258	if (LHSMask.getNode() \|\| RHSMask.getNode()) {
9259	SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
9260	SDValue Mask = AllOnes;
9261
9262	if (LHSMask.getNode()) {
9263	SDValue RHSBits = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: AllOnes, N2: RHSShiftAmt);
9264	Mask = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Mask,
9265	N2: DAG.getNode(Opcode: ISD::OR, DL, VT, N1: LHSMask, N2: RHSBits));
9266	}
9267	if (RHSMask.getNode()) {
9268	SDValue LHSBits = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: AllOnes, N2: LHSShiftAmt);
9269	Mask = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Mask,
9270	N2: DAG.getNode(Opcode: ISD::OR, DL, VT, N1: RHSMask, N2: LHSBits));
9271	}
9272
9273	Res = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Res, N2: Mask);
9274	}
9275
9276	return Res;
9277	};
9278
9279	// TODO: Support pre-legalization funnel-shift by constant.
9280	bool IsRotate = LHSShiftArg == RHSShiftArg;
9281	if (!IsRotate && !(HasFSHL \|\| HasFSHR)) {
9282	if (TLI.isTypeLegal(VT) && LHS.hasOneUse() && RHS.hasOneUse() &&
9283	ISD::matchBinaryPredicate(LHS: LHSShiftAmt, RHS: RHSShiftAmt, Match: MatchRotateSum)) {
9284	// Look for a disguised rotate by constant.
9285	// The common shifted operand X may be hidden inside another 'or'.
9286	SDValue X, Y;
9287	auto matchOr = [&X, &Y](SDValue Or, SDValue CommonOp) {
9288	if (!Or.hasOneUse() \|\| Or.getOpcode() != ISD::OR)
9289	return false;
9290	if (CommonOp == Or.getOperand(i: `0`)) {
9291	X = CommonOp;
9292	Y = Or.getOperand(i: `1`);
9293	return true;
9294	}
9295	if (CommonOp == Or.getOperand(i: `1`)) {
9296	X = CommonOp;
9297	Y = Or.getOperand(i: `0`);
9298	return true;
9299	}
9300	return false;
9301	};
9302
9303	SDValue Res;
9304	if (matchOr (LHSShiftArg, RHSShiftArg)) {
9305	// (shl (X \| Y), C1) \| (srl X, C2) --> (rotl X, C1) \| (shl Y, C1)
9306	SDValue RotX = DAG.getNode(Opcode: ISD::ROTL, DL, VT, N1: X, N2: LHSShiftAmt);
9307	SDValue ShlY = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Y, N2: LHSShiftAmt);
9308	Res = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: RotX, N2: ShlY);
9309	} else if (matchOr (RHSShiftArg, LHSShiftArg)) {
9310	// (shl X, C1) \| (srl (X \| Y), C2) --> (rotl X, C1) \| (srl Y, C2)
9311	SDValue RotX = DAG.getNode(Opcode: ISD::ROTL, DL, VT, N1: X, N2: LHSShiftAmt);
9312	SDValue SrlY = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Y, N2: RHSShiftAmt);
9313	Res = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: RotX, N2: SrlY);
9314	} else {
9315	return SDValue ();
9316	}
9317
9318	return ApplyMasks (Res);
9319	}
9320
9321	return SDValue (); // Requires funnel shift support.
9322	}
9323
9324	// fold (or/add (shl x, C1), (srl x, C2)) -> (rotl x, C1)
9325	// fold (or/add (shl x, C1), (srl x, C2)) -> (rotr x, C2)
9326	// fold (or/add (shl x, C1), (srl y, C2)) -> (fshl x, y, C1)
9327	// fold (or/add (shl x, C1), (srl y, C2)) -> (fshr x, y, C2)
9328	// iff C1+C2 == EltSizeInBits
9329	if (ISD::matchBinaryPredicate(LHS: LHSShiftAmt, RHS: RHSShiftAmt, Match: MatchRotateSum)) {
9330	SDValue Res;
9331	if (IsRotate && (HasROTL \|\| HasROTR \|\| !(HasFSHL \|\| HasFSHR))) {
9332	bool UseROTL = !LegalOperations \|\| HasROTL;
9333	Res = DAG.getNode(Opcode: UseROTL ? ISD::ROTL : ISD::ROTR, DL, VT, N1: LHSShiftArg,
9334	N2: UseROTL ? LHSShiftAmt : RHSShiftAmt);
9335	} else {
9336	bool UseFSHL = !LegalOperations \|\| HasFSHL;
9337	Res = DAG.getNode(Opcode: UseFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, N1: LHSShiftArg,
9338	N2: RHSShiftArg, N3: UseFSHL ? LHSShiftAmt : RHSShiftAmt);
9339	}
9340
9341	return ApplyMasks (Res);
9342	}
9343
9344	// Even pre-legalization, we can't easily rotate/funnel-shift by a variable
9345	// shift.
9346	if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
9347	return SDValue ();
9348
9349	// If there is a mask here, and we have a variable shift, we can't be sure
9350	// that we're masking out the right stuff.
9351	if (LHSMask.getNode() \|\| RHSMask.getNode())
9352	return SDValue ();
9353
9354	// If the shift amount is sign/zext/any-extended just peel it off.
9355	SDValue LExtOp0 = LHSShiftAmt;
9356	SDValue RExtOp0 = RHSShiftAmt;
9357	if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND \|\|
9358	LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND \|\|
9359	LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND \|\|
9360	LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
9361	(RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND \|\|
9362	RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND \|\|
9363	RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND \|\|
9364	RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
9365	LExtOp0 = LHSShiftAmt.getOperand(i: `0`);
9366	RExtOp0 = RHSShiftAmt.getOperand(i: `0`);
9367	}
9368
9369	if (IsRotate && (HasROTL \|\| HasROTR)) {
9370	if (SDValue TryL = MatchRotatePosNeg(Shifted: LHSShiftArg, Pos: LHSShiftAmt, Neg: RHSShiftAmt,
9371	InnerPos: LExtOp0, InnerNeg: RExtOp0, FromAdd, HasPos: HasROTL,
9372	PosOpcode: ISD::ROTL, NegOpcode: ISD::ROTR, DL))
9373	return TryL;
9374
9375	if (SDValue TryR = MatchRotatePosNeg(Shifted: RHSShiftArg, Pos: RHSShiftAmt, Neg: LHSShiftAmt,
9376	InnerPos: RExtOp0, InnerNeg: LExtOp0, FromAdd, HasPos: HasROTR,
9377	PosOpcode: ISD::ROTR, NegOpcode: ISD::ROTL, DL))
9378	return TryR;
9379	}
9380
9381	if (SDValue TryL = MatchFunnelPosNeg(N0: LHSShiftArg, N1: RHSShiftArg, Pos: LHSShiftAmt,
9382	Neg: RHSShiftAmt, InnerPos: LExtOp0, InnerNeg: RExtOp0, FromAdd,
9383	HasPos: HasFSHL, PosOpcode: ISD::FSHL, NegOpcode: ISD::FSHR, DL))
9384	return TryL;
9385
9386	if (SDValue TryR = MatchFunnelPosNeg(N0: LHSShiftArg, N1: RHSShiftArg, Pos: RHSShiftAmt,
9387	Neg: LHSShiftAmt, InnerPos: RExtOp0, InnerNeg: LExtOp0, FromAdd,
9388	HasPos: HasFSHR, PosOpcode: ISD::FSHR, NegOpcode: ISD::FSHL, DL))
9389	return TryR;
9390
9391	return SDValue ();
9392	}
9393
9394	/// Recursively traverses the expression calculating the origin of the requested
9395	/// byte of the given value. Returns std::nullopt if the provider can't be
9396	/// calculated.
9397	///
9398	/// For all the values except the root of the expression, we verify that the
9399	/// value has exactly one use and if not then return std::nullopt. This way if
9400	/// the origin of the byte is returned it's guaranteed that the values which
9401	/// contribute to the byte are not used outside of this expression.
9402
9403	/// However, there is a special case when dealing with vector loads -- we allow
9404	/// more than one use if the load is a vector type. Since the values that
9405	/// contribute to the byte ultimately come from the ExtractVectorElements of the
9406	/// Load, we don't care if the Load has uses other than ExtractVectorElements,
9407	/// because those operations are independent from the pattern to be combined.
9408	/// For vector loads, we simply care that the ByteProviders are adjacent
9409	/// positions of the same vector, and their index matches the byte that is being
9410	/// provided. This is captured by the \p VectorIndex algorithm. \p VectorIndex
9411	/// is the index used in an ExtractVectorElement, and \p StartingIndex is the
9412	/// byte position we are trying to provide for the LoadCombine. If these do
9413	/// not match, then we can not combine the vector loads. \p Index uses the
9414	/// byte position we are trying to provide for and is matched against the
9415	/// shl and load size. The \p Index algorithm ensures the requested byte is
9416	/// provided for by the pattern, and the pattern does not over provide bytes.
9417	///
9418	///
9419	/// The supported LoadCombine pattern for vector loads is as follows
9420	/// or
9421	/// / \
9422	/// or shl
9423	/// / \ \|
9424	/// or shl zext
9425	/// / \ \| \|
9426	/// shl zext zext EVE*
9427	/// \| \| \| \|
9428	/// zext EVE EVE* LOAD*
9429	/// \| \| \|
9430	/// EVE LOAD LOAD*
9431	/// \|
9432	/// LOAD
9433	///
9434	/// ExtractVectorElement*
9435	using SDByteProvider = ByteProvider<SDNode *>;
9436
9437	static std::optional<SDByteProvider>
9438	calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
9439	std::optional<uint64_t> VectorIndex,
9440	unsigned StartingIndex = `0`) {
9441
9442	// Typical i64 by i8 pattern requires recursion up to 8 calls depth
9443	if (Depth == `10`)
9444	return std::nullopt;
9445
9446	// Only allow multiple uses if the instruction is a vector load (in which
9447	// case we will use the load for every ExtractVectorElement)
9448	if (Depth && !Op.hasOneUse() &&
9449	(Op.getOpcode() != ISD::LOAD \|\| !Op.getValueType().isVector()))
9450	return std::nullopt;
9451
9452	// Fail to combine if we have encountered anything but a LOAD after handling
9453	// an ExtractVectorElement.
9454	if (Op.getOpcode() != ISD::LOAD && VectorIndex.has_value())
9455	return std::nullopt;
9456
9457	unsigned BitWidth = Op.getScalarValueSizeInBits();
9458	if (BitWidth % `8` != `0`)
9459	return std::nullopt;
9460	unsigned ByteWidth = BitWidth / `8`;
9461	assert(Index < ByteWidth && "invalid index requested");
9462	(void) ByteWidth;
9463
9464	switch (Op.getOpcode()) {
9465	case ISD::OR: {
9466	auto LHS =
9467	calculateByteProvider(Op: Op ->getOperand(Num: `0`), Index, Depth: Depth + `1`, VectorIndex);
9468	if (!LHS)
9469	return std::nullopt;
9470	auto RHS =
9471	calculateByteProvider(Op: Op ->getOperand(Num: `1`), Index, Depth: Depth + `1`, VectorIndex);
9472	if (!RHS)
9473	return std::nullopt;
9474
9475	if (LHS ->isConstantZero())
9476	return RHS;
9477	if (RHS ->isConstantZero())
9478	return LHS;
9479	return std::nullopt;
9480	}
9481	case ISD::SHL: {
9482	auto ShiftOp = dyn_cast<ConstantSDNode>(Val: Op ->getOperand(Num: `1`));
9483	if (!ShiftOp)
9484	return std::nullopt;
9485
9486	uint64_t BitShift = ShiftOp->getZExtValue();
9487
9488	if (BitShift % `8` != `0`)
9489	return std::nullopt;
9490	uint64_t ByteShift = BitShift / `8`;
9491
9492	// If we are shifting by an amount greater than the index we are trying to
9493	// provide, then do not provide anything. Otherwise, subtract the index by
9494	// the amount we shifted by.
9495	return Index < ByteShift
9496	? SDByteProvider::getConstantZero()
9497	: calculateByteProvider(Op: Op ->getOperand(Num: `0`), Index: Index - ByteShift,
9498	Depth: Depth + `1`, VectorIndex, StartingIndex: Index);
9499	}
9500	case ISD::ANY_EXTEND:
9501	case ISD::SIGN_EXTEND:
9502	case ISD::ZERO_EXTEND: {
9503	SDValue NarrowOp = Op ->getOperand(Num: `0`);
9504	unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
9505	if (NarrowBitWidth % `8` != `0`)
9506	return std::nullopt;
9507	uint64_t NarrowByteWidth = NarrowBitWidth / `8`;
9508
9509	if (Index >= NarrowByteWidth)
9510	return Op.getOpcode() == ISD::ZERO_EXTEND
9511	? std::optional<SDByteProvider>(
9512	SDByteProvider::getConstantZero())
9513	: std::nullopt;
9514	return calculateByteProvider(Op: NarrowOp, Index, Depth: Depth + `1`, VectorIndex,
9515	StartingIndex);
9516	}
9517	case ISD::BSWAP:
9518	return calculateByteProvider(Op: Op ->getOperand(Num: `0`), Index: ByteWidth - Index - `1`,
9519	Depth: Depth + `1`, VectorIndex, StartingIndex);
9520	case ISD::EXTRACT_VECTOR_ELT: {
9521	auto OffsetOp = dyn_cast<ConstantSDNode>(Val: Op ->getOperand(Num: `1`));
9522	if (!OffsetOp)
9523	return std::nullopt;
9524
9525	VectorIndex = OffsetOp->getZExtValue();
9526
9527	SDValue NarrowOp = Op ->getOperand(Num: `0`);
9528	unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
9529	if (NarrowBitWidth % `8` != `0`)
9530	return std::nullopt;
9531	uint64_t NarrowByteWidth = NarrowBitWidth / `8`;
9532	// EXTRACT_VECTOR_ELT can extend the element type to the width of the return
9533	// type, leaving the high bits undefined.
9534	if (Index >= NarrowByteWidth)
9535	return std::nullopt;
9536
9537	// Check to see if the position of the element in the vector corresponds
9538	// with the byte we are trying to provide for. In the case of a vector of
9539	// i8, this simply means the VectorIndex == StartingIndex. For non i8 cases,
9540	// the element will provide a range of bytes. For example, if we have a
9541	// vector of i16s, each element provides two bytes (V[1] provides byte 2 and
9542	// 3).
9543	if (VectorIndex NarrowByteWidth > StartingIndex)
9544	return std::nullopt;
9545	if ((VectorIndex + `1`) NarrowByteWidth <= StartingIndex)
9546	return std::nullopt;
9547
9548	return calculateByteProvider(Op: Op ->getOperand(Num: `0`), Index, Depth: Depth + `1`,
9549	VectorIndex, StartingIndex);
9550	}
9551	case ISD::LOAD: {
9552	auto L = cast<LoadSDNode>(Val: Op.getNode());
9553	if (!L->isSimple() \|\| L->isIndexed())
9554	return std::nullopt;
9555
9556	unsigned NarrowBitWidth = L->getMemoryVT().getScalarSizeInBits();
9557	if (NarrowBitWidth % `8` != `0`)
9558	return std::nullopt;
9559	uint64_t NarrowByteWidth = NarrowBitWidth / `8`;
9560
9561	// If the width of the load does not reach byte we are trying to provide for
9562	// and it is not a ZEXTLOAD, then the load does not provide for the byte in
9563	// question
9564	if (Index >= NarrowByteWidth)
9565	return L->getExtensionType() == ISD::ZEXTLOAD
9566	? std::optional<SDByteProvider>(
9567	SDByteProvider::getConstantZero())
9568	: std::nullopt;
9569
9570	unsigned BPVectorIndex = VectorIndex.value_or(u: `0U`);
9571	return SDByteProvider::getSrc(Val: L, ByteOffset: Index, VectorOffset: BPVectorIndex);
9572	}
9573	}
9574
9575	return std::nullopt;
9576	}
9577
9578	static unsigned littleEndianByteAt(unsigned BW, unsigned i) {
9579	return i;
9580	}
9581
9582	static unsigned bigEndianByteAt(unsigned BW, unsigned i) {
9583	return BW - i - `1`;
9584	}
9585
9586	// Check if the bytes offsets we are looking at match with either big or
9587	// little endian value loaded. Return true for big endian, false for little
9588	// endian, and std::nullopt if match failed.
9589	static std::optional<bool> isBigEndian(ArrayRef<int64_t> ByteOffsets,
9590	int64_t FirstOffset) {
9591	// The endian can be decided only when it is 2 bytes at least.
9592	unsigned Width = ByteOffsets.size();
9593	if (Width < `2`)
9594	return std::nullopt;
9595
9596	bool BigEndian = true, LittleEndian = true;
9597	for (unsigned i = `0`; i < Width; i++) {
9598	int64_t CurrentByteOffset = ByteOffsets [i] - FirstOffset;
9599	LittleEndian &= CurrentByteOffset == littleEndianByteAt(BW: Width, i);
9600	BigEndian &= CurrentByteOffset == bigEndianByteAt(BW: Width, i);
9601	if (!BigEndian && !LittleEndian)
9602	return std::nullopt;
9603	}
9604
9605	assert((BigEndian != LittleEndian) && "It should be either big endian or"
9606	"little endian");
9607	return BigEndian;
9608	}
9609
9610	// Look through one layer of truncate or extend.
9611	static SDValue stripTruncAndExt(SDValue Value) {
9612	switch (Value.getOpcode()) {
9613	case ISD::TRUNCATE:
9614	case ISD::ZERO_EXTEND:
9615	case ISD::SIGN_EXTEND:
9616	case ISD::ANY_EXTEND:
9617	return Value.getOperand(i: `0`);
9618	}
9619	return SDValue ();
9620	}
9621
9622	/// Match a pattern where a wide type scalar value is stored by several narrow
9623	/// stores. Fold it into a single store or a BSWAP and a store if the targets
9624	/// supports it.
9625	///
9626	/// Assuming little endian target:
9627	/// i8 p = ...*
9628	/// i32 val = ...
9629	/// p[0] = (val >> 0) & 0xFF;
9630	/// p[1] = (val >> 8) & 0xFF;
9631	/// p[2] = (val >> 16) & 0xFF;
9632	/// p[3] = (val >> 24) & 0xFF;
9633	/// =>
9634	/// ((i32)p) = val;*
9635	///
9636	/// i8 p = ...*
9637	/// i32 val = ...
9638	/// p[0] = (val >> 24) & 0xFF;
9639	/// p[1] = (val >> 16) & 0xFF;
9640	/// p[2] = (val >> 8) & 0xFF;
9641	/// p[3] = (val >> 0) & 0xFF;
9642	/// =>
9643	/// ((i32)p) = BSWAP(val);*
9644	SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
9645	// The matching looks for "store (trunc x)" patterns that appear early but are
9646	// likely to be replaced by truncating store nodes during combining.
9647	// TODO: If there is evidence that running this later would help, this
9648	// limitation could be removed. Legality checks may need to be added
9649	// for the created store and optional bswap/rotate.
9650	if (LegalOperations \|\| OptLevel == CodeGenOptLevel::None)
9651	return SDValue ();
9652
9653	// We only handle merging simple stores of 1-4 bytes.
9654	// TODO: Allow unordered atomics when wider type is legal (see D66309)
9655	EVT MemVT = N->getMemoryVT();
9656	if (!(MemVT == MVT::i8 \|\| MemVT == MVT::i16 \|\| MemVT == MVT::i32) \|\|
9657	!N->isSimple() \|\| N->isIndexed())
9658	return SDValue ();
9659
9660	// Collect all of the stores in the chain, upto the maximum store width (i64).
9661	SDValue Chain = N->getChain();
9662	SmallVector<StoreSDNode *, `8`> Stores = {N};
9663	unsigned NarrowNumBits = MemVT.getScalarSizeInBits();
9664	unsigned MaxWideNumBits = `64`;
9665	unsigned MaxStores = MaxWideNumBits / NarrowNumBits;
9666	while (auto *Store = dyn_cast<StoreSDNode>(Val&: Chain)) {
9667	// All stores must be the same size to ensure that we are writing all of the
9668	// bytes in the wide value.
9669	// This store should have exactly one use as a chain operand for another
9670	// store in the merging set. If there are other chain uses, then the
9671	// transform may not be safe because order of loads/stores outside of this
9672	// set may not be preserved.
9673	// TODO: We could allow multiple sizes by tracking each stored byte.
9674	if (Store->getMemoryVT() != MemVT \|\| !Store->isSimple() \|\|
9675	Store->isIndexed() \|\| !Store->hasOneUse())
9676	return SDValue ();
9677	Stores.push_back(Elt: Store);
9678	Chain = Store->getChain();
9679	if (MaxStores < Stores.size())
9680	return SDValue ();
9681	}
9682	// There is no reason to continue if we do not have at least a pair of stores.
9683	if (Stores.size() < `2`)
9684	return SDValue ();
9685
9686	// Handle simple types only.
9687	LLVMContext &Context = *DAG.getContext();
9688	unsigned NumStores = Stores.size();
9689	unsigned WideNumBits = NumStores * NarrowNumBits;
9690	if (WideNumBits != `16` && WideNumBits != `32` && WideNumBits != `64`)
9691	return SDValue ();
9692
9693	// Check if all bytes of the source value that we are looking at are stored
9694	// to the same base address. Collect offsets from Base address into OffsetMap.
9695	SDValue SourceValue;
9696	SmallVector<int64_t, `8`> OffsetMap(NumStores, INT64_MAX);
9697	int64_t FirstOffset = INT64_MAX;
9698	StoreSDNode FirstStore = nullptr*;
9699	std::optional<BaseIndexOffset> Base;
9700	for (auto *Store : Stores) {
9701	// All the stores store different parts of the CombinedValue. A truncate is
9702	// required to get the partial value.
9703	SDValue Trunc = Store->getValue();
9704	if (Trunc.getOpcode() != ISD::TRUNCATE)
9705	return SDValue ();
9706	// Other than the first/last part, a shift operation is required to get the
9707	// offset.
9708	int64_t Offset = `0`;
9709	SDValue WideVal = Trunc.getOperand(i: `0`);
9710	if ((WideVal.getOpcode() == ISD::SRL \|\| WideVal.getOpcode() == ISD::SRA) &&
9711	isa<ConstantSDNode>(Val: WideVal.getOperand(i: `1`))) {
9712	// The shift amount must be a constant multiple of the narrow type.
9713	// It is translated to the offset address in the wide source value "y".
9714	//
9715	// x = srl y, ShiftAmtC
9716	// i8 z = trunc x
9717	// store z, ...
9718	uint64_t ShiftAmtC = WideVal.getConstantOperandVal(i: `1`);
9719	if (ShiftAmtC % NarrowNumBits != `0`)
9720	return SDValue ();
9721
9722	// Make sure we aren't reading bits that are shifted in.
9723	if (ShiftAmtC > WideVal.getScalarValueSizeInBits() - NarrowNumBits)
9724	return SDValue ();
9725
9726	Offset = ShiftAmtC / NarrowNumBits;
9727	WideVal = WideVal.getOperand(i: `0`);
9728	}
9729
9730	// Stores must share the same source value with different offsets.
9731	if (!SourceValue)
9732	SourceValue = WideVal;
9733	else if (SourceValue != WideVal) {
9734	// Truncate and extends can be stripped to see if the values are related.
9735	if (stripTruncAndExt(Value: SourceValue) != WideVal &&
9736	stripTruncAndExt(Value: WideVal) != SourceValue)
9737	return SDValue ();
9738
9739	if (WideVal.getScalarValueSizeInBits() >
9740	SourceValue.getScalarValueSizeInBits())
9741	SourceValue = WideVal;
9742
9743	// Give up if the source value type is smaller than the store size.
9744	if (SourceValue.getScalarValueSizeInBits() < WideNumBits)
9745	return SDValue ();
9746	}
9747
9748	// Stores must share the same base address.
9749	BaseIndexOffset Ptr = BaseIndexOffset::match(N: Store, DAG);
9750	int64_t ByteOffsetFromBase = `0`;
9751	if (!Base)
9752	Base = Ptr;
9753	else if (!Base ->equalBaseIndex(Other: Ptr, DAG, Off&: ByteOffsetFromBase))
9754	return SDValue ();
9755
9756	// Remember the first store.
9757	if (ByteOffsetFromBase < FirstOffset) {
9758	FirstStore = Store;
9759	FirstOffset = ByteOffsetFromBase;
9760	}
9761	// Map the offset in the store and the offset in the combined value, and
9762	// early return if it has been set before.
9763	if (Offset < `0` \|\| Offset >= NumStores \|\| OffsetMap [Offset] != INT64_MAX)
9764	return SDValue ();
9765	OffsetMap [Offset] = ByteOffsetFromBase;
9766	}
9767
9768	EVT WideVT = EVT::getIntegerVT(Context, BitWidth: WideNumBits);
9769
9770	assert(FirstOffset != INT64_MAX && "First byte offset must be set");
9771	assert(FirstStore && "First store must be set");
9772
9773	// Check that a store of the wide type is both allowed and fast on the target
9774	const DataLayout &Layout = DAG.getDataLayout();
9775	unsigned Fast = `0`;
9776	bool Allowed = TLI.allowsMemoryAccess(Context, DL: Layout, VT: WideVT,
9777	MMO: *FirstStore->getMemOperand(), Fast: &Fast);
9778	if (!Allowed \|\| !Fast)
9779	return SDValue ();
9780
9781	// Check if the pieces of the value are going to the expected places in memory
9782	// to merge the stores.
9783	auto checkOffsets = [&](bool MatchLittleEndian) {
9784	if (MatchLittleEndian) {
9785	for (unsigned i = `0`; i != NumStores; ++i)
9786	if (OffsetMap [i] != i * (NarrowNumBits / `8`) + FirstOffset)
9787	return false;
9788	} else { // MatchBigEndian by reversing loop counter.
9789	for (unsigned i = `0`, j = NumStores - `1`; i != NumStores; ++i, --j)
9790	if (OffsetMap [j] != i * (NarrowNumBits / `8`) + FirstOffset)
9791	return false;
9792	}
9793	return true;
9794	};
9795
9796	// Check if the offsets line up for the native data layout of this target.
9797	bool NeedBswap = false;
9798	bool NeedRotate = false;
9799	if (!checkOffsets (Layout.isLittleEndian())) {
9800	// Special-case: check if byte offsets line up for the opposite endian.
9801	if (NarrowNumBits == `8` && checkOffsets (Layout.isBigEndian()))
9802	NeedBswap = true;
9803	else if (NumStores == `2` && checkOffsets (Layout.isBigEndian()))
9804	NeedRotate = true;
9805	else
9806	return SDValue ();
9807	}
9808
9809	SDLoc DL(N);
9810	if (WideVT != SourceValue.getValueType()) {
9811	assert(SourceValue.getValueType().getScalarSizeInBits() > WideNumBits &&
9812	"Unexpected store value to merge");
9813	SourceValue = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: WideVT, Operand: SourceValue);
9814	}
9815
9816	// Before legalize we can introduce illegal bswaps/rotates which will be later
9817	// converted to an explicit bswap sequence. This way we end up with a single
9818	// store and byte shuffling instead of several stores and byte shuffling.
9819	if (NeedBswap) {
9820	SourceValue = DAG.getNode(Opcode: ISD::BSWAP, DL, VT: WideVT, Operand: SourceValue);
9821	} else if (NeedRotate) {
9822	assert(WideNumBits % `2` == `0` && "Unexpected type for rotate");
9823	SDValue RotAmt = DAG.getConstant(Val: WideNumBits / `2`, DL, VT: WideVT);
9824	SourceValue = DAG.getNode(Opcode: ISD::ROTR, DL, VT: WideVT, N1: SourceValue, N2: RotAmt);
9825	}
9826
9827	SDValue NewStore =
9828	DAG.getStore(Chain, dl: DL, Val: SourceValue, Ptr: FirstStore->getBasePtr(),
9829	PtrInfo: FirstStore->getPointerInfo(), Alignment: FirstStore->getAlign());
9830
9831	// Rely on other DAG combine rules to remove the other individual stores.
9832	DAG.ReplaceAllUsesWith(From: N, To: NewStore.getNode());
9833	return NewStore;
9834	}
9835
9836	/// Match a pattern where a wide type scalar value is loaded by several narrow
9837	/// loads and combined by shifts and ors. Fold it into a single load or a load
9838	/// and a BSWAP if the targets supports it.
9839	///
9840	/// Assuming little endian target:
9841	/// i8 a = ...*
9842	/// i32 val = a[0] \| (a[1] << 8) \| (a[2] << 16) \| (a[3] << 24)
9843	/// =>
9844	/// i32 val = ((i32)a)*
9845	///
9846	/// i8 a = ...*
9847	/// i32 val = (a[0] << 24) \| (a[1] << 16) \| (a[2] << 8) \| a[3]
9848	/// =>
9849	/// i32 val = BSWAP(((i32)a))*
9850	///
9851	/// TODO: This rule matches complex patterns with OR node roots and doesn't
9852	/// interact well with the worklist mechanism. When a part of the pattern is
9853	/// updated (e.g. one of the loads) its direct users are put into the worklist,
9854	/// but the root node of the pattern which triggers the load combine is not
9855	/// necessarily a direct user of the changed node. For example, once the address
9856	/// of t28 load is reassociated load combine won't be triggered:
9857	/// t25: i32 = add t4, Constant:i32<2>
9858	/// t26: i64 = sign_extend t25
9859	/// t27: i64 = add t2, t26
9860	/// t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
9861	/// t29: i32 = zero_extend t28
9862	/// t32: i32 = shl t29, Constant:i8<8>
9863	/// t33: i32 = or t23, t32
9864	/// As a possible fix visitLoad can check if the load can be a part of a load
9865	/// combine pattern and add corresponding OR roots to the worklist.
9866	SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
9867	assert(N->getOpcode() == ISD::OR &&
9868	"Can only match load combining against OR nodes");
9869
9870	// Handles simple types only
9871	EVT VT = N->getValueType(ResNo: `0`);
9872	if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
9873	return SDValue ();
9874	unsigned ByteWidth = VT.getSizeInBits() / `8`;
9875
9876	bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
9877	auto MemoryByteOffset = [&](SDByteProvider P) {
9878	assert(P.hasSrc() && "Must be a memory byte provider");
9879	auto *Load = cast<LoadSDNode>(Val: P.Src.value());
9880
9881	unsigned LoadBitWidth = Load->getMemoryVT().getScalarSizeInBits();
9882
9883	assert(LoadBitWidth % `8` == `0` &&
9884	"can only analyze providers for individual bytes not bit");
9885	unsigned LoadByteWidth = LoadBitWidth / `8`;
9886	return IsBigEndianTarget ? bigEndianByteAt(BW: LoadByteWidth, i: P.DestOffset)
9887	: littleEndianByteAt(BW: LoadByteWidth, i: P.DestOffset);
9888	};
9889
9890	std::optional<BaseIndexOffset> Base;
9891	SDValue Chain;
9892
9893	SmallPtrSet<LoadSDNode *, `8`> Loads;
9894	std::optional<SDByteProvider> FirstByteProvider;
9895	int64_t FirstOffset = INT64_MAX;
9896
9897	// Check if all the bytes of the OR we are looking at are loaded from the same
9898	// base address. Collect bytes offsets from Base address in ByteOffsets.
9899	SmallVector<int64_t, `8`> ByteOffsets(ByteWidth);
9900	unsigned ZeroExtendedBytes = `0`;
9901	for (int i = ByteWidth - `1`; i >= `0`; --i) {
9902	auto P =
9903	calculateByteProvider(Op: SDValue (N, `0`), Index: i, Depth: `0`, /VectorIndex/ std::nullopt,
9904	/StartingIndex/ i);
9905	if (!P)
9906	return SDValue ();
9907
9908	if (P ->isConstantZero()) {
9909	// It's OK for the N most significant bytes to be 0, we can just
9910	// zero-extend the load.
9911	if (++ZeroExtendedBytes != (ByteWidth - static_cast<unsigned>(i)))
9912	return SDValue ();
9913	continue;
9914	}
9915	assert(P->hasSrc() && "provenance should either be memory or zero");
9916	auto *L = cast<LoadSDNode>(Val: P ->Src.value());
9917
9918	// All loads must share the same chain
9919	SDValue LChain = L->getChain();
9920	if (!Chain)
9921	Chain = LChain;
9922	else if (Chain != LChain)
9923	return SDValue ();
9924
9925	// Loads must share the same base address
9926	BaseIndexOffset Ptr = BaseIndexOffset::match(N: L, DAG);
9927	int64_t ByteOffsetFromBase = `0`;
9928
9929	// For vector loads, the expected load combine pattern will have an
9930	// ExtractElement for each index in the vector. While each of these
9931	// ExtractElements will be accessing the same base address as determined
9932	// by the load instruction, the actual bytes they interact with will differ
9933	// due to different ExtractElement indices. To accurately determine the
9934	// byte position of an ExtractElement, we offset the base load ptr with
9935	// the index multiplied by the byte size of each element in the vector.
9936	if (L->getMemoryVT().isVector()) {
9937	unsigned LoadWidthInBit = L->getMemoryVT().getScalarSizeInBits();
9938	if (LoadWidthInBit % `8` != `0`)
9939	return SDValue ();
9940	unsigned ByteOffsetFromVector = P ->SrcOffset * LoadWidthInBit / `8`;
9941	Ptr.addToOffset(VectorOff: ByteOffsetFromVector);
9942	}
9943
9944	if (!Base)
9945	Base = Ptr;
9946
9947	else if (!Base ->equalBaseIndex(Other: Ptr, DAG, Off&: ByteOffsetFromBase))
9948	return SDValue ();
9949
9950	// Calculate the offset of the current byte from the base address
9951	ByteOffsetFromBase += MemoryByteOffset (*P);
9952	ByteOffsets [i] = ByteOffsetFromBase;
9953
9954	// Remember the first byte load
9955	if (ByteOffsetFromBase < FirstOffset) {
9956	FirstByteProvider = P;
9957	FirstOffset = ByteOffsetFromBase;
9958	}
9959
9960	Loads.insert(Ptr: L);
9961	}
9962
9963	assert(!Loads.empty() && "All the bytes of the value must be loaded from "
9964	"memory, so there must be at least one load which produces the value");
9965	assert(Base && "Base address of the accessed memory location must be set");
9966	assert(FirstOffset != INT64_MAX && "First byte offset must be set");
9967
9968	bool NeedsZext = ZeroExtendedBytes > `0`;
9969
9970	EVT MemVT =
9971	EVT::getIntegerVT(Context&: DAG.getContext(), BitWidth: (ByteWidth - ZeroExtendedBytes) `8`);
9972
9973	if (!MemVT.isSimple())
9974	return SDValue ();
9975
9976	// Before legalize we can introduce too wide illegal loads which will be later
9977	// split into legal sized loads. This enables us to combine i64 load by i8
9978	// patterns to a couple of i32 loads on 32 bit targets.
9979	if (LegalOperations &&
9980	!TLI.isLoadExtLegal(ExtType: NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, ValVT: VT,
9981	MemVT))
9982	return SDValue ();
9983
9984	// Check if the bytes of the OR we are looking at match with either big or
9985	// little endian value load
9986	std::optional<bool> IsBigEndian = isBigEndian(
9987	ByteOffsets: ArrayRef(ByteOffsets).drop_back(N: ZeroExtendedBytes), FirstOffset);
9988	if (!IsBigEndian)
9989	return SDValue ();
9990
9991	assert(FirstByteProvider && "must be set");
9992
9993	// Ensure that the first byte is loaded from zero offset of the first load.
9994	// So the combined value can be loaded from the first load address.
9995	if (MemoryByteOffset (*FirstByteProvider) != `0`)
9996	return SDValue ();
9997	auto *FirstLoad = cast<LoadSDNode>(Val: FirstByteProvider ->Src.value());
9998
9999	// The node we are looking at matches with the pattern, check if we can
10000	// replace it with a single (possibly zero-extended) load and bswap + shift if
10001	// needed.
10002
10003	// If the load needs byte swap check if the target supports it
10004	bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
10005
10006	// Before legalize we can introduce illegal bswaps which will be later
10007	// converted to an explicit bswap sequence. This way we end up with a single
10008	// load and byte shuffling instead of several loads and byte shuffling.
10009	// We do not introduce illegal bswaps when zero-extending as this tends to
10010	// introduce too many arithmetic instructions.
10011	if (NeedsBswap && (LegalOperations \|\| NeedsZext) &&
10012	!TLI.isOperationLegal(Op: ISD::BSWAP, VT))
10013	return SDValue ();
10014
10015	// If we need to bswap and zero extend, we have to insert a shift. Check that
10016	// it is legal.
10017	if (NeedsBswap && NeedsZext && LegalOperations &&
10018	!TLI.isOperationLegal(Op: ISD::SHL, VT))
10019	return SDValue ();
10020
10021	// Check that a load of the wide type is both allowed and fast on the target
10022	unsigned Fast = `0`;
10023	bool Allowed =
10024	TLI.allowsMemoryAccess(Context&: *DAG.getContext(), DL: DAG.getDataLayout(), VT: MemVT,
10025	MMO: *FirstLoad->getMemOperand(), Fast: &Fast);
10026	if (!Allowed \|\| !Fast)
10027	return SDValue ();
10028
10029	SDValue NewLoad =
10030	DAG.getExtLoad(ExtType: NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, dl: SDLoc (N), VT,
10031	Chain, Ptr: FirstLoad->getBasePtr(),
10032	PtrInfo: FirstLoad->getPointerInfo(), MemVT, Alignment: FirstLoad->getAlign());
10033
10034	// Transfer chain users from old loads to the new load.
10035	for (LoadSDNode *L : Loads)
10036	DAG.makeEquivalentMemoryOrdering(OldLoad: L, NewMemOp: NewLoad);
10037
10038	if (!NeedsBswap)
10039	return NewLoad;
10040
10041	SDValue ShiftedLoad =
10042	NeedsZext ? DAG.getNode(Opcode: ISD::SHL, DL: SDLoc (N), VT, N1: NewLoad,
10043	N2: DAG.getShiftAmountConstant(Val: ZeroExtendedBytes * `8`,
10044	VT, DL: SDLoc (N)))
10045	: NewLoad;
10046	return DAG.getNode(Opcode: ISD::BSWAP, DL: SDLoc (N), VT, Operand: ShiftedLoad);
10047	}
10048
10049	// If the target has andn, bsl, or a similar bit-select instruction,
10050	// we want to unfold masked merge, with canonical pattern of:
10051	// \| A \| \|B\|
10052	// ((x ^ y) & m) ^ y
10053	// \| D \|
10054	// Into:
10055	// (x & m) \| (y & ~m)
10056	// If y is a constant, m is not a 'not', and the 'andn' does not work with
10057	// immediates, we unfold into a different pattern:
10058	// ~(~x & m) & (m \| y)
10059	// If x is a constant, m is a 'not', and the 'andn' does not work with
10060	// immediates, we unfold into a different pattern:
10061	// (x \| ~m) & ~(~m & ~y)
10062	// NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
10063	// the very least that breaks andnpd / andnps patterns, and because those
10064	// patterns are simplified in IR and shouldn't be created in the DAG
10065	SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
10066	assert(N->getOpcode() == ISD::XOR);
10067
10068	// Don't touch 'not' (i.e. where y = -1).
10069	if (isAllOnesOrAllOnesSplat(V: N->getOperand(Num: `1`)))
10070	return SDValue ();
10071
10072	EVT VT = N->getValueType(ResNo: `0`);
10073
10074	// There are 3 commutable operators in the pattern,
10075	// so we have to deal with 8 possible variants of the basic pattern.
10076	SDValue X, Y, M;
10077	auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
10078	if (And.getOpcode() != ISD::AND \|\| !And.hasOneUse())
10079	return false;
10080	SDValue Xor = And.getOperand(i: XorIdx);
10081	if (Xor.getOpcode() != ISD::XOR \|\| !Xor.hasOneUse())
10082	return false;
10083	SDValue Xor0 = Xor.getOperand(i: `0`);
10084	SDValue Xor1 = Xor.getOperand(i: `1`);
10085	// Don't touch 'not' (i.e. where y = -1).
10086	if (isAllOnesOrAllOnesSplat(V: Xor1))
10087	return false;
10088	if (Other == Xor0)
10089	std::swap(a&: Xor0, b&: Xor1);
10090	if (Other != Xor1)
10091	return false;
10092	X = Xor0;
10093	Y = Xor1;
10094	M = And.getOperand(i: XorIdx ? `0` : `1`);
10095	return true;
10096	};
10097
10098	SDValue N0 = N->getOperand(Num: `0`);
10099	SDValue N1 = N->getOperand(Num: `1`);
10100	if (!matchAndXor (N0, `0`, N1) && !matchAndXor (N0, `1`, N1) &&
10101	!matchAndXor (N1, `0`, N0) && !matchAndXor (N1, `1`, N0))
10102	return SDValue ();
10103
10104	// Don't do anything if the mask is constant. This should not be reachable.
10105	// InstCombine should have already unfolded this pattern, and DAGCombiner
10106	// probably shouldn't produce it, too.
10107	if (isa<ConstantSDNode>(Val: M.getNode()))
10108	return SDValue ();
10109
10110	// We can transform if the target has AndNot
10111	if (!TLI.hasAndNot(X: M))
10112	return SDValue ();
10113
10114	SDLoc DL(N);
10115
10116	// If Y is a constant, check that 'andn' works with immediates. Unless M is
10117	// a bitwise not that would already allow ANDN to be used.
10118	if (!TLI.hasAndNot(X: Y) && !isBitwiseNot(V: M)) {
10119	assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
10120	// If not, we need to do a bit more work to make sure andn is still used.
10121	SDValue NotX = DAG.getNOT(DL, Val: X, VT);
10122	SDValue LHS = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: NotX, N2: M);
10123	SDValue NotLHS = DAG.getNOT(DL, Val: LHS, VT);
10124	SDValue RHS = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: M, N2: Y);
10125	return DAG.getNode(Opcode: ISD::AND, DL, VT, N1: NotLHS, N2: RHS);
10126	}
10127
10128	// If X is a constant and M is a bitwise not, check that 'andn' works with
10129	// immediates.
10130	if (!TLI.hasAndNot(X) && isBitwiseNot(V: M)) {
10131	assert(TLI.hasAndNot(Y) && "Only mask is a variable? Unreachable.");
10132	// If not, we need to do a bit more work to make sure andn is still used.
10133	SDValue NotM = M.getOperand(i: `0`);
10134	SDValue LHS = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: X, N2: NotM);
10135	SDValue NotY = DAG.getNOT(DL, Val: Y, VT);
10136	SDValue RHS = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: NotM, N2: NotY);
10137	SDValue NotRHS = DAG.getNOT(DL, Val: RHS, VT);
10138	return DAG.getNode(Opcode: ISD::AND, DL, VT, N1: LHS, N2: NotRHS);
10139	}
10140
10141	SDValue LHS = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: X, N2: M);
10142	SDValue NotM = DAG.getNOT(DL, Val: M, VT);
10143	SDValue RHS = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Y, N2: NotM);
10144
10145	return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: LHS, N2: RHS);
10146	}
10147
10148	SDValue DAGCombiner::visitXOR(SDNode *N) {
10149	SDValue N0 = N->getOperand(Num: `0`);
10150	SDValue N1 = N->getOperand(Num: `1`);
10151	EVT VT = N0.getValueType();
10152	SDLoc DL(N);
10153
10154	// fold (xor undef, undef) -> 0. This is a common idiom (misuse).
10155	if (N0.isUndef() && N1.isUndef())
10156	return DAG.getConstant(Val: `0`, DL, VT);
10157
10158	// fold (xor x, undef) -> undef
10159	if (N0.isUndef())
10160	return N0;
10161	if (N1.isUndef())
10162	return N1;
10163
10164	// fold (xor c1, c2) -> c1^c2
10165	if (SDValue C = DAG.FoldConstantArithmetic(Opcode: ISD::XOR, DL, VT, Ops: {N0, N1}))
10166	return C;
10167
10168	// canonicalize constant to RHS
10169	if (DAG.isConstantIntBuildVectorOrConstantInt(N: N0) &&
10170	!DAG.isConstantIntBuildVectorOrConstantInt(N: N1))
10171	return DAG.getNode(Opcode: ISD::XOR, DL, VT, N1, N2: N0);
10172
10173	// fold vector ops
10174	if (VT.isVector()) {
10175	if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
10176	return FoldedVOp;
10177
10178	// fold (xor x, 0) -> x, vector edition
10179	if (ISD::isConstantSplatVectorAllZeros(N: N1.getNode()))
10180	return N0;
10181	}
10182
10183	// fold (xor x, 0) -> x
10184	if (isNullConstant(V: N1))
10185	return N0;
10186
10187	if (SDValue NewSel = foldBinOpIntoSelect(BO: N))
10188	return NewSel;
10189
10190	// reassociate xor
10191	if (SDValue RXOR = reassociateOps(Opc: ISD::XOR, DL, N0, N1, Flags: N->getFlags()))
10192	return RXOR;
10193
10194	// Fold xor(vecreduce(x), vecreduce(y)) -> vecreduce(xor(x, y))
10195	if (SDValue SD =
10196	reassociateReduction(RedOpc: ISD::VECREDUCE_XOR, Opc: ISD::XOR, DL, VT, N0, N1))
10197	return SD;
10198
10199	// fold (a^b) -> (a\|b) iff a and b share no bits.
10200	if ((!LegalOperations \|\| TLI.isOperationLegal(Op: ISD::OR, VT)) &&
10201	DAG.haveNoCommonBitsSet(A: N0, B: N1))
10202	return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: N0, N2: N1, Flags: SDNodeFlags::Disjoint);
10203
10204	// look for 'add-like' folds:
10205	// XOR(N0,MIN_SIGNED_VALUE) == ADD(N0,MIN_SIGNED_VALUE)
10206	if ((!LegalOperations \|\| TLI.isOperationLegal(Op: ISD::ADD, VT)) &&
10207	isMinSignedConstant(V: N1))
10208	if (SDValue Combined = visitADDLike(N))
10209	return Combined;
10210
10211	// fold not (setcc x, y, cc) -> setcc x y !cc
10212	// Avoid breaking: and (not(setcc x, y, cc), z) -> andn for vec
10213	unsigned N0Opcode = N0.getOpcode();
10214	SDValue LHS, RHS, CC;
10215	if (TLI.isConstTrueVal(N: N1) &&
10216	isSetCCEquivalent(N: N0, LHS, RHS, CC, /MatchStrict/ true) &&
10217	!(VT.isVector() && TLI.hasAndNot(X: SDValue (N, `0`)) && N->hasOneUse() &&
10218	N->use_begin()->getUser()->getOpcode() == ISD::AND)) {
10219	ISD::CondCode NotCC = ISD::getSetCCInverse(Operation: cast<CondCodeSDNode>(Val&: CC)->get(),
10220	Type: LHS.getValueType());
10221	if (!LegalOperations \|\|
10222	TLI.isCondCodeLegal(CC: NotCC, VT: LHS.getSimpleValueType())) {
10223	switch (N0Opcode) {
10224	default:
10225	llvm_unreachable("Unhandled SetCC Equivalent!");
10226	case ISD::SETCC:
10227	return DAG.getSetCC(DL: SDLoc (N0), VT, LHS, RHS, Cond: NotCC);
10228	case ISD::SELECT_CC:
10229	return DAG.getSelectCC(DL: SDLoc (N0), LHS, RHS, True: N0.getOperand(i: `2`),
10230	False: N0.getOperand(i: `3`), Cond: NotCC);
10231	case ISD::STRICT_FSETCC:
10232	case ISD::STRICT_FSETCCS: {
10233	if (N0.hasOneUse()) {
10234	// FIXME Can we handle multiple uses? Could we token factor the chain
10235	// results from the new/old setcc?
10236	SDValue SetCC =
10237	DAG.getSetCC(DL: SDLoc (N0), VT, LHS, RHS, Cond: NotCC,
10238	Chain: N0.getOperand(i: `0`), IsSignaling: N0Opcode == ISD::STRICT_FSETCCS);
10239	CombineTo(N, Res: SetCC);
10240	DAG.ReplaceAllUsesOfValueWith(From: N0.getValue(R: `1`), To: SetCC.getValue(R: `1`));
10241	recursivelyDeleteUnusedNodes(N: N0.getNode());
10242	return SDValue (N, `0`); // Return N so it doesn't get rechecked!
10243	}
10244	break;
10245	}
10246	}
10247	}
10248	}
10249
10250	// fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
10251	if (isOneConstant(V: N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
10252	isSetCCEquivalent(N: N0.getOperand(i: `0`), LHS, RHS, CC)){
10253	SDValue V = N0.getOperand(i: `0`);
10254	SDLoc DL0(N0);
10255	V = DAG.getNode(Opcode: ISD::XOR, DL: DL0, VT: V.getValueType(), N1: V,
10256	N2: DAG.getConstant(Val: `1`, DL: DL0, VT: V.getValueType()));
10257	AddToWorklist(N: V.getNode());
10258	return DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT, Operand: V);
10259	}
10260
10261	// fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
10262	// fold (not (and x, y)) -> (or (not x), (not y)) iff x or y are setcc
10263	if (isOneConstant(V: N1) && VT == MVT::i1 && N0.hasOneUse() &&
10264	(N0Opcode == ISD::OR \|\| N0Opcode == ISD::AND)) {
10265	SDValue N00 = N0.getOperand(i: `0`), N01 = N0.getOperand(i: `1`);
10266	if (isOneUseSetCC(N: N01) \|\| isOneUseSetCC(N: N00)) {
10267	unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
10268	N00 = DAG.getNode(Opcode: ISD::XOR, DL: SDLoc (N00), VT, N1: N00, N2: N1); // N00 = ~N00
10269	N01 = DAG.getNode(Opcode: ISD::XOR, DL: SDLoc (N01), VT, N1: N01, N2: N1); // N01 = ~N01
10270	AddToWorklist(N: N00.getNode()); AddToWorklist(N: N01.getNode());
10271	return DAG.getNode(Opcode: NewOpcode, DL, VT, N1: N00, N2: N01);
10272	}
10273	}
10274	// fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
10275	// fold (not (and x, y)) -> (or (not x), (not y)) iff x or y are constants
10276	if (isAllOnesConstant(V: N1) && N0.hasOneUse() &&
10277	(N0Opcode == ISD::OR \|\| N0Opcode == ISD::AND)) {
10278	SDValue N00 = N0.getOperand(i: `0`), N01 = N0.getOperand(i: `1`);
10279	if (isa<ConstantSDNode>(Val: N01) \|\| isa<ConstantSDNode>(Val: N00)) {
10280	unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
10281	N00 = DAG.getNode(Opcode: ISD::XOR, DL: SDLoc (N00), VT, N1: N00, N2: N1); // N00 = ~N00
10282	N01 = DAG.getNode(Opcode: ISD::XOR, DL: SDLoc (N01), VT, N1: N01, N2: N1); // N01 = ~N01
10283	AddToWorklist(N: N00.getNode()); AddToWorklist(N: N01.getNode());
10284	return DAG.getNode(Opcode: NewOpcode, DL, VT, N1: N00, N2: N01);
10285	}
10286	}
10287
10288	// fold (not (sub Y, X)) -> (add X, ~Y) if Y is a constant
10289	if (N0.getOpcode() == ISD::SUB && isAllOnesConstant(V: N1)) {
10290	SDValue Y = N0.getOperand(i: `0`);
10291	SDValue X = N0.getOperand(i: `1`);
10292
10293	if (auto *YConst = dyn_cast<ConstantSDNode>(Val&: Y)) {
10294	APInt NotYValue = ~YConst->getAPIntValue();
10295	SDValue NotY = DAG.getConstant(Val: NotYValue, DL, VT);
10296	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: X, N2: NotY, Flags: N->getFlags());
10297	}
10298	}
10299
10300	// fold (not (add X, -1)) -> (neg X)
10301	if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() && isAllOnesConstant(V: N1) &&
10302	isAllOnesOrAllOnesSplat(V: N0.getOperand(i: `1`))) {
10303	return DAG.getNegative(Val: N0.getOperand(i: `0`), DL, VT);
10304	}
10305
10306	// fold (xor (and x, y), y) -> (and (not x), y)
10307	if (N0Opcode == ISD::AND && N0.hasOneUse() && N0 ->getOperand(Num: `1`) == N1) {
10308	SDValue X = N0.getOperand(i: `0`);
10309	SDValue NotX = DAG.getNOT(DL: SDLoc (X), Val: X, VT);
10310	AddToWorklist(N: NotX.getNode());
10311	return DAG.getNode(Opcode: ISD::AND, DL, VT, N1: NotX, N2: N1);
10312	}
10313
10314	// fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
10315	if (!LegalOperations \|\| hasOperation(Opcode: ISD::ABS, VT)) {
10316	SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
10317	SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
10318	if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
10319	SDValue A0 = A.getOperand(i: `0`), A1 = A.getOperand(i: `1`);
10320	SDValue S0 = S.getOperand(i: `0`);
10321	if ((A0 == S && A1 == S0) \|\| (A1 == S && A0 == S0))
10322	if (ConstantSDNode *C = isConstOrConstSplat(N: S.getOperand(i: `1`)))
10323	if (C->getAPIntValue() == (VT.getScalarSizeInBits() - `1`))
10324	return DAG.getNode(Opcode: ISD::ABS, DL, VT, Operand: S0);
10325	}
10326	}
10327
10328	// fold (xor x, x) -> 0
10329	if (N0 == N1)
10330	return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
10331
10332	// fold (xor (shl 1, x), -1) -> (rotl ~1, x)
10333	// Here is a concrete example of this equivalence:
10334	// i16 x == 14
10335	// i16 shl == 1 << 14 == 16384 == 0b0100000000000000
10336	// i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
10337	//
10338	// =>
10339	//
10340	// i16 ~1 == 0b1111111111111110
10341	// i16 rol(~1, 14) == 0b1011111111111111
10342	//
10343	// Some additional tips to help conceptualize this transform:
10344	// - Try to see the operation as placing a single zero in a value of all ones.
10345	// - There exists no value for x which would allow the result to contain zero.
10346	// - Values of x larger than the bitwidth are undefined and do not require a
10347	// consistent result.
10348	// - Pushing the zero left requires shifting one bits in from the right.
10349	// A rotate left of ~1 is a nice way of achieving the desired result.
10350	if (TLI.isOperationLegalOrCustom(Op: ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
10351	isAllOnesConstant(V: N1) && isOneConstant(V: N0.getOperand(i: `0`))) {
10352	return DAG.getNode(Opcode: ISD::ROTL, DL, VT, N1: DAG.getSignedConstant(Val: ~`1`, DL, VT),
10353	N2: N0.getOperand(i: `1`));
10354	}
10355
10356	// Simplify: xor (op x...), (op y...) -> (op (xor x, y))
10357	if (N0Opcode == N1.getOpcode())
10358	if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
10359	return V;
10360
10361	if (SDValue R = foldLogicOfShifts(N, LogicOp: N0, ShiftOp: N1, DAG))
10362	return R;
10363	if (SDValue R = foldLogicOfShifts(N, LogicOp: N1, ShiftOp: N0, DAG))
10364	return R;
10365	if (SDValue R = foldLogicTreeOfShifts(N, LeftHand: N0, RightHand: N1, DAG))
10366	return R;
10367
10368	// Unfold ((x ^ y) & m) ^ y into (x & m) \| (y & ~m) if profitable
10369	if (SDValue MM = unfoldMaskedMerge(N))
10370	return MM;
10371
10372	// Simplify the expression using non-local knowledge.
10373	if (SimplifyDemandedBits(Op: SDValue (N, `0`)))
10374	return SDValue (N, `0`);
10375
10376	if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
10377	return Combined;
10378
10379	// fold (xor (smin(x, C), C)) -> select (x < C), xor(x, C), 0
10380	// fold (xor (smax(x, C), C)) -> select (x > C), xor(x, C), 0
10381	// fold (xor (umin(x, C), C)) -> select (x < C), xor(x, C), 0
10382	// fold (xor (umax(x, C), C)) -> select (x > C), xor(x, C), 0
10383	SDValue Op0;
10384	if (sd_match(N: N0, P: m_OneUse(P: m_AnyOf(preds: m_SMin(L: m_Value(N&: Op0), R: m_Specific(N: N1)),
10385	preds: m_SMax(L: m_Value(N&: Op0), R: m_Specific(N: N1)),
10386	preds: m_UMin(L: m_Value(N&: Op0), R: m_Specific(N: N1)),
10387	preds: m_UMax(L: m_Value(N&: Op0), R: m_Specific(N: N1)))))) {
10388
10389	if (isa<ConstantSDNode>(Val: N1) \|\|
10390	ISD::isBuildVectorOfConstantSDNodes(N: N1.getNode())) {
10391	// For vectors, only optimize when the constant is zero or all-ones to
10392	// avoid generating more instructions
10393	if (VT.isVector()) {
10394	ConstantSDNode *N1C = isConstOrConstSplat(N: N1);
10395	if (!N1C \|\| (!N1C->isZero() && !N1C->isAllOnes()))
10396	return SDValue ();
10397	}
10398
10399	// Avoid the fold if the minmax operation is legal and select is expensive
10400	if (TLI.isOperationLegal(Op: N0.getOpcode(), VT) &&
10401	TLI.isPredictableSelectExpensive())
10402	return SDValue ();
10403
10404	EVT CCVT = getSetCCResultType(VT);
10405	ISD::CondCode CC;
10406	switch (N0.getOpcode()) {
10407	case ISD::SMIN:
10408	CC = ISD::SETLT;
10409	break;
10410	case ISD::SMAX:
10411	CC = ISD::SETGT;
10412	break;
10413	case ISD::UMIN:
10414	CC = ISD::SETULT;
10415	break;
10416	case ISD::UMAX:
10417	CC = ISD::SETUGT;
10418	break;
10419	}
10420	SDValue FN1 = DAG.getFreeze(V: N1);
10421	SDValue Cmp = DAG.getSetCC(DL, VT: CCVT, LHS: Op0, RHS: FN1, Cond: CC);
10422	SDValue XorXC = DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: Op0, N2: FN1);
10423	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT);
10424	return DAG.getSelect(DL, VT, Cond: Cmp, LHS: XorXC, RHS: Zero);
10425	}
10426	}
10427
10428	return SDValue ();
10429	}
10430
10431	/// If we have a shift-by-constant of a bitwise logic op that itself has a
10432	/// shift-by-constant operand with identical opcode, we may be able to convert
10433	/// that into 2 independent shifts followed by the logic op. This is a
10434	/// throughput improvement.
10435	static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG) {
10436	// Match a one-use bitwise logic op.
10437	SDValue LogicOp = Shift->getOperand(Num: `0`);
10438	if (!LogicOp.hasOneUse())
10439	return SDValue ();
10440
10441	unsigned LogicOpcode = LogicOp.getOpcode();
10442	if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&
10443	LogicOpcode != ISD::XOR)
10444	return SDValue ();
10445
10446	// Find a matching one-use shift by constant.
10447	unsigned ShiftOpcode = Shift->getOpcode();
10448	SDValue C1 = Shift->getOperand(Num: `1`);
10449	ConstantSDNode *C1Node = isConstOrConstSplat(N: C1);
10450	assert(C1Node && "Expected a shift with constant operand");
10451	const APInt &C1Val = C1Node->getAPIntValue();
10452	auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp,
10453	const APInt *&ShiftAmtVal) {
10454	if (V.getOpcode() != ShiftOpcode \|\| !V.hasOneUse())
10455	return false;
10456
10457	ConstantSDNode *ShiftCNode = isConstOrConstSplat(N: V.getOperand(i: `1`));
10458	if (!ShiftCNode)
10459	return false;
10460
10461	// Capture the shifted operand and shift amount value.
10462	ShiftOp = V.getOperand(i: `0`);
10463	ShiftAmtVal = &ShiftCNode->getAPIntValue();
10464
10465	// Shift amount types do not have to match their operand type, so check that
10466	// the constants are the same width.
10467	if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
10468	return false;
10469
10470	// The fold is not valid if the sum of the shift values doesn't fit in the
10471	// given shift amount type.
10472	bool Overflow = false;
10473	APInt NewShiftAmt = C1Val.uadd_ov(RHS: *ShiftAmtVal, Overflow);
10474	if (Overflow)
10475	return false;
10476
10477	// The fold is not valid if the sum of the shift values exceeds bitwidth.
10478	if (NewShiftAmt.uge(RHS: V.getScalarValueSizeInBits()))
10479	return false;
10480
10481	return true;
10482	};
10483
10484	// Logic ops are commutative, so check each operand for a match.
10485	SDValue X, Y;
10486	const APInt *C0Val;
10487	if (matchFirstShift (LogicOp.getOperand(i: `0`), X, C0Val))
10488	Y = LogicOp.getOperand(i: `1`);
10489	else if (matchFirstShift (LogicOp.getOperand(i: `1`), X, C0Val))
10490	Y = LogicOp.getOperand(i: `0`);
10491	else
10492	return SDValue ();
10493
10494	// shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
10495	SDLoc DL(Shift);
10496	EVT VT = Shift->getValueType(ResNo: `0`);
10497	EVT ShiftAmtVT = Shift->getOperand(Num: `1`).getValueType();
10498	SDValue ShiftSumC = DAG.getConstant(Val: *C0Val + C1Val, DL, VT: ShiftAmtVT);
10499	SDValue NewShift1 = DAG.getNode(Opcode: ShiftOpcode, DL, VT, N1: X, N2: ShiftSumC);
10500	SDValue NewShift2 = DAG.getNode(Opcode: ShiftOpcode, DL, VT, N1: Y, N2: C1);
10501	return DAG.getNode(Opcode: LogicOpcode, DL, VT, N1: NewShift1, N2: NewShift2,
10502	Flags: LogicOp ->getFlags());
10503	}
10504
10505	/// Handle transforms common to the three shifts, when the shift amount is a
10506	/// constant.
10507	/// We are looking for: (shift being one of shl/sra/srl)
10508	/// shift (binop X, C0), C1
10509	/// And want to transform into:
10510	/// binop (shift X, C1), (shift C0, C1)
10511	SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
10512	assert(isConstOrConstSplat(N->getOperand(`1`)) && "Expected constant operand");
10513
10514	// Do not turn a 'not' into a regular xor.
10515	if (isBitwiseNot(V: N->getOperand(Num: `0`)))
10516	return SDValue ();
10517
10518	// The inner binop must be one-use, since we want to replace it.
10519	SDValue LHS = N->getOperand(Num: `0`);
10520	if (!LHS.hasOneUse() \|\| !TLI.isDesirableToCommuteWithShift(N, Level))
10521	return SDValue ();
10522
10523	// Fold shift(bitop(shift(x,c1),y), c2) -> bitop(shift(x,c1+c2),shift(y,c2)).
10524	if (SDValue R = combineShiftOfShiftedLogic(Shift: N, DAG))
10525	return R;
10526
10527	// We want to pull some binops through shifts, so that we have (and (shift))
10528	// instead of (shift (and)), likewise for add, or, xor, etc. This sort of
10529	// thing happens with address calculations, so it's important to canonicalize
10530	// it.
10531	switch (LHS.getOpcode()) {
10532	default:
10533	return SDValue ();
10534	case ISD::OR:
10535	case ISD::XOR:
10536	case ISD::AND:
10537	break;
10538	case ISD::ADD:
10539	if (N->getOpcode() != ISD::SHL)
10540	return SDValue (); // only shl(add) not sr[al](add).
10541	break;
10542	}
10543
10544	// FIXME: disable this unless the input to the binop is a shift by a constant
10545	// or is copy/select. Enable this in other cases when figure out it's exactly
10546	// profitable.
10547	SDValue BinOpLHSVal = LHS.getOperand(i: `0`);
10548	bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL \|\|
10549	BinOpLHSVal.getOpcode() == ISD::SRA \|\|
10550	BinOpLHSVal.getOpcode() == ISD::SRL) &&
10551	isa<ConstantSDNode>(Val: BinOpLHSVal.getOperand(i: `1`));
10552	bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg \|\|
10553	BinOpLHSVal.getOpcode() == ISD::SELECT;
10554
10555	if (!IsShiftByConstant && !IsCopyOrSelect)
10556	return SDValue ();
10557
10558	if (IsCopyOrSelect && N->hasOneUse())
10559	return SDValue ();
10560
10561	// Attempt to fold the constants, shifting the binop RHS by the shift amount.
10562	SDLoc DL(N);
10563	EVT VT = N->getValueType(ResNo: `0`);
10564	if (SDValue NewRHS = DAG.FoldConstantArithmetic(
10565	Opcode: N->getOpcode(), DL, VT, Ops: {LHS.getOperand(i: `1`), N->getOperand(Num: `1`)})) {
10566	SDValue NewShift = DAG.getNode(Opcode: N->getOpcode(), DL, VT, N1: LHS.getOperand(i: `0`),
10567	N2: N->getOperand(Num: `1`));
10568	return DAG.getNode(Opcode: LHS.getOpcode(), DL, VT, N1: NewShift, N2: NewRHS);
10569	}
10570
10571	return SDValue ();
10572	}
10573
10574	SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
10575	assert(N->getOpcode() == ISD::TRUNCATE);
10576	assert(N->getOperand(`0`).getOpcode() == ISD::AND);
10577
10578	// (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
10579	EVT TruncVT = N->getValueType(ResNo: `0`);
10580	if (N->hasOneUse() && N->getOperand(Num: `0`).hasOneUse() &&
10581	TLI.isTypeDesirableForOp(ISD::AND, VT: TruncVT)) {
10582	SDValue N01 = N->getOperand(Num: `0`).getOperand(i: `1`);
10583	if (isConstantOrConstantVector(N: N01, / NoOpaques / true)) {
10584	SDLoc DL(N);
10585	SDValue N00 = N->getOperand(Num: `0`).getOperand(i: `0`);
10586	SDValue Trunc00 = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: TruncVT, Operand: N00);
10587	SDValue Trunc01 = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: TruncVT, Operand: N01);
10588	AddToWorklist(N: Trunc00.getNode());
10589	AddToWorklist(N: Trunc01.getNode());
10590	return DAG.getNode(Opcode: ISD::AND, DL, VT: TruncVT, N1: Trunc00, N2: Trunc01);
10591	}
10592	}
10593
10594	return SDValue ();
10595	}
10596
10597	SDValue DAGCombiner::visitRotate(SDNode *N) {
10598	SDLoc dl(N);
10599	SDValue N0 = N->getOperand(Num: `0`);
10600	SDValue N1 = N->getOperand(Num: `1`);
10601	EVT VT = N->getValueType(ResNo: `0`);
10602	unsigned Bitsize = VT.getScalarSizeInBits();
10603
10604	// fold (rot x, 0) -> x
10605	if (isNullOrNullSplat(V: N1))
10606	return N0;
10607
10608	// fold (rot x, c) -> x iff (c % BitSize) == 0
10609	if (isPowerOf2_32(Value: Bitsize) && Bitsize > `1`) {
10610	APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - `1`);
10611	if (DAG.MaskedValueIsZero(Op: N1, Mask: ModuloMask))
10612	return N0;
10613	}
10614
10615	// fold (rot x, c) -> (rot x, c % BitSize)
10616	bool OutOfRange = false;
10617	auto MatchOutOfRange = [Bitsize, &OutOfRange](ConstantSDNode *C) {
10618	OutOfRange \|= C->getAPIntValue().uge(RHS: Bitsize);
10619	return true;
10620	};
10621	if (ISD::matchUnaryPredicate(Op: N1, Match: MatchOutOfRange) && OutOfRange) {
10622	EVT AmtVT = N1.getValueType();
10623	SDValue Bits = DAG.getConstant(Val: Bitsize, DL: dl, VT: AmtVT);
10624	if (SDValue Amt =
10625	DAG.FoldConstantArithmetic(Opcode: ISD::UREM, DL: dl, VT: AmtVT, Ops: {N1, Bits}))
10626	return DAG.getNode(Opcode: N->getOpcode(), DL: dl, VT, N1: N0, N2: Amt);
10627	}
10628
10629	// rot i16 X, 8 --> bswap X
10630	auto *RotAmtC = isConstOrConstSplat(N: N1);
10631	if (RotAmtC && RotAmtC->getAPIntValue() == `8` &&
10632	VT.getScalarSizeInBits() == `16` && hasOperation(Opcode: ISD::BSWAP, VT))
10633	return DAG.getNode(Opcode: ISD::BSWAP, DL: dl, VT, Operand: N0);
10634
10635	// Simplify the operands using demanded-bits information.
10636	if (SimplifyDemandedBits(Op: SDValue (N, `0`)))
10637	return SDValue (N, `0`);
10638
10639	// fold (rot x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).*
10640	if (N1.getOpcode() == ISD::TRUNCATE &&
10641	N1.getOperand(i: `0`).getOpcode() == ISD::AND) {
10642	if (SDValue NewOp1 = distributeTruncateThroughAnd(N: N1.getNode()))
10643	return DAG.getNode(Opcode: N->getOpcode(), DL: dl, VT, N1: N0, N2: NewOp1);
10644	}
10645
10646	unsigned NextOp = N0.getOpcode();
10647
10648	// fold (rot (rot* x, c2), c1)*
10649	// -> (rot x, ((c1 % bitsize) +- (c2 % bitsize) + bitsize) % bitsize)*
10650	if (NextOp == ISD::ROTL \|\| NextOp == ISD::ROTR) {
10651	bool C1 = DAG.isConstantIntBuildVectorOrConstantInt(N: N1);
10652	bool C2 = DAG.isConstantIntBuildVectorOrConstantInt(N: N0.getOperand(i: `1`));
10653	if (C1 && C2 && N1.getValueType() == N0.getOperand(i: `1`).getValueType()) {
10654	EVT ShiftVT = N1.getValueType();
10655	bool SameSide = (N->getOpcode() == NextOp);
10656	unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
10657	SDValue BitsizeC = DAG.getConstant(Val: Bitsize, DL: dl, VT: ShiftVT);
10658	SDValue Norm1 = DAG.FoldConstantArithmetic(Opcode: ISD::UREM, DL: dl, VT: ShiftVT,
10659	Ops: {N1, BitsizeC});
10660	SDValue Norm2 = DAG.FoldConstantArithmetic(Opcode: ISD::UREM, DL: dl, VT: ShiftVT,
10661	Ops: {N0.getOperand(i: `1`), BitsizeC});
10662	if (Norm1 && Norm2)
10663	if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
10664	Opcode: CombineOp, DL: dl, VT: ShiftVT, Ops: {Norm1, Norm2})) {
10665	CombinedShift = DAG.FoldConstantArithmetic(Opcode: ISD::ADD, DL: dl, VT: ShiftVT,
10666	Ops: {CombinedShift, BitsizeC});
10667	SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
10668	Opcode: ISD::UREM, DL: dl, VT: ShiftVT, Ops: {CombinedShift, BitsizeC});
10669	return DAG.getNode(Opcode: N->getOpcode(), DL: dl, VT, N1: N0 ->getOperand(Num: `0`),
10670	N2: CombinedShiftNorm);
10671	}
10672	}
10673	}
10674	return SDValue ();
10675	}
10676
10677	SDValue DAGCombiner::visitSHL(SDNode *N) {
10678	SDValue N0 = N->getOperand(Num: `0`);
10679	SDValue N1 = N->getOperand(Num: `1`);
10680	if (SDValue V = DAG.simplifyShift(X: N0, Y: N1))
10681	return V;
10682
10683	SDLoc DL(N);
10684	EVT VT = N0.getValueType();
10685	EVT ShiftVT = N1.getValueType();
10686	unsigned OpSizeInBits = VT.getScalarSizeInBits();
10687
10688	// fold (shl c1, c2) -> c1<<c2
10689	if (SDValue C = DAG.FoldConstantArithmetic(Opcode: ISD::SHL, DL, VT, Ops: {N0, N1}))
10690	return C;
10691
10692	// fold vector ops
10693	if (VT.isVector()) {
10694	if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
10695	return FoldedVOp;
10696
10697	BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(Val&: N1);
10698	// If setcc produces all-one true value then:
10699	// (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
10700	if (N1CV && N1CV->isConstant()) {
10701	if (N0.getOpcode() == ISD::AND) {
10702	SDValue N00 = N0 ->getOperand(Num: `0`);
10703	SDValue N01 = N0 ->getOperand(Num: `1`);
10704	BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(Val&: N01);
10705
10706	if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
10707	TLI.getBooleanContents(Type: N00.getOperand(i: `0`).getValueType()) ==
10708	TargetLowering::ZeroOrNegativeOneBooleanContent) {
10709	if (SDValue C =
10710	DAG.FoldConstantArithmetic(Opcode: ISD::SHL, DL, VT, Ops: {N01, N1}))
10711	return DAG.getNode(Opcode: ISD::AND, DL, VT, N1: N00, N2: C);
10712	}
10713	}
10714	}
10715	}
10716
10717	if (SDValue NewSel = foldBinOpIntoSelect(BO: N))
10718	return NewSel;
10719
10720	// if (shl x, c) is known to be zero, return 0
10721	if (DAG.MaskedValueIsZero(Op: SDValue (N, `0`), Mask: APInt::getAllOnes(numBits: OpSizeInBits)))
10722	return DAG.getConstant(Val: `0`, DL, VT);
10723
10724	// fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
10725	if (N1.getOpcode() == ISD::TRUNCATE &&
10726	N1.getOperand(i: `0`).getOpcode() == ISD::AND) {
10727	if (SDValue NewOp1 = distributeTruncateThroughAnd(N: N1.getNode()))
10728	return DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: N0, N2: NewOp1);
10729	}
10730
10731	// fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
10732	if (N0.getOpcode() == ISD::SHL) {
10733	auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
10734	ConstantSDNode *RHS) {
10735	APInt c1 = LHS->getAPIntValue();
10736	APInt c2 = RHS->getAPIntValue();
10737	zeroExtendToMatch(LHS&: c1, RHS&: c2, Offset: `1` / Overflow Bit /);
10738	return (c1 + c2).uge(RHS: OpSizeInBits);
10739	};
10740	if (ISD::matchBinaryPredicate(LHS: N1, RHS: N0.getOperand(i: `1`), Match: MatchOutOfRange))
10741	return DAG.getConstant(Val: `0`, DL, VT);
10742
10743	auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
10744	ConstantSDNode *RHS) {
10745	APInt c1 = LHS->getAPIntValue();
10746	APInt c2 = RHS->getAPIntValue();
10747	zeroExtendToMatch(LHS&: c1, RHS&: c2, Offset: `1` / Overflow Bit /);
10748	return (c1 + c2).ult(RHS: OpSizeInBits);
10749	};
10750	if (ISD::matchBinaryPredicate(LHS: N1, RHS: N0.getOperand(i: `1`), Match: MatchInRange)) {
10751	SDValue Sum = DAG.getNode(Opcode: ISD::ADD, DL, VT: ShiftVT, N1, N2: N0.getOperand(i: `1`));
10752	return DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: N0.getOperand(i: `0`), N2: Sum);
10753	}
10754	}
10755
10756	// fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
10757	// For this to be valid, the second form must not preserve any of the bits
10758	// that are shifted out by the inner shift in the first form. This means
10759	// the outer shift size must be >= the number of bits added by the ext.
10760	// As a corollary, we don't care what kind of ext it is.
10761	if ((N0.getOpcode() == ISD::ZERO_EXTEND \|\|
10762	N0.getOpcode() == ISD::ANY_EXTEND \|\|
10763	N0.getOpcode() == ISD::SIGN_EXTEND) &&
10764	N0.getOperand(i: `0`).getOpcode() == ISD::SHL) {
10765	SDValue N0Op0 = N0.getOperand(i: `0`);
10766	SDValue InnerShiftAmt = N0Op0.getOperand(i: `1`);
10767	EVT InnerVT = N0Op0.getValueType();
10768	uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
10769
10770	auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
10771	ConstantSDNode *RHS) {
10772	APInt c1 = LHS->getAPIntValue();
10773	APInt c2 = RHS->getAPIntValue();
10774	zeroExtendToMatch(LHS&: c1, RHS&: c2, Offset: `1` / Overflow Bit /);
10775	return c2.uge(RHS: OpSizeInBits - InnerBitwidth) &&
10776	(c1 + c2).uge(RHS: OpSizeInBits);
10777	};
10778	if (ISD::matchBinaryPredicate(LHS: InnerShiftAmt, RHS: N1, Match: MatchOutOfRange,
10779	/AllowUndefs/ false,
10780	/AllowTypeMismatch/ true))
10781	return DAG.getConstant(Val: `0`, DL, VT);
10782
10783	auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
10784	ConstantSDNode *RHS) {
10785	APInt c1 = LHS->getAPIntValue();
10786	APInt c2 = RHS->getAPIntValue();
10787	zeroExtendToMatch(LHS&: c1, RHS&: c2, Offset: `1` / Overflow Bit /);
10788	return c2.uge(RHS: OpSizeInBits - InnerBitwidth) &&
10789	(c1 + c2).ult(RHS: OpSizeInBits);
10790	};
10791	if (ISD::matchBinaryPredicate(LHS: InnerShiftAmt, RHS: N1, Match: MatchInRange,
10792	/AllowUndefs/ false,
10793	/AllowTypeMismatch/ true)) {
10794	SDValue Ext = DAG.getNode(Opcode: N0.getOpcode(), DL, VT, Operand: N0Op0.getOperand(i: `0`));
10795	SDValue Sum = DAG.getZExtOrTrunc(Op: InnerShiftAmt, DL, VT: ShiftVT);
10796	Sum = DAG.getNode(Opcode: ISD::ADD, DL, VT: ShiftVT, N1: Sum, N2: N1);
10797	return DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Ext, N2: Sum);
10798	}
10799	}
10800
10801	// fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
10802	// Only fold this if the inner zext has no other uses to avoid increasing
10803	// the total number of instructions.
10804	if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
10805	N0.getOperand(i: `0`).getOpcode() == ISD::SRL) {
10806	SDValue N0Op0 = N0.getOperand(i: `0`);
10807	SDValue InnerShiftAmt = N0Op0.getOperand(i: `1`);
10808
10809	auto MatchEqual = [VT](ConstantSDNode LHS, ConstantSDNode RHS) {
10810	APInt c1 = LHS->getAPIntValue();
10811	APInt c2 = RHS->getAPIntValue();
10812	zeroExtendToMatch(LHS&: c1, RHS&: c2);
10813	return c1.ult(RHS: VT.getScalarSizeInBits()) && (c1 == c2);
10814	};
10815	if (ISD::matchBinaryPredicate(LHS: InnerShiftAmt, RHS: N1, Match: MatchEqual,
10816	/AllowUndefs/ false,
10817	/AllowTypeMismatch/ true)) {
10818	EVT InnerShiftAmtVT = N0Op0.getOperand(i: `1`).getValueType();
10819	SDValue NewSHL = DAG.getZExtOrTrunc(Op: N1, DL, VT: InnerShiftAmtVT);
10820	NewSHL = DAG.getNode(Opcode: ISD::SHL, DL, VT: N0Op0.getValueType(), N1: N0Op0, N2: NewSHL);
10821	AddToWorklist(N: NewSHL.getNode());
10822	return DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: SDLoc (N0), VT, Operand: NewSHL);
10823	}
10824	}
10825
10826	if (N0.getOpcode() == ISD::SRL \|\| N0.getOpcode() == ISD::SRA) {
10827	auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
10828	ConstantSDNode *RHS) {
10829	const APInt &LHSC = LHS->getAPIntValue();
10830	const APInt &RHSC = RHS->getAPIntValue();
10831	return LHSC.ult(RHS: OpSizeInBits) && RHSC.ult(RHS: OpSizeInBits) &&
10832	LHSC.getZExtValue() <= RHSC.getZExtValue();
10833	};
10834
10835	// fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
10836	// fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 >= C2
10837	if (N0 ->getFlags().hasExact()) {
10838	if (ISD::matchBinaryPredicate(LHS: N0.getOperand(i: `1`), RHS: N1, Match: MatchShiftAmount,
10839	/AllowUndefs/ false,
10840	/AllowTypeMismatch/ true)) {
10841	SDValue N01 = DAG.getZExtOrTrunc(Op: N0.getOperand(i: `1`), DL, VT: ShiftVT);
10842	SDValue Diff = DAG.getNode(Opcode: ISD::SUB, DL, VT: ShiftVT, N1, N2: N01);
10843	return DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: N0.getOperand(i: `0`), N2: Diff);
10844	}
10845	if (ISD::matchBinaryPredicate(LHS: N1, RHS: N0.getOperand(i: `1`), Match: MatchShiftAmount,
10846	/AllowUndefs/ false,
10847	/AllowTypeMismatch/ true)) {
10848	SDValue N01 = DAG.getZExtOrTrunc(Op: N0.getOperand(i: `1`), DL, VT: ShiftVT);
10849	SDValue Diff = DAG.getNode(Opcode: ISD::SUB, DL, VT: ShiftVT, N1: N01, N2: N1);
10850	return DAG.getNode(Opcode: N0.getOpcode(), DL, VT, N1: N0.getOperand(i: `0`), N2: Diff);
10851	}
10852	}
10853
10854	// fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
10855	// (and (srl x, (sub c1, c2), MASK)
10856	// Only fold this if the inner shift has no other uses -- if it does,
10857	// folding this will increase the total number of instructions.
10858	if (N0.getOpcode() == ISD::SRL &&
10859	(N0.getOperand(i: `1`) == N1 \|\| N0.hasOneUse()) &&
10860	TLI.shouldFoldConstantShiftPairToMask(N)) {
10861	if (ISD::matchBinaryPredicate(LHS: N1, RHS: N0.getOperand(i: `1`), Match: MatchShiftAmount,
10862	/AllowUndefs/ false,
10863	/AllowTypeMismatch/ true)) {
10864	SDValue N01 = DAG.getZExtOrTrunc(Op: N0.getOperand(i: `1`), DL, VT: ShiftVT);
10865	SDValue Diff = DAG.getNode(Opcode: ISD::SUB, DL, VT: ShiftVT, N1: N01, N2: N1);
10866	SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10867	Mask = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Mask, N2: N01);
10868	Mask = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Mask, N2: Diff);
10869	SDValue Shift = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: N0.getOperand(i: `0`), N2: Diff);
10870	return DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Shift, N2: Mask);
10871	}
10872	if (ISD::matchBinaryPredicate(LHS: N0.getOperand(i: `1`), RHS: N1, Match: MatchShiftAmount,
10873	/AllowUndefs/ false,
10874	/AllowTypeMismatch/ true)) {
10875	SDValue N01 = DAG.getZExtOrTrunc(Op: N0.getOperand(i: `1`), DL, VT: ShiftVT);
10876	SDValue Diff = DAG.getNode(Opcode: ISD::SUB, DL, VT: ShiftVT, N1, N2: N01);
10877	SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10878	Mask = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Mask, N2: N1);
10879	SDValue Shift = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: N0.getOperand(i: `0`), N2: Diff);
10880	return DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Shift, N2: Mask);
10881	}
10882	}
10883	}
10884
10885	// fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
10886	if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(i: `1`) &&
10887	isConstantOrConstantVector(N: N1, / No Opaques / NoOpaques: true)) {
10888	SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
10889	SDValue HiBitsMask = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: AllBits, N2: N1);
10890	return DAG.getNode(Opcode: ISD::AND, DL, VT, N1: N0.getOperand(i: `0`), N2: HiBitsMask);
10891	}
10892
10893	// fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
10894	// fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
10895	// Variant of version done on multiply, except mul by a power of 2 is turned
10896	// into a shift.
10897	if ((N0.getOpcode() == ISD::ADD \|\| N0.getOpcode() == ISD::OR) &&
10898	TLI.isDesirableToCommuteWithShift(N, Level)) {
10899	SDValue N01 = N0.getOperand(i: `1`);
10900	if (SDValue Shl1 =
10901	DAG.FoldConstantArithmetic(Opcode: ISD::SHL, DL: SDLoc (N1), VT, Ops: {N01, N1})) {
10902	SDValue Shl0 = DAG.getNode(Opcode: ISD::SHL, DL: SDLoc (N0), VT, N1: N0.getOperand(i: `0`), N2: N1);
10903	AddToWorklist(N: Shl0.getNode());
10904	SDNodeFlags Flags;
10905	// Preserve the disjoint flag for Or.
10906	if (N0.getOpcode() == ISD::OR && N0 ->getFlags().hasDisjoint())
10907	Flags \|= SDNodeFlags::Disjoint;
10908	return DAG.getNode(Opcode: N0.getOpcode(), DL, VT, N1: Shl0, N2: Shl1, Flags);
10909	}
10910	}
10911
10912	// fold (shl (sext (add_nsw x, c1)), c2) -> (add (shl (sext x), c2), c1 << c2)
10913	// TODO: Add zext/add_nuw variant with suitable test coverage
10914	// TODO: Should we limit this with isLegalAddImmediate?
10915	if (N0.getOpcode() == ISD::SIGN_EXTEND &&
10916	N0.getOperand(i: `0`).getOpcode() == ISD::ADD &&
10917	N0.getOperand(i: `0`)->getFlags().hasNoSignedWrap() &&
10918	TLI.isDesirableToCommuteWithShift(N, Level)) {
10919	SDValue Add = N0.getOperand(i: `0`);
10920	SDLoc DL(N0);
10921	if (SDValue ExtC = DAG.FoldConstantArithmetic(Opcode: N0.getOpcode(), DL, VT,
10922	Ops: {Add.getOperand(i: `1`)})) {
10923	if (SDValue ShlC =
10924	DAG.FoldConstantArithmetic(Opcode: ISD::SHL, DL, VT, Ops: {ExtC, N1})) {
10925	SDValue ExtX = DAG.getNode(Opcode: N0.getOpcode(), DL, VT, Operand: Add.getOperand(i: `0`));
10926	SDValue ShlX = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: ExtX, N2: N1);
10927	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: ShlX, N2: ShlC);
10928	}
10929	}
10930	}
10931
10932	// fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
10933	if (N0.getOpcode() == ISD::MUL && N0 ->hasOneUse()) {
10934	SDValue N01 = N0.getOperand(i: `1`);
10935	if (SDValue Shl =
10936	DAG.FoldConstantArithmetic(Opcode: ISD::SHL, DL: SDLoc (N1), VT, Ops: {N01, N1}))
10937	return DAG.getNode(Opcode: ISD::MUL, DL, VT, N1: N0.getOperand(i: `0`), N2: Shl);
10938	}
10939
10940	ConstantSDNode *N1C = isConstOrConstSplat(N: N1);
10941	if (N1C && !N1C->isOpaque())
10942	if (SDValue NewSHL = visitShiftByConstant(N))
10943	return NewSHL;
10944
10945	// fold (shl X, cttz(Y)) -> (mul (Y & -Y), X) if cttz is unsupported on the
10946	// target.
10947	if (((N1.getOpcode() == ISD::CTTZ &&
10948	VT.getScalarSizeInBits() <= ShiftVT.getScalarSizeInBits()) \|\|
10949	N1.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
10950	N1.hasOneUse() && !TLI.isOperationLegalOrCustom(Op: ISD::CTTZ, VT: ShiftVT) &&
10951	TLI.isOperationLegalOrCustom(Op: ISD::MUL, VT)) {
10952	SDValue Y = N1.getOperand(i: `0`);
10953	SDLoc DL(N);
10954	SDValue NegY = DAG.getNegative(Val: Y, DL, VT: ShiftVT);
10955	SDValue And =
10956	DAG.getZExtOrTrunc(Op: DAG.getNode(Opcode: ISD::AND, DL, VT: ShiftVT, N1: Y, N2: NegY), DL, VT);
10957	return DAG.getNode(Opcode: ISD::MUL, DL, VT, N1: And, N2: N0);
10958	}
10959
10960	if (SimplifyDemandedBits(Op: SDValue (N, `0`)))
10961	return SDValue (N, `0`);
10962
10963	// Fold (shl (vscale C0), C1) to (vscale * (C0 << C1)).*
10964	if (N0.getOpcode() == ISD::VSCALE && N1C) {
10965	const APInt &C0 = N0.getConstantOperandAPInt(i: `0`);
10966	const APInt &C1 = N1C->getAPIntValue();
10967	return DAG.getVScale(DL, VT, MulImm: C0 << C1);
10968	}
10969
10970	SDValue X;
10971	APInt VS0;
10972
10973	// fold (shl (X vscale(VS0)), C1) -> (X * vscale(VS0 << C1))*
10974	if (N1C && sd_match(N: N0, P: m_Mul(L: m_Value(N&: X), R: m_VScale(Op: m_ConstInt(V&: VS0))))) {
10975	SDNodeFlags Flags;
10976	Flags.setNoUnsignedWrap(N->getFlags().hasNoUnsignedWrap() &&
10977	N0 ->getFlags().hasNoUnsignedWrap());
10978
10979	SDValue VScale = DAG.getVScale(DL, VT, MulImm: VS0 << N1C->getAPIntValue());
10980	return DAG.getNode(Opcode: ISD::MUL, DL, VT, N1: X, N2: VScale, Flags);
10981	}
10982
10983	// Fold (shl step_vector(C0), C1) to (step_vector(C0 << C1)).
10984	APInt ShlVal;
10985	if (N0.getOpcode() == ISD::STEP_VECTOR &&
10986	ISD::isConstantSplatVector(N: N1.getNode(), SplatValue&: ShlVal)) {
10987	const APInt &C0 = N0.getConstantOperandAPInt(i: `0`);
10988	if (ShlVal.ult(RHS: C0.getBitWidth())) {
10989	APInt NewStep = C0 << ShlVal;
10990	return DAG.getStepVector(DL, ResVT: VT, StepVal: NewStep);
10991	}
10992	}
10993
10994	return SDValue ();
10995	}
10996
10997	// Transform a right shift of a multiply into a multiply-high.
10998	// Examples:
10999	// (srl (mul (zext i32:$a to i64), (zext i32:$a to i64)), 32) -> (mulhu $a, $b)
11000	// (sra (mul (sext i32:$a to i64), (sext i32:$a to i64)), 32) -> (mulhs $a, $b)
11001	static SDValue combineShiftToMULH(SDNode N, const* SDLoc &DL, SelectionDAG &DAG,
11002	const TargetLowering &TLI) {
11003	assert((N->getOpcode() == ISD::SRL \|\| N->getOpcode() == ISD::SRA) &&
11004	"SRL or SRA node is required here!");
11005
11006	// Check the shift amount. Proceed with the transformation if the shift
11007	// amount is constant.
11008	ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N: N->getOperand(Num: `1`));
11009	if (!ShiftAmtSrc)
11010	return SDValue ();
11011
11012	// The operation feeding into the shift must be a multiply.
11013	SDValue ShiftOperand = N->getOperand(Num: `0`);
11014	if (ShiftOperand.getOpcode() != ISD::MUL)
11015	return SDValue ();
11016
11017	// Both operands must be equivalent extend nodes.
11018	SDValue LeftOp = ShiftOperand.getOperand(i: `0`);
11019	SDValue RightOp = ShiftOperand.getOperand(i: `1`);
11020
11021	bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
11022	bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
11023
11024	if (!IsSignExt && !IsZeroExt)
11025	return SDValue ();
11026
11027	EVT NarrowVT = LeftOp.getOperand(i: `0`).getValueType();
11028	unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
11029
11030	// return true if U may use the lower bits of its operands
11031	auto UserOfLowerBits = [NarrowVTSize](SDNode *U) {
11032	if (U->getOpcode() != ISD::SRL && U->getOpcode() != ISD::SRA) {
11033	return true;
11034	}
11035	ConstantSDNode *UShiftAmtSrc = isConstOrConstSplat(N: U->getOperand(Num: `1`));
11036	if (!UShiftAmtSrc) {
11037	return true;
11038	}
11039	unsigned UShiftAmt = UShiftAmtSrc->getZExtValue();
11040	return UShiftAmt < NarrowVTSize;
11041	};
11042
11043	// If the lower part of the MUL is also used and MUL_LOHI is supported
11044	// do not introduce the MULH in favor of MUL_LOHI
11045	unsigned MulLoHiOp = IsSignExt ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
11046	if (!ShiftOperand.hasOneUse() &&
11047	TLI.isOperationLegalOrCustom(Op: MulLoHiOp, VT: NarrowVT) &&
11048	llvm::any_of(Range: ShiftOperand ->users(), P: UserOfLowerBits)) {
11049	return SDValue ();
11050	}
11051
11052	SDValue MulhRightOp;
11053	if (ConstantSDNode *Constant = isConstOrConstSplat(N: RightOp)) {
11054	unsigned ActiveBits = IsSignExt
11055	? Constant->getAPIntValue().getSignificantBits()
11056	: Constant->getAPIntValue().getActiveBits();
11057	if (ActiveBits > NarrowVTSize)
11058	return SDValue ();
11059	MulhRightOp = DAG.getConstant(
11060	Val: Constant->getAPIntValue().trunc(width: NarrowVT.getScalarSizeInBits()), DL,
11061	VT: NarrowVT);
11062	} else {
11063	if (LeftOp.getOpcode() != RightOp.getOpcode())
11064	return SDValue ();
11065	// Check that the two extend nodes are the same type.
11066	if (NarrowVT != RightOp.getOperand(i: `0`).getValueType())
11067	return SDValue ();
11068	MulhRightOp = RightOp.getOperand(i: `0`);
11069	}
11070
11071	EVT WideVT = LeftOp.getValueType();
11072	// Proceed with the transformation if the wide types match.
11073	assert((WideVT == RightOp.getValueType()) &&
11074	"Cannot have a multiply node with two different operand types.");
11075
11076	// Proceed with the transformation if the wide type is twice as large
11077	// as the narrow type.
11078	if (WideVT.getScalarSizeInBits() != `2` * NarrowVTSize)
11079	return SDValue ();
11080
11081	// Check the shift amount with the narrow type size.
11082	// Proceed with the transformation if the shift amount is the width
11083	// of the narrow type.
11084	unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
11085	if (ShiftAmt != NarrowVTSize)
11086	return SDValue ();
11087
11088	// If the operation feeding into the MUL is a sign extend (sext),
11089	// we use mulhs. Othewise, zero extends (zext) use mulhu.
11090	unsigned MulhOpcode = IsSignExt ? ISD::MULHS : ISD::MULHU;
11091
11092	// Combine to mulh if mulh is legal/custom for the narrow type on the target
11093	// or if it is a vector type then we could transform to an acceptable type and
11094	// rely on legalization to split/combine the result.
11095	EVT TransformVT = NarrowVT;
11096	if (NarrowVT.isVector()) {
11097	TransformVT = TLI.getLegalTypeToTransformTo(Context&: *DAG.getContext(), VT: NarrowVT);
11098	if (TransformVT.getScalarType() != NarrowVT.getScalarType())
11099	return SDValue ();
11100	}
11101	if (!TLI.isOperationLegalOrCustom(Op: MulhOpcode, VT: TransformVT))
11102	return SDValue ();
11103
11104	SDValue Result =
11105	DAG.getNode(Opcode: MulhOpcode, DL, VT: NarrowVT, N1: LeftOp.getOperand(i: `0`), N2: MulhRightOp);
11106	bool IsSigned = N->getOpcode() == ISD::SRA;
11107	return DAG.getExtOrTrunc(IsSigned, Op: Result, DL, VT: WideVT);
11108	}
11109
11110	// fold (bswap (logic_op(bswap(x),y))) -> logic_op(x,bswap(y))
11111	// This helper function accept SDNode with opcode ISD::BSWAP and ISD::BITREVERSE
11112	static SDValue foldBitOrderCrossLogicOp(SDNode *N, SelectionDAG &DAG) {
11113	unsigned Opcode = N->getOpcode();
11114	if (Opcode != ISD::BSWAP && Opcode != ISD::BITREVERSE)
11115	return SDValue ();
11116
11117	SDValue N0 = N->getOperand(Num: `0`);
11118	EVT VT = N->getValueType(ResNo: `0`);
11119	SDLoc DL(N);
11120	SDValue X, Y;
11121
11122	// If both operands are bswap/bitreverse, ignore the multiuse
11123	if (sd_match(N: N0, P: m_OneUse(P: m_BitwiseLogic(L: m_UnaryOp(Opc: Opcode, Op: m_Value(N&: X)),
11124	R: m_UnaryOp(Opc: Opcode, Op: m_Value(N&: Y))))))
11125	return DAG.getNode(Opcode: N0.getOpcode(), DL, VT, N1: X, N2: Y);
11126
11127	// Otherwise need to ensure logic_op and bswap/bitreverse(x) have one use.
11128	if (sd_match(N: N0, P: m_OneUse(P: m_BitwiseLogic(
11129	L: m_OneUse(P: m_UnaryOp(Opc: Opcode, Op: m_Value(N&: X))), R: m_Value(N&: Y))))) {
11130	SDValue NewBitReorder = DAG.getNode(Opcode, DL, VT, Operand: Y);
11131	return DAG.getNode(Opcode: N0.getOpcode(), DL, VT, N1: X, N2: NewBitReorder);
11132	}
11133
11134	return SDValue ();
11135	}
11136
11137	SDValue DAGCombiner::visitSRA(SDNode *N) {
11138	SDValue N0 = N->getOperand(Num: `0`);
11139	SDValue N1 = N->getOperand(Num: `1`);
11140	if (SDValue V = DAG.simplifyShift(X: N0, Y: N1))
11141	return V;
11142
11143	SDLoc DL(N);
11144	EVT VT = N0.getValueType();
11145	unsigned OpSizeInBits = VT.getScalarSizeInBits();
11146
11147	// fold (sra c1, c2) -> (sra c1, c2)
11148	if (SDValue C = DAG.FoldConstantArithmetic(Opcode: ISD::SRA, DL, VT, Ops: {N0, N1}))
11149	return C;
11150
11151	// Arithmetic shifting an all-sign-bit value is a no-op.
11152	// fold (sra 0, x) -> 0
11153	// fold (sra -1, x) -> -1
11154	if (DAG.ComputeNumSignBits(Op: N0) == OpSizeInBits)
11155	return N0;
11156
11157	// fold vector ops
11158	if (VT.isVector())
11159	if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
11160	return FoldedVOp;
11161
11162	if (SDValue NewSel = foldBinOpIntoSelect(BO: N))
11163	return NewSel;
11164
11165	ConstantSDNode *N1C = isConstOrConstSplat(N: N1);
11166
11167	// fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
11168	// clamp (add c1, c2) to max shift.
11169	if (N0.getOpcode() == ISD::SRA) {
11170	EVT ShiftVT = N1.getValueType();
11171	EVT ShiftSVT = ShiftVT.getScalarType();
11172	SmallVector<SDValue, `16`> ShiftValues;
11173
11174	auto SumOfShifts = [&](ConstantSDNode LHS, ConstantSDNode RHS) {
11175	APInt c1 = LHS->getAPIntValue();
11176	APInt c2 = RHS->getAPIntValue();
11177	zeroExtendToMatch(LHS&: c1, RHS&: c2, Offset: `1` / Overflow Bit /);
11178	APInt Sum = c1 + c2;
11179	unsigned ShiftSum =
11180	Sum.uge(RHS: OpSizeInBits) ? (OpSizeInBits - `1`) : Sum.getZExtValue();
11181	ShiftValues.push_back(Elt: DAG.getConstant(Val: ShiftSum, DL, VT: ShiftSVT));
11182	return true;
11183	};
11184	if (ISD::matchBinaryPredicate(LHS: N1, RHS: N0.getOperand(i: `1`), Match: SumOfShifts)) {
11185	SDValue ShiftValue;
11186	if (N1.getOpcode() == ISD::BUILD_VECTOR)
11187	ShiftValue = DAG.getBuildVector(VT: ShiftVT, DL, Ops: ShiftValues);
11188	else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
11189	assert(ShiftValues.size() == `1` &&
11190	"Expected matchBinaryPredicate to return one element for "
11191	"SPLAT_VECTORs");
11192	ShiftValue = DAG.getSplatVector(VT: ShiftVT, DL, Op: ShiftValues [`0`]);
11193	} else
11194	ShiftValue = ShiftValues [`0`];
11195	return DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: N0.getOperand(i: `0`), N2: ShiftValue);
11196	}
11197	}
11198
11199	// fold (sra (xor (sra x, c1), -1), c2) -> (xor (sra x, c3), -1)
11200	// This allows merging two arithmetic shifts even when there's a NOT in
11201	// between.
11202	SDValue X;
11203	APInt C1;
11204	if (N1C && sd_match(N: N0, P: m_OneUse(P: m_Not(
11205	V: m_OneUse(P: m_Sra(L: m_Value(N&: X), R: m_ConstInt(V&: C1))))))) {
11206	APInt C2 = N1C->getAPIntValue();
11207	zeroExtendToMatch(LHS&: C1, RHS&: C2, Offset: `1` / Overflow Bit /);
11208	APInt Sum = C1 + C2;
11209	unsigned ShiftSum = Sum.getLimitedValue(Limit: OpSizeInBits - `1`);
11210	SDValue NewShift = DAG.getNode(
11211	Opcode: ISD::SRA, DL, VT, N1: X, N2: DAG.getShiftAmountConstant(Val: ShiftSum, VT, DL));
11212	return DAG.getNOT(DL, Val: NewShift, VT);
11213	}
11214
11215	// fold (sra (shl X, m), (sub result_size, n))
11216	// -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
11217	// result_size - n != m.
11218	// If truncate is free for the target sext(shl) is likely to result in better
11219	// code.
11220	if (N0.getOpcode() == ISD::SHL && N1C) {
11221	// Get the two constants of the shifts, CN0 = m, CN = n.
11222	const ConstantSDNode *N01C = isConstOrConstSplat(N: N0.getOperand(i: `1`));
11223	if (N01C) {
11224	LLVMContext &Ctx = *DAG.getContext();
11225	// Determine what the truncate's result bitsize and type would be.
11226	EVT TruncVT = VT.changeElementType(
11227	Context&: Ctx, EltVT: EVT::getIntegerVT(Context&: Ctx, BitWidth: OpSizeInBits - N1C->getZExtValue()));
11228
11229	// Determine the residual right-shift amount.
11230	int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
11231
11232	// If the shift is not a no-op (in which case this should be just a sign
11233	// extend already), the truncated to type is legal, sign_extend is legal
11234	// on that type, and the truncate to that type is both legal and free,
11235	// perform the transform.
11236	if ((ShiftAmt > `0`) &&
11237	TLI.isOperationLegalOrCustom(Op: ISD::SIGN_EXTEND, VT: TruncVT) &&
11238	TLI.isOperationLegalOrCustom(Op: ISD::TRUNCATE, VT) &&
11239	TLI.isTruncateFree(FromVT: VT, ToVT: TruncVT)) {
11240	SDValue Amt = DAG.getShiftAmountConstant(Val: ShiftAmt, VT, DL);
11241	SDValue Shift = DAG.getNode(Opcode: ISD::SRL, DL, VT,
11242	N1: N0.getOperand(i: `0`), N2: Amt);
11243	SDValue Trunc = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: TruncVT,
11244	Operand: Shift);
11245	return DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL,
11246	VT: N->getValueType(ResNo: `0`), Operand: Trunc);
11247	}
11248	}
11249	}
11250
11251	// We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
11252	// sra (add (shl X, N1C), AddC), N1C -->
11253	// sext (add (trunc X to (width - N1C)), AddC')
11254	// sra (sub AddC, (shl X, N1C)), N1C -->
11255	// sext (sub AddC1',(trunc X to (width - N1C)))
11256	if ((N0.getOpcode() == ISD::ADD \|\| N0.getOpcode() == ISD::SUB) && N1C &&
11257	N0.hasOneUse()) {
11258	bool IsAdd = N0.getOpcode() == ISD::ADD;
11259	SDValue Shl = N0.getOperand(i: IsAdd ? `0` : `1`);
11260	if (Shl.getOpcode() == ISD::SHL && Shl.getOperand(i: `1`) == N1 &&
11261	Shl.hasOneUse()) {
11262	// TODO: AddC does not need to be a splat.
11263	if (ConstantSDNode *AddC =
11264	isConstOrConstSplat(N: N0.getOperand(i: IsAdd ? `1` : `0`))) {
11265	// Determine what the truncate's type would be and ask the target if
11266	// that is a free operation.
11267	LLVMContext &Ctx = *DAG.getContext();
11268	unsigned ShiftAmt = N1C->getZExtValue();
11269	EVT TruncVT = VT.changeElementType(
11270	Context&: Ctx, EltVT: EVT::getIntegerVT(Context&: Ctx, BitWidth: OpSizeInBits - ShiftAmt));
11271
11272	// TODO: The simple type check probably belongs in the default hook
11273	// implementation and/or target-specific overrides (because
11274	// non-simple types likely require masking when legalized), but
11275	// that restriction may conflict with other transforms.
11276	if (TruncVT.isSimple() && isTypeLegal(VT: TruncVT) &&
11277	TLI.isTruncateFree(FromVT: VT, ToVT: TruncVT)) {
11278	SDValue Trunc = DAG.getZExtOrTrunc(Op: Shl.getOperand(i: `0`), DL, VT: TruncVT);
11279	SDValue ShiftC =
11280	DAG.getConstant(Val: AddC->getAPIntValue().lshr(shiftAmt: ShiftAmt).trunc(
11281	width: TruncVT.getScalarSizeInBits()),
11282	DL, VT: TruncVT);
11283	SDValue Add;
11284	if (IsAdd)
11285	Add = DAG.getNode(Opcode: ISD::ADD, DL, VT: TruncVT, N1: Trunc, N2: ShiftC);
11286	else
11287	Add = DAG.getNode(Opcode: ISD::SUB, DL, VT: TruncVT, N1: ShiftC, N2: Trunc);
11288	return DAG.getSExtOrTrunc(Op: Add, DL, VT);
11289	}
11290	}
11291	}
11292	}
11293
11294	// fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
11295	if (N1.getOpcode() == ISD::TRUNCATE &&
11296	N1.getOperand(i: `0`).getOpcode() == ISD::AND) {
11297	if (SDValue NewOp1 = distributeTruncateThroughAnd(N: N1.getNode()))
11298	return DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: N0, N2: NewOp1);
11299	}
11300
11301	// fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
11302	// fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
11303	// if c1 is equal to the number of bits the trunc removes
11304	// TODO - support non-uniform vector shift amounts.
11305	if (N0.getOpcode() == ISD::TRUNCATE &&
11306	(N0.getOperand(i: `0`).getOpcode() == ISD::SRL \|\|
11307	N0.getOperand(i: `0`).getOpcode() == ISD::SRA) &&
11308	N0.getOperand(i: `0`).hasOneUse() &&
11309	N0.getOperand(i: `0`).getOperand(i: `1`).hasOneUse() && N1C) {
11310	SDValue N0Op0 = N0.getOperand(i: `0`);
11311	if (ConstantSDNode *LargeShift = isConstOrConstSplat(N: N0Op0.getOperand(i: `1`))) {
11312	EVT LargeVT = N0Op0.getValueType();
11313	unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
11314	if (LargeShift->getAPIntValue() == TruncBits) {
11315	EVT LargeShiftVT = getShiftAmountTy(LHSTy: LargeVT);
11316	SDValue Amt = DAG.getZExtOrTrunc(Op: N1, DL, VT: LargeShiftVT);
11317	Amt = DAG.getNode(Opcode: ISD::ADD, DL, VT: LargeShiftVT, N1: Amt,
11318	N2: DAG.getConstant(Val: TruncBits, DL, VT: LargeShiftVT));
11319	SDValue SRA =
11320	DAG.getNode(Opcode: ISD::SRA, DL, VT: LargeVT, N1: N0Op0.getOperand(i: `0`), N2: Amt);
11321	return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: SRA);
11322	}
11323	}
11324	}
11325
11326	// Simplify, based on bits shifted out of the LHS.
11327	if (SimplifyDemandedBits(Op: SDValue (N, `0`)))
11328	return SDValue (N, `0`);
11329
11330	// If the sign bit is known to be zero, switch this to a SRL.
11331	if (DAG.SignBitIsZero(Op: N0))
11332	return DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: N0, N2: N1);
11333
11334	if (N1C && !N1C->isOpaque())
11335	if (SDValue NewSRA = visitShiftByConstant(N))
11336	return NewSRA;
11337
11338	// Try to transform this shift into a multiply-high if
11339	// it matches the appropriate pattern detected in combineShiftToMULH.
11340	if (SDValue MULH = combineShiftToMULH(N, DL, DAG, TLI))
11341	return MULH;
11342
11343	// Attempt to convert a sra of a load into a narrower sign-extending load.
11344	if (SDValue NarrowLoad = reduceLoadWidth(N))
11345	return NarrowLoad;
11346
11347	if (SDValue AVG = foldShiftToAvg(N, DL))
11348	return AVG;
11349
11350	return SDValue ();
11351	}
11352
11353	SDValue DAGCombiner::visitSRL(SDNode *N) {
11354	SDValue N0 = N->getOperand(Num: `0`);
11355	SDValue N1 = N->getOperand(Num: `1`);
11356	if (SDValue V = DAG.simplifyShift(X: N0, Y: N1))
11357	return V;
11358
11359	SDLoc DL(N);
11360	EVT VT = N0.getValueType();
11361	EVT ShiftVT = N1.getValueType();
11362	unsigned OpSizeInBits = VT.getScalarSizeInBits();
11363
11364	// fold (srl c1, c2) -> c1 >>u c2
11365	if (SDValue C = DAG.FoldConstantArithmetic(Opcode: ISD::SRL, DL, VT, Ops: {N0, N1}))
11366	return C;
11367
11368	// fold vector ops
11369	if (VT.isVector())
11370	if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
11371	return FoldedVOp;
11372
11373	if (SDValue NewSel = foldBinOpIntoSelect(BO: N))
11374	return NewSel;
11375
11376	// if (srl x, c) is known to be zero, return 0
11377	ConstantSDNode *N1C = isConstOrConstSplat(N: N1);
11378	if (N1C &&
11379	DAG.MaskedValueIsZero(Op: SDValue (N, `0`), Mask: APInt::getAllOnes(numBits: OpSizeInBits)))
11380	return DAG.getConstant(Val: `0`, DL, VT);
11381
11382	// fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
11383	if (N0.getOpcode() == ISD::SRL) {
11384	auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
11385	ConstantSDNode *RHS) {
11386	APInt c1 = LHS->getAPIntValue();
11387	APInt c2 = RHS->getAPIntValue();
11388	zeroExtendToMatch(LHS&: c1, RHS&: c2, Offset: `1` / Overflow Bit /);
11389	return (c1 + c2).uge(RHS: OpSizeInBits);
11390	};
11391	if (ISD::matchBinaryPredicate(LHS: N1, RHS: N0.getOperand(i: `1`), Match: MatchOutOfRange))
11392	return DAG.getConstant(Val: `0`, DL, VT);
11393
11394	auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
11395	ConstantSDNode *RHS) {
11396	APInt c1 = LHS->getAPIntValue();
11397	APInt c2 = RHS->getAPIntValue();
11398	zeroExtendToMatch(LHS&: c1, RHS&: c2, Offset: `1` / Overflow Bit /);
11399	return (c1 + c2).ult(RHS: OpSizeInBits);
11400	};
11401	if (ISD::matchBinaryPredicate(LHS: N1, RHS: N0.getOperand(i: `1`), Match: MatchInRange)) {
11402	SDValue Sum = DAG.getNode(Opcode: ISD::ADD, DL, VT: ShiftVT, N1, N2: N0.getOperand(i: `1`));
11403	return DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: N0.getOperand(i: `0`), N2: Sum);
11404	}
11405	}
11406
11407	if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
11408	N0.getOperand(i: `0`).getOpcode() == ISD::SRL) {
11409	SDValue InnerShift = N0.getOperand(i: `0`);
11410	// TODO - support non-uniform vector shift amounts.
11411	if (auto *N001C = isConstOrConstSplat(N: InnerShift.getOperand(i: `1`))) {
11412	uint64_t c1 = N001C->getZExtValue();
11413	uint64_t c2 = N1C->getZExtValue();
11414	EVT InnerShiftVT = InnerShift.getValueType();
11415	EVT ShiftAmtVT = InnerShift.getOperand(i: `1`).getValueType();
11416	uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
11417	// srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2)))
11418	// This is only valid if the OpSizeInBits + c1 = size of inner shift.
11419	if (c1 + OpSizeInBits == InnerShiftSize) {
11420	if (c1 + c2 >= InnerShiftSize)
11421	return DAG.getConstant(Val: `0`, DL, VT);
11422	SDValue NewShiftAmt = DAG.getConstant(Val: c1 + c2, DL, VT: ShiftAmtVT);
11423	SDValue NewShift = DAG.getNode(Opcode: ISD::SRL, DL, VT: InnerShiftVT,
11424	N1: InnerShift.getOperand(i: `0`), N2: NewShiftAmt);
11425	return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: NewShift);
11426	}
11427	// In the more general case, we can clear the high bits after the shift:
11428	// srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask)
11429	if (N0.hasOneUse() && InnerShift.hasOneUse() &&
11430	c1 + c2 < InnerShiftSize) {
11431	SDValue NewShiftAmt = DAG.getConstant(Val: c1 + c2, DL, VT: ShiftAmtVT);
11432	SDValue NewShift = DAG.getNode(Opcode: ISD::SRL, DL, VT: InnerShiftVT,
11433	N1: InnerShift.getOperand(i: `0`), N2: NewShiftAmt);
11434	SDValue Mask = DAG.getConstant(Val: APInt::getLowBitsSet(numBits: InnerShiftSize,
11435	loBitsSet: OpSizeInBits - c2),
11436	DL, VT: InnerShiftVT);
11437	SDValue And = DAG.getNode(Opcode: ISD::AND, DL, VT: InnerShiftVT, N1: NewShift, N2: Mask);
11438	return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: And);
11439	}
11440	}
11441	}
11442
11443	if (N0.getOpcode() == ISD::SHL) {
11444	// fold (srl (shl nuw x, c), c) -> x
11445	if (N0.getOperand(i: `1`) == N1 && N0 ->getFlags().hasNoUnsignedWrap())
11446	return N0.getOperand(i: `0`);
11447
11448	// fold (srl (shl x, c1), c2) -> (and (shl x, (sub c1, c2), MASK) or
11449	// (and (srl x, (sub c2, c1), MASK)
11450	if ((N0.getOperand(i: `1`) == N1 \|\| N0 ->hasOneUse()) &&
11451	TLI.shouldFoldConstantShiftPairToMask(N)) {
11452	auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
11453	ConstantSDNode *RHS) {
11454	const APInt &LHSC = LHS->getAPIntValue();
11455	const APInt &RHSC = RHS->getAPIntValue();
11456	return LHSC.ult(RHS: OpSizeInBits) && RHSC.ult(RHS: OpSizeInBits) &&
11457	LHSC.getZExtValue() <= RHSC.getZExtValue();
11458	};
11459	if (ISD::matchBinaryPredicate(LHS: N1, RHS: N0.getOperand(i: `1`), Match: MatchShiftAmount,
11460	/AllowUndefs/ false,
11461	/AllowTypeMismatch/ true)) {
11462	SDValue N01 = DAG.getZExtOrTrunc(Op: N0.getOperand(i: `1`), DL, VT: ShiftVT);
11463	SDValue Diff = DAG.getNode(Opcode: ISD::SUB, DL, VT: ShiftVT, N1: N01, N2: N1);
11464	SDValue Mask = DAG.getAllOnesConstant(DL, VT);
11465	Mask = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Mask, N2: N01);
11466	Mask = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Mask, N2: Diff);
11467	SDValue Shift = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: N0.getOperand(i: `0`), N2: Diff);
11468	return DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Shift, N2: Mask);
11469	}
11470	if (ISD::matchBinaryPredicate(LHS: N0.getOperand(i: `1`), RHS: N1, Match: MatchShiftAmount,
11471	/AllowUndefs/ false,
11472	/AllowTypeMismatch/ true)) {
11473	SDValue N01 = DAG.getZExtOrTrunc(Op: N0.getOperand(i: `1`), DL, VT: ShiftVT);
11474	SDValue Diff = DAG.getNode(Opcode: ISD::SUB, DL, VT: ShiftVT, N1, N2: N01);
11475	SDValue Mask = DAG.getAllOnesConstant(DL, VT);
11476	Mask = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Mask, N2: N1);
11477	SDValue Shift = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: N0.getOperand(i: `0`), N2: Diff);
11478	return DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Shift, N2: Mask);
11479	}
11480	}
11481	}
11482
11483	// fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
11484	// TODO - support non-uniform vector shift amounts.
11485	if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
11486	// Shifting in all undef bits?
11487	EVT SmallVT = N0.getOperand(i: `0`).getValueType();
11488	unsigned BitSize = SmallVT.getScalarSizeInBits();
11489	if (N1C->getAPIntValue().uge(RHS: BitSize))
11490	return DAG.getUNDEF(VT);
11491
11492	if (!LegalTypes \|\| TLI.isTypeDesirableForOp(ISD::SRL, VT: SmallVT)) {
11493	uint64_t ShiftAmt = N1C->getZExtValue();
11494	SDLoc DL0(N0);
11495	SDValue SmallShift =
11496	DAG.getNode(Opcode: ISD::SRL, DL: DL0, VT: SmallVT, N1: N0.getOperand(i: `0`),
11497	N2: DAG.getShiftAmountConstant(Val: ShiftAmt, VT: SmallVT, DL: DL0));
11498	AddToWorklist(N: SmallShift.getNode());
11499	APInt Mask = APInt::getLowBitsSet(numBits: OpSizeInBits, loBitsSet: OpSizeInBits - ShiftAmt);
11500	return DAG.getNode(Opcode: ISD::AND, DL, VT,
11501	N1: DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT, Operand: SmallShift),
11502	N2: DAG.getConstant(Val: Mask, DL, VT));
11503	}
11504	}
11505
11506	// fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
11507	// bit, which is unmodified by sra.
11508	if (N1C && N1C->getAPIntValue() == (OpSizeInBits - `1`)) {
11509	if (N0.getOpcode() == ISD::SRA)
11510	return DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: N0.getOperand(i: `0`), N2: N1);
11511	}
11512
11513	// fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit), and x has a power
11514	// of two bitwidth. The "5" represents (log2 (bitwidth x)).
11515	if (N1C && N0.getOpcode() == ISD::CTLZ &&
11516	isPowerOf2_32(Value: OpSizeInBits) &&
11517	N1C->getAPIntValue() == Log2_32(Value: OpSizeInBits)) {
11518	KnownBits Known = DAG.computeKnownBits(Op: N0.getOperand(i: `0`));
11519
11520	// If any of the input bits are KnownOne, then the input couldn't be all
11521	// zeros, thus the result of the srl will always be zero.
11522	if (Known.One.getBoolValue()) return DAG.getConstant(Val: `0`, DL: SDLoc (N0), VT);
11523
11524	// If all of the bits input the to ctlz node are known to be zero, then
11525	// the result of the ctlz is "32" and the result of the shift is one.
11526	APInt UnknownBits = ~Known.Zero;
11527	if (UnknownBits == `0`) return DAG.getConstant(Val: `1`, DL: SDLoc (N0), VT);
11528
11529	// Otherwise, check to see if there is exactly one bit input to the ctlz.
11530	if (UnknownBits.isPowerOf2()) {
11531	// Okay, we know that only that the single bit specified by UnknownBits
11532	// could be set on input to the CTLZ node. If this bit is set, the SRL
11533	// will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
11534	// to an SRL/XOR pair, which is likely to simplify more.
11535	unsigned ShAmt = UnknownBits.countr_zero();
11536	SDValue Op = N0.getOperand(i: `0`);
11537
11538	if (ShAmt) {
11539	SDLoc DL(N0);
11540	Op = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Op,
11541	N2: DAG.getShiftAmountConstant(Val: ShAmt, VT, DL));
11542	AddToWorklist(N: Op.getNode());
11543	}
11544	return DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: Op, N2: DAG.getConstant(Val: `1`, DL, VT));
11545	}
11546	}
11547
11548	// fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
11549	if (N1.getOpcode() == ISD::TRUNCATE &&
11550	N1.getOperand(i: `0`).getOpcode() == ISD::AND) {
11551	if (SDValue NewOp1 = distributeTruncateThroughAnd(N: N1.getNode()))
11552	return DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: N0, N2: NewOp1);
11553	}
11554
11555	// fold (srl (logic_op x, (shl (zext y), c1)), c1)
11556	// -> (logic_op (srl x, c1), (zext y))
11557	// c1 <= leadingzeros(zext(y))
11558	SDValue X, ZExtY;
11559	if (N1C && sd_match(N: N0, P: m_OneUse(P: m_BitwiseLogic(
11560	L: m_Value(N&: X),
11561	R: m_OneUse(P: m_Shl(L: m_AllOf(preds: m_Value(N&: ZExtY),
11562	preds: m_Opc(Opcode: ISD::ZERO_EXTEND)),
11563	R: m_Specific(N: N1))))))) {
11564	unsigned NumLeadingZeros = ZExtY.getScalarValueSizeInBits() -
11565	ZExtY.getOperand(i: `0`).getScalarValueSizeInBits();
11566	if (N1C->getZExtValue() <= NumLeadingZeros)
11567	return DAG.getNode(Opcode: N0.getOpcode(), DL: SDLoc (N0), VT,
11568	N1: DAG.getNode(Opcode: ISD::SRL, DL: SDLoc (N0), VT, N1: X, N2: N1), N2: ZExtY);
11569	}
11570
11571	// fold operands of srl based on knowledge that the low bits are not
11572	// demanded.
11573	if (SimplifyDemandedBits(Op: SDValue (N, `0`)))
11574	return SDValue (N, `0`);
11575
11576	if (N1C && !N1C->isOpaque())
11577	if (SDValue NewSRL = visitShiftByConstant(N))
11578	return NewSRL;
11579
11580	// Attempt to convert a srl of a load into a narrower zero-extending load.
11581	if (SDValue NarrowLoad = reduceLoadWidth(N))
11582	return NarrowLoad;
11583
11584	// Here is a common situation. We want to optimize:
11585	//
11586	// %a = ...
11587	// %b = and i32 %a, 2
11588	// %c = srl i32 %b, 1
11589	// brcond i32 %c ...
11590	//
11591	// into
11592	//
11593	// %a = ...
11594	// %b = and %a, 2
11595	// %c = setcc eq %b, 0
11596	// brcond %c ...
11597	//
11598	// However when after the source operand of SRL is optimized into AND, the SRL
11599	// itself may not be optimized further. Look for it and add the BRCOND into
11600	// the worklist.
11601	//
11602	// The also tends to happen for binary operations when SimplifyDemandedBits
11603	// is involved.
11604	//
11605	// FIXME: This is unecessary if we process the DAG in topological order,
11606	// which we plan to do. This workaround can be removed once the DAG is
11607	// processed in topological order.
11608	if (N->hasOneUse()) {
11609	SDNode User = N->user_begin();
11610
11611	// Look pass the truncate.
11612	if (User->getOpcode() == ISD::TRUNCATE && User->hasOneUse())
11613	User = *User->user_begin();
11614
11615	if (User->getOpcode() == ISD::BRCOND \|\| User->getOpcode() == ISD::AND \|\|
11616	User->getOpcode() == ISD::OR \|\| User->getOpcode() == ISD::XOR)
11617	AddToWorklist(N: User);
11618	}
11619
11620	// Try to transform this shift into a multiply-high if
11621	// it matches the appropriate pattern detected in combineShiftToMULH.
11622	if (SDValue MULH = combineShiftToMULH(N, DL, DAG, TLI))
11623	return MULH;
11624
11625	if (SDValue AVG = foldShiftToAvg(N, DL))
11626	return AVG;
11627
11628	SDValue Y;
11629	if (VT.getScalarSizeInBits() % `2` == `0` && N1C) {
11630	// Fold clmul(zext(x), zext(y)) >> (BW - 1 \| BW) -> clmul(r\|h)(x, y).
11631	unsigned HalfBW = VT.getScalarSizeInBits() / `2`;
11632	if (sd_match(N: N0, P: m_Clmul(L: m_ZExt(Op: m_Value(N&: X)), R: m_ZExt(Op: m_Value(N&: Y)))) &&
11633	X.getScalarValueSizeInBits() == HalfBW &&
11634	Y.getScalarValueSizeInBits() == HalfBW) {
11635	if (N1C->getZExtValue() == HalfBW - `1` &&
11636	(!LegalOperations \|\|
11637	TLI.isOperationLegalOrCustom(Op: ISD::CLMULR, VT: X.getValueType())))
11638	return DAG.getNode(
11639	Opcode: ISD::ZERO_EXTEND, DL, VT,
11640	Operand: DAG.getNode(Opcode: ISD::CLMULR, DL, VT: X.getValueType(), N1: X, N2: Y));
11641	if (N1C->getZExtValue() == HalfBW &&
11642	(!LegalOperations \|\|
11643	TLI.isOperationLegalOrCustom(Op: ISD::CLMULH, VT: X.getValueType())))
11644	return DAG.getNode(
11645	Opcode: ISD::ZERO_EXTEND, DL, VT,
11646	Operand: DAG.getNode(Opcode: ISD::CLMULH, DL, VT: X.getValueType(), N1: X, N2: Y));
11647	}
11648	}
11649
11650	// Fold bitreverse(clmul(bitreverse(x), bitreverse(y))) >> 1 ->
11651	// clmulh(x, y).
11652	if (N1C && N1C->getZExtValue() == `1` &&
11653	sd_match(N: N0, P: m_BitReverse(Op: m_Clmul(L: m_BitReverse(Op: m_Value(N&: X)),
11654	R: m_BitReverse(Op: m_Value(N&: Y))))))
11655	return DAG.getNode(Opcode: ISD::CLMULH, DL, VT, N1: X, N2: Y);
11656
11657	return SDValue ();
11658	}
11659
11660	SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
11661	EVT VT = N->getValueType(ResNo: `0`);
11662	SDValue N0 = N->getOperand(Num: `0`);
11663	SDValue N1 = N->getOperand(Num: `1`);
11664	SDValue N2 = N->getOperand(Num: `2`);
11665	bool IsFSHL = N->getOpcode() == ISD::FSHL;
11666	unsigned BitWidth = VT.getScalarSizeInBits();
11667	SDLoc DL(N);
11668
11669	// fold (fshl/fshr C0, C1, C2) -> C3
11670	if (SDValue C =
11671	DAG.FoldConstantArithmetic(Opcode: N->getOpcode(), DL, VT, Ops: {N0, N1, N2}))
11672	return C;
11673
11674	// fold (fshl N0, N1, 0) -> N0
11675	// fold (fshr N0, N1, 0) -> N1
11676	if (isPowerOf2_32(Value: BitWidth))
11677	if (DAG.MaskedValueIsZero(
11678	Op: N2, Mask: APInt (N2.getScalarValueSizeInBits(), BitWidth - `1`)))
11679	return IsFSHL ? N0 : N1;
11680
11681	auto IsUndefOrZero = [](SDValue V) {
11682	return V.isUndef() \|\| isNullOrNullSplat(V, /AllowUndefs/ true);
11683	};
11684
11685	// TODO - support non-uniform vector shift amounts.
11686	if (ConstantSDNode *Cst = isConstOrConstSplat(N: N2)) {
11687	EVT ShAmtTy = N2.getValueType();
11688
11689	// fold (fsh N0, N1, c) -> (fsh* N0, N1, c % BitWidth)*
11690	if (Cst->getAPIntValue().uge(RHS: BitWidth)) {
11691	uint64_t RotAmt = Cst->getAPIntValue().urem(RHS: BitWidth);
11692	return DAG.getNode(Opcode: N->getOpcode(), DL, VT, N1: N0, N2: N1,
11693	N3: DAG.getConstant(Val: RotAmt, DL, VT: ShAmtTy));
11694	}
11695
11696	unsigned ShAmt = Cst->getZExtValue();
11697	if (ShAmt == `0`)
11698	return IsFSHL ? N0 : N1;
11699
11700	// fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
11701	// fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
11702	// fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
11703	// fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
11704	if (IsUndefOrZero (N0))
11705	return DAG.getNode(
11706	Opcode: ISD::SRL, DL, VT, N1,
11707	N2: DAG.getConstant(Val: IsFSHL ? BitWidth - ShAmt : ShAmt, DL, VT: ShAmtTy));
11708	if (IsUndefOrZero (N1))
11709	return DAG.getNode(
11710	Opcode: ISD::SHL, DL, VT, N1: N0,
11711	N2: DAG.getConstant(Val: IsFSHL ? ShAmt : BitWidth - ShAmt, DL, VT: ShAmtTy));
11712
11713	// fold fshl(N0, N1, c) -> x and fshr(N0, N1, c) -> x
11714	// where N0 is any node that contributes "x >> C0" to the result:
11715	// lshr(x, C0) \| fshr(_, x, C0) \| fshl(_, x, C1)
11716	// and N1 is any node that contributes "x << C1" to the result:
11717	// shl(x, C1) \| fshl(x, _, C1) \| fshr(x, _, C0)
11718	// with C0 = IsFSHL ? amnt : BW-amnt, C1 = BW - C0
11719
11720	// ShAmt == 0 was handled above; uge(BitWidth) was reduced via modulo above.
11721	assert(ShAmt >= `1` && ShAmt < BitWidth &&
11722	"ShAmt must be in [1, BW-1] for the identity fold to be valid");
11723	SDValue Val;
11724	unsigned C0Expected = IsFSHL ? ShAmt : BitWidth - ShAmt;
11725	unsigned C1Expected = IsFSHL ? BitWidth - ShAmt : ShAmt;
11726
11727	if ((sd_match(N: N0, P: m_Srl(L: m_Value(N&: Val), R: m_SpecificInt(V: C0Expected))) \|\|
11728	sd_match(N: N0, P: m_Node(Opcode: ISD::FSHR, preds: m_Value(), preds: m_Value(N&: Val),
11729	preds: m_SpecificInt(V: C0Expected))) \|\|
11730	sd_match(N: N0, P: m_Node(Opcode: ISD::FSHL, preds: m_Value(), preds: m_Value(N&: Val),
11731	preds: m_SpecificInt(V: C1Expected)))) &&
11732	(sd_match(N: N1, P: m_Shl(L: m_Specific(N: Val), R: m_SpecificInt(V: C1Expected))) \|\|
11733	sd_match(N: N1, P: m_Node(Opcode: ISD::FSHL, preds: m_Specific(N: Val), preds: m_Value(),
11734	preds: m_SpecificInt(V: C1Expected))) \|\|
11735	sd_match(N: N1, P: m_Node(Opcode: ISD::FSHR, preds: m_Specific(N: Val), preds: m_Value(),
11736	preds: m_SpecificInt(V: C0Expected)))))
11737	return Val;
11738
11739	// fold (fshl ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
11740	// fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
11741	// TODO - bigendian support once we have test coverage.
11742	// TODO - can we merge this with CombineConseutiveLoads/MatchLoadCombine?
11743	// TODO - permit LHS EXTLOAD if extensions are shifted out.
11744	if ((BitWidth % `8`) == `0` && (ShAmt % `8`) == `0` && !VT.isVector() &&
11745	!DAG.getDataLayout().isBigEndian()) {
11746	auto *LHS = dyn_cast<LoadSDNode>(Val&: N0);
11747	auto *RHS = dyn_cast<LoadSDNode>(Val&: N1);
11748	if (LHS && RHS && LHS->isSimple() && RHS->isSimple() &&
11749	LHS->getAddressSpace() == RHS->getAddressSpace() &&
11750	(LHS->hasNUsesOfValue(NUses: `1`, Value: `0`) \|\| RHS->hasNUsesOfValue(NUses: `1`, Value: `0`)) &&
11751	ISD::isNON_EXTLoad(N: RHS) && ISD::isNON_EXTLoad(N: LHS)) {
11752	if (DAG.areNonVolatileConsecutiveLoads(LD: LHS, Base: RHS, Bytes: BitWidth / `8`, Dist: `1`)) {
11753	SDLoc DL(RHS);
11754	uint64_t PtrOff =
11755	IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / `8`) : (ShAmt / `8`);
11756	Align NewAlign = commonAlignment(A: RHS->getAlign(), Offset: PtrOff);
11757	unsigned Fast = `0`;
11758	if (TLI.allowsMemoryAccess(Context&: *DAG.getContext(), DL: DAG.getDataLayout(), VT,
11759	AddrSpace: RHS->getAddressSpace(), Alignment: NewAlign,
11760	Flags: RHS->getMemOperand()->getFlags(), Fast: &Fast) &&
11761	Fast) {
11762	SDValue NewPtr = DAG.getMemBasePlusOffset(
11763	Base: RHS->getBasePtr(), Offset: TypeSize::getFixed(ExactSize: PtrOff), DL);
11764	AddToWorklist(N: NewPtr.getNode());
11765	SDValue Load = DAG.getLoad(
11766	VT, dl: DL, Chain: RHS->getChain(), Ptr: NewPtr,
11767	PtrInfo: RHS->getPointerInfo().getWithOffset(O: PtrOff), Alignment: NewAlign,
11768	MMOFlags: RHS->getMemOperand()->getFlags(), AAInfo: RHS->getAAInfo());
11769	DAG.makeEquivalentMemoryOrdering(OldLoad: LHS, NewMemOp: Load.getValue(R: `1`));
11770	DAG.makeEquivalentMemoryOrdering(OldLoad: RHS, NewMemOp: Load.getValue(R: `1`));
11771	return Load;
11772	}
11773	}
11774	}
11775	}
11776	}
11777
11778	// fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
11779	// fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
11780	// iff We know the shift amount is in range.
11781	// TODO: when is it worth doing SUB(BW, N2) as well?
11782	if (isPowerOf2_32(Value: BitWidth)) {
11783	APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - `1`);
11784	if (IsUndefOrZero (N0) && !IsFSHL && DAG.MaskedValueIsZero(Op: N2, Mask: ~ModuloBits))
11785	return DAG.getNode(Opcode: ISD::SRL, DL, VT, N1, N2);
11786	if (IsUndefOrZero (N1) && IsFSHL && DAG.MaskedValueIsZero(Op: N2, Mask: ~ModuloBits))
11787	return DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: N0, N2);
11788	}
11789
11790	// fold (fshl N0, N0, N2) -> (rotl N0, N2)
11791	// fold (fshr N0, N0, N2) -> (rotr N0, N2)
11792	// TODO: Investigate flipping this rotate if only one is legal.
11793	// If funnel shift is legal as well we might be better off avoiding
11794	// non-constant (BW - N2).
11795	unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
11796	if (N0 == N1 && hasOperation(Opcode: RotOpc, VT))
11797	return DAG.getNode(Opcode: RotOpc, DL, VT, N1: N0, N2);
11798
11799	// Simplify, based on bits shifted out of N0/N1.
11800	if (SimplifyDemandedBits(Op: SDValue (N, `0`)))
11801	return SDValue (N, `0`);
11802
11803	return SDValue ();
11804	}
11805
11806	SDValue DAGCombiner::visitSHLSAT(SDNode *N) {
11807	SDValue N0 = N->getOperand(Num: `0`);
11808	SDValue N1 = N->getOperand(Num: `1`);
11809	if (SDValue V = DAG.simplifyShift(X: N0, Y: N1))
11810	return V;
11811
11812	SDLoc DL(N);
11813	EVT VT = N0.getValueType();
11814
11815	// fold (shlsat c1, c2) -> c1<<c2*
11816	if (SDValue C = DAG.FoldConstantArithmetic(Opcode: N->getOpcode(), DL, VT, Ops: {N0, N1}))
11817	return C;
11818
11819	ConstantSDNode *N1C = isConstOrConstSplat(N: N1);
11820
11821	if (!LegalOperations \|\| TLI.isOperationLegalOrCustom(Op: ISD::SHL, VT)) {
11822	// fold (sshlsat x, c) -> (shl x, c)
11823	if (N->getOpcode() == ISD::SSHLSAT && N1C &&
11824	N1C->getAPIntValue().ult(RHS: DAG.ComputeNumSignBits(Op: N0)))
11825	return DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: N0, N2: N1);
11826
11827	// fold (ushlsat x, c) -> (shl x, c)
11828	if (N->getOpcode() == ISD::USHLSAT && N1C &&
11829	N1C->getAPIntValue().ule(
11830	RHS: DAG.computeKnownBits(Op: N0).countMinLeadingZeros()))
11831	return DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: N0, N2: N1);
11832	}
11833
11834	return SDValue ();
11835	}
11836
11837	// Given a ABS node, detect the following patterns:
11838	// (ABS (SUB (EXTEND a), (EXTEND b))).
11839	// (TRUNC (ABS (SUB (EXTEND a), (EXTEND b)))).
11840	// Generates UABD/SABD instruction.
11841	SDValue DAGCombiner::foldABSToABD(SDNode N, const* SDLoc &DL) {
11842	EVT SrcVT = N->getValueType(ResNo: `0`);
11843
11844	if (N->getOpcode() == ISD::TRUNCATE)
11845	N = N->getOperand(Num: `0`).getNode();
11846
11847	EVT VT = N->getValueType(ResNo: `0`);
11848	SDValue Op0, Op1;
11849
11850	if (!sd_match(N, P: m_Abs(Op: m_Sub(L: m_Value(N&: Op0), R: m_Value(N&: Op1)))))
11851	return SDValue ();
11852
11853	SDValue AbsOp0 = N->getOperand(Num: `0`);
11854	unsigned Opc0 = Op0.getOpcode();
11855
11856	// Check if the operands of the sub are (zero\|sign)-extended, otherwise
11857	// fallback to ValueTracking.
11858	if (Opc0 != Op1.getOpcode() \|\|
11859	(Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND &&
11860	Opc0 != ISD::SIGN_EXTEND_INREG)) {
11861	// fold (abs (sub nsw x, y)) -> abds(x, y)
11862	// Don't fold this for unsupported types as we lose the NSW handling.
11863	if (hasOperation(Opcode: ISD::ABDS, VT) && TLI.preferABDSToABSWithNSW(VT) &&
11864	(AbsOp0 ->getFlags().hasNoSignedWrap() \|\|
11865	DAG.willNotOverflowSub(/IsSigned=/true, N0: Op0, N1: Op1))) {
11866	SDValue ABD = DAG.getNode(Opcode: ISD::ABDS, DL, VT, N1: Op0, N2: Op1);
11867	return DAG.getZExtOrTrunc(Op: ABD, DL, VT: SrcVT);
11868	}
11869	// fold (abs (sub x, y)) -> abdu(x, y)
11870	if (hasOperation(Opcode: ISD::ABDU, VT) && DAG.SignBitIsZero(Op: Op0) &&
11871	DAG.SignBitIsZero(Op: Op1)) {
11872	SDValue ABD = DAG.getNode(Opcode: ISD::ABDU, DL, VT, N1: Op0, N2: Op1);
11873	return DAG.getZExtOrTrunc(Op: ABD, DL, VT: SrcVT);
11874	}
11875	return SDValue ();
11876	}
11877
11878	EVT VT0, VT1;
11879	if (Opc0 == ISD::SIGN_EXTEND_INREG) {
11880	VT0 = cast<VTSDNode>(Val: Op0.getOperand(i: `1`))->getVT();
11881	VT1 = cast<VTSDNode>(Val: Op1.getOperand(i: `1`))->getVT();
11882	} else {
11883	VT0 = Op0.getOperand(i: `0`).getValueType();
11884	VT1 = Op1.getOperand(i: `0`).getValueType();
11885	}
11886	unsigned ABDOpcode = (Opc0 == ISD::ZERO_EXTEND) ? ISD::ABDU : ISD::ABDS;
11887
11888	// fold abs(sext(x) - sext(y)) -> zext(abds(x, y))
11889	// fold abs(zext(x) - zext(y)) -> zext(abdu(x, y))
11890	EVT MaxVT = VT0.bitsGT(VT: VT1) ? VT0 : VT1;
11891	if ((VT0 == MaxVT \|\| Op0 ->hasOneUse()) &&
11892	(VT1 == MaxVT \|\| Op1 ->hasOneUse()) &&
11893	(!LegalTypes \|\| hasOperation(Opcode: ABDOpcode, VT: MaxVT))) {
11894	SDValue ABD = DAG.getNode(Opcode: ABDOpcode, DL, VT: MaxVT,
11895	N1: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MaxVT, Operand: Op0),
11896	N2: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MaxVT, Operand: Op1));
11897	ABD = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT, Operand: ABD);
11898	return DAG.getZExtOrTrunc(Op: ABD, DL, VT: SrcVT);
11899	}
11900
11901	// fold abs(sext(x) - sext(y)) -> abds(sext(x), sext(y))
11902	// fold abs(zext(x) - zext(y)) -> abdu(zext(x), zext(y))
11903	if (!LegalOperations \|\| hasOperation(Opcode: ABDOpcode, VT)) {
11904	SDValue ABD = DAG.getNode(Opcode: ABDOpcode, DL, VT, N1: Op0, N2: Op1);
11905	return DAG.getZExtOrTrunc(Op: ABD, DL, VT: SrcVT);
11906	}
11907
11908	return SDValue ();
11909	}
11910
11911	SDValue DAGCombiner::visitABS(SDNode *N) {
11912	SDValue N0 = N->getOperand(Num: `0`);
11913	EVT VT = N->getValueType(ResNo: `0`);
11914	SDLoc DL(N);
11915
11916	// fold (abs c1) -> c2
11917	if (SDValue C = DAG.FoldConstantArithmetic(Opcode: ISD::ABS, DL, VT, Ops: {N0}))
11918	return C;
11919	// fold (abs (abs x)) -> (abs x)
11920	if (N0.getOpcode() == ISD::ABS)
11921	return N0;
11922	// fold (abs x) -> x iff not-negative
11923	if (DAG.SignBitIsZero(Op: N0))
11924	return N0;
11925
11926	if (SDValue ABD = foldABSToABD(N, DL))
11927	return ABD;
11928
11929	// fold (abs (sign_extend_inreg x)) -> (zero_extend (abs (truncate x)))
11930	// iff zero_extend/truncate are free.
11931	if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
11932	EVT ExtVT = cast<VTSDNode>(Val: N0.getOperand(i: `1`))->getVT();
11933	if (TLI.isTruncateFree(FromVT: VT, ToVT: ExtVT) && TLI.isZExtFree(FromTy: ExtVT, ToTy: VT) &&
11934	TLI.isTypeDesirableForOp(ISD::ABS, VT: ExtVT) &&
11935	hasOperation(Opcode: ISD::ABS, VT: ExtVT)) {
11936	return DAG.getNode(
11937	Opcode: ISD::ZERO_EXTEND, DL, VT,
11938	Operand: DAG.getNode(Opcode: ISD::ABS, DL, VT: ExtVT,
11939	Operand: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: ExtVT, Operand: N0.getOperand(i: `0`))));
11940	}
11941	}
11942
11943	return SDValue ();
11944	}
11945
11946	SDValue DAGCombiner::visitCLMUL(SDNode *N) {
11947	unsigned Opcode = N->getOpcode();
11948	SDValue N0 = N->getOperand(Num: `0`);
11949	SDValue N1 = N->getOperand(Num: `1`);
11950	EVT VT = N->getValueType(ResNo: `0`);
11951	SDLoc DL(N);
11952
11953	// fold (clmul c1, c2)
11954	if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, Ops: {N0, N1}))
11955	return C;
11956
11957	// canonicalize constant to RHS
11958	if (DAG.isConstantIntBuildVectorOrConstantInt(N: N0) &&
11959	!DAG.isConstantIntBuildVectorOrConstantInt(N: N1))
11960	return DAG.getNode(Opcode, DL, VT, N1, N2: N0);
11961
11962	// fold (clmul x, 0) -> 0
11963	if (isNullConstant(V: N1) \|\| ISD::isConstantSplatVectorAllZeros(N: N1.getNode()))
11964	return DAG.getConstant(Val: `0`, DL, VT);
11965
11966	// fold (clmul x, c_pow2) -> (shl x, log2(c_pow2))
11967	// This also handles (clmul x, 1) -> x since (shl x, 0) simplifies to x.
11968	if (Opcode == ISD::CLMUL) {
11969	if (ConstantSDNode *C = isConstOrConstSplat(N: N1)) {
11970	APInt CV = C->getAPIntValue().trunc(width: VT.getScalarSizeInBits());
11971	if (CV.isPowerOf2() &&
11972	(!LegalOperations \|\| TLI.isOperationLegal(Op: ISD::SHL, VT)))
11973	return DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: N0,
11974	N2: DAG.getShiftAmountConstant(Val: CV.logBase2(), VT, DL));
11975	}
11976	}
11977
11978	return SDValue ();
11979	}
11980
11981	SDValue DAGCombiner::visitBSWAP(SDNode *N) {
11982	SDValue N0 = N->getOperand(Num: `0`);
11983	EVT VT = N->getValueType(ResNo: `0`);
11984	SDLoc DL(N);
11985
11986	// fold (bswap c1) -> c2
11987	if (SDValue C = DAG.FoldConstantArithmetic(Opcode: ISD::BSWAP, DL, VT, Ops: {N0}))
11988	return C;
11989	// fold (bswap (bswap x)) -> x
11990	if (N0.getOpcode() == ISD::BSWAP)
11991	return N0.getOperand(i: `0`);
11992
11993	// Canonicalize bswap(bitreverse(x)) -> bitreverse(bswap(x)). If bitreverse
11994	// isn't supported, it will be expanded to bswap followed by a manual reversal
11995	// of bits in each byte. By placing bswaps before bitreverse, we can remove
11996	// the two bswaps if the bitreverse gets expanded.
11997	if (N0.getOpcode() == ISD::BITREVERSE && N0.hasOneUse()) {
11998	SDValue BSwap = DAG.getNode(Opcode: ISD::BSWAP, DL, VT, Operand: N0.getOperand(i: `0`));
11999	return DAG.getNode(Opcode: ISD::BITREVERSE, DL, VT, Operand: BSwap);
12000	}
12001
12002	// fold (bswap shl(x,c)) -> (zext(bswap(trunc(shl(x,sub(c,bw/2))))))
12003	// iff x >= bw/2 (i.e. lower half is known zero)
12004	unsigned BW = VT.getScalarSizeInBits();
12005	if (BW >= `32` && N0.getOpcode() == ISD::SHL && N0.hasOneUse()) {
12006	auto *ShAmt = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: `1`));
12007	EVT HalfVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: BW / `2`);
12008	if (ShAmt && ShAmt->getAPIntValue().ult(RHS: BW) &&
12009	ShAmt->getZExtValue() >= (BW / `2`) &&
12010	(ShAmt->getZExtValue() % `16`) == `0` && TLI.isTypeLegal(VT: HalfVT) &&
12011	TLI.isTruncateFree(FromVT: VT, ToVT: HalfVT) &&
12012	(!LegalOperations \|\| hasOperation(Opcode: ISD::BSWAP, VT: HalfVT))) {
12013	SDValue Res = N0.getOperand(i: `0`);
12014	if (uint64_t NewShAmt = (ShAmt->getZExtValue() - (BW / `2`)))
12015	Res = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Res,
12016	N2: DAG.getShiftAmountConstant(Val: NewShAmt, VT, DL));
12017	Res = DAG.getZExtOrTrunc(Op: Res, DL, VT: HalfVT);
12018	Res = DAG.getNode(Opcode: ISD::BSWAP, DL, VT: HalfVT, Operand: Res);
12019	return DAG.getZExtOrTrunc(Op: Res, DL, VT);
12020	}
12021	}
12022
12023	// Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
12024	// inverse-shift-of-bswap:
12025	// bswap (X u<< C) --> (bswap X) u>> C
12026	// bswap (X u>> C) --> (bswap X) u<< C
12027	if ((N0.getOpcode() == ISD::SHL \|\| N0.getOpcode() == ISD::SRL) &&
12028	N0.hasOneUse()) {
12029	auto *ShAmt = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: `1`));
12030	if (ShAmt && ShAmt->getAPIntValue().ult(RHS: BW) &&
12031	ShAmt->getZExtValue() % `8` == `0`) {
12032	SDValue NewSwap = DAG.getNode(Opcode: ISD::BSWAP, DL, VT, Operand: N0.getOperand(i: `0`));
12033	unsigned InverseShift = N0.getOpcode() == ISD::SHL ? ISD::SRL : ISD::SHL;
12034	return DAG.getNode(Opcode: InverseShift, DL, VT, N1: NewSwap, N2: N0.getOperand(i: `1`));
12035	}
12036	}
12037
12038	if (SDValue V = foldBitOrderCrossLogicOp(N, DAG))
12039	return V;
12040
12041	return SDValue ();
12042	}
12043
12044	SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
12045	SDValue N0 = N->getOperand(Num: `0`);
12046	EVT VT = N->getValueType(ResNo: `0`);
12047	SDLoc DL(N);
12048
12049	// fold (bitreverse c1) -> c2
12050	if (SDValue C = DAG.FoldConstantArithmetic(Opcode: ISD::BITREVERSE, DL, VT, Ops: {N0}))
12051	return C;
12052
12053	// fold (bitreverse (bitreverse x)) -> x
12054	if (N0.getOpcode() == ISD::BITREVERSE)
12055	return N0.getOperand(i: `0`);
12056
12057	SDValue X, Y;
12058
12059	// fold (bitreverse (lshr (bitreverse x), y)) -> (shl x, y)
12060	if ((!LegalOperations \|\| TLI.isOperationLegal(Op: ISD::SHL, VT)) &&
12061	sd_match(N: N0, P: m_Srl(L: m_BitReverse(Op: m_Value(N&: X)), R: m_Value(N&: Y))))
12062	return DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: X, N2: Y);
12063
12064	// fold (bitreverse (shl (bitreverse x), y)) -> (lshr x, y)
12065	if ((!LegalOperations \|\| TLI.isOperationLegal(Op: ISD::SRL, VT)) &&
12066	sd_match(N: N0, P: m_Shl(L: m_BitReverse(Op: m_Value(N&: X)), R: m_Value(N&: Y))))
12067	return DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: X, N2: Y);
12068
12069	// fold bitreverse(clmul(bitreverse(x), bitreverse(y))) -> clmulr(x, y)
12070	if ((!LegalOperations \|\| TLI.isOperationLegalOrCustom(Op: ISD::CLMULR, VT)) &&
12071	sd_match(N: N0, P: m_Clmul(L: m_BitReverse(Op: m_Value(N&: X)), R: m_BitReverse(Op: m_Value(N&: Y)))))
12072	return DAG.getNode(Opcode: ISD::CLMULR, DL, VT, N1: X, N2: Y);
12073
12074	return SDValue ();
12075	}
12076
12077	// Fold (ctlz (xor x, (sra x, bitwidth-1))) -> (add (ctls x), 1).
12078	// Fold (ctlz (or (shl (xor x, (sra x, bitwidth-1)), 1), 1) -> (ctls x)
12079	SDValue DAGCombiner::foldCTLZToCTLS(SDValue Src, const SDLoc &DL) {
12080	EVT VT = Src.getValueType();
12081
12082	auto LK = TLI.getTypeConversion(Context&: *DAG.getContext(), VT);
12083	if ((LK.first != TargetLoweringBase::TypeLegal &&
12084	LK.first != TargetLoweringBase::TypePromoteInteger) \|\|
12085	!TLI.isOperationLegalOrCustom(Op: ISD::CTLS, VT: LK.second))
12086	return SDValue ();
12087
12088	unsigned BitWidth = VT.getScalarSizeInBits();
12089
12090	bool NeedAdd = true;
12091
12092	SDValue X;
12093	if (sd_match(N: Src, P: m_OneUse(P: m_Or(L: m_OneUse(P: m_Shl(L: m_Value(N&: X), R: m_SpecificInt(V: `1`))),
12094	R: m_SpecificInt(V: `1`))))) {
12095	NeedAdd = false;
12096	Src = X;
12097	}
12098
12099	if (!sd_match(N: Src,
12100	P: m_OneUse(P: m_Xor(L: m_Value(N&: X),
12101	R: m_OneUse(P: m_Sra(L: m_Deferred(V&: X),
12102	R: m_SpecificInt(V: BitWidth - `1`)))))))
12103	return SDValue ();
12104
12105	SDValue Res = DAG.getNode(Opcode: ISD::CTLS, DL, VT, Operand: X);
12106	if (!NeedAdd)
12107	return Res;
12108
12109	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Res, N2: DAG.getConstant(Val: `1`, DL, VT));
12110	}
12111
12112	SDValue DAGCombiner::visitCTLZ(SDNode *N) {
12113	SDValue N0 = N->getOperand(Num: `0`);
12114	EVT VT = N->getValueType(ResNo: `0`);
12115	SDLoc DL(N);
12116
12117	// fold (ctlz c1) -> c2
12118	if (SDValue C = DAG.FoldConstantArithmetic(Opcode: ISD::CTLZ, DL, VT, Ops: {N0}))
12119	return C;
12120
12121	// If the value is known never to be zero, switch to the undef version.
12122	if (!LegalOperations \|\| TLI.isOperationLegal(Op: ISD::CTLZ_ZERO_UNDEF, VT))
12123	if (DAG.isKnownNeverZero(Op: N0))
12124	return DAG.getNode(Opcode: ISD::CTLZ_ZERO_UNDEF, DL, VT, Operand: N0);
12125
12126	if (SDValue V = foldCTLZToCTLS(Src: N0, DL))
12127	return V;
12128
12129	return SDValue ();
12130	}
12131
12132	SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
12133	SDValue N0 = N->getOperand(Num: `0`);
12134	EVT VT = N->getValueType(ResNo: `0`);
12135	SDLoc DL(N);
12136
12137	// fold (ctlz_zero_undef c1) -> c2
12138	if (SDValue C =
12139	DAG.FoldConstantArithmetic(Opcode: ISD::CTLZ_ZERO_UNDEF, DL, VT, Ops: {N0}))
12140	return C;
12141
12142	if (SDValue V = foldCTLZToCTLS(Src: N0, DL))
12143	return V;
12144
12145	return SDValue ();
12146	}
12147
12148	SDValue DAGCombiner::visitCTTZ(SDNode *N) {
12149	SDValue N0 = N->getOperand(Num: `0`);
12150	EVT VT = N->getValueType(ResNo: `0`);
12151	SDLoc DL(N);
12152
12153	// fold (cttz c1) -> c2
12154	if (SDValue C = DAG.FoldConstantArithmetic(Opcode: ISD::CTTZ, DL, VT, Ops: {N0}))
12155	return C;
12156
12157	// If the value is known never to be zero, switch to the undef version.
12158	if (!LegalOperations \|\| TLI.isOperationLegal(Op: ISD::CTTZ_ZERO_UNDEF, VT))
12159	if (DAG.isKnownNeverZero(Op: N0))
12160	return DAG.getNode(Opcode: ISD::CTTZ_ZERO_UNDEF, DL, VT, Operand: N0);
12161
12162	return SDValue ();
12163	}
12164
12165	SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
12166	SDValue N0 = N->getOperand(Num: `0`);
12167	EVT VT = N->getValueType(ResNo: `0`);
12168	SDLoc DL(N);
12169
12170	// fold (cttz_zero_undef c1) -> c2
12171	if (SDValue C =
12172	DAG.FoldConstantArithmetic(Opcode: ISD::CTTZ_ZERO_UNDEF, DL, VT, Ops: {N0}))
12173	return C;
12174	return SDValue ();
12175	}
12176
12177	SDValue DAGCombiner::visitCTPOP(SDNode *N) {
12178	SDValue N0 = N->getOperand(Num: `0`);
12179	EVT VT = N->getValueType(ResNo: `0`);
12180	unsigned NumBits = VT.getScalarSizeInBits();
12181	SDLoc DL(N);
12182
12183	// fold (ctpop c1) -> c2
12184	if (SDValue C = DAG.FoldConstantArithmetic(Opcode: ISD::CTPOP, DL, VT, Ops: {N0}))
12185	return C;
12186
12187	// If the source is being shifted, but doesn't affect any active bits,
12188	// then we can call CTPOP on the shift source directly.
12189	if (N0.getOpcode() == ISD::SRL \|\| N0.getOpcode() == ISD::SHL) {
12190	if (ConstantSDNode *AmtC = isConstOrConstSplat(N: N0.getOperand(i: `1`))) {
12191	const APInt &Amt = AmtC->getAPIntValue();
12192	if (Amt.ult(RHS: NumBits)) {
12193	KnownBits KnownSrc = DAG.computeKnownBits(Op: N0.getOperand(i: `0`));
12194	if ((N0.getOpcode() == ISD::SRL &&
12195	Amt.ule(RHS: KnownSrc.countMinTrailingZeros())) \|\|
12196	(N0.getOpcode() == ISD::SHL &&
12197	Amt.ule(RHS: KnownSrc.countMinLeadingZeros()))) {
12198	return DAG.getNode(Opcode: ISD::CTPOP, DL, VT, Operand: N0.getOperand(i: `0`));
12199	}
12200	}
12201	}
12202	}
12203
12204	// If the upper bits are known to be zero, then see if its profitable to
12205	// only count the lower bits.
12206	if (VT.isScalarInteger() && NumBits > `8` && (NumBits & `1`) == `0`) {
12207	EVT HalfVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NumBits / `2`);
12208	if (hasOperation(Opcode: ISD::CTPOP, VT: HalfVT) &&
12209	TLI.isTypeDesirableForOp(ISD::CTPOP, VT: HalfVT) &&
12210	TLI.isTruncateFree(Val: N0, VT2: HalfVT) && TLI.isZExtFree(FromTy: HalfVT, ToTy: VT)) {
12211	APInt UpperBits = APInt::getHighBitsSet(numBits: NumBits, hiBitsSet: NumBits / `2`);
12212	if (DAG.MaskedValueIsZero(Op: N0, Mask: UpperBits)) {
12213	SDValue PopCnt = DAG.getNode(Opcode: ISD::CTPOP, DL, VT: HalfVT,
12214	Operand: DAG.getZExtOrTrunc(Op: N0, DL, VT: HalfVT));
12215	return DAG.getZExtOrTrunc(Op: PopCnt, DL, VT);
12216	}
12217	}
12218	}
12219
12220	return SDValue ();
12221	}
12222
12223	static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS,
12224	SDValue RHS, const SDNodeFlags Flags,
12225	const TargetLowering &TLI) {
12226	EVT VT = LHS.getValueType();
12227	if (!VT.isFloatingPoint())
12228	return false;
12229
12230	return Flags.hasNoSignedZeros() &&
12231	TLI.isProfitableToCombineMinNumMaxNum(VT) &&
12232	(Flags.hasNoNaNs() \|\|
12233	(DAG.isKnownNeverNaN(Op: RHS) && DAG.isKnownNeverNaN(Op: LHS)));
12234	}
12235
12236	static SDValue combineMinNumMaxNumImpl(const SDLoc &DL, EVT VT, SDValue LHS,
12237	SDValue RHS, SDValue True, SDValue False,
12238	ISD::CondCode CC,
12239	const TargetLowering &TLI,
12240	SelectionDAG &DAG) {
12241	EVT TransformVT = TLI.getTypeToTransformTo(Context&: *DAG.getContext(), VT);
12242	switch (CC) {
12243	case ISD::SETOLT:
12244	case ISD::SETOLE:
12245	case ISD::SETLT:
12246	case ISD::SETLE:
12247	case ISD::SETULT:
12248	case ISD::SETULE: {
12249	// Since it's known never nan to get here already, either fminnum or
12250	// fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
12251	// expanded in terms of it.
12252	unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
12253	if (TLI.isOperationLegalOrCustom(Op: IEEEOpcode, VT))
12254	return DAG.getNode(Opcode: IEEEOpcode, DL, VT, N1: LHS, N2: RHS);
12255
12256	unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
12257	if (TLI.isOperationLegalOrCustom(Op: Opcode, VT: TransformVT))
12258	return DAG.getNode(Opcode, DL, VT, N1: LHS, N2: RHS);
12259	return SDValue ();
12260	}
12261	case ISD::SETOGT:
12262	case ISD::SETOGE:
12263	case ISD::SETGT:
12264	case ISD::SETGE:
12265	case ISD::SETUGT:
12266	case ISD::SETUGE: {
12267	unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
12268	if (TLI.isOperationLegalOrCustom(Op: IEEEOpcode, VT))
12269	return DAG.getNode(Opcode: IEEEOpcode, DL, VT, N1: LHS, N2: RHS);
12270
12271	unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
12272	if (TLI.isOperationLegalOrCustom(Op: Opcode, VT: TransformVT))
12273	return DAG.getNode(Opcode, DL, VT, N1: LHS, N2: RHS);
12274	return SDValue ();
12275	}
12276	default:
12277	return SDValue ();
12278	}
12279	}
12280
12281	// Convert (sr[al] (add n[su]w x, y)) -> (avgfloor[su] x, y)
12282	SDValue DAGCombiner::foldShiftToAvg(SDNode N, const* SDLoc &DL) {
12283	const unsigned Opcode = N->getOpcode();
12284	if (Opcode != ISD::SRA && Opcode != ISD::SRL)
12285	return SDValue ();
12286
12287	EVT VT = N->getValueType(ResNo: `0`);
12288	bool IsUnsigned = Opcode == ISD::SRL;
12289
12290	// Captured values.
12291	SDValue A, B;
12292
12293	// Match floor average as it is common to both floor/ceil avgs, ensure the add
12294	// doesn't wrap.
12295	SDNodeFlags Flags =
12296	IsUnsigned ? SDNodeFlags::NoUnsignedWrap : SDNodeFlags::NoSignedWrap;
12297	if (sd_match(N, P: m_BinOp(Opc: Opcode,
12298	L: m_c_BinOp(Opc: ISD::ADD, L: m_Value(N&: A), R: m_Value(N&: B), Flgs: Flags),
12299	R: m_One()))) {
12300	// Decide whether signed or unsigned.
12301	unsigned FloorISD = IsUnsigned ? ISD::AVGFLOORU : ISD::AVGFLOORS;
12302	if (hasOperation(Opcode: FloorISD, VT))
12303	return DAG.getNode(Opcode: FloorISD, DL, VT, Ops: {A, B});
12304	}
12305
12306	return SDValue ();
12307	}
12308
12309	SDValue DAGCombiner::foldBitwiseOpWithNeg(SDNode N, const* SDLoc &DL, EVT VT) {
12310	unsigned Opc = N->getOpcode();
12311	SDValue X, Y, Z;
12312	if (sd_match(
12313	N, P: m_BitwiseLogic(L: m_Value(N&: X), R: m_Add(L: m_Not(V: m_Value(N&: Y)), R: m_Value(N&: Z)))))
12314	return DAG.getNode(Opcode: Opc, DL, VT, N1: X,
12315	N2: DAG.getNOT(DL, Val: DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Y, N2: Z), VT));
12316
12317	if (sd_match(N, P: m_BitwiseLogic(L: m_Value(N&: X), R: m_Sub(L: m_OneUse(P: m_Not(V: m_Value(N&: Y))),
12318	R: m_Value(N&: Z)))))
12319	return DAG.getNode(Opcode: Opc, DL, VT, N1: X,
12320	N2: DAG.getNOT(DL, Val: DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Y, N2: Z), VT));
12321
12322	return SDValue ();
12323	}
12324
12325	/// Generate Min/Max node
12326	SDValue DAGCombiner::combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
12327	SDValue RHS, SDValue True,
12328	SDValue False, ISD::CondCode CC) {
12329	if ((LHS == True && RHS == False) \|\| (LHS == False && RHS == True))
12330	return combineMinNumMaxNumImpl(DL, VT, LHS, RHS, True, False, CC, TLI, DAG);
12331
12332	// If we can't directly match this, try to see if we can pull an fneg out of
12333	// the select.
12334	SDValue NegTrue = TLI.getCheaperOrNeutralNegatedExpression(
12335	Op: True, DAG, LegalOps: LegalOperations, OptForSize: ForCodeSize);
12336	if (!NegTrue)
12337	return SDValue ();
12338
12339	HandleSDNode NegTrueHandle(NegTrue);
12340
12341	// Try to unfold an fneg from the select if we are comparing the negated
12342	// constant.
12343	//
12344	// select (setcc x, K) (fneg x), -K -> fneg(minnum(x, K))
12345	//
12346	// TODO: Handle fabs
12347	if (LHS == NegTrue) {
12348	// If we can't directly match this, try to see if we can pull an fneg out of
12349	// the select.
12350	SDValue NegRHS = TLI.getCheaperOrNeutralNegatedExpression(
12351	Op: RHS, DAG, LegalOps: LegalOperations, OptForSize: ForCodeSize);
12352	if (NegRHS) {
12353	HandleSDNode NegRHSHandle(NegRHS);
12354	if (NegRHS == False) {
12355	SDValue Combined = combineMinNumMaxNumImpl(DL, VT, LHS, RHS, True: NegTrue,
12356	False, CC, TLI, DAG);
12357	if (Combined)
12358	return DAG.getNode(Opcode: ISD::FNEG, DL, VT, Operand: Combined);
12359	}
12360	}
12361	}
12362
12363	return SDValue ();
12364	}
12365
12366	/// If a (v)select has a condition value that is a sign-bit test, try to smear
12367	/// the condition operand sign-bit across the value width and use it as a mask.
12368	static SDValue foldSelectOfConstantsUsingSra(SDNode N, const* SDLoc &DL,
12369	SelectionDAG &DAG) {
12370	SDValue Cond = N->getOperand(Num: `0`);
12371	SDValue C1 = N->getOperand(Num: `1`);
12372	SDValue C2 = N->getOperand(Num: `2`);
12373	if (!isConstantOrConstantVector(N: C1) \|\| !isConstantOrConstantVector(N: C2))
12374	return SDValue ();
12375
12376	EVT VT = N->getValueType(ResNo: `0`);
12377	if (Cond.getOpcode() != ISD::SETCC \|\| !Cond.hasOneUse() \|\|
12378	VT != Cond.getOperand(i: `0`).getValueType())
12379	return SDValue ();
12380
12381	// The inverted-condition + commuted-select variants of these patterns are
12382	// canonicalized to these forms in IR.
12383	SDValue X = Cond.getOperand(i: `0`);
12384	SDValue CondC = Cond.getOperand(i: `1`);
12385	ISD::CondCode CC = cast<CondCodeSDNode>(Val: Cond.getOperand(i: `2`))->get();
12386	if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(V: CondC) &&
12387	isAllOnesOrAllOnesSplat(V: C2)) {
12388	// i32 X > -1 ? C1 : -1 --> (X >>s 31) \| C1
12389	SDValue ShAmtC = DAG.getConstant(Val: X.getScalarValueSizeInBits() - `1`, DL, VT);
12390	SDValue Sra = DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: X, N2: ShAmtC);
12391	return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: Sra, N2: C1);
12392	}
12393	if (CC == ISD::SETLT && isNullOrNullSplat(V: CondC) && isNullOrNullSplat(V: C2)) {
12394	// i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
12395	SDValue ShAmtC = DAG.getConstant(Val: X.getScalarValueSizeInBits() - `1`, DL, VT);
12396	SDValue Sra = DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: X, N2: ShAmtC);
12397	return DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Sra, N2: C1);
12398	}
12399	return SDValue ();
12400	}
12401
12402	static bool shouldConvertSelectOfConstantsToMath(const SDValue &Cond, EVT VT,
12403	const TargetLowering &TLI) {
12404	if (!TLI.convertSelectOfConstantsToMath(VT))
12405	return false;
12406
12407	if (Cond.getOpcode() != ISD::SETCC \|\| !Cond ->hasOneUse())
12408	return true;
12409	if (!TLI.isOperationLegalOrCustom(Op: ISD::SELECT_CC, VT))
12410	return true;
12411
12412	ISD::CondCode CC = cast<CondCodeSDNode>(Val: Cond.getOperand(i: `2`))->get();
12413	if (CC == ISD::SETLT && isNullOrNullSplat(V: Cond.getOperand(i: `1`)))
12414	return true;
12415	if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(V: Cond.getOperand(i: `1`)))
12416	return true;
12417
12418	return false;
12419	}
12420
12421	SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
12422	SDValue Cond = N->getOperand(Num: `0`);
12423	SDValue N1 = N->getOperand(Num: `1`);
12424	SDValue N2 = N->getOperand(Num: `2`);
12425	EVT VT = N->getValueType(ResNo: `0`);
12426	EVT CondVT = Cond.getValueType();
12427	SDLoc DL(N);
12428
12429	if (!VT.isInteger())
12430	return SDValue ();
12431
12432	auto *C1 = dyn_cast<ConstantSDNode>(Val&: N1);
12433	auto *C2 = dyn_cast<ConstantSDNode>(Val&: N2);
12434	if (!C1 \|\| !C2)
12435	return SDValue ();
12436
12437	if (CondVT != MVT::i1 \|\| LegalOperations) {
12438	// fold (select Cond, 0, 1) -> (xor Cond, 1)
12439	// We can't do this reliably if integer based booleans have different contents
12440	// to floating point based booleans. This is because we can't tell whether we
12441	// have an integer-based boolean or a floating-point-based boolean unless we
12442	// can find the SETCC that produced it and inspect its operands. This is
12443	// fairly easy if C is the SETCC node, but it can potentially be
12444	// undiscoverable (or not reasonably discoverable). For example, it could be
12445	// in another basic block or it could require searching a complicated
12446	// expression.
12447	if (CondVT.isInteger() &&
12448	TLI.getBooleanContents(/isVec/false, /isFloat/true) ==
12449	TargetLowering::ZeroOrOneBooleanContent &&
12450	TLI.getBooleanContents(/isVec/false, /isFloat/false) ==
12451	TargetLowering::ZeroOrOneBooleanContent &&
12452	C1->isZero() && C2->isOne()) {
12453	SDValue NotCond =
12454	DAG.getNode(Opcode: ISD::XOR, DL, VT: CondVT, N1: Cond, N2: DAG.getConstant(Val: `1`, DL, VT: CondVT));
12455	if (VT.bitsEq(VT: CondVT))
12456	return NotCond;
12457	return DAG.getZExtOrTrunc(Op: NotCond, DL, VT);
12458	}
12459
12460	return SDValue ();
12461	}
12462
12463	// Only do this before legalization to avoid conflicting with target-specific
12464	// transforms in the other direction (create a select from a zext/sext). There
12465	// is also a target-independent combine here in DAGCombiner in the other
12466	// direction for (select Cond, -1, 0) when the condition is not i1.
12467	assert(CondVT == MVT::i1 && !LegalOperations);
12468
12469	// select Cond, 1, 0 --> zext (Cond)
12470	if (C1->isOne() && C2->isZero())
12471	return DAG.getZExtOrTrunc(Op: Cond, DL, VT);
12472
12473	// select Cond, -1, 0 --> sext (Cond)
12474	if (C1->isAllOnes() && C2->isZero())
12475	return DAG.getSExtOrTrunc(Op: Cond, DL, VT);
12476
12477	// select Cond, 0, 1 --> zext (!Cond)
12478	if (C1->isZero() && C2->isOne()) {
12479	SDValue NotCond = DAG.getNOT(DL, Val: Cond, VT: MVT::i1);
12480	NotCond = DAG.getZExtOrTrunc(Op: NotCond, DL, VT);
12481	return NotCond;
12482	}
12483
12484	// select Cond, 0, -1 --> sext (!Cond)
12485	if (C1->isZero() && C2->isAllOnes()) {
12486	SDValue NotCond = DAG.getNOT(DL, Val: Cond, VT: MVT::i1);
12487	NotCond = DAG.getSExtOrTrunc(Op: NotCond, DL, VT);
12488	return NotCond;
12489	}
12490
12491	// Use a target hook because some targets may prefer to transform in the
12492	// other direction.
12493	if (!shouldConvertSelectOfConstantsToMath(Cond, VT, TLI))
12494	return SDValue ();
12495
12496	// For any constants that differ by 1, we can transform the select into
12497	// an extend and add.
12498	const APInt &C1Val = C1->getAPIntValue();
12499	const APInt &C2Val = C2->getAPIntValue();
12500
12501	// select Cond, C1, C1-1 --> add (zext Cond), C1-1
12502	if (C1Val - `1` == C2Val) {
12503	Cond = DAG.getZExtOrTrunc(Op: Cond, DL, VT);
12504	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Cond, N2);
12505	}
12506
12507	// select Cond, C1, C1+1 --> add (sext Cond), C1+1
12508	if (C1Val + `1` == C2Val) {
12509	Cond = DAG.getSExtOrTrunc(Op: Cond, DL, VT);
12510	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Cond, N2);
12511	}
12512
12513	// select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
12514	if (C1Val.isPowerOf2() && C2Val.isZero()) {
12515	Cond = DAG.getZExtOrTrunc(Op: Cond, DL, VT);
12516	SDValue ShAmtC =
12517	DAG.getShiftAmountConstant(Val: C1Val.exactLogBase2(), VT, DL);
12518	return DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Cond, N2: ShAmtC);
12519	}
12520
12521	// select Cond, -1, C --> or (sext Cond), C
12522	if (C1->isAllOnes()) {
12523	Cond = DAG.getSExtOrTrunc(Op: Cond, DL, VT);
12524	return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: Cond, N2);
12525	}
12526
12527	// select Cond, C, -1 --> or (sext (not Cond)), C
12528	if (C2->isAllOnes()) {
12529	SDValue NotCond = DAG.getNOT(DL, Val: Cond, VT: MVT::i1);
12530	NotCond = DAG.getSExtOrTrunc(Op: NotCond, DL, VT);
12531	return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: NotCond, N2: N1);
12532	}
12533
12534	if (SDValue V = foldSelectOfConstantsUsingSra(N, DL, DAG))
12535	return V;
12536
12537	return SDValue ();
12538	}
12539
12540	template <class MatchContextClass>
12541	static SDValue foldBoolSelectToLogic(SDNode N, const* SDLoc &DL,
12542	SelectionDAG &DAG) {
12543	assert((N->getOpcode() == ISD::SELECT \|\| N->getOpcode() == ISD::VSELECT \|\|
12544	N->getOpcode() == ISD::VP_SELECT) &&
12545	"Expected a (v)(vp.)select");
12546	SDValue Cond = N->getOperand(Num: `0`);
12547	SDValue T = N->getOperand(Num: `1`), F = N->getOperand(Num: `2`);
12548	EVT VT = N->getValueType(ResNo: `0`);
12549	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12550	MatchContextClass matcher(DAG, TLI, N);
12551
12552	if (VT != Cond.getValueType() \|\| VT.getScalarSizeInBits() != `1`)
12553	return SDValue ();
12554
12555	// select Cond, Cond, F --> or Cond, freeze(F)
12556	// select Cond, 1, F --> or Cond, freeze(F)
12557	if (Cond == T \|\| isOneOrOneSplat(V: T, / AllowUndefs / true))
12558	return matcher.getNode(ISD::OR, DL, VT, Cond, DAG.getFreeze(V: F));
12559
12560	// select Cond, T, Cond --> and Cond, freeze(T)
12561	// select Cond, T, 0 --> and Cond, freeze(T)
12562	if (Cond == F \|\| isNullOrNullSplat(V: F, / AllowUndefs / true))
12563	return matcher.getNode(ISD::AND, DL, VT, Cond, DAG.getFreeze(V: T));
12564
12565	// select Cond, T, 1 --> or (not Cond), freeze(T)
12566	if (isOneOrOneSplat(V: F, / AllowUndefs / true)) {
12567	SDValue NotCond =
12568	matcher.getNode(ISD::XOR, DL, VT, Cond, DAG.getAllOnesConstant(DL, VT));
12569	return matcher.getNode(ISD::OR, DL, VT, NotCond, DAG.getFreeze(V: T));
12570	}
12571
12572	// select Cond, 0, F --> and (not Cond), freeze(F)
12573	if (isNullOrNullSplat(V: T, / AllowUndefs / true)) {
12574	SDValue NotCond =
12575	matcher.getNode(ISD::XOR, DL, VT, Cond, DAG.getAllOnesConstant(DL, VT));
12576	return matcher.getNode(ISD::AND, DL, VT, NotCond, DAG.getFreeze(V: F));
12577	}
12578
12579	return SDValue ();
12580	}
12581
12582	static SDValue foldVSelectToSignBitSplatMask(SDNode *N, SelectionDAG &DAG) {
12583	SDValue N0 = N->getOperand(Num: `0`);
12584	SDValue N1 = N->getOperand(Num: `1`);
12585	SDValue N2 = N->getOperand(Num: `2`);
12586	EVT VT = N->getValueType(ResNo: `0`);
12587	unsigned EltSizeInBits = VT.getScalarSizeInBits();
12588
12589	SDValue Cond0, Cond1;
12590	ISD::CondCode CC;
12591	if (!sd_match(N: N0, P: m_OneUse(P: m_SetCC(LHS: m_Value(N&: Cond0), RHS: m_Value(N&: Cond1),
12592	CC: m_CondCode(CC)))) \|\|
12593	VT != Cond0.getValueType())
12594	return SDValue ();
12595
12596	// Match a signbit check of Cond0 as "Cond0 s<0". Swap select operands if the
12597	// compare is inverted from that pattern ("Cond0 s> -1").
12598	if (CC == ISD::SETLT && isNullOrNullSplat(V: Cond1))
12599	; // This is the pattern we are looking for.
12600	else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(V: Cond1))
12601	std::swap(a&: N1, b&: N2);
12602	else
12603	return SDValue ();
12604
12605	// (Cond0 s< 0) ? N1 : 0 --> (Cond0 s>> BW-1) & freeze(N1)
12606	if (isNullOrNullSplat(V: N2)) {
12607	SDLoc DL(N);
12608	SDValue ShiftAmt = DAG.getShiftAmountConstant(Val: EltSizeInBits - `1`, VT, DL);
12609	SDValue Sra = DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: Cond0, N2: ShiftAmt);
12610	return DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Sra, N2: DAG.getFreeze(V: N1));
12611	}
12612
12613	// (Cond0 s< 0) ? -1 : N2 --> (Cond0 s>> BW-1) \| freeze(N2)
12614	if (isAllOnesOrAllOnesSplat(V: N1)) {
12615	SDLoc DL(N);
12616	SDValue ShiftAmt = DAG.getShiftAmountConstant(Val: EltSizeInBits - `1`, VT, DL);
12617	SDValue Sra = DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: Cond0, N2: ShiftAmt);
12618	return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: Sra, N2: DAG.getFreeze(V: N2));
12619	}
12620
12621	// If we have to invert the sign bit mask, only do that transform if the
12622	// target has a bitwise 'and not' instruction (the invert is free).
12623	// (Cond0 s< -0) ? 0 : N2 --> ~(Cond0 s>> BW-1) & freeze(N2)
12624	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12625	if (isNullOrNullSplat(V: N1) && TLI.hasAndNot(X: N1)) {
12626	SDLoc DL(N);
12627	SDValue ShiftAmt = DAG.getShiftAmountConstant(Val: EltSizeInBits - `1`, VT, DL);
12628	SDValue Sra = DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: Cond0, N2: ShiftAmt);
12629	SDValue Not = DAG.getNOT(DL, Val: Sra, VT);
12630	return DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Not, N2: DAG.getFreeze(V: N2));
12631	}
12632
12633	// TODO: There's another pattern in this family, but it may require
12634	// implementing hasOrNot() to check for profitability:
12635	// (Cond0 s> -1) ? -1 : N2 --> ~(Cond0 s>> BW-1) \| freeze(N2)
12636
12637	return SDValue ();
12638	}
12639
12640	// Match SELECTs with absolute difference patterns.
12641	// (select (setcc a, b, set?gt), (sub a, b), (sub b, a)) --> (abd? a, b)
12642	// (select (setcc a, b, set?ge), (sub a, b), (sub b, a)) --> (abd? a, b)
12643	// (select (setcc a, b, set?lt), (sub b, a), (sub a, b)) --> (abd? a, b)
12644	// (select (setcc a, b, set?le), (sub b, a), (sub a, b)) --> (abd? a, b)
12645	SDValue DAGCombiner::foldSelectToABD(SDValue LHS, SDValue RHS, SDValue True,
12646	SDValue False, ISD::CondCode CC,
12647	const SDLoc &DL) {
12648	bool IsSigned = isSignedIntSetCC(Code: CC);
12649	unsigned ABDOpc = IsSigned ? ISD::ABDS : ISD::ABDU;
12650	EVT VT = LHS.getValueType();
12651
12652	if (LegalOperations && !hasOperation(Opcode: ABDOpc, VT))
12653	return SDValue ();
12654
12655	// (setcc 0, b set???) --> (setcc b, 0, set???)
12656	if (isZeroOrZeroSplat(N: LHS)) {
12657	std::swap(a&: LHS, b&: RHS);
12658	CC = ISD::getSetCCSwappedOperands(Operation: CC);
12659	}
12660
12661	// (setcc (add nsw A, Const), 0, sets??) --> (setcc A, -Const, sets??)
12662	SDValue A, B;
12663	if (ISD::isSignedIntSetCC(Code: CC) && LHS ->getFlags().hasNoSignedWrap() &&
12664	isZeroOrZeroSplat(N: RHS) && sd_match(N: LHS, P: m_Add(L: m_Value(N&: A), R: m_Value(N&: B))) &&
12665	DAG.isConstantIntBuildVectorOrConstantInt(N: B)) {
12666	RHS = DAG.getNegative(Val: B, DL: LHS, VT: B.getValueType());
12667	LHS = A;
12668	}
12669
12670	switch (CC) {
12671	case ISD::SETGT:
12672	case ISD::SETGE:
12673	case ISD::SETUGT:
12674	case ISD::SETUGE:
12675	if (sd_match(N: True, P: m_AnyOf(preds: m_Sub(L: m_Specific(N: LHS), R: m_Specific(N: RHS)),
12676	preds: m_Add(L: m_Specific(N: LHS), R: m_SpecificNeg(V: RHS)))) &&
12677	sd_match(N: False, P: m_AnyOf(preds: m_Sub(L: m_Specific(N: RHS), R: m_Specific(N: LHS)),
12678	preds: m_Add(L: m_Specific(N: RHS), R: m_SpecificNeg(V: LHS)))))
12679	return DAG.getNode(Opcode: ABDOpc, DL, VT, N1: LHS, N2: RHS);
12680	if (sd_match(N: True, P: m_AnyOf(preds: m_Sub(L: m_Specific(N: RHS), R: m_Specific(N: LHS)),
12681	preds: m_Add(L: m_Specific(N: RHS), R: m_SpecificNeg(V: LHS)))) &&
12682	sd_match(N: False, P: m_AnyOf(preds: m_Sub(L: m_Specific(N: LHS), R: m_Specific(N: RHS)),
12683	preds: m_Add(L: m_Specific(N: LHS), R: m_SpecificNeg(V: RHS)))) &&
12684	hasOperation(Opcode: ABDOpc, VT))
12685	return DAG.getNegative(Val: DAG.getNode(Opcode: ABDOpc, DL, VT, N1: LHS, N2: RHS), DL, VT);
12686	break;
12687	case ISD::SETLT:
12688	case ISD::SETLE:
12689	case ISD::SETULT:
12690	case ISD::SETULE:
12691	if (sd_match(N: True, P: m_AnyOf(preds: m_Sub(L: m_Specific(N: RHS), R: m_Specific(N: LHS)),
12692	preds: m_Add(L: m_Specific(N: RHS), R: m_SpecificNeg(V: LHS)))) &&
12693	sd_match(N: False, P: m_AnyOf(preds: m_Sub(L: m_Specific(N: LHS), R: m_Specific(N: RHS)),
12694	preds: m_Add(L: m_Specific(N: LHS), R: m_SpecificNeg(V: RHS)))))
12695	return DAG.getNode(Opcode: ABDOpc, DL, VT, N1: LHS, N2: RHS);
12696	if (sd_match(N: True, P: m_AnyOf(preds: m_Sub(L: m_Specific(N: LHS), R: m_Specific(N: RHS)),
12697	preds: m_Add(L: m_Specific(N: LHS), R: m_SpecificNeg(V: RHS)))) &&
12698	sd_match(N: False, P: m_AnyOf(preds: m_Sub(L: m_Specific(N: RHS), R: m_Specific(N: LHS)),
12699	preds: m_Add(L: m_Specific(N: RHS), R: m_SpecificNeg(V: LHS)))) &&
12700	hasOperation(Opcode: ABDOpc, VT))
12701	return DAG.getNegative(Val: DAG.getNode(Opcode: ABDOpc, DL, VT, N1: LHS, N2: RHS), DL, VT);
12702	break;
12703	default:
12704	break;
12705	}
12706
12707	return SDValue ();
12708	}
12709
12710	// ([v]select (ugt x, C), (add x, ~C), x) -> (umin (add x, ~C), x)
12711	// ([v]select (ult x, C), x, (add x, -C)) -> (umin x, (add x, -C))
12712	SDValue DAGCombiner::foldSelectToUMin(SDValue LHS, SDValue RHS, SDValue True,
12713	SDValue False, ISD::CondCode CC,
12714	const SDLoc &DL) {
12715	APInt C;
12716	EVT VT = True.getValueType();
12717	if (sd_match(N: RHS, P: m_ConstInt(V&: C)) && hasUMin(VT)) {
12718	if (CC == ISD::SETUGT && LHS == False &&
12719	sd_match(N: True, P: m_Add(L: m_Specific(N: False), R: m_SpecificInt(V: ~C)))) {
12720	SDValue AddC = DAG.getConstant(Val: ~C, DL, VT);
12721	SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: False, N2: AddC);
12722	return DAG.getNode(Opcode: ISD::UMIN, DL, VT, N1: Add, N2: False);
12723	}
12724	if (CC == ISD::SETULT && LHS == True &&
12725	sd_match(N: False, P: m_Add(L: m_Specific(N: True), R: m_SpecificInt(V: -C)))) {
12726	SDValue AddC = DAG.getConstant(Val: -C, DL, VT);
12727	SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: True, N2: AddC);
12728	return DAG.getNode(Opcode: ISD::UMIN, DL, VT, N1: True, N2: Add);
12729	}
12730	}
12731	return SDValue ();
12732	}
12733
12734	SDValue DAGCombiner::visitSELECT(SDNode *N) {
12735	SDValue N0 = N->getOperand(Num: `0`);
12736	SDValue N1 = N->getOperand(Num: `1`);
12737	SDValue N2 = N->getOperand(Num: `2`);
12738	EVT VT = N->getValueType(ResNo: `0`);
12739	EVT VT0 = N0.getValueType();
12740	SDLoc DL(N);
12741	SDNodeFlags Flags = N->getFlags();
12742
12743	if (SDValue V = DAG.simplifySelect(Cond: N0, TVal: N1, FVal: N2))
12744	return V;
12745
12746	if (SDValue V = foldBoolSelectToLogic<EmptyMatchContext>(N, DL, DAG))
12747	return V;
12748
12749	// select (not Cond), N1, N2 -> select Cond, N2, N1
12750	if (SDValue F = extractBooleanFlip(V: N0, DAG, TLI, Force: false))
12751	return DAG.getSelect(DL, VT, Cond: F, LHS: N2, RHS: N1, Flags);
12752
12753	if (SDValue V = foldSelectOfConstants(N))
12754	return V;
12755
12756	// If we can fold this based on the true/false value, do so.
12757	if (SimplifySelectOps(SELECT: N, LHS: N1, RHS: N2))
12758	return SDValue (N, `0`); // Don't revisit N.
12759
12760	if (VT0 == MVT::i1) {
12761	// The code in this block deals with the following 2 equivalences:
12762	// select(C0\|C1, x, y) <=> select(C0, x, select(C1, x, y))
12763	// select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
12764	// The target can specify its preferred form with the
12765	// shouldNormalizeToSelectSequence() callback. However we always transform
12766	// to the right anyway if we find the inner select exists in the DAG anyway
12767	// and we always transform to the left side if we know that we can further
12768	// optimize the combination of the conditions.
12769	bool normalizeToSequence =
12770	TLI.shouldNormalizeToSelectSequence(Context&: *DAG.getContext(), VT);
12771	// select (and Cond0, Cond1), X, Y
12772	// -> select Cond0, (select Cond1, X, Y), Y
12773	if (N0 ->getOpcode() == ISD::AND && N0 ->hasOneUse()) {
12774	SDValue Cond0 = N0 ->getOperand(Num: `0`);
12775	SDValue Cond1 = N0 ->getOperand(Num: `1`);
12776	SDValue InnerSelect =
12777	DAG.getNode(Opcode: ISD::SELECT, DL, VT: N1.getValueType(), N1: Cond1, N2: N1, N3: N2, Flags);
12778	if (normalizeToSequence \|\| !InnerSelect.use_empty())
12779	return DAG.getNode(Opcode: ISD::SELECT, DL, VT: N1.getValueType(), N1: Cond0,
12780	N2: InnerSelect, N3: N2, Flags);
12781	// Cleanup on failure.
12782	if (InnerSelect.use_empty())
12783	recursivelyDeleteUnusedNodes(N: InnerSelect.getNode());
12784	}
12785	// select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
12786	if (N0 ->getOpcode() == ISD::OR && N0 ->hasOneUse()) {
12787	SDValue Cond0 = N0 ->getOperand(Num: `0`);
12788	SDValue Cond1 = N0 ->getOperand(Num: `1`);
12789	SDValue InnerSelect = DAG.getNode(Opcode: ISD::SELECT, DL, VT: N1.getValueType(),
12790	N1: Cond1, N2: N1, N3: N2, Flags);
12791	if (normalizeToSequence \|\| !InnerSelect.use_empty())
12792	return DAG.getNode(Opcode: ISD::SELECT, DL, VT: N1.getValueType(), N1: Cond0, N2: N1,
12793	N3: InnerSelect, Flags);
12794	// Cleanup on failure.
12795	if (InnerSelect.use_empty())
12796	recursivelyDeleteUnusedNodes(N: InnerSelect.getNode());
12797	}
12798
12799	// select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
12800	if (N1 ->getOpcode() == ISD::SELECT && N1 ->hasOneUse()) {
12801	SDValue N1_0 = N1 ->getOperand(Num: `0`);
12802	SDValue N1_1 = N1 ->getOperand(Num: `1`);
12803	SDValue N1_2 = N1 ->getOperand(Num: `2`);
12804	if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
12805	// Create the actual and node if we can generate good code for it.
12806	if (!normalizeToSequence) {
12807	SDValue And = DAG.getNode(Opcode: ISD::AND, DL, VT: N0.getValueType(), N1: N0, N2: N1_0);
12808	return DAG.getNode(Opcode: ISD::SELECT, DL, VT: N1.getValueType(), N1: And, N2: N1_1,
12809	N3: N2, Flags);
12810	}
12811	// Otherwise see if we can optimize the "and" to a better pattern.
12812	if (SDValue Combined = visitANDLike(N0, N1: N1_0, N)) {
12813	return DAG.getNode(Opcode: ISD::SELECT, DL, VT: N1.getValueType(), N1: Combined, N2: N1_1,
12814	N3: N2, Flags);
12815	}
12816	}
12817	}
12818	// select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
12819	if (N2 ->getOpcode() == ISD::SELECT && N2 ->hasOneUse()) {
12820	SDValue N2_0 = N2 ->getOperand(Num: `0`);
12821	SDValue N2_1 = N2 ->getOperand(Num: `1`);
12822	SDValue N2_2 = N2 ->getOperand(Num: `2`);
12823	if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
12824	// Create the actual or node if we can generate good code for it.
12825	if (!normalizeToSequence) {
12826	SDValue Or = DAG.getNode(Opcode: ISD::OR, DL, VT: N0.getValueType(), N1: N0, N2: N2_0);
12827	return DAG.getNode(Opcode: ISD::SELECT, DL, VT: N1.getValueType(), N1: Or, N2: N1,
12828	N3: N2_2, Flags);
12829	}
12830	// Otherwise see if we can optimize to a better pattern.
12831	if (SDValue Combined = visitORLike(N0, N1: N2_0, DL))
12832	return DAG.getNode(Opcode: ISD::SELECT, DL, VT: N1.getValueType(), N1: Combined, N2: N1,
12833	N3: N2_2, Flags);
12834	}
12835	}
12836
12837	// select usubo(x, y).overflow, (sub y, x), (usubo x, y) -> abdu(x, y)
12838	if (N0.getOpcode() == ISD::USUBO && N0.getResNo() == `1` &&
12839	N2.getNode() == N0.getNode() && N2.getResNo() == `0` &&
12840	N1.getOpcode() == ISD::SUB && N2.getOperand(i: `0`) == N1.getOperand(i: `1`) &&
12841	N2.getOperand(i: `1`) == N1.getOperand(i: `0`) &&
12842	(!LegalOperations \|\| TLI.isOperationLegal(Op: ISD::ABDU, VT)))
12843	return DAG.getNode(Opcode: ISD::ABDU, DL, VT, N1: N0.getOperand(i: `0`), N2: N0.getOperand(i: `1`));
12844
12845	// select usubo(x, y).overflow, (usubo x, y), (sub y, x) -> neg (abdu x, y)
12846	if (N0.getOpcode() == ISD::USUBO && N0.getResNo() == `1` &&
12847	N1.getNode() == N0.getNode() && N1.getResNo() == `0` &&
12848	N2.getOpcode() == ISD::SUB && N2.getOperand(i: `0`) == N1.getOperand(i: `1`) &&
12849	N2.getOperand(i: `1`) == N1.getOperand(i: `0`) &&
12850	(!LegalOperations \|\| TLI.isOperationLegal(Op: ISD::ABDU, VT)))
12851	return DAG.getNegative(
12852	Val: DAG.getNode(Opcode: ISD::ABDU, DL, VT, N1: N0.getOperand(i: `0`), N2: N0.getOperand(i: `1`)),
12853	DL, VT);
12854	}
12855
12856	// Fold selects based on a setcc into other things, such as min/max/abs.
12857	if (N0.getOpcode() == ISD::SETCC) {
12858	SDValue Cond0 = N0.getOperand(i: `0`), Cond1 = N0.getOperand(i: `1`);
12859	ISD::CondCode CC = cast<CondCodeSDNode>(Val: N0.getOperand(i: `2`))->get();
12860
12861	// select (fcmp lt x, y), x, y -> fminnum x, y
12862	// select (fcmp gt x, y), x, y -> fmaxnum x, y
12863	//
12864	// This is OK if we don't care what happens if either operand is a NaN.
12865	if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS: N1, RHS: N2, Flags, TLI))
12866	if (SDValue FMinMax =
12867	combineMinNumMaxNum(DL, VT, LHS: Cond0, RHS: Cond1, True: N1, False: N2, CC))
12868	return FMinMax;
12869
12870	// Use 'unsigned add with overflow' to optimize an unsigned saturating add.
12871	// This is conservatively limited to pre-legal-operations to give targets
12872	// a chance to reverse the transform if they want to do that. Also, it is
12873	// unlikely that the pattern would be formed late, so it's probably not
12874	// worth going through the other checks.
12875	if (!LegalOperations && TLI.isOperationLegalOrCustom(Op: ISD::UADDO, VT) &&
12876	CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(V: N1) &&
12877	N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(i: `0`)) {
12878	auto *C = dyn_cast<ConstantSDNode>(Val: N2.getOperand(i: `1`));
12879	auto *NotC = dyn_cast<ConstantSDNode>(Val&: Cond1);
12880	if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
12881	// select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
12882	// uaddo Cond0, C; select uaddo.1, -1, uaddo.0
12883	//
12884	// The IR equivalent of this transform would have this form:
12885	// %a = add %x, C
12886	// %c = icmp ugt %x, ~C
12887	// %r = select %c, -1, %a
12888	// =>
12889	// %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
12890	// %u0 = extractvalue %u, 0
12891	// %u1 = extractvalue %u, 1
12892	// %r = select %u1, -1, %u0
12893	SDVTList VTs = DAG.getVTList(VT1: VT, VT2: VT0);
12894	SDValue UAO = DAG.getNode(Opcode: ISD::UADDO, DL, VTList: VTs, N1: Cond0, N2: N2.getOperand(i: `1`));
12895	return DAG.getSelect(DL, VT, Cond: UAO.getValue(R: `1`), LHS: N1, RHS: UAO.getValue(R: `0`));
12896	}
12897	}
12898
12899	if (TLI.isOperationLegal(Op: ISD::SELECT_CC, VT) \|\|
12900	(!LegalOperations &&
12901	TLI.isOperationLegalOrCustom(Op: ISD::SELECT_CC, VT))) {
12902	// Any flags available in a select/setcc fold will be on the setcc as they
12903	// migrated from fcmp
12904	return DAG.getNode(Opcode: ISD::SELECT_CC, DL, VT, N1: Cond0, N2: Cond1, N3: N1, N4: N2,
12905	N5: N0.getOperand(i: `2`), Flags: N0 ->getFlags());
12906	}
12907
12908	if (SDValue ABD = foldSelectToABD(LHS: Cond0, RHS: Cond1, True: N1, False: N2, CC, DL))
12909	return ABD;
12910
12911	if (SDValue NewSel = SimplifySelect(DL, N0, N1, N2))
12912	return NewSel;
12913
12914	// (select (ugt x, C), (add x, ~C), x) -> (umin (add x, ~C), x)
12915	// (select (ult x, C), x, (add x, -C)) -> (umin x, (add x, -C))
12916	if (SDValue UMin = foldSelectToUMin(LHS: Cond0, RHS: Cond1, True: N1, False: N2, CC, DL))
12917	return UMin;
12918	}
12919
12920	if (!VT.isVector())
12921	if (SDValue BinOp = foldSelectOfBinops(N))
12922	return BinOp;
12923
12924	if (SDValue R = combineSelectAsExtAnd(Cond: N0, T: N1, F: N2, DL, DAG))
12925	return R;
12926
12927	return SDValue ();
12928	}
12929
12930	// This function assumes all the vselect's arguments are CONCAT_VECTOR
12931	// nodes and that the condition is a BV of ConstantSDNodes (or undefs).
12932	static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
12933	SDLoc DL(N);
12934	SDValue Cond = N->getOperand(Num: `0`);
12935	SDValue LHS = N->getOperand(Num: `1`);
12936	SDValue RHS = N->getOperand(Num: `2`);
12937	EVT VT = N->getValueType(ResNo: `0`);
12938	int NumElems = VT.getVectorNumElements();
12939	assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
12940	RHS.getOpcode() == ISD::CONCAT_VECTORS &&
12941	Cond.getOpcode() == ISD::BUILD_VECTOR);
12942
12943	// CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
12944	// binary ones here.
12945	if (LHS ->getNumOperands() != `2` \|\| RHS ->getNumOperands() != `2`)
12946	return SDValue ();
12947
12948	// We're sure we have an even number of elements due to the
12949	// concat_vectors we have as arguments to vselect.
12950	// Skip BV elements until we find one that's not an UNDEF
12951	// After we find an UNDEF element, keep looping until we get to half the
12952	// length of the BV and see if all the non-undef nodes are the same.
12953	ConstantSDNode BottomHalf = nullptr*;
12954	for (int i = `0`; i < NumElems / `2`; ++i) {
12955	if (Cond ->getOperand(Num: i)->isUndef())
12956	continue;
12957
12958	if (BottomHalf == nullptr)
12959	BottomHalf = cast<ConstantSDNode>(Val: Cond.getOperand(i));
12960	else if (Cond ->getOperand(Num: i).getNode() != BottomHalf)
12961	return SDValue ();
12962	}
12963
12964	// Do the same for the second half of the BuildVector
12965	ConstantSDNode TopHalf = nullptr*;
12966	for (int i = NumElems / `2`; i < NumElems; ++i) {
12967	if (Cond ->getOperand(Num: i)->isUndef())
12968	continue;
12969
12970	if (TopHalf == nullptr)
12971	TopHalf = cast<ConstantSDNode>(Val: Cond.getOperand(i));
12972	else if (Cond ->getOperand(Num: i).getNode() != TopHalf)
12973	return SDValue ();
12974	}
12975
12976	assert(TopHalf && BottomHalf &&
12977	"One half of the selector was all UNDEFs and the other was all the "
12978	"same value. This should have been addressed before this function.");
12979	return DAG.getNode(
12980	Opcode: ISD::CONCAT_VECTORS, DL, VT,
12981	N1: BottomHalf->isZero() ? RHS ->getOperand(Num: `0`) : LHS ->getOperand(Num: `0`),
12982	N2: TopHalf->isZero() ? RHS ->getOperand(Num: `1`) : LHS ->getOperand(Num: `1`));
12983	}
12984
12985	bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled,
12986	SelectionDAG &DAG, const SDLoc &DL) {
12987
12988	// Only perform the transformation when existing operands can be reused.
12989	if (IndexIsScaled)
12990	return false;
12991
12992	if (!isNullConstant(V: BasePtr) && !Index.hasOneUse())
12993	return false;
12994
12995	EVT VT = BasePtr.getValueType();
12996
12997	if (SDValue SplatVal = DAG.getSplatValue(V: Index);
12998	SplatVal && !isNullConstant(V: SplatVal) &&
12999	SplatVal.getValueType() == VT) {
13000	BasePtr = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: BasePtr, N2: SplatVal);
13001	Index = DAG.getSplat(VT: Index.getValueType(), DL, Op: DAG.getConstant(Val: `0`, DL, VT));
13002	return true;
13003	}
13004
13005	if (Index.getOpcode() != ISD::ADD)
13006	return false;
13007
13008	if (SDValue SplatVal = DAG.getSplatValue(V: Index.getOperand(i: `0`));
13009	SplatVal && SplatVal.getValueType() == VT) {
13010	BasePtr = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: BasePtr, N2: SplatVal);
13011	Index = Index.getOperand(i: `1`);
13012	return true;
13013	}
13014	if (SDValue SplatVal = DAG.getSplatValue(V: Index.getOperand(i: `1`));
13015	SplatVal && SplatVal.getValueType() == VT) {
13016	BasePtr = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: BasePtr, N2: SplatVal);
13017	Index = Index.getOperand(i: `0`);
13018	return true;
13019	}
13020	return false;
13021	}
13022
13023	// Fold sext/zext of index into index type.
13024	bool refineIndexType(SDValue &Index, ISD::MemIndexType &IndexType, EVT DataVT,
13025	SelectionDAG &DAG) {
13026	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13027
13028	// It's always safe to look through zero extends.
13029	if (Index.getOpcode() == ISD::ZERO_EXTEND) {
13030	if (TLI.shouldRemoveExtendFromGSIndex(Extend: Index, DataVT)) {
13031	IndexType = ISD::UNSIGNED_SCALED;
13032	Index = Index.getOperand(i: `0`);
13033	return true;
13034	}
13035	if (ISD::isIndexTypeSigned(IndexType)) {
13036	IndexType = ISD::UNSIGNED_SCALED;
13037	return true;
13038	}
13039	}
13040
13041	// It's only safe to look through sign extends when Index is signed.
13042	if (Index.getOpcode() == ISD::SIGN_EXTEND &&
13043	ISD::isIndexTypeSigned(IndexType) &&
13044	TLI.shouldRemoveExtendFromGSIndex(Extend: Index, DataVT)) {
13045	Index = Index.getOperand(i: `0`);
13046	return true;
13047	}
13048
13049	return false;
13050	}
13051
13052	SDValue DAGCombiner::visitVPSCATTER(SDNode *N) {
13053	VPScatterSDNode *MSC = cast<VPScatterSDNode>(Val: N);
13054	SDValue Mask = MSC->getMask();
13055	SDValue Chain = MSC->getChain();
13056	SDValue Index = MSC->getIndex();
13057	SDValue Scale = MSC->getScale();
13058	SDValue StoreVal = MSC->getValue();
13059	SDValue BasePtr = MSC->getBasePtr();
13060	SDValue VL = MSC->getVectorLength();
13061	ISD::MemIndexType IndexType = MSC->getIndexType();
13062	SDLoc DL(N);
13063
13064	// Zap scatters with a zero mask.
13065	if (ISD::isConstantSplatVectorAllZeros(N: Mask.getNode()))
13066	return Chain;
13067
13068	if (refineUniformBase(BasePtr, Index, IndexIsScaled: MSC->isIndexScaled(), DAG, DL)) {
13069	SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL};
13070	return DAG.getScatterVP(VTs: DAG.getVTList(VT: MVT::Other), VT: MSC->getMemoryVT(),
13071	dl: DL, Ops, MMO: MSC->getMemOperand(), IndexType);
13072	}
13073
13074	if (refineIndexType(Index, IndexType, DataVT: StoreVal.getValueType(), DAG)) {
13075	SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL};
13076	return DAG.getScatterVP(VTs: DAG.getVTList(VT: MVT::Other), VT: MSC->getMemoryVT(),
13077	dl: DL, Ops, MMO: MSC->getMemOperand(), IndexType);
13078	}
13079
13080	return SDValue ();
13081	}
13082
13083	SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
13084	MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(Val: N);
13085	SDValue Mask = MSC->getMask();
13086	SDValue Chain = MSC->getChain();
13087	SDValue Index = MSC->getIndex();
13088	SDValue Scale = MSC->getScale();
13089	SDValue StoreVal = MSC->getValue();
13090	SDValue BasePtr = MSC->getBasePtr();
13091	ISD::MemIndexType IndexType = MSC->getIndexType();
13092	SDLoc DL(N);
13093
13094	// Zap scatters with a zero mask.
13095	if (ISD::isConstantSplatVectorAllZeros(N: Mask.getNode()))
13096	return Chain;
13097
13098	if (refineUniformBase(BasePtr, Index, IndexIsScaled: MSC->isIndexScaled(), DAG, DL)) {
13099	SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
13100	return DAG.getMaskedScatter(VTs: DAG.getVTList(VT: MVT::Other), MemVT: MSC->getMemoryVT(),
13101	dl: DL, Ops, MMO: MSC->getMemOperand(), IndexType,
13102	IsTruncating: MSC->isTruncatingStore());
13103	}
13104
13105	if (refineIndexType(Index, IndexType, DataVT: StoreVal.getValueType(), DAG)) {
13106	SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
13107	return DAG.getMaskedScatter(VTs: DAG.getVTList(VT: MVT::Other), MemVT: MSC->getMemoryVT(),
13108	dl: DL, Ops, MMO: MSC->getMemOperand(), IndexType,
13109	IsTruncating: MSC->isTruncatingStore());
13110	}
13111
13112	return SDValue ();
13113	}
13114
13115	SDValue DAGCombiner::visitMSTORE(SDNode *N) {
13116	MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(Val: N);
13117	SDValue Mask = MST->getMask();
13118	SDValue Chain = MST->getChain();
13119	SDValue Value = MST->getValue();
13120	SDValue Ptr = MST->getBasePtr();
13121
13122	// Zap masked stores with a zero mask.
13123	if (ISD::isConstantSplatVectorAllZeros(N: Mask.getNode()))
13124	return Chain;
13125
13126	// Remove a masked store if base pointers and masks are equal.
13127	if (MaskedStoreSDNode *MST1 = dyn_cast<MaskedStoreSDNode>(Val&: Chain)) {
13128	if (MST->isUnindexed() && MST->isSimple() && MST1->isUnindexed() &&
13129	MST1->isSimple() && MST1->getBasePtr() == Ptr &&
13130	!MST->getBasePtr().isUndef() &&
13131	((Mask == MST1->getMask() && MST->getMemoryVT().getStoreSize() ==
13132	MST1->getMemoryVT().getStoreSize()) \|\|
13133	ISD::isConstantSplatVectorAllOnes(N: Mask.getNode())) &&
13134	TypeSize::isKnownLE(LHS: MST1->getMemoryVT().getStoreSize(),
13135	RHS: MST->getMemoryVT().getStoreSize())) {
13136	CombineTo(N: MST1, Res: MST1->getChain());
13137	if (N->getOpcode() != ISD::DELETED_NODE)
13138	AddToWorklist(N);
13139	return SDValue (N, `0`);
13140	}
13141	}
13142
13143	// If this is a masked load with an all ones mask, we can use a unmasked load.
13144	// FIXME: Can we do this for indexed, compressing, or truncating stores?
13145	if (ISD::isConstantSplatVectorAllOnes(N: Mask.getNode()) && MST->isUnindexed() &&
13146	!MST->isCompressingStore() && !MST->isTruncatingStore())
13147	return DAG.getStore(Chain: MST->getChain(), dl: SDLoc (N), Val: MST->getValue(),
13148	Ptr: MST->getBasePtr(), PtrInfo: MST->getPointerInfo(),
13149	Alignment: MST->getBaseAlign(), MMOFlags: MST->getMemOperand()->getFlags(),
13150	AAInfo: MST->getAAInfo());
13151
13152	// Try transforming N to an indexed store.
13153	if (CombineToPreIndexedLoadStore(N) \|\| CombineToPostIndexedLoadStore(N))
13154	return SDValue (N, `0`);
13155
13156	if (MST->isTruncatingStore() && MST->isUnindexed() &&
13157	Value.getValueType().isInteger() &&
13158	(!isa<ConstantSDNode>(Val: Value) \|\|
13159	!cast<ConstantSDNode>(Val&: Value)->isOpaque())) {
13160	APInt TruncDemandedBits =
13161	APInt::getLowBitsSet(numBits: Value.getScalarValueSizeInBits(),
13162	loBitsSet: MST->getMemoryVT().getScalarSizeInBits());
13163
13164	// See if we can simplify the operation with
13165	// SimplifyDemandedBits, which only works if the value has a single use.
13166	if (SimplifyDemandedBits(Op: Value, DemandedBits: TruncDemandedBits)) {
13167	// Re-visit the store if anything changed and the store hasn't been merged
13168	// with another node (N is deleted) SimplifyDemandedBits will add Value's
13169	// node back to the worklist if necessary, but we also need to re-visit
13170	// the Store node itself.
13171	if (N->getOpcode() != ISD::DELETED_NODE)
13172	AddToWorklist(N);
13173	return SDValue (N, `0`);
13174	}
13175	}
13176
13177	// If this is a TRUNC followed by a masked store, fold this into a masked
13178	// truncating store. We can do this even if this is already a masked
13179	// truncstore.
13180	// TODO: Try combine to masked compress store if possiable.
13181	if ((Value.getOpcode() == ISD::TRUNCATE) && Value ->hasOneUse() &&
13182	MST->isUnindexed() && !MST->isCompressingStore() &&
13183	TLI.canCombineTruncStore(ValVT: Value.getOperand(i: `0`).getValueType(),
13184	MemVT: MST->getMemoryVT(), LegalOnly: LegalOperations)) {
13185	auto Mask = TLI.promoteTargetBoolean(DAG, Bool: MST->getMask(),
13186	ValVT: Value.getOperand(i: `0`).getValueType());
13187	return DAG.getMaskedStore(Chain, dl: SDLoc (N), Val: Value.getOperand(i: `0`), Base: Ptr,
13188	Offset: MST->getOffset(), Mask, MemVT: MST->getMemoryVT(),
13189	MMO: MST->getMemOperand(), AM: MST->getAddressingMode(),
13190	/IsTruncating=/true);
13191	}
13192
13193	return SDValue ();
13194	}
13195
13196	SDValue DAGCombiner::visitVP_STRIDED_STORE(SDNode *N) {
13197	auto *SST = cast<VPStridedStoreSDNode>(Val: N);
13198	EVT EltVT = SST->getValue().getValueType().getVectorElementType();
13199	// Combine strided stores with unit-stride to a regular VP store.
13200	if (auto *CStride = dyn_cast<ConstantSDNode>(Val: SST->getStride());
13201	CStride && CStride->getZExtValue() == EltVT.getStoreSize()) {
13202	return DAG.getStoreVP(Chain: SST->getChain(), dl: SDLoc (N), Val: SST->getValue(),
13203	Ptr: SST->getBasePtr(), Offset: SST->getOffset(), Mask: SST->getMask(),
13204	EVL: SST->getVectorLength(), MemVT: SST->getMemoryVT(),
13205	MMO: SST->getMemOperand(), AM: SST->getAddressingMode(),
13206	IsTruncating: SST->isTruncatingStore(), IsCompressing: SST->isCompressingStore());
13207	}
13208	return SDValue ();
13209	}
13210
13211	SDValue DAGCombiner::visitVECTOR_COMPRESS(SDNode *N) {
13212	SDLoc DL(N);
13213	SDValue Vec = N->getOperand(Num: `0`);
13214	SDValue Mask = N->getOperand(Num: `1`);
13215	SDValue Passthru = N->getOperand(Num: `2`);
13216	EVT VecVT = Vec.getValueType();
13217
13218	bool HasPassthru = !Passthru.isUndef();
13219
13220	APInt SplatVal;
13221	if (ISD::isConstantSplatVector(N: Mask.getNode(), SplatValue&: SplatVal))
13222	return TLI.isConstTrueVal(N: Mask) ? Vec : Passthru;
13223
13224	if (Vec.isUndef() \|\| Mask.isUndef())
13225	return Passthru;
13226
13227	// No need for potentially expensive compress if the mask is constant.
13228	if (ISD::isBuildVectorOfConstantSDNodes(N: Mask.getNode())) {
13229	SmallVector<SDValue, `16`> Ops;
13230	EVT ScalarVT = VecVT.getVectorElementType();
13231	unsigned NumSelected = `0`;
13232	unsigned NumElmts = VecVT.getVectorNumElements();
13233	for (unsigned I = `0`; I < NumElmts; ++I) {
13234	SDValue MaskI = Mask.getOperand(i: I);
13235	// We treat undef mask entries as "false".
13236	if (MaskI.isUndef())
13237	continue;
13238
13239	if (TLI.isConstTrueVal(N: MaskI)) {
13240	SDValue VecI = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: ScalarVT, N1: Vec,
13241	N2: DAG.getVectorIdxConstant(Val: I, DL));
13242	Ops.push_back(Elt: VecI);
13243	NumSelected++;
13244	}
13245	}
13246	for (unsigned Rest = NumSelected; Rest < NumElmts; ++Rest) {
13247	SDValue Val =
13248	HasPassthru
13249	? DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: ScalarVT, N1: Passthru,
13250	N2: DAG.getVectorIdxConstant(Val: Rest, DL))
13251	: DAG.getUNDEF(VT: ScalarVT);
13252	Ops.push_back(Elt: Val);
13253	}
13254	return DAG.getBuildVector(VT: VecVT, DL, Ops);
13255	}
13256
13257	return SDValue ();
13258	}
13259
13260	SDValue DAGCombiner::visitVPGATHER(SDNode *N) {
13261	VPGatherSDNode *MGT = cast<VPGatherSDNode>(Val: N);
13262	SDValue Mask = MGT->getMask();
13263	SDValue Chain = MGT->getChain();
13264	SDValue Index = MGT->getIndex();
13265	SDValue Scale = MGT->getScale();
13266	SDValue BasePtr = MGT->getBasePtr();
13267	SDValue VL = MGT->getVectorLength();
13268	ISD::MemIndexType IndexType = MGT->getIndexType();
13269	SDLoc DL(N);
13270
13271	if (refineUniformBase(BasePtr, Index, IndexIsScaled: MGT->isIndexScaled(), DAG, DL)) {
13272	SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL};
13273	return DAG.getGatherVP(
13274	VTs: DAG.getVTList(VT1: N->getValueType(ResNo: `0`), VT2: MVT::Other), VT: MGT->getMemoryVT(), dl: DL,
13275	Ops, MMO: MGT->getMemOperand(), IndexType);
13276	}
13277
13278	if (refineIndexType(Index, IndexType, DataVT: N->getValueType(ResNo: `0`), DAG)) {
13279	SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL};
13280	return DAG.getGatherVP(
13281	VTs: DAG.getVTList(VT1: N->getValueType(ResNo: `0`), VT2: MVT::Other), VT: MGT->getMemoryVT(), dl: DL,
13282	Ops, MMO: MGT->getMemOperand(), IndexType);
13283	}
13284
13285	return SDValue ();
13286	}
13287
13288	SDValue DAGCombiner::visitMGATHER(SDNode *N) {
13289	MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(Val: N);
13290	SDValue Mask = MGT->getMask();
13291	SDValue Chain = MGT->getChain();
13292	SDValue Index = MGT->getIndex();
13293	SDValue Scale = MGT->getScale();
13294	SDValue PassThru = MGT->getPassThru();
13295	SDValue BasePtr = MGT->getBasePtr();
13296	ISD::MemIndexType IndexType = MGT->getIndexType();
13297	SDLoc DL(N);
13298
13299	// Zap gathers with a zero mask.
13300	if (ISD::isConstantSplatVectorAllZeros(N: Mask.getNode()))
13301	return CombineTo(N, Res0: PassThru, Res1: MGT->getChain());
13302
13303	if (refineUniformBase(BasePtr, Index, IndexIsScaled: MGT->isIndexScaled(), DAG, DL)) {
13304	SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
13305	return DAG.getMaskedGather(
13306	VTs: DAG.getVTList(VT1: N->getValueType(ResNo: `0`), VT2: MVT::Other), MemVT: MGT->getMemoryVT(), dl: DL,
13307	Ops, MMO: MGT->getMemOperand(), IndexType, ExtTy: MGT->getExtensionType());
13308	}
13309
13310	if (refineIndexType(Index, IndexType, DataVT: N->getValueType(ResNo: `0`), DAG)) {
13311	SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
13312	return DAG.getMaskedGather(
13313	VTs: DAG.getVTList(VT1: N->getValueType(ResNo: `0`), VT2: MVT::Other), MemVT: MGT->getMemoryVT(), dl: DL,
13314	Ops, MMO: MGT->getMemOperand(), IndexType, ExtTy: MGT->getExtensionType());
13315	}
13316
13317	return SDValue ();
13318	}
13319
13320	SDValue DAGCombiner::visitMLOAD(SDNode *N) {
13321	MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(Val: N);
13322	SDValue Mask = MLD->getMask();
13323
13324	// Zap masked loads with a zero mask.
13325	if (ISD::isConstantSplatVectorAllZeros(N: Mask.getNode()))
13326	return CombineTo(N, Res0: MLD->getPassThru(), Res1: MLD->getChain());
13327
13328	// If this is a masked load with an all ones mask, we can use a unmasked load.
13329	// FIXME: Can we do this for indexed, expanding, or extending loads?
13330	if (ISD::isConstantSplatVectorAllOnes(N: Mask.getNode()) && MLD->isUnindexed() &&
13331	!MLD->isExpandingLoad() && MLD->getExtensionType() == ISD::NON_EXTLOAD) {
13332	SDValue NewLd = DAG.getLoad(
13333	VT: N->getValueType(ResNo: `0`), dl: SDLoc (N), Chain: MLD->getChain(), Ptr: MLD->getBasePtr(),
13334	PtrInfo: MLD->getPointerInfo(), Alignment: MLD->getBaseAlign(),
13335	MMOFlags: MLD->getMemOperand()->getFlags(), AAInfo: MLD->getAAInfo(), Ranges: MLD->getRanges());
13336	return CombineTo(N, Res0: NewLd, Res1: NewLd.getValue(R: `1`));
13337	}
13338
13339	// Try transforming N to an indexed load.
13340	if (CombineToPreIndexedLoadStore(N) \|\| CombineToPostIndexedLoadStore(N))
13341	return SDValue (N, `0`);
13342
13343	return SDValue ();
13344	}
13345
13346	SDValue DAGCombiner::visitMHISTOGRAM(SDNode *N) {
13347	MaskedHistogramSDNode *HG = cast<MaskedHistogramSDNode>(Val: N);
13348	SDValue Chain = HG->getChain();
13349	SDValue Inc = HG->getInc();
13350	SDValue Mask = HG->getMask();
13351	SDValue BasePtr = HG->getBasePtr();
13352	SDValue Index = HG->getIndex();
13353	SDLoc DL(HG);
13354
13355	EVT MemVT = HG->getMemoryVT();
13356	EVT DataVT = Index.getValueType();
13357	MachineMemOperand *MMO = HG->getMemOperand();
13358	ISD::MemIndexType IndexType = HG->getIndexType();
13359
13360	if (ISD::isConstantSplatVectorAllZeros(N: Mask.getNode()))
13361	return Chain;
13362
13363	if (refineUniformBase(BasePtr, Index, IndexIsScaled: HG->isIndexScaled(), DAG, DL) \|\|
13364	refineIndexType(Index, IndexType, DataVT, DAG)) {
13365	SDValue Ops[] = {Chain, Inc, Mask, BasePtr, Index,
13366	HG->getScale(), HG->getIntID()};
13367	return DAG.getMaskedHistogram(VTs: DAG.getVTList(VT: MVT::Other), MemVT, dl: DL, Ops,
13368	MMO, IndexType);
13369	}
13370
13371	return SDValue ();
13372	}
13373
13374	SDValue DAGCombiner::visitPARTIAL_REDUCE_MLA(SDNode *N) {
13375	if (SDValue Res = foldPartialReduceMLAMulOp(N))
13376	return Res;
13377	if (SDValue Res = foldPartialReduceAdd(N))
13378	return Res;
13379	return SDValue ();
13380	}
13381
13382	// partial_reduce_mla(acc, mul(ext(a), ext(b)), splat(1))*
13383	// -> partial_reduce_mla(acc, a, b)*
13384	//
13385	// partial_reduce_mla(acc, mul(ext(x), splat(C)), splat(1))
13386	// -> partial_reduce_mla(acc, x, splat(C))*
13387	//
13388	// partial_reduce_mla(acc, sel(p, mul(ext(a), ext(b)), splat(0)), splat(1))*
13389	// -> partial_reduce_mla(acc, sel(p, a, splat(0)), b)*
13390	//
13391	// partial_reduce_mla(acc, sel(p, mul(ext(a), splat(C)), splat(0)), splat(1))
13392	// -> partial_reduce_mla(acc, sel(p, a, splat(0)), splat(C))*
13393	SDValue DAGCombiner::foldPartialReduceMLAMulOp(SDNode *N) {
13394	SDLoc DL(N);
13395	auto *Context = DAG.getContext();
13396	SDValue Acc = N->getOperand(Num: `0`);
13397	SDValue Op1 = N->getOperand(Num: `1`);
13398	SDValue Op2 = N->getOperand(Num: `2`);
13399	unsigned Opc = Op1 ->getOpcode();
13400
13401	// Handle predication by moving the SELECT into the operand of the MUL.
13402	SDValue Pred;
13403	if (Opc == ISD::VSELECT && (isZeroOrZeroSplat(N: Op1 ->getOperand(Num: `2`)) \|\|
13404	isZeroOrZeroSplatFP(N: Op1 ->getOperand(Num: `2`)))) {
13405	Pred = Op1 ->getOperand(Num: `0`);
13406	Op1 = Op1 ->getOperand(Num: `1`);
13407	Opc = Op1 ->getOpcode();
13408	}
13409
13410	if (Opc != ISD::MUL && Opc != ISD::FMUL && Opc != ISD::SHL)
13411	return SDValue ();
13412
13413	SDValue LHS = Op1 ->getOperand(Num: `0`);
13414	SDValue RHS = Op1 ->getOperand(Num: `1`);
13415
13416	// Try to treat (shl %a, %c) as (mul %a, (1 << %c)) for constant %c.
13417	if (Opc == ISD::SHL) {
13418	APInt C;
13419	if (!ISD::isConstantSplatVector(N: RHS.getNode(), SplatValue&: C))
13420	return SDValue ();
13421
13422	RHS =
13423	DAG.getSplatVector(VT: RHS.getValueType(), DL,
13424	Op: DAG.getConstant(Val: APInt (C.getBitWidth(), `1`).shl(ShiftAmt: C), DL,
13425	VT: RHS.getValueType().getScalarType()));
13426	Opc = ISD::MUL;
13427	}
13428
13429	if (!(Opc == ISD::MUL && llvm::isOneOrOneSplat(V: Op2)) &&
13430	!(Opc == ISD::FMUL && llvm::isOneOrOneSplatFP(V: Op2)))
13431	return SDValue ();
13432
13433	auto IsIntOrFPExtOpcode = [](unsigned int Opcode) {
13434	return (ISD::isExtOpcode(Opcode) \|\| Opcode == ISD::FP_EXTEND);
13435	};
13436
13437	unsigned LHSOpcode = LHS ->getOpcode();
13438	if (!IsIntOrFPExtOpcode (LHSOpcode))
13439	return SDValue ();
13440
13441	SDValue LHSExtOp = LHS ->getOperand(Num: `0`);
13442	EVT LHSExtOpVT = LHSExtOp.getValueType();
13443
13444	// When Pred is non-zero, set Op = select(Pred, Op, splat(0)) and freeze
13445	// OtherOp to keep the same semantics when moving the selects into the MUL
13446	// operands.
13447	auto ApplyPredicate = [&](SDValue &Op, SDValue &OtherOp) {
13448	if (Pred) {
13449	EVT OpVT = Op.getValueType();
13450	SDValue Zero = OpVT.isFloatingPoint() ? DAG.getConstantFP(Val: `0.0`, DL, VT: OpVT)
13451	: DAG.getConstant(Val: `0`, DL, VT: OpVT);
13452	Op = DAG.getSelect(DL, VT: OpVT, Cond: Pred, LHS: Op, RHS: Zero);
13453	OtherOp = DAG.getFreeze(V: OtherOp);
13454	}
13455	};
13456
13457	// partial_reduce_mla(acc, mul(ext(x), splat(C)), splat(1))*
13458	// -> partial_reduce_mla(acc, x, C)*
13459	APInt C;
13460	if (ISD::isConstantSplatVector(N: RHS.getNode(), SplatValue&: C)) {
13461	// TODO: Make use of partial_reduce_sumla here
13462	APInt CTrunc = C.trunc(width: LHSExtOpVT.getScalarSizeInBits());
13463	unsigned LHSBits = LHS.getValueType().getScalarSizeInBits();
13464	if ((LHSOpcode != ISD::ZERO_EXTEND \|\| CTrunc.zext(width: LHSBits) != C) &&
13465	(LHSOpcode != ISD::SIGN_EXTEND \|\| CTrunc.sext(width: LHSBits) != C))
13466	return SDValue ();
13467
13468	unsigned NewOpcode = LHSOpcode == ISD::SIGN_EXTEND
13469	? ISD::PARTIAL_REDUCE_SMLA
13470	: ISD::PARTIAL_REDUCE_UMLA;
13471
13472	// Only perform these combines if the target supports folding
13473	// the extends into the operation.
13474	if (!TLI.isPartialReduceMLALegalOrCustom(
13475	Opc: NewOpcode, AccVT: TLI.getTypeToTransformTo(Context&: *Context, VT: N->getValueType(ResNo: `0`)),
13476	InputVT: TLI.getTypeToTransformTo(Context&: *Context, VT: LHSExtOpVT)))
13477	return SDValue ();
13478
13479	SDValue C = DAG.getConstant(Val: CTrunc, DL, VT: LHSExtOpVT);
13480	ApplyPredicate (C, LHSExtOp);
13481	return DAG.getNode(Opcode: NewOpcode, DL, VT: N->getValueType(ResNo: `0`), N1: Acc, N2: LHSExtOp, N3: C);
13482	}
13483
13484	unsigned RHSOpcode = RHS ->getOpcode();
13485	if (!IsIntOrFPExtOpcode (RHSOpcode))
13486	return SDValue ();
13487
13488	SDValue RHSExtOp = RHS ->getOperand(Num: `0`);
13489	if (LHSExtOpVT != RHSExtOp.getValueType())
13490	return SDValue ();
13491
13492	unsigned NewOpc;
13493	if (LHSOpcode == ISD::SIGN_EXTEND && RHSOpcode == ISD::SIGN_EXTEND)
13494	NewOpc = ISD::PARTIAL_REDUCE_SMLA;
13495	else if (LHSOpcode == ISD::ZERO_EXTEND && RHSOpcode == ISD::ZERO_EXTEND)
13496	NewOpc = ISD::PARTIAL_REDUCE_UMLA;
13497	else if (LHSOpcode == ISD::SIGN_EXTEND && RHSOpcode == ISD::ZERO_EXTEND)
13498	NewOpc = ISD::PARTIAL_REDUCE_SUMLA;
13499	else if (LHSOpcode == ISD::ZERO_EXTEND && RHSOpcode == ISD::SIGN_EXTEND) {
13500	NewOpc = ISD::PARTIAL_REDUCE_SUMLA;
13501	std::swap(a&: LHSExtOp, b&: RHSExtOp);
13502	} else if (LHSOpcode == ISD::FP_EXTEND && RHSOpcode == ISD::FP_EXTEND) {
13503	NewOpc = ISD::PARTIAL_REDUCE_FMLA;
13504	} else
13505	return SDValue ();
13506	// For a 2-stage extend the signedness of both of the extends must match
13507	// If the mul has the same type, there is no outer extend, and thus we
13508	// can simply use the inner extends to pick the result node.
13509	// TODO: extend to handle nonneg zext as sext
13510	EVT AccElemVT = Acc.getValueType().getVectorElementType();
13511	if (Op1.getValueType().getVectorElementType() != AccElemVT &&
13512	NewOpc != N->getOpcode())
13513	return SDValue ();
13514
13515	// Only perform these combines if the target supports folding
13516	// the extends into the operation.
13517	if (!TLI.isPartialReduceMLALegalOrCustom(
13518	Opc: NewOpc, AccVT: TLI.getTypeToTransformTo(Context&: *Context, VT: N->getValueType(ResNo: `0`)),
13519	InputVT: TLI.getTypeToTransformTo(Context&: *Context, VT: LHSExtOpVT)))
13520	return SDValue ();
13521
13522	ApplyPredicate (RHSExtOp, LHSExtOp);
13523	return DAG.getNode(Opcode: NewOpc, DL, VT: N->getValueType(ResNo: `0`), N1: Acc, N2: LHSExtOp, N3: RHSExtOp);
13524	}
13525
13526	// partial.reduce.mla(acc, ext(op), splat(1))
13527	// -> partial.reduce.mla(acc, op, splat(trunc(1)))*
13528	// partial.reduce.sumla(acc, sext(op), splat(1))
13529	// -> partial.reduce.smla(acc, op, splat(trunc(1)))
13530	//
13531	// partial.reduce.mla(acc, sel(p, ext(op), splat(0)), splat(1))
13532	// -> partial.reduce.mla(acc, sel(p, op, splat(0)), splat(trunc(1)))*
13533	SDValue DAGCombiner::foldPartialReduceAdd(SDNode *N) {
13534	SDLoc DL(N);
13535	SDValue Acc = N->getOperand(Num: `0`);
13536	SDValue Op1 = N->getOperand(Num: `1`);
13537	SDValue Op2 = N->getOperand(Num: `2`);
13538
13539	if (!llvm::isOneOrOneSplat(V: Op2) && !llvm::isOneOrOneSplatFP(V: Op2))
13540	return SDValue ();
13541
13542	SDValue Pred;
13543	unsigned Op1Opcode = Op1.getOpcode();
13544	if (Op1Opcode == ISD::VSELECT && (isZeroOrZeroSplat(N: Op1 ->getOperand(Num: `2`)) \|\|
13545	isZeroOrZeroSplatFP(N: Op1 ->getOperand(Num: `2`)))) {
13546	Pred = Op1 ->getOperand(Num: `0`);
13547	Op1 = Op1 ->getOperand(Num: `1`);
13548	Op1Opcode = Op1 ->getOpcode();
13549	}
13550
13551	if (!ISD::isExtOpcode(Opcode: Op1Opcode) && Op1Opcode != ISD::FP_EXTEND)
13552	return SDValue ();
13553
13554	bool Op1IsSigned =
13555	Op1Opcode == ISD::SIGN_EXTEND \|\| Op1Opcode == ISD::FP_EXTEND;
13556	bool NodeIsSigned = N->getOpcode() != ISD::PARTIAL_REDUCE_UMLA;
13557	EVT AccElemVT = Acc.getValueType().getVectorElementType();
13558	if (Op1IsSigned != NodeIsSigned &&
13559	Op1.getValueType().getVectorElementType() != AccElemVT)
13560	return SDValue ();
13561
13562	unsigned NewOpcode = N->getOpcode() == ISD::PARTIAL_REDUCE_FMLA
13563	? ISD::PARTIAL_REDUCE_FMLA
13564	: Op1IsSigned ? ISD::PARTIAL_REDUCE_SMLA
13565	: ISD::PARTIAL_REDUCE_UMLA;
13566
13567	SDValue UnextOp1 = Op1.getOperand(i: `0`);
13568	EVT UnextOp1VT = UnextOp1.getValueType();
13569	auto *Context = DAG.getContext();
13570	if (!TLI.isPartialReduceMLALegalOrCustom(
13571	Opc: NewOpcode, AccVT: TLI.getTypeToTransformTo(Context&: *Context, VT: N->getValueType(ResNo: `0`)),
13572	InputVT: TLI.getTypeToTransformTo(Context&: *Context, VT: UnextOp1VT)))
13573	return SDValue ();
13574
13575	SDValue Constant = N->getOpcode() == ISD::PARTIAL_REDUCE_FMLA
13576	? DAG.getConstantFP(Val: `1`, DL, VT: UnextOp1VT)
13577	: DAG.getConstant(Val: `1`, DL, VT: UnextOp1VT);
13578
13579	if (Pred) {
13580	SDValue Zero = N->getOpcode() == ISD::PARTIAL_REDUCE_FMLA
13581	? DAG.getConstantFP(Val: `0`, DL, VT: UnextOp1VT)
13582	: DAG.getConstant(Val: `0`, DL, VT: UnextOp1VT);
13583	Constant = DAG.getSelect(DL, VT: UnextOp1VT, Cond: Pred, LHS: Constant, RHS: Zero);
13584	}
13585	return DAG.getNode(Opcode: NewOpcode, DL, VT: N->getValueType(ResNo: `0`), N1: Acc, N2: UnextOp1,
13586	N3: Constant);
13587	}
13588
13589	SDValue DAGCombiner::visitVP_STRIDED_LOAD(SDNode *N) {
13590	auto *SLD = cast<VPStridedLoadSDNode>(Val: N);
13591	EVT EltVT = SLD->getValueType(ResNo: `0`).getVectorElementType();
13592	// Combine strided loads with unit-stride to a regular VP load.
13593	if (auto *CStride = dyn_cast<ConstantSDNode>(Val: SLD->getStride());
13594	CStride && CStride->getZExtValue() == EltVT.getStoreSize()) {
13595	SDValue NewLd = DAG.getLoadVP(
13596	AM: SLD->getAddressingMode(), ExtType: SLD->getExtensionType(), VT: SLD->getValueType(ResNo: `0`),
13597	dl: SDLoc (N), Chain: SLD->getChain(), Ptr: SLD->getBasePtr(), Offset: SLD->getOffset(),
13598	Mask: SLD->getMask(), EVL: SLD->getVectorLength(), MemVT: SLD->getMemoryVT(),
13599	MMO: SLD->getMemOperand(), IsExpanding: SLD->isExpandingLoad());
13600	return CombineTo(N, Res0: NewLd, Res1: NewLd.getValue(R: `1`));
13601	}
13602	return SDValue ();
13603	}
13604
13605	/// A vector select of 2 constant vectors can be simplified to math/logic to
13606	/// avoid a variable select instruction and possibly avoid constant loads.
13607	SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
13608	SDValue Cond = N->getOperand(Num: `0`);
13609	SDValue N1 = N->getOperand(Num: `1`);
13610	SDValue N2 = N->getOperand(Num: `2`);
13611	EVT VT = N->getValueType(ResNo: `0`);
13612	if (!Cond.hasOneUse() \|\| Cond.getScalarValueSizeInBits() != `1` \|\|
13613	!shouldConvertSelectOfConstantsToMath(Cond, VT, TLI) \|\|
13614	!ISD::isBuildVectorOfConstantSDNodes(N: N1.getNode()) \|\|
13615	!ISD::isBuildVectorOfConstantSDNodes(N: N2.getNode()))
13616	return SDValue ();
13617
13618	// Check if we can use the condition value to increment/decrement a single
13619	// constant value. This simplifies a select to an add and removes a constant
13620	// load/materialization from the general case.
13621	bool AllAddOne = true;
13622	bool AllSubOne = true;
13623	unsigned Elts = VT.getVectorNumElements();
13624	for (unsigned i = `0`; i != Elts; ++i) {
13625	SDValue N1Elt = N1.getOperand(i);
13626	SDValue N2Elt = N2.getOperand(i);
13627	if (N1Elt.isUndef())
13628	continue;
13629	// N2 should not contain undef values since it will be reused in the fold.
13630	if (N2Elt.isUndef() \|\| N1Elt.getValueType() != N2Elt.getValueType()) {
13631	AllAddOne = false;
13632	AllSubOne = false;
13633	break;
13634	}
13635
13636	const APInt &C1 = N1Elt ->getAsAPIntVal();
13637	const APInt &C2 = N2Elt ->getAsAPIntVal();
13638	if (C1 != C2 + `1`)
13639	AllAddOne = false;
13640	if (C1 != C2 - `1`)
13641	AllSubOne = false;
13642	}
13643
13644	// Further simplifications for the extra-special cases where the constants are
13645	// all 0 or all -1 should be implemented as folds of these patterns.
13646	SDLoc DL(N);
13647	if (AllAddOne \|\| AllSubOne) {
13648	// vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
13649	// vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
13650	auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
13651	SDValue ExtendedCond = DAG.getNode(Opcode: ExtendOpcode, DL, VT, Operand: Cond);
13652	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: ExtendedCond, N2);
13653	}
13654
13655	// select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
13656	APInt Pow2C;
13657	if (ISD::isConstantSplatVector(N: N1.getNode(), SplatValue&: Pow2C) && Pow2C.isPowerOf2() &&
13658	isNullOrNullSplat(V: N2)) {
13659	SDValue ZextCond = DAG.getZExtOrTrunc(Op: Cond, DL, VT);
13660	SDValue ShAmtC = DAG.getConstant(Val: Pow2C.exactLogBase2(), DL, VT);
13661	return DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: ZextCond, N2: ShAmtC);
13662	}
13663
13664	if (SDValue V = foldSelectOfConstantsUsingSra(N, DL, DAG))
13665	return V;
13666
13667	// The general case for select-of-constants:
13668	// vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
13669	// ...but that only makes sense if a vselect is slower than 2 logic ops, so
13670	// leave that to a machine-specific pass.
13671	return SDValue ();
13672	}
13673
13674	SDValue DAGCombiner::visitVP_SELECT(SDNode *N) {
13675	SDValue N0 = N->getOperand(Num: `0`);
13676	SDValue N1 = N->getOperand(Num: `1`);
13677	SDValue N2 = N->getOperand(Num: `2`);
13678	SDLoc DL(N);
13679
13680	if (SDValue V = DAG.simplifySelect(Cond: N0, TVal: N1, FVal: N2))
13681	return V;
13682
13683	if (SDValue V = foldBoolSelectToLogic<VPMatchContext>(N, DL, DAG))
13684	return V;
13685
13686	return SDValue ();
13687	}
13688
13689	static SDValue combineVSelectWithAllOnesOrZeros(SDValue Cond, SDValue TVal,
13690	SDValue FVal,
13691	const TargetLowering &TLI,
13692	SelectionDAG &DAG,
13693	const SDLoc &DL) {
13694	EVT VT = TVal.getValueType();
13695	if (!TLI.isTypeLegal(VT))
13696	return SDValue ();
13697
13698	EVT CondVT = Cond.getValueType();
13699	assert(CondVT.isVector() && "Vector select expects a vector selector!");
13700
13701	bool IsTAllZero = ISD::isConstantSplatVectorAllZeros(N: TVal.getNode());
13702	bool IsTAllOne = ISD::isConstantSplatVectorAllOnes(N: TVal.getNode());
13703	bool IsFAllZero = ISD::isConstantSplatVectorAllZeros(N: FVal.getNode());
13704	bool IsFAllOne = ISD::isConstantSplatVectorAllOnes(N: FVal.getNode());
13705
13706	// no vselect(cond, 0/-1, X) or vselect(cond, X, 0/-1), return
13707	if (!IsTAllZero && !IsTAllOne && !IsFAllZero && !IsFAllOne)
13708	return SDValue ();
13709
13710	// select Cond, 0, 0 → 0
13711	if (IsTAllZero && IsFAllZero) {
13712	return VT.isFloatingPoint() ? DAG.getConstantFP(Val: `0.0`, DL, VT)
13713	: DAG.getConstant(Val: `0`, DL, VT);
13714	}
13715
13716	// check select(setgt lhs, -1), 1, -1 --> or (sra lhs, bitwidth - 1), 1
13717	APInt TValAPInt;
13718	if (Cond.getOpcode() == ISD::SETCC &&
13719	Cond.getOperand(i: `2`) == DAG.getCondCode(Cond: ISD::SETGT) &&
13720	Cond.getOperand(i: `0`).getValueType() == VT && VT.isSimple() &&
13721	ISD::isConstantSplatVector(N: TVal.getNode(), SplatValue&: TValAPInt) &&
13722	TValAPInt.isOne() &&
13723	ISD::isConstantSplatVectorAllOnes(N: Cond.getOperand(i: `1`).getNode()) &&
13724	ISD::isConstantSplatVectorAllOnes(N: FVal.getNode())) {
13725	return SDValue ();
13726	}
13727
13728	// To use the condition operand as a bitwise mask, it must have elements that
13729	// are the same size as the select elements. i.e, the condition operand must
13730	// have already been promoted from the IR select condition type <N x i1>.
13731	// Don't check if the types themselves are equal because that excludes
13732	// vector floating-point selects.
13733	if (CondVT.getScalarSizeInBits() != VT.getScalarSizeInBits())
13734	return SDValue ();
13735
13736	// Cond value must be 'sign splat' to be converted to a logical op.
13737	if (DAG.ComputeNumSignBits(Op: Cond) != CondVT.getScalarSizeInBits())
13738	return SDValue ();
13739
13740	// Try inverting Cond and swapping T/F if it gives all-ones/all-zeros form
13741	if (!IsTAllOne && !IsFAllZero && Cond.hasOneUse() &&
13742	Cond.getOpcode() == ISD::SETCC &&
13743	TLI.getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT) ==
13744	CondVT) {
13745	if (IsTAllZero \|\| IsFAllOne) {
13746	SDValue CC = Cond.getOperand(i: `2`);
13747	ISD::CondCode InverseCC = ISD::getSetCCInverse(
13748	Operation: cast<CondCodeSDNode>(Val&: CC)->get(), Type: Cond.getOperand(i: `0`).getValueType());
13749	Cond = DAG.getSetCC(DL, VT: CondVT, LHS: Cond.getOperand(i: `0`), RHS: Cond.getOperand(i: `1`),
13750	Cond: InverseCC);
13751	std::swap(a&: TVal, b&: FVal);
13752	std::swap(a&: IsTAllOne, b&: IsFAllOne);
13753	std::swap(a&: IsTAllZero, b&: IsFAllZero);
13754	}
13755	}
13756
13757	assert(DAG.ComputeNumSignBits(Cond) == CondVT.getScalarSizeInBits() &&
13758	"Select condition no longer all-sign bits");
13759
13760	// select Cond, -1, 0 → bitcast Cond
13761	if (IsTAllOne && IsFAllZero)
13762	return DAG.getBitcast(VT, V: Cond);
13763
13764	// select Cond, -1, x → or Cond, x
13765	if (IsTAllOne) {
13766	SDValue X = DAG.getBitcast(VT: CondVT, V: DAG.getFreeze(V: FVal));
13767	SDValue Or = DAG.getNode(Opcode: ISD::OR, DL, VT: CondVT, N1: Cond, N2: X);
13768	return DAG.getBitcast(VT, V: Or);
13769	}
13770
13771	// select Cond, x, 0 → and Cond, x
13772	if (IsFAllZero) {
13773	SDValue X = DAG.getBitcast(VT: CondVT, V: DAG.getFreeze(V: TVal));
13774	SDValue And = DAG.getNode(Opcode: ISD::AND, DL, VT: CondVT, N1: Cond, N2: X);
13775	return DAG.getBitcast(VT, V: And);
13776	}
13777
13778	// select Cond, 0, x -> and not(Cond), x
13779	if (IsTAllZero &&
13780	(isBitwiseNot(V: peekThroughBitcasts(V: Cond)) \|\| TLI.hasAndNot(X: Cond))) {
13781	SDValue X = DAG.getBitcast(VT: CondVT, V: DAG.getFreeze(V: FVal));
13782	SDValue And =
13783	DAG.getNode(Opcode: ISD::AND, DL, VT: CondVT, N1: DAG.getNOT(DL, Val: Cond, VT: CondVT), N2: X);
13784	return DAG.getBitcast(VT, V: And);
13785	}
13786
13787	return SDValue ();
13788	}
13789
13790	SDValue DAGCombiner::visitVSELECT(SDNode *N) {
13791	SDValue N0 = N->getOperand(Num: `0`);
13792	SDValue N1 = N->getOperand(Num: `1`);
13793	SDValue N2 = N->getOperand(Num: `2`);
13794	EVT VT = N->getValueType(ResNo: `0`);
13795	SDLoc DL(N);
13796
13797	if (SDValue V = DAG.simplifySelect(Cond: N0, TVal: N1, FVal: N2))
13798	return V;
13799
13800	if (SDValue V = foldBoolSelectToLogic<EmptyMatchContext>(N, DL, DAG))
13801	return V;
13802
13803	// vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
13804	if (!TLI.isTargetCanonicalSelect(N))
13805	if (SDValue F = extractBooleanFlip(V: N0, DAG, TLI, Force: false))
13806	return DAG.getSelect(DL, VT, Cond: F, LHS: N2, RHS: N1, Flags: N->getFlags());
13807
13808	// select (sext m), (add X, C), X --> (add X, (and C, (sext m))))
13809	if (N1.getOpcode() == ISD::ADD && N1.getOperand(i: `0`) == N2 && N1 ->hasOneUse() &&
13810	DAG.isConstantIntBuildVectorOrConstantInt(N: N1.getOperand(i: `1`)) &&
13811	N0.getScalarValueSizeInBits() == N1.getScalarValueSizeInBits() &&
13812	TLI.getBooleanContents(Type: N0.getValueType()) ==
13813	TargetLowering::ZeroOrNegativeOneBooleanContent) {
13814	return DAG.getNode(
13815	Opcode: ISD::ADD, DL, VT: N1.getValueType(), N1: N2,
13816	N2: DAG.getNode(Opcode: ISD::AND, DL, VT: N0.getValueType(), N1: N1.getOperand(i: `1`), N2: N0));
13817	}
13818
13819	// Canonicalize integer abs.
13820	// vselect (setg[te] X, 0), X, -X ->
13821	// vselect (setgt X, -1), X, -X ->
13822	// vselect (setl[te] X, 0), -X, X ->
13823	// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
13824	if (N0.getOpcode() == ISD::SETCC) {
13825	SDValue LHS = N0.getOperand(i: `0`), RHS = N0.getOperand(i: `1`);
13826	ISD::CondCode CC = cast<CondCodeSDNode>(Val: N0.getOperand(i: `2`))->get();
13827	bool isAbs = false;
13828	bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(N: RHS.getNode());
13829
13830	if (((RHSIsAllZeros && (CC == ISD::SETGT \|\| CC == ISD::SETGE)) \|\|
13831	(ISD::isBuildVectorAllOnes(N: RHS.getNode()) && CC == ISD::SETGT)) &&
13832	N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(i: `1`))
13833	isAbs = ISD::isBuildVectorAllZeros(N: N2.getOperand(i: `0`).getNode());
13834	else if ((RHSIsAllZeros && (CC == ISD::SETLT \|\| CC == ISD::SETLE)) &&
13835	N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(i: `1`))
13836	isAbs = ISD::isBuildVectorAllZeros(N: N1.getOperand(i: `0`).getNode());
13837
13838	if (isAbs) {
13839	if (TLI.isOperationLegalOrCustom(Op: ISD::ABS, VT))
13840	return DAG.getNode(Opcode: ISD::ABS, DL, VT, Operand: LHS);
13841
13842	SDValue Shift = DAG.getNode(
13843	Opcode: ISD::SRA, DL, VT, N1: LHS,
13844	N2: DAG.getShiftAmountConstant(Val: VT.getScalarSizeInBits() - `1`, VT, DL));
13845	SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: LHS, N2: Shift);
13846	AddToWorklist(N: Shift.getNode());
13847	AddToWorklist(N: Add.getNode());
13848	return DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: Add, N2: Shift);
13849	}
13850
13851	// vselect x, y (fcmp lt x, y) -> fminnum x, y
13852	// vselect x, y (fcmp gt x, y) -> fmaxnum x, y
13853	//
13854	// This is OK if we don't care about what happens if either operand is a
13855	// NaN.
13856	//
13857	if (N0.hasOneUse() &&
13858	isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, Flags: N->getFlags(), TLI)) {
13859	if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, LHS, RHS, True: N1, False: N2, CC))
13860	return FMinMax;
13861	}
13862
13863	if (SDValue S = PerformMinMaxFpToSatCombine(N0: LHS, N1: RHS, N2: N1, N3: N2, CC, DAG))
13864	return S;
13865	if (SDValue S = PerformUMinFpToSatCombine(N0: LHS, N1: RHS, N2: N1, N3: N2, CC, DAG))
13866	return S;
13867
13868	// If this select has a condition (setcc) with narrower operands than the
13869	// select, try to widen the compare to match the select width.
13870	// TODO: This should be extended to handle any constant.
13871	// TODO: This could be extended to handle non-loading patterns, but that
13872	// requires thorough testing to avoid regressions.
13873	if (isNullOrNullSplat(V: RHS)) {
13874	EVT NarrowVT = LHS.getValueType();
13875	EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger();
13876	EVT SetCCVT = getSetCCResultType(VT: LHS.getValueType());
13877	unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
13878	unsigned WideWidth = WideVT.getScalarSizeInBits();
13879	bool IsSigned = isSignedIntSetCC(Code: CC);
13880	auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
13881	if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
13882	SetCCWidth != `1` && SetCCWidth < WideWidth &&
13883	TLI.isLoadExtLegalOrCustom(ExtType: LoadExtOpcode, ValVT: WideVT, MemVT: NarrowVT) &&
13884	TLI.isOperationLegalOrCustom(Op: ISD::SETCC, VT: WideVT)) {
13885	// Both compare operands can be widened for free. The LHS can use an
13886	// extended load, and the RHS is a constant:
13887	// vselect (ext (setcc load(X), C)), N1, N2 -->
13888	// vselect (setcc extload(X), C'), N1, N2
13889	auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
13890	SDValue WideLHS = DAG.getNode(Opcode: ExtOpcode, DL, VT: WideVT, Operand: LHS);
13891	SDValue WideRHS = DAG.getNode(Opcode: ExtOpcode, DL, VT: WideVT, Operand: RHS);
13892	EVT WideSetCCVT = getSetCCResultType(VT: WideVT);
13893	SDValue WideSetCC = DAG.getSetCC(DL, VT: WideSetCCVT, LHS: WideLHS, RHS: WideRHS, Cond: CC);
13894	return DAG.getSelect(DL, VT: N1.getValueType(), Cond: WideSetCC, LHS: N1, RHS: N2);
13895	}
13896	}
13897
13898	if (SDValue ABD = foldSelectToABD(LHS, RHS, True: N1, False: N2, CC, DL))
13899	return ABD;
13900
13901	// Match VSELECTs into add with unsigned saturation.
13902	if (hasOperation(Opcode: ISD::UADDSAT, VT)) {
13903	// Check if one of the arms of the VSELECT is vector with all bits set.
13904	// If it's on the left side invert the predicate to simplify logic below.
13905	SDValue Other;
13906	ISD::CondCode SatCC = CC;
13907	if (ISD::isConstantSplatVectorAllOnes(N: N1.getNode())) {
13908	Other = N2;
13909	SatCC = ISD::getSetCCInverse(Operation: SatCC, Type: VT.getScalarType());
13910	} else if (ISD::isConstantSplatVectorAllOnes(N: N2.getNode())) {
13911	Other = N1;
13912	}
13913
13914	if (Other && Other.getOpcode() == ISD::ADD) {
13915	SDValue CondLHS = LHS, CondRHS = RHS;
13916	SDValue OpLHS = Other.getOperand(i: `0`), OpRHS = Other.getOperand(i: `1`);
13917
13918	// Canonicalize condition operands.
13919	if (SatCC == ISD::SETUGE) {
13920	std::swap(a&: CondLHS, b&: CondRHS);
13921	SatCC = ISD::SETULE;
13922	}
13923
13924	// We can test against either of the addition operands.
13925	// x <= x+y ? x+y : ~0 --> uaddsat x, y
13926	// x+y >= x ? x+y : ~0 --> uaddsat x, y
13927	if (SatCC == ISD::SETULE && Other == CondRHS &&
13928	(OpLHS == CondLHS \|\| OpRHS == CondLHS))
13929	return DAG.getNode(Opcode: ISD::UADDSAT, DL, VT, N1: OpLHS, N2: OpRHS);
13930
13931	if (OpRHS.getOpcode() == CondRHS.getOpcode() &&
13932	(OpRHS.getOpcode() == ISD::BUILD_VECTOR \|\|
13933	OpRHS.getOpcode() == ISD::SPLAT_VECTOR) &&
13934	CondLHS == OpLHS) {
13935	// If the RHS is a constant we have to reverse the const
13936	// canonicalization.
13937	// x >= ~C ? x+C : ~0 --> uaddsat x, C
13938	auto MatchUADDSAT = [](ConstantSDNode Op, ConstantSDNode Cond) {
13939	return Cond->getAPIntValue() == ~Op->getAPIntValue();
13940	};
13941	if (SatCC == ISD::SETULE &&
13942	ISD::matchBinaryPredicate(LHS: OpRHS, RHS: CondRHS, Match: MatchUADDSAT))
13943	return DAG.getNode(Opcode: ISD::UADDSAT, DL, VT, N1: OpLHS, N2: OpRHS);
13944	}
13945	}
13946	}
13947
13948	// Match VSELECTs into sub with unsigned saturation.
13949	if (hasOperation(Opcode: ISD::USUBSAT, VT)) {
13950	// Check if one of the arms of the VSELECT is a zero vector. If it's on
13951	// the left side invert the predicate to simplify logic below.
13952	SDValue Other;
13953	ISD::CondCode SatCC = CC;
13954	if (ISD::isConstantSplatVectorAllZeros(N: N1.getNode())) {
13955	Other = N2;
13956	SatCC = ISD::getSetCCInverse(Operation: SatCC, Type: VT.getScalarType());
13957	} else if (ISD::isConstantSplatVectorAllZeros(N: N2.getNode())) {
13958	Other = N1;
13959	}
13960
13961	// zext(x) >= y ? trunc(zext(x) - y) : 0
13962	// --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
13963	// zext(x) > y ? trunc(zext(x) - y) : 0
13964	// --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
13965	if (Other && Other.getOpcode() == ISD::TRUNCATE &&
13966	Other.getOperand(i: `0`).getOpcode() == ISD::SUB &&
13967	(SatCC == ISD::SETUGE \|\| SatCC == ISD::SETUGT)) {
13968	SDValue OpLHS = Other.getOperand(i: `0`).getOperand(i: `0`);
13969	SDValue OpRHS = Other.getOperand(i: `0`).getOperand(i: `1`);
13970	if (LHS == OpLHS && RHS == OpRHS && LHS.getOpcode() == ISD::ZERO_EXTEND)
13971	if (SDValue R = getTruncatedUSUBSAT(DstVT: VT, SrcVT: LHS.getValueType(), LHS, RHS,
13972	DAG, DL))
13973	return R;
13974	}
13975
13976	if (Other && Other.getNumOperands() == `2`) {
13977	SDValue CondRHS = RHS;
13978	SDValue OpLHS = Other.getOperand(i: `0`), OpRHS = Other.getOperand(i: `1`);
13979
13980	if (OpLHS == LHS) {
13981	// Look for a general sub with unsigned saturation first.
13982	// x >= y ? x-y : 0 --> usubsat x, y
13983	// x > y ? x-y : 0 --> usubsat x, y
13984	if ((SatCC == ISD::SETUGE \|\| SatCC == ISD::SETUGT) &&
13985	Other.getOpcode() == ISD::SUB && OpRHS == CondRHS)
13986	return DAG.getNode(Opcode: ISD::USUBSAT, DL, VT, N1: OpLHS, N2: OpRHS);
13987
13988	if (OpRHS.getOpcode() == ISD::BUILD_VECTOR \|\|
13989	OpRHS.getOpcode() == ISD::SPLAT_VECTOR) {
13990	if (CondRHS.getOpcode() == ISD::BUILD_VECTOR \|\|
13991	CondRHS.getOpcode() == ISD::SPLAT_VECTOR) {
13992	// If the RHS is a constant we have to reverse the const
13993	// canonicalization.
13994	// x > C-1 ? x+-C : 0 --> usubsat x, C
13995	auto MatchUSUBSAT = [](ConstantSDNode Op, ConstantSDNode Cond) {
13996	return (!Op && !Cond) \|\|
13997	(Op && Cond &&
13998	Cond->getAPIntValue() == (-Op->getAPIntValue() - `1`));
13999	};
14000	if (SatCC == ISD::SETUGT && Other.getOpcode() == ISD::ADD &&
14001	ISD::matchBinaryPredicate(LHS: OpRHS, RHS: CondRHS, Match: MatchUSUBSAT,
14002	/AllowUndefs/ true)) {
14003	OpRHS = DAG.getNegative(Val: OpRHS, DL, VT);
14004	return DAG.getNode(Opcode: ISD::USUBSAT, DL, VT, N1: OpLHS, N2: OpRHS);
14005	}
14006
14007	// Another special case: If C was a sign bit, the sub has been
14008	// canonicalized into a xor.
14009	// FIXME: Would it be better to use computeKnownBits to
14010	// determine whether it's safe to decanonicalize the xor?
14011	// x s< 0 ? x^C : 0 --> usubsat x, C
14012	APInt SplatValue;
14013	if (SatCC == ISD::SETLT && Other.getOpcode() == ISD::XOR &&
14014	ISD::isConstantSplatVector(N: OpRHS.getNode(), SplatValue) &&
14015	ISD::isConstantSplatVectorAllZeros(N: CondRHS.getNode()) &&
14016	SplatValue.isSignMask()) {
14017	// Note that we have to rebuild the RHS constant here to
14018	// ensure we don't rely on particular values of undef lanes.
14019	OpRHS = DAG.getConstant(Val: SplatValue, DL, VT);
14020	return DAG.getNode(Opcode: ISD::USUBSAT, DL, VT, N1: OpLHS, N2: OpRHS);
14021	}
14022	}
14023	}
14024	}
14025	}
14026	}
14027
14028	// (vselect (ugt x, C), (add x, ~C), x) -> (umin (add x, ~C), x)
14029	// (vselect (ult x, C), x, (add x, -C)) -> (umin x, (add x, -C))
14030	if (SDValue UMin = foldSelectToUMin(LHS, RHS, True: N1, False: N2, CC, DL))
14031	return UMin;
14032	}
14033
14034	if (SimplifySelectOps(SELECT: N, LHS: N1, RHS: N2))
14035	return SDValue (N, `0`); // Don't revisit N.
14036
14037	// Fold (vselect all_ones, N1, N2) -> N1
14038	if (ISD::isConstantSplatVectorAllOnes(N: N0.getNode()))
14039	return N1;
14040	// Fold (vselect all_zeros, N1, N2) -> N2
14041	if (ISD::isConstantSplatVectorAllZeros(N: N0.getNode()))
14042	return N2;
14043
14044	// The ConvertSelectToConcatVector function is assuming both the above
14045	// checks for (vselect (build_vector all{ones,zeros) ...) have been made
14046	// and addressed.
14047	if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
14048	N2.getOpcode() == ISD::CONCAT_VECTORS &&
14049	ISD::isBuildVectorOfConstantSDNodes(N: N0.getNode())) {
14050	if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
14051	return CV;
14052	}
14053
14054	if (SDValue V = foldVSelectOfConstants(N))
14055	return V;
14056
14057	if (hasOperation(Opcode: ISD::SRA, VT))
14058	if (SDValue V = foldVSelectToSignBitSplatMask(N, DAG))
14059	return V;
14060
14061	if (SimplifyDemandedVectorElts(Op: SDValue (N, `0`)))
14062	return SDValue (N, `0`);
14063
14064	if (SDValue V = combineVSelectWithAllOnesOrZeros(Cond: N0, TVal: N1, FVal: N2, TLI, DAG, DL))
14065	return V;
14066
14067	return SDValue ();
14068	}
14069
14070	SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
14071	SDValue N0 = N->getOperand(Num: `0`);
14072	SDValue N1 = N->getOperand(Num: `1`);
14073	SDValue N2 = N->getOperand(Num: `2`);
14074	SDValue N3 = N->getOperand(Num: `3`);
14075	SDValue N4 = N->getOperand(Num: `4`);
14076	ISD::CondCode CC = cast<CondCodeSDNode>(Val&: N4)->get();
14077	SDLoc DL(N);
14078
14079	// fold select_cc lhs, rhs, x, x, cc -> x
14080	if (N2 == N3)
14081	return N2;
14082
14083	// select_cc bool, 0, x, y, seteq -> select bool, y, x
14084	if (CC == ISD::SETEQ && !LegalTypes && N0.getValueType() == MVT::i1 &&
14085	isNullConstant(V: N1))
14086	return DAG.getSelect(DL, VT: N2.getValueType(), Cond: N0, LHS: N3, RHS: N2);
14087
14088	// Determine if the condition we're dealing with is constant
14089	if (SDValue SCC = SimplifySetCC(VT: getSetCCResultType(VT: N0.getValueType()), N0, N1,
14090	Cond: CC, DL, foldBooleans: false)) {
14091	AddToWorklist(N: SCC.getNode());
14092
14093	// cond always true -> true val
14094	// cond always false -> false val
14095	if (auto *SCCC = dyn_cast<ConstantSDNode>(Val: SCC.getNode()))
14096	return SCCC->isZero() ? N3 : N2;
14097
14098	// When the condition is UNDEF, just return the first operand. This is
14099	// coherent the DAG creation, no setcc node is created in this case
14100	if (SCC ->isUndef())
14101	return N2;
14102
14103	// Fold to a simpler select_cc
14104	if (SCC.getOpcode() == ISD::SETCC) {
14105	return DAG.getNode(Opcode: ISD::SELECT_CC, DL, VT: N2.getValueType(),
14106	N1: SCC.getOperand(i: `0`), N2: SCC.getOperand(i: `1`), N3: N2, N4: N3,
14107	N5: SCC.getOperand(i: `2`), Flags: SCC ->getFlags());
14108	}
14109	}
14110
14111	// If we can fold this based on the true/false value, do so.
14112	if (SimplifySelectOps(SELECT: N, LHS: N2, RHS: N3))
14113	return SDValue (N, `0`); // Don't revisit N.
14114
14115	// fold select_cc into other things, such as min/max/abs
14116	return SimplifySelectCC(DL, N0, N1, N2, N3, CC);
14117	}
14118
14119	SDValue DAGCombiner::visitSETCC(SDNode *N) {
14120	// setcc is very commonly used as an argument to brcond. This pattern
14121	// also lend itself to numerous combines and, as a result, it is desired
14122	// we keep the argument to a brcond as a setcc as much as possible.
14123	bool PreferSetCC =
14124	N->hasOneUse() && N->user_begin()->getOpcode() == ISD::BRCOND;
14125
14126	ISD::CondCode Cond = cast<CondCodeSDNode>(Val: N->getOperand(Num: `2`))->get();
14127	EVT VT = N->getValueType(ResNo: `0`);
14128	SDValue N0 = N->getOperand(Num: `0`), N1 = N->getOperand(Num: `1`);
14129	SDLoc DL(N);
14130
14131	if (SDValue Combined = SimplifySetCC(VT, N0, N1, Cond, DL, foldBooleans: !PreferSetCC)) {
14132	// If we prefer to have a setcc, and we don't, we'll try our best to
14133	// recreate one using rebuildSetCC.
14134	if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
14135	SDValue NewSetCC = rebuildSetCC(N: Combined);
14136
14137	// We don't have anything interesting to combine to.
14138	if (NewSetCC.getNode() == N)
14139	return SDValue ();
14140
14141	if (NewSetCC)
14142	return NewSetCC;
14143	}
14144	return Combined;
14145	}
14146
14147	// Optimize
14148	// 1) (icmp eq/ne (and X, C0), (shift X, C1))
14149	// or
14150	// 2) (icmp eq/ne X, (rotate X, C1))
14151	// If C0 is a mask or shifted mask and the shift amt (C1) isolates the
14152	// remaining bits (i.e something like `(x64 & UINT32_MAX) == (x64 >> 32)`)
14153	// Then:
14154	// If C1 is a power of 2, then the rotate and shift+and versions are
14155	// equivilent, so we can interchange them depending on target preference.
14156	// Otherwise, if we have the shift+and version we can interchange srl/shl
14157	// which inturn affects the constant C0. We can use this to get better
14158	// constants again determined by target preference.
14159	if (Cond == ISD::SETNE \|\| Cond == ISD::SETEQ) {
14160	auto IsAndWithShift = [](SDValue A, SDValue B) {
14161	return A.getOpcode() == ISD::AND &&
14162	(B.getOpcode() == ISD::SRL \|\| B.getOpcode() == ISD::SHL) &&
14163	A.getOperand(i: `0`) == B.getOperand(i: `0`);
14164	};
14165	auto IsRotateWithOp = [](SDValue A, SDValue B) {
14166	return (B.getOpcode() == ISD::ROTL \|\| B.getOpcode() == ISD::ROTR) &&
14167	B.getOperand(i: `0`) == A;
14168	};
14169	SDValue AndOrOp = SDValue (), ShiftOrRotate = SDValue ();
14170	bool IsRotate = false;
14171
14172	// Find either shift+and or rotate pattern.
14173	if (IsAndWithShift (N0, N1)) {
14174	AndOrOp = N0;
14175	ShiftOrRotate = N1;
14176	} else if (IsAndWithShift (N1, N0)) {
14177	AndOrOp = N1;
14178	ShiftOrRotate = N0;
14179	} else if (IsRotateWithOp (N0, N1)) {
14180	IsRotate = true;
14181	AndOrOp = N0;
14182	ShiftOrRotate = N1;
14183	} else if (IsRotateWithOp (N1, N0)) {
14184	IsRotate = true;
14185	AndOrOp = N1;
14186	ShiftOrRotate = N0;
14187	}
14188
14189	if (AndOrOp && ShiftOrRotate && ShiftOrRotate.hasOneUse() &&
14190	(IsRotate \|\| AndOrOp.hasOneUse())) {
14191	EVT OpVT = N0.getValueType();
14192	// Get constant shift/rotate amount and possibly mask (if its shift+and
14193	// variant).
14194	auto GetAPIntValue = [](SDValue Op) -> std::optional<APInt> {
14195	ConstantSDNode CNode = isConstOrConstSplat(N: Op, /AllowUndefs/* false,
14196	/AllowTrunc/ AllowTruncation: false);
14197	if (CNode == nullptr)
14198	return std::nullopt;
14199	return CNode->getAPIntValue();
14200	};
14201	std::optional<APInt> AndCMask =
14202	IsRotate ? std::nullopt : GetAPIntValue (AndOrOp.getOperand(i: `1`));
14203	std::optional<APInt> ShiftCAmt =
14204	GetAPIntValue (ShiftOrRotate.getOperand(i: `1`));
14205	unsigned NumBits = OpVT.getScalarSizeInBits();
14206
14207	// We found constants.
14208	if (ShiftCAmt && (IsRotate \|\| AndCMask) && ShiftCAmt ->ult(RHS: NumBits)) {
14209	unsigned ShiftOpc = ShiftOrRotate.getOpcode();
14210	// Check that the constants meet the constraints.
14211	bool CanTransform = IsRotate;
14212	if (!CanTransform) {
14213	// Check that mask and shift compliment eachother
14214	CanTransform = ShiftCAmt == (~AndCMask).popcount();
14215	// Check that we are comparing all bits
14216	CanTransform &= (*ShiftCAmt + AndCMask ->popcount()) == NumBits;
14217	// Check that the and mask is correct for the shift
14218	CanTransform &=
14219	ShiftOpc == ISD::SHL ? (~*AndCMask).isMask() : AndCMask ->isMask();
14220	}
14221
14222	// See if target prefers another shift/rotate opcode.
14223	unsigned NewShiftOpc = TLI.preferedOpcodeForCmpEqPiecesOfOperand(
14224	VT: OpVT, ShiftOpc, MayTransformRotate: ShiftCAmt ->isPowerOf2(), ShiftOrRotateAmt: *ShiftCAmt, AndMask: AndCMask);
14225	// Transform is valid and we have a new preference.
14226	if (CanTransform && NewShiftOpc != ShiftOpc) {
14227	SDValue NewShiftOrRotate =
14228	DAG.getNode(Opcode: NewShiftOpc, DL, VT: OpVT, N1: ShiftOrRotate.getOperand(i: `0`),
14229	N2: ShiftOrRotate.getOperand(i: `1`));
14230	SDValue NewAndOrOp = SDValue ();
14231
14232	if (NewShiftOpc == ISD::SHL \|\| NewShiftOpc == ISD::SRL) {
14233	APInt NewMask =
14234	NewShiftOpc == ISD::SHL
14235	? APInt::getHighBitsSet(numBits: NumBits,
14236	hiBitsSet: NumBits - ShiftCAmt ->getZExtValue())
14237	: APInt::getLowBitsSet(numBits: NumBits,
14238	loBitsSet: NumBits - ShiftCAmt ->getZExtValue());
14239	NewAndOrOp =
14240	DAG.getNode(Opcode: ISD::AND, DL, VT: OpVT, N1: ShiftOrRotate.getOperand(i: `0`),
14241	N2: DAG.getConstant(Val: NewMask, DL, VT: OpVT));
14242	} else {
14243	NewAndOrOp = ShiftOrRotate.getOperand(i: `0`);
14244	}
14245
14246	return DAG.getSetCC(DL, VT, LHS: NewAndOrOp, RHS: NewShiftOrRotate, Cond);
14247	}
14248	}
14249	}
14250	}
14251	return SDValue ();
14252	}
14253
14254	SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
14255	SDValue LHS = N->getOperand(Num: `0`);
14256	SDValue RHS = N->getOperand(Num: `1`);
14257	SDValue Carry = N->getOperand(Num: `2`);
14258	SDValue Cond = N->getOperand(Num: `3`);
14259
14260	// If Carry is false, fold to a regular SETCC.
14261	if (isNullConstant(V: Carry))
14262	return DAG.getNode(Opcode: ISD::SETCC, DL: SDLoc (N), VTList: N->getVTList(), N1: LHS, N2: RHS, N3: Cond);
14263
14264	return SDValue ();
14265	}
14266
14267	/// Check if N satisfies:
14268	/// N is used once.
14269	/// N is a Load.
14270	/// The load is compatible with ExtOpcode. It means
14271	/// If load has explicit zero/sign extension, ExpOpcode must have the same
14272	/// extension.
14273	/// Otherwise returns true.
14274	static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode) {
14275	if (!N.hasOneUse())
14276	return false;
14277
14278	if (!isa<LoadSDNode>(Val: N))
14279	return false;
14280
14281	LoadSDNode *Load = cast<LoadSDNode>(Val&: N);
14282	ISD::LoadExtType LoadExt = Load->getExtensionType();
14283	if (LoadExt == ISD::NON_EXTLOAD \|\| LoadExt == ISD::EXTLOAD)
14284	return true;
14285
14286	// Now LoadExt is either SEXTLOAD or ZEXTLOAD, ExtOpcode must have the same
14287	// extension.
14288	if ((LoadExt == ISD::SEXTLOAD && ExtOpcode != ISD::SIGN_EXTEND) \|\|
14289	(LoadExt == ISD::ZEXTLOAD && ExtOpcode != ISD::ZERO_EXTEND))
14290	return false;
14291
14292	return true;
14293	}
14294
14295	/// Fold
14296	/// (sext (select c, load x, load y)) -> (select c, sextload x, sextload y)
14297	/// (zext (select c, load x, load y)) -> (select c, zextload x, zextload y)
14298	/// (aext (select c, load x, load y)) -> (select c, extload x, extload y)
14299	/// This function is called by the DAGCombiner when visiting sext/zext/aext
14300	/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
14301	static SDValue tryToFoldExtendSelectLoad(SDNode N, const* TargetLowering &TLI,
14302	SelectionDAG &DAG, const SDLoc &DL,
14303	CombineLevel Level) {
14304	unsigned Opcode = N->getOpcode();
14305	SDValue N0 = N->getOperand(Num: `0`);
14306	EVT VT = N->getValueType(ResNo: `0`);
14307	assert((Opcode == ISD::SIGN_EXTEND \|\| Opcode == ISD::ZERO_EXTEND \|\|
14308	Opcode == ISD::ANY_EXTEND) &&
14309	"Expected EXTEND dag node in input!");
14310
14311	SDValue Cond, Op1, Op2;
14312	if (!sd_match(N: N0, P: m_OneUse(P: m_SelectLike(Cond: m_Value(N&: Cond), T: m_Value(N&: Op1),
14313	F: m_Value(N&: Op2)))))
14314	return SDValue ();
14315
14316	if (!isCompatibleLoad(N: Op1, ExtOpcode: Opcode) \|\| !isCompatibleLoad(N: Op2, ExtOpcode: Opcode))
14317	return SDValue ();
14318
14319	auto ExtLoadOpcode = ISD::EXTLOAD;
14320	if (Opcode == ISD::SIGN_EXTEND)
14321	ExtLoadOpcode = ISD::SEXTLOAD;
14322	else if (Opcode == ISD::ZERO_EXTEND)
14323	ExtLoadOpcode = ISD::ZEXTLOAD;
14324
14325	// Illegal VSELECT may ISel fail if happen after legalization (DAG
14326	// Combine2), so we should conservatively check the OperationAction.
14327	LoadSDNode *Load1 = cast<LoadSDNode>(Val&: Op1);
14328	LoadSDNode *Load2 = cast<LoadSDNode>(Val&: Op2);
14329	if (!TLI.isLoadExtLegal(ExtType: ExtLoadOpcode, ValVT: VT, MemVT: Load1->getMemoryVT()) \|\|
14330	!TLI.isLoadExtLegal(ExtType: ExtLoadOpcode, ValVT: VT, MemVT: Load2->getMemoryVT()) \|\|
14331	(N0 ->getOpcode() == ISD::VSELECT && Level >= AfterLegalizeTypes &&
14332	TLI.getOperationAction(Op: ISD::VSELECT, VT) != TargetLowering::Legal))
14333	return SDValue ();
14334
14335	SDValue Ext1 = DAG.getNode(Opcode, DL, VT, Operand: Op1);
14336	SDValue Ext2 = DAG.getNode(Opcode, DL, VT, Operand: Op2);
14337	return DAG.getSelect(DL, VT, Cond, LHS: Ext1, RHS: Ext2);
14338	}
14339
14340	/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
14341	/// a build_vector of constants.
14342	/// This function is called by the DAGCombiner when visiting sext/zext/aext
14343	/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
14344	/// Vector extends are not folded if operations are legal; this is to
14345	/// avoid introducing illegal build_vector dag nodes.
14346	static SDValue tryToFoldExtendOfConstant(SDNode N, const* SDLoc &DL,
14347	const TargetLowering &TLI,
14348	SelectionDAG &DAG, bool LegalTypes) {
14349	unsigned Opcode = N->getOpcode();
14350	SDValue N0 = N->getOperand(Num: `0`);
14351	EVT VT = N->getValueType(ResNo: `0`);
14352
14353	assert((ISD::isExtOpcode(Opcode) \|\| ISD::isExtVecInRegOpcode(Opcode)) &&
14354	"Expected EXTEND dag node in input!");
14355
14356	// fold (sext c1) -> c1
14357	// fold (zext c1) -> c1
14358	// fold (aext c1) -> c1
14359	if (isa<ConstantSDNode>(Val: N0))
14360	return DAG.getNode(Opcode, DL, VT, Operand: N0);
14361
14362	// fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
14363	// fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
14364	// fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
14365	if (N0 ->getOpcode() == ISD::SELECT) {
14366	SDValue Op1 = N0 ->getOperand(Num: `1`);
14367	SDValue Op2 = N0 ->getOperand(Num: `2`);
14368	if (isa<ConstantSDNode>(Val: Op1) && isa<ConstantSDNode>(Val: Op2) &&
14369	(Opcode != ISD::ZERO_EXTEND \|\| !TLI.isZExtFree(FromTy: N0.getValueType(), ToTy: VT))) {
14370	// For any_extend, choose sign extension of the constants to allow a
14371	// possible further transform to sign_extend_inreg.i.e.
14372	//
14373	// t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
14374	// t2: i64 = any_extend t1
14375	// -->
14376	// t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
14377	// -->
14378	// t4: i64 = sign_extend_inreg t3
14379	unsigned FoldOpc = Opcode;
14380	if (FoldOpc == ISD::ANY_EXTEND)
14381	FoldOpc = ISD::SIGN_EXTEND;
14382	return DAG.getSelect(DL, VT, Cond: N0 ->getOperand(Num: `0`),
14383	LHS: DAG.getNode(Opcode: FoldOpc, DL, VT, Operand: Op1),
14384	RHS: DAG.getNode(Opcode: FoldOpc, DL, VT, Operand: Op2));
14385	}
14386	}
14387
14388	// fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
14389	// fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
14390	// fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
14391	EVT SVT = VT.getScalarType();
14392	if (!(VT.isVector() && (!LegalTypes \|\| TLI.isTypeLegal(VT: SVT)) &&
14393	ISD::isBuildVectorOfConstantSDNodes(N: N0.getNode())))
14394	return SDValue ();
14395
14396	// We can fold this node into a build_vector.
14397	unsigned VTBits = SVT.getSizeInBits();
14398	unsigned EVTBits = N0 ->getValueType(ResNo: `0`).getScalarSizeInBits();
14399	SmallVector<SDValue, `8`> Elts;
14400	unsigned NumElts = VT.getVectorNumElements();
14401
14402	for (unsigned i = `0`; i != NumElts; ++i) {
14403	SDValue Op = N0.getOperand(i);
14404	if (Op.isUndef()) {
14405	if (Opcode == ISD::ANY_EXTEND \|\| Opcode == ISD::ANY_EXTEND_VECTOR_INREG)
14406	Elts.push_back(Elt: DAG.getUNDEF(VT: SVT));
14407	else
14408	Elts.push_back(Elt: DAG.getConstant(Val: `0`, DL, VT: SVT));
14409	continue;
14410	}
14411
14412	SDLoc DL(Op);
14413	// Get the constant value and if needed trunc it to the size of the type.
14414	// Nodes like build_vector might have constants wider than the scalar type.
14415	APInt C = Op ->getAsAPIntVal().zextOrTrunc(width: EVTBits);
14416	if (Opcode == ISD::SIGN_EXTEND \|\| Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
14417	Elts.push_back(Elt: DAG.getConstant(Val: C.sext(width: VTBits), DL, VT: SVT));
14418	else
14419	Elts.push_back(Elt: DAG.getConstant(Val: C.zext(width: VTBits), DL, VT: SVT));
14420	}
14421
14422	return DAG.getBuildVector(VT, DL, Ops: Elts);
14423	}
14424
14425	// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
14426	// "fold ({s\|z\|a}ext (load x)) -> ({s\|z\|a}ext (truncate ({s\|z\|a}extload x)))"
14427	// transformation. Returns true if extension are possible and the above
14428	// mentioned transformation is profitable.
14429	static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0,
14430	unsigned ExtOpc,
14431	SmallVectorImpl<SDNode *> &ExtendNodes,
14432	const TargetLowering &TLI) {
14433	bool HasCopyToRegUses = false;
14434	bool isTruncFree = TLI.isTruncateFree(FromVT: VT, ToVT: N0.getValueType());
14435	for (SDUse &Use : N0 ->uses()) {
14436	SDNode *User = Use.getUser();
14437	if (User == N)
14438	continue;
14439	if (Use.getResNo() != N0.getResNo())
14440	continue;
14441	// FIXME: Only extend SETCC N, N and SETCC N, c for now.
14442	if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
14443	ISD::CondCode CC = cast<CondCodeSDNode>(Val: User->getOperand(Num: `2`))->get();
14444	if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(Code: CC))
14445	// Sign bits will be lost after a zext.
14446	return false;
14447	bool Add = false;
14448	for (unsigned i = `0`; i != `2`; ++i) {
14449	SDValue UseOp = User->getOperand(Num: i);
14450	if (UseOp == N0)
14451	continue;
14452	if (!isa<ConstantSDNode>(Val: UseOp))
14453	return false;
14454	Add = true;
14455	}
14456	if (Add)
14457	ExtendNodes.push_back(Elt: User);
14458	continue;
14459	}
14460	// If truncates aren't free and there are users we can't
14461	// extend, it isn't worthwhile.
14462	if (!isTruncFree)
14463	return false;
14464	// Remember if this value is live-out.
14465	if (User->getOpcode() == ISD::CopyToReg)
14466	HasCopyToRegUses = true;
14467	}
14468
14469	if (HasCopyToRegUses) {
14470	bool BothLiveOut = false;
14471	for (SDUse &Use : N->uses()) {
14472	if (Use.getResNo() == `0` && Use.getUser()->getOpcode() == ISD::CopyToReg) {
14473	BothLiveOut = true;
14474	break;
14475	}
14476	}
14477	if (BothLiveOut)
14478	// Both unextended and extended values are live out. There had better be
14479	// a good reason for the transformation.
14480	return !ExtendNodes.empty();
14481	}
14482	return true;
14483	}
14484
14485	void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
14486	SDValue OrigLoad, SDValue ExtLoad,
14487	ISD::NodeType ExtType) {
14488	// Extend SetCC uses if necessary.
14489	SDLoc DL(ExtLoad);
14490	for (SDNode *SetCC : SetCCs) {
14491	SmallVector<SDValue, `4`> Ops;
14492
14493	for (unsigned j = `0`; j != `2`; ++j) {
14494	SDValue SOp = SetCC->getOperand(Num: j);
14495	if (SOp == OrigLoad)
14496	Ops.push_back(Elt: ExtLoad);
14497	else
14498	Ops.push_back(Elt: DAG.getNode(Opcode: ExtType, DL, VT: ExtLoad ->getValueType(ResNo: `0`), Operand: SOp));
14499	}
14500
14501	Ops.push_back(Elt: SetCC->getOperand(Num: `2`));
14502	CombineTo(N: SetCC, Res: DAG.getNode(Opcode: ISD::SETCC, DL, VT: SetCC->getValueType(ResNo: `0`), Ops));
14503	}
14504	}
14505
14506	// FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
14507	SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
14508	SDValue N0 = N->getOperand(Num: `0`);
14509	EVT DstVT = N->getValueType(ResNo: `0`);
14510	EVT SrcVT = N0.getValueType();
14511
14512	assert((N->getOpcode() == ISD::SIGN_EXTEND \|\|
14513	N->getOpcode() == ISD::ZERO_EXTEND) &&
14514	"Unexpected node type (not an extend)!");
14515
14516	// fold (sext (load x)) to multiple smaller sextloads; same for zext.
14517	// For example, on a target with legal v4i32, but illegal v8i32, turn:
14518	// (v8i32 (sext (v8i16 (load x))))
14519	// into:
14520	// (v8i32 (concat_vectors (v4i32 (sextload x)),
14521	// (v4i32 (sextload (x + 16)))))
14522	// Where uses of the original load, i.e.:
14523	// (v8i16 (load x))
14524	// are replaced with:
14525	// (v8i16 (truncate
14526	// (v8i32 (concat_vectors (v4i32 (sextload x)),
14527	// (v4i32 (sextload (x + 16)))))))
14528	//
14529	// This combine is only applicable to illegal, but splittable, vectors.
14530	// All legal types, and illegal non-vector types, are handled elsewhere.
14531	// This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
14532	//
14533	if (N0 ->getOpcode() != ISD::LOAD)
14534	return SDValue ();
14535
14536	LoadSDNode *LN0 = cast<LoadSDNode>(Val&: N0);
14537
14538	if (!ISD::isNON_EXTLoad(N: LN0) \|\| !ISD::isUNINDEXEDLoad(N: LN0) \|\|
14539	!N0.hasOneUse() \|\| !LN0->isSimple() \|\|
14540	!DstVT.isVector() \|\| !DstVT.isPow2VectorType() \|\|
14541	!TLI.isVectorLoadExtDesirable(ExtVal: SDValue (N, `0`)))
14542	return SDValue ();
14543
14544	SmallVector<SDNode *, `4`> SetCCs;
14545	if (!ExtendUsesToFormExtLoad(VT: DstVT, N, N0, ExtOpc: N->getOpcode(), ExtendNodes&: SetCCs, TLI))
14546	return SDValue ();
14547
14548	ISD::LoadExtType ExtType =
14549	N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
14550
14551	// Try to split the vector types to get down to legal types.
14552	EVT SplitSrcVT = SrcVT;
14553	EVT SplitDstVT = DstVT;
14554	while (!TLI.isLoadExtLegalOrCustom(ExtType, ValVT: SplitDstVT, MemVT: SplitSrcVT) &&
14555	SplitSrcVT.getVectorNumElements() > `1`) {
14556	SplitDstVT = DAG.GetSplitDestVTs(VT: SplitDstVT).first;
14557	SplitSrcVT = DAG.GetSplitDestVTs(VT: SplitSrcVT).first;
14558	}
14559
14560	if (!TLI.isLoadExtLegalOrCustom(ExtType, ValVT: SplitDstVT, MemVT: SplitSrcVT))
14561	return SDValue ();
14562
14563	assert(!DstVT.isScalableVector() && "Unexpected scalable vector type");
14564
14565	SDLoc DL(N);
14566	const unsigned NumSplits =
14567	DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
14568	const unsigned Stride = SplitSrcVT.getStoreSize();
14569	SmallVector<SDValue, `4`> Loads;
14570	SmallVector<SDValue, `4`> Chains;
14571
14572	SDValue BasePtr = LN0->getBasePtr();
14573	for (unsigned Idx = `0`; Idx < NumSplits; Idx++) {
14574	const unsigned Offset = Idx * Stride;
14575
14576	SDValue SplitLoad =
14577	DAG.getExtLoad(ExtType, dl: SDLoc (LN0), VT: SplitDstVT, Chain: LN0->getChain(),
14578	Ptr: BasePtr, PtrInfo: LN0->getPointerInfo().getWithOffset(O: Offset),
14579	MemVT: SplitSrcVT, Alignment: LN0->getBaseAlign(),
14580	MMOFlags: LN0->getMemOperand()->getFlags(), AAInfo: LN0->getAAInfo());
14581
14582	BasePtr = DAG.getMemBasePlusOffset(Base: BasePtr, Offset: TypeSize::getFixed(ExactSize: Stride), DL);
14583
14584	Loads.push_back(Elt: SplitLoad.getValue(R: `0`));
14585	Chains.push_back(Elt: SplitLoad.getValue(R: `1`));
14586	}
14587
14588	SDValue NewChain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: Chains);
14589	SDValue NewValue = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: DstVT, Ops: Loads);
14590
14591	// Simplify TF.
14592	AddToWorklist(N: NewChain.getNode());
14593
14594	CombineTo(N, Res: NewValue);
14595
14596	// Replace uses of the original load (before extension)
14597	// with a truncate of the concatenated sextloaded vectors.
14598	SDValue Trunc =
14599	DAG.getNode(Opcode: ISD::TRUNCATE, DL: SDLoc (N0), VT: N0.getValueType(), Operand: NewValue);
14600	ExtendSetCCUses(SetCCs, OrigLoad: N0, ExtLoad: NewValue, ExtType: (ISD::NodeType)N->getOpcode());
14601	CombineTo(N: N0.getNode(), Res0: Trunc, Res1: NewChain);
14602	return SDValue (N, `0`); // Return N so it doesn't get rechecked!
14603	}
14604
14605	// fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
14606	// (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
14607	SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
14608	assert(N->getOpcode() == ISD::ZERO_EXTEND);
14609	EVT VT = N->getValueType(ResNo: `0`);
14610	EVT OrigVT = N->getOperand(Num: `0`).getValueType();
14611	if (TLI.isZExtFree(FromTy: OrigVT, ToTy: VT))
14612	return SDValue ();
14613
14614	// and/or/xor
14615	SDValue N0 = N->getOperand(Num: `0`);
14616	if (!ISD::isBitwiseLogicOp(Opcode: N0.getOpcode()) \|\|
14617	N0.getOperand(i: `1`).getOpcode() != ISD::Constant \|\|
14618	(LegalOperations && !TLI.isOperationLegal(Op: N0.getOpcode(), VT)))
14619	return SDValue ();
14620
14621	// shl/shr
14622	SDValue N1 = N0 ->getOperand(Num: `0`);
14623	if (!(N1.getOpcode() == ISD::SHL \|\| N1.getOpcode() == ISD::SRL) \|\|
14624	N1.getOperand(i: `1`).getOpcode() != ISD::Constant \|\|
14625	(LegalOperations && !TLI.isOperationLegal(Op: N1.getOpcode(), VT)))
14626	return SDValue ();
14627
14628	// load
14629	if (!isa<LoadSDNode>(Val: N1.getOperand(i: `0`)))
14630	return SDValue ();
14631	LoadSDNode *Load = cast<LoadSDNode>(Val: N1.getOperand(i: `0`));
14632	EVT MemVT = Load->getMemoryVT();
14633	if (!TLI.isLoadExtLegal(ExtType: ISD::ZEXTLOAD, ValVT: VT, MemVT) \|\|
14634	Load->getExtensionType() == ISD::SEXTLOAD \|\| Load->isIndexed())
14635	return SDValue ();
14636
14637
14638	// If the shift op is SHL, the logic op must be AND, otherwise the result
14639	// will be wrong.
14640	if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
14641	return SDValue ();
14642
14643	if (!N0.hasOneUse() \|\| !N1.hasOneUse())
14644	return SDValue ();
14645
14646	SmallVector<SDNode*, `4`> SetCCs;
14647	if (!ExtendUsesToFormExtLoad(VT, N: N1.getNode(), N0: N1.getOperand(i: `0`),
14648	ExtOpc: ISD::ZERO_EXTEND, ExtendNodes&: SetCCs, TLI))
14649	return SDValue ();
14650
14651	// Actually do the transformation.
14652	SDValue ExtLoad = DAG.getExtLoad(ExtType: ISD::ZEXTLOAD, dl: SDLoc (Load), VT,
14653	Chain: Load->getChain(), Ptr: Load->getBasePtr(),
14654	MemVT: Load->getMemoryVT(), MMO: Load->getMemOperand());
14655
14656	SDLoc DL1(N1);
14657	SDValue Shift = DAG.getNode(Opcode: N1.getOpcode(), DL: DL1, VT, N1: ExtLoad,
14658	N2: N1.getOperand(i: `1`));
14659
14660	APInt Mask = N0.getConstantOperandAPInt(i: `1`).zext(width: VT.getSizeInBits());
14661	SDLoc DL0(N0);
14662	SDValue And = DAG.getNode(Opcode: N0.getOpcode(), DL: DL0, VT, N1: Shift,
14663	N2: DAG.getConstant(Val: Mask, DL: DL0, VT));
14664
14665	ExtendSetCCUses(SetCCs, OrigLoad: N1.getOperand(i: `0`), ExtLoad, ExtType: ISD::ZERO_EXTEND);
14666	CombineTo(N, Res: And);
14667	if (SDValue (Load, `0`).hasOneUse()) {
14668	DAG.ReplaceAllUsesOfValueWith(From: SDValue (Load, `1`), To: ExtLoad.getValue(R: `1`));
14669	} else {
14670	SDValue Trunc = DAG.getNode(Opcode: ISD::TRUNCATE, DL: SDLoc (Load),
14671	VT: Load->getValueType(ResNo: `0`), Operand: ExtLoad);
14672	CombineTo(N: Load, Res0: Trunc, Res1: ExtLoad.getValue(R: `1`));
14673	}
14674
14675	// N0 is dead at this point.
14676	recursivelyDeleteUnusedNodes(N: N0.getNode());
14677
14678	return SDValue (N,`0`); // Return N so it doesn't get rechecked!
14679	}
14680
14681	/// If we're narrowing or widening the result of a vector select and the final
14682	/// size is the same size as a setcc (compare) feeding the select, then try to
14683	/// apply the cast operation to the select's operands because matching vector
14684	/// sizes for a select condition and other operands should be more efficient.
14685	SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
14686	unsigned CastOpcode = Cast->getOpcode();
14687	assert((CastOpcode == ISD::SIGN_EXTEND \|\| CastOpcode == ISD::ZERO_EXTEND \|\|
14688	CastOpcode == ISD::TRUNCATE \|\| CastOpcode == ISD::FP_EXTEND \|\|
14689	CastOpcode == ISD::FP_ROUND) &&
14690	"Unexpected opcode for vector select narrowing/widening");
14691
14692	// We only do this transform before legal ops because the pattern may be
14693	// obfuscated by target-specific operations after legalization. Do not create
14694	// an illegal select op, however, because that may be difficult to lower.
14695	EVT VT = Cast->getValueType(ResNo: `0`);
14696	if (LegalOperations \|\| !TLI.isOperationLegalOrCustom(Op: ISD::VSELECT, VT))
14697	return SDValue ();
14698
14699	SDValue VSel = Cast->getOperand(Num: `0`);
14700	if (VSel.getOpcode() != ISD::VSELECT \|\| !VSel.hasOneUse() \|\|
14701	VSel.getOperand(i: `0`).getOpcode() != ISD::SETCC)
14702	return SDValue ();
14703
14704	// Does the setcc have the same vector size as the casted select?
14705	SDValue SetCC = VSel.getOperand(i: `0`);
14706	EVT SetCCVT = getSetCCResultType(VT: SetCC.getOperand(i: `0`).getValueType());
14707	if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
14708	return SDValue ();
14709
14710	// cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
14711	SDValue A = VSel.getOperand(i: `1`);
14712	SDValue B = VSel.getOperand(i: `2`);
14713	SDValue CastA, CastB;
14714	SDLoc DL(Cast);
14715	if (CastOpcode == ISD::FP_ROUND) {
14716	// FP_ROUND (fptrunc) has an extra flag operand to pass along.
14717	CastA = DAG.getNode(Opcode: CastOpcode, DL, VT, N1: A, N2: Cast->getOperand(Num: `1`));
14718	CastB = DAG.getNode(Opcode: CastOpcode, DL, VT, N1: B, N2: Cast->getOperand(Num: `1`));
14719	} else {
14720	CastA = DAG.getNode(Opcode: CastOpcode, DL, VT, Operand: A);
14721	CastB = DAG.getNode(Opcode: CastOpcode, DL, VT, Operand: B);
14722	}
14723	return DAG.getNode(Opcode: ISD::VSELECT, DL, VT, N1: SetCC, N2: CastA, N3: CastB);
14724	}
14725
14726	// fold ([s\|z]ext ([s\|z]extload x)) -> ([s\|z]ext (truncate ([s\|z]extload x)))
14727	// fold ([s\|z]ext ( extload x)) -> ([s\|z]ext (truncate ([s\|z]extload x)))
14728	static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
14729	const TargetLowering &TLI, EVT VT,
14730	bool LegalOperations, SDNode *N,
14731	SDValue N0, ISD::LoadExtType ExtLoadType) {
14732	bool Frozen = N0.getOpcode() == ISD::FREEZE;
14733	auto *OldExtLoad = dyn_cast<LoadSDNode>(Val: Frozen ? N0.getOperand(i: `0`) : N0);
14734	if (!OldExtLoad)
14735	return SDValue ();
14736
14737	bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD)
14738	? ISD::isSEXTLoad(N: OldExtLoad)
14739	: ISD::isZEXTLoad(N: OldExtLoad);
14740	if ((!isAExtLoad && !ISD::isEXTLoad(N: OldExtLoad)) \|\|
14741	!ISD::isUNINDEXEDLoad(N: OldExtLoad) \|\| !OldExtLoad->hasNUsesOfValue(NUses: `1`, Value: `0`))
14742	return SDValue ();
14743
14744	EVT MemVT = OldExtLoad->getMemoryVT();
14745	if ((LegalOperations \|\| !OldExtLoad->isSimple() \|\| VT.isVector()) &&
14746	!TLI.isLoadExtLegal(ExtType: ExtLoadType, ValVT: VT, MemVT))
14747	return SDValue ();
14748
14749	SDLoc DL(OldExtLoad);
14750	SDValue ExtLoad = DAG.getExtLoad(ExtType: ExtLoadType, dl: DL, VT, Chain: OldExtLoad->getChain(),
14751	Ptr: OldExtLoad->getBasePtr(), MemVT,
14752	MMO: OldExtLoad->getMemOperand());
14753	SDValue Res = ExtLoad;
14754	if (Frozen) {
14755	Res = DAG.getFreeze(V: ExtLoad);
14756	Res = DAG.getNode(
14757	Opcode: ExtLoadType == ISD::SEXTLOAD ? ISD::AssertSext : ISD::AssertZext, DL,
14758	VT: Res.getValueType(), N1: Res,
14759	N2: DAG.getValueType(OldExtLoad->getValueType(ResNo: `0`).getScalarType()));
14760	}
14761	Combiner.CombineTo(N, Res);
14762	DAG.ReplaceAllUsesOfValueWith(From: SDValue (OldExtLoad, `1`), To: ExtLoad.getValue(R: `1`));
14763	if (N0 ->use_empty())
14764	Combiner.recursivelyDeleteUnusedNodes(N: N0.getNode());
14765	return SDValue (N, `0`); // Return N so it doesn't get rechecked!
14766	}
14767
14768	// fold ([s\|z]ext (load x)) -> ([s\|z]ext (truncate ([s\|z]extload x)))
14769	// Only generate vector extloads when 1) they're legal, and 2) they are
14770	// deemed desirable by the target. NonNegZExt can be set to true if a zero
14771	// extend has the nonneg flag to allow use of sextload if profitable.
14772	static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
14773	const TargetLowering &TLI, EVT VT,
14774	bool LegalOperations, SDNode *N, SDValue N0,
14775	ISD::LoadExtType ExtLoadType,
14776	ISD::NodeType ExtOpc,
14777	bool NonNegZExt = false) {
14778	bool Frozen = N0.getOpcode() == ISD::FREEZE;
14779	SDValue Freeze = Frozen ? N0 : SDValue ();
14780	auto *Load = dyn_cast<LoadSDNode>(Val: Frozen ? N0.getOperand(i: `0`) : N0);
14781	// TODO: Support multiple uses of the load when frozen.
14782	if (!Load \|\| !ISD::isNON_EXTLoad(N: Load) \|\| !ISD::isUNINDEXEDLoad(N: Load) \|\|
14783	(Frozen && !Load->hasNUsesOfValue(NUses: `1`, Value: `0`)))
14784	return {};
14785
14786	// If this is zext nneg, see if it would make sense to treat it as a sext.
14787	if (NonNegZExt) {
14788	assert(ExtLoadType == ISD::ZEXTLOAD && ExtOpc == ISD::ZERO_EXTEND &&
14789	"Unexpected load type or opcode");
14790	for (SDNode *User : Load->users()) {
14791	if (User->getOpcode() == ISD::SETCC) {
14792	ISD::CondCode CC = cast<CondCodeSDNode>(Val: User->getOperand(Num: `2`))->get();
14793	if (ISD::isSignedIntSetCC(Code: CC)) {
14794	ExtLoadType = ISD::SEXTLOAD;
14795	ExtOpc = ISD::SIGN_EXTEND;
14796	break;
14797	}
14798	}
14799	}
14800	}
14801
14802	// TODO: isFixedLengthVector() should be removed and any negative effects on
14803	// code generation being the result of that target's implementation of
14804	// isVectorLoadExtDesirable().
14805	if ((LegalOperations \|\| VT.isFixedLengthVector() \|\| !Load->isSimple()) &&
14806	!TLI.isLoadExtLegal(ExtType: ExtLoadType, ValVT: VT, MemVT: Load->getValueType(ResNo: `0`)))
14807	return {};
14808
14809	bool DoXform = true;
14810	SmallVector<SDNode *, `4`> SetCCs;
14811	if (!N0 ->hasOneUse())
14812	DoXform = ExtendUsesToFormExtLoad(VT, N, N0: Frozen ? Freeze : SDValue (Load, `0`),
14813	ExtOpc, ExtendNodes&: SetCCs, TLI);
14814	if (VT.isVector())
14815	DoXform &= TLI.isVectorLoadExtDesirable(ExtVal: SDValue (N, `0`));
14816	if (!DoXform)
14817	return {};
14818
14819	SDLoc DL(Load);
14820	// If the load value is used only by N, replace it via CombineTo N.
14821	bool NoReplaceTrunc = N0.hasOneUse();
14822	SDValue ExtLoad =
14823	DAG.getExtLoad(ExtType: ExtLoadType, dl: DL, VT, Chain: Load->getChain(), Ptr: Load->getBasePtr(),
14824	MemVT: Load->getValueType(ResNo: `0`), MMO: Load->getMemOperand());
14825	SDValue Res = ExtLoad;
14826	if (Frozen) {
14827	Res = DAG.getFreeze(V: ExtLoad);
14828	Res = DAG.getNode(Opcode: ExtLoadType == ISD::SEXTLOAD ? ISD::AssertSext
14829	: ISD::AssertZext,
14830	DL, VT: Res.getValueType(), N1: Res,
14831	N2: DAG.getValueType(Load->getValueType(ResNo: `0`).getScalarType()));
14832	}
14833	Combiner.ExtendSetCCUses(SetCCs, OrigLoad: N0, ExtLoad: Res, ExtType: ExtOpc);
14834	Combiner.CombineTo(N, Res);
14835	if (NoReplaceTrunc) {
14836	DAG.ReplaceAllUsesOfValueWith(From: SDValue (Load, `1`), To: ExtLoad.getValue(R: `1`));
14837	Combiner.recursivelyDeleteUnusedNodes(N: N0.getNode());
14838	} else {
14839	SDValue Trunc = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: Load->getValueType(ResNo: `0`), Operand: Res);
14840	if (Frozen) {
14841	Combiner.CombineTo(N: Freeze.getNode(), Res: Trunc);
14842	DAG.ReplaceAllUsesOfValueWith(From: SDValue (Load, `1`), To: ExtLoad.getValue(R: `1`));
14843	} else {
14844	Combiner.CombineTo(N: Load, Res0: Trunc, Res1: ExtLoad.getValue(R: `1`));
14845	}
14846	}
14847	return SDValue (N, `0`); // Return N so it doesn't get rechecked!
14848	}
14849
14850	static SDValue
14851	tryToFoldExtOfMaskedLoad(SelectionDAG &DAG, const TargetLowering &TLI, EVT VT,
14852	bool LegalOperations, SDNode *N, SDValue N0,
14853	ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc) {
14854	if (!N0.hasOneUse())
14855	return SDValue ();
14856
14857	MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(Val&: N0);
14858	if (!Ld \|\| Ld->getExtensionType() != ISD::NON_EXTLOAD)
14859	return SDValue ();
14860
14861	if ((LegalOperations \|\| !cast<MaskedLoadSDNode>(Val&: N0)->isSimple()) &&
14862	!TLI.isLoadExtLegalOrCustom(ExtType: ExtLoadType, ValVT: VT, MemVT: Ld->getValueType(ResNo: `0`)))
14863	return SDValue ();
14864
14865	if (!TLI.isVectorLoadExtDesirable(ExtVal: SDValue (N, `0`)))
14866	return SDValue ();
14867
14868	SDLoc dl(Ld);
14869	SDValue PassThru = DAG.getNode(Opcode: ExtOpc, DL: dl, VT, Operand: Ld->getPassThru());
14870	SDValue NewLoad = DAG.getMaskedLoad(
14871	VT, dl, Chain: Ld->getChain(), Base: Ld->getBasePtr(), Offset: Ld->getOffset(), Mask: Ld->getMask(),
14872	Src0: PassThru, MemVT: Ld->getMemoryVT(), MMO: Ld->getMemOperand(), AM: Ld->getAddressingMode(),
14873	ExtLoadType, IsExpanding: Ld->isExpandingLoad());
14874	DAG.ReplaceAllUsesOfValueWith(From: SDValue (Ld, `1`), To: SDValue (NewLoad.getNode(), `1`));
14875	return NewLoad;
14876	}
14877
14878	// fold ([s\|z]ext (atomic_load)) -> ([s\|z]ext (truncate ([s\|z]ext atomic_load)))
14879	static SDValue tryToFoldExtOfAtomicLoad(SelectionDAG &DAG,
14880	const TargetLowering &TLI, EVT VT,
14881	SDValue N0,
14882	ISD::LoadExtType ExtLoadType) {
14883	auto *ALoad = dyn_cast<AtomicSDNode>(Val&: N0);
14884	if (!ALoad \|\| ALoad->getOpcode() != ISD::ATOMIC_LOAD)
14885	return {};
14886	EVT MemoryVT = ALoad->getMemoryVT();
14887	if (!TLI.isAtomicLoadExtLegal(ExtType: ExtLoadType, ValVT: VT, MemVT: MemoryVT))
14888	return {};
14889	// Can't fold into ALoad if it is already extending differently.
14890	ISD::LoadExtType ALoadExtTy = ALoad->getExtensionType();
14891	if ((ALoadExtTy == ISD::ZEXTLOAD && ExtLoadType == ISD::SEXTLOAD) \|\|
14892	(ALoadExtTy == ISD::SEXTLOAD && ExtLoadType == ISD::ZEXTLOAD))
14893	return {};
14894
14895	EVT OrigVT = ALoad->getValueType(ResNo: `0`);
14896	assert(OrigVT.getSizeInBits() < VT.getSizeInBits() && "VT should be wider.");
14897	auto *NewALoad = cast<AtomicSDNode>(Val: DAG.getAtomicLoad(
14898	ExtType: ExtLoadType, dl: SDLoc (ALoad), MemVT: MemoryVT, VT, Chain: ALoad->getChain(),
14899	Ptr: ALoad->getBasePtr(), MMO: ALoad->getMemOperand()));
14900	DAG.ReplaceAllUsesOfValueWith(
14901	From: SDValue (ALoad, `0`),
14902	To: DAG.getNode(Opcode: ISD::TRUNCATE, DL: SDLoc (ALoad), VT: OrigVT, Operand: SDValue (NewALoad, `0`)));
14903	// Update the chain uses.
14904	DAG.ReplaceAllUsesOfValueWith(From: SDValue (ALoad, `1`), To: SDValue (NewALoad, `1`));
14905	return SDValue (NewALoad, `0`);
14906	}
14907
14908	static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG,
14909	bool LegalOperations) {
14910	assert((N->getOpcode() == ISD::SIGN_EXTEND \|\|
14911	N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
14912
14913	SDValue SetCC = N->getOperand(Num: `0`);
14914	if (LegalOperations \|\| SetCC.getOpcode() != ISD::SETCC \|\|
14915	!SetCC.hasOneUse() \|\| SetCC.getValueType() != MVT::i1)
14916	return SDValue ();
14917
14918	SDValue X = SetCC.getOperand(i: `0`);
14919	SDValue Ones = SetCC.getOperand(i: `1`);
14920	ISD::CondCode CC = cast<CondCodeSDNode>(Val: SetCC.getOperand(i: `2`))->get();
14921	EVT VT = N->getValueType(ResNo: `0`);
14922	EVT XVT = X.getValueType();
14923	// setge X, C is canonicalized to setgt, so we do not need to match that
14924	// pattern. The setlt sibling is folded in SimplifySelectCC() because it does
14925	// not require the 'not' op.
14926	if (CC == ISD::SETGT && isAllOnesConstant(V: Ones) && VT == XVT) {
14927	// Invert and smear/shift the sign bit:
14928	// sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
14929	// zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
14930	SDLoc DL(N);
14931	unsigned ShCt = VT.getSizeInBits() - `1`;
14932	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14933	if (!TLI.shouldAvoidTransformToShift(VT, Amount: ShCt)) {
14934	SDValue NotX = DAG.getNOT(DL, Val: X, VT);
14935	SDValue ShiftAmount = DAG.getConstant(Val: ShCt, DL, VT);
14936	auto ShiftOpcode =
14937	N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
14938	return DAG.getNode(Opcode: ShiftOpcode, DL, VT, N1: NotX, N2: ShiftAmount);
14939	}
14940	}
14941	return SDValue ();
14942	}
14943
14944	SDValue DAGCombiner::foldSextSetcc(SDNode *N) {
14945	SDValue N0 = N->getOperand(Num: `0`);
14946	if (N0.getOpcode() != ISD::SETCC)
14947	return SDValue ();
14948
14949	SDValue N00 = N0.getOperand(i: `0`);
14950	SDValue N01 = N0.getOperand(i: `1`);
14951	ISD::CondCode CC = cast<CondCodeSDNode>(Val: N0.getOperand(i: `2`))->get();
14952	EVT VT = N->getValueType(ResNo: `0`);
14953	EVT N00VT = N00.getValueType();
14954	SDLoc DL(N);
14955
14956	// Propagate fast-math-flags.
14957	SDNodeFlags Flags = N0 ->getFlags();
14958
14959	// On some architectures (such as SSE/NEON/etc) the SETCC result type is
14960	// the same size as the compared operands. Try to optimize sext(setcc())
14961	// if this is the case.
14962	if (VT.isVector() && !LegalOperations &&
14963	TLI.getBooleanContents(Type: N00VT) ==
14964	TargetLowering::ZeroOrNegativeOneBooleanContent) {
14965	EVT SVT = getSetCCResultType(VT: N00VT);
14966
14967	// If we already have the desired type, don't change it.
14968	if (SVT != N0.getValueType()) {
14969	// We know that the # elements of the results is the same as the
14970	// # elements of the compare (and the # elements of the compare result
14971	// for that matter). Check to see that they are the same size. If so,
14972	// we know that the element size of the sext'd result matches the
14973	// element size of the compare operands.
14974	if (VT.getSizeInBits() == SVT.getSizeInBits())
14975	return DAG.getSetCC(DL, VT, LHS: N00, RHS: N01, Cond: CC, /Chain=/{},
14976	/Signaling=/IsSignaling: false, Flags);
14977
14978	// If the desired elements are smaller or larger than the source
14979	// elements, we can use a matching integer vector type and then
14980	// truncate/sign extend.
14981	EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
14982	if (SVT == MatchingVecType) {
14983	SDValue VsetCC = DAG.getSetCC(DL, VT: MatchingVecType, LHS: N00, RHS: N01, Cond: CC,
14984	/Chain=/{}, /Signaling=/IsSignaling: false, Flags);
14985	return DAG.getSExtOrTrunc(Op: VsetCC, DL, VT);
14986	}
14987	}
14988
14989	// Try to eliminate the sext of a setcc by zexting the compare operands.
14990	if (N0.hasOneUse() && TLI.isOperationLegalOrCustom(Op: ISD::SETCC, VT) &&
14991	!TLI.isOperationLegalOrCustom(Op: ISD::SETCC, VT: SVT)) {
14992	bool IsSignedCmp = ISD::isSignedIntSetCC(Code: CC);
14993	unsigned LoadOpcode = IsSignedCmp ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
14994	unsigned ExtOpcode = IsSignedCmp ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
14995
14996	// We have an unsupported narrow vector compare op that would be legal
14997	// if extended to the destination type. See if the compare operands
14998	// can be freely extended to the destination type.
14999	auto IsFreeToExtend = [&](SDValue V) {
15000	if (isConstantOrConstantVector(N: V, /NoOpaques/ true))
15001	return true;
15002	// Match a simple, non-extended load that can be converted to a
15003	// legal {z/s}ext-load.
15004	// TODO: Allow widening of an existing {z/s}ext-load?
15005	if (!(ISD::isNON_EXTLoad(N: V.getNode()) &&
15006	ISD::isUNINDEXEDLoad(N: V.getNode()) &&
15007	cast<LoadSDNode>(Val&: V)->isSimple() &&
15008	TLI.isLoadExtLegal(ExtType: LoadOpcode, ValVT: VT, MemVT: V.getValueType())))
15009	return false;
15010
15011	// Non-chain users of this value must either be the setcc in this
15012	// sequence or extends that can be folded into the new {z/s}ext-load.
15013	for (SDUse &Use : V ->uses()) {
15014	// Skip uses of the chain and the setcc.
15015	SDNode *User = Use.getUser();
15016	if (Use.getResNo() != `0` \|\| User == N0.getNode())
15017	continue;
15018	// Extra users must have exactly the same cast we are about to create.
15019	// TODO: This restriction could be eased if ExtendUsesToFormExtLoad()
15020	// is enhanced similarly.
15021	if (User->getOpcode() != ExtOpcode \|\| User->getValueType(ResNo: `0`) != VT)
15022	return false;
15023	}
15024	return true;
15025	};
15026
15027	if (IsFreeToExtend (N00) && IsFreeToExtend (N01)) {
15028	SDValue Ext0 = DAG.getNode(Opcode: ExtOpcode, DL, VT, Operand: N00);
15029	SDValue Ext1 = DAG.getNode(Opcode: ExtOpcode, DL, VT, Operand: N01);
15030	return DAG.getSetCC(DL, VT, LHS: Ext0, RHS: Ext1, Cond: CC, /Chain=/{},
15031	/Signaling=/IsSignaling: false, Flags);
15032	}
15033	}
15034	}
15035
15036	// sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
15037	// Here, T can be 1 or -1, depending on the type of the setcc and
15038	// getBooleanContents().
15039	unsigned SetCCWidth = N0.getScalarValueSizeInBits();
15040
15041	// To determine the "true" side of the select, we need to know the high bit
15042	// of the value returned by the setcc if it evaluates to true.
15043	// If the type of the setcc is i1, then the true case of the select is just
15044	// sext(i1 1), that is, -1.
15045	// If the type of the setcc is larger (say, i8) then the value of the high
15046	// bit depends on getBooleanContents(), so ask TLI for a real "true" value
15047	// of the appropriate width.
15048	SDValue ExtTrueVal = (SetCCWidth == `1`)
15049	? DAG.getAllOnesConstant(DL, VT)
15050	: DAG.getBoolConstant(V: true, DL, VT, OpVT: N00VT);
15051	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT);
15052	if (SDValue SCC = SimplifySelectCC(DL, N0: N00, N1: N01, N2: ExtTrueVal, N3: Zero, CC, NotExtCompare: true))
15053	return SCC;
15054
15055	if (!VT.isVector() && !shouldConvertSelectOfConstantsToMath(Cond: N0, VT, TLI)) {
15056	EVT SetCCVT = getSetCCResultType(VT: N00VT);
15057	// Don't do this transform for i1 because there's a select transform
15058	// that would reverse it.
15059	// TODO: We should not do this transform at all without a target hook
15060	// because a sext is likely cheaper than a select?
15061	if (SetCCVT.getScalarSizeInBits() != `1` &&
15062	(!LegalOperations \|\| TLI.isOperationLegal(Op: ISD::SETCC, VT: N00VT))) {
15063	SDValue SetCC = DAG.getSetCC(DL, VT: SetCCVT, LHS: N00, RHS: N01, Cond: CC, /Chain=/{},
15064	/Signaling=/IsSignaling: false, Flags);
15065	return DAG.getSelect(DL, VT, Cond: SetCC, LHS: ExtTrueVal, RHS: Zero, Flags);
15066	}
15067	}
15068
15069	return SDValue ();
15070	}
15071
15072	SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
15073	SDValue N0 = N->getOperand(Num: `0`);
15074	EVT VT = N->getValueType(ResNo: `0`);
15075	SDLoc DL(N);
15076
15077	if (VT.isVector())
15078	if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
15079	return FoldedVOp;
15080
15081	// sext(undef) = 0 because the top bit will all be the same.
15082	if (N0.isUndef())
15083	return DAG.getConstant(Val: `0`, DL, VT);
15084
15085	if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
15086	return Res;
15087
15088	// fold (sext (sext x)) -> (sext x)
15089	// fold (sext (aext x)) -> (sext x)
15090	if (N0.getOpcode() == ISD::SIGN_EXTEND \|\| N0.getOpcode() == ISD::ANY_EXTEND)
15091	return DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT, Operand: N0.getOperand(i: `0`));
15092
15093	// fold (sext (aext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
15094	// fold (sext (sext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
15095	if (N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG \|\|
15096	N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG)
15097	return DAG.getNode(Opcode: ISD::SIGN_EXTEND_VECTOR_INREG, DL: SDLoc (N), VT,
15098	Operand: N0.getOperand(i: `0`));
15099
15100	if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
15101	SDValue N00 = N0.getOperand(i: `0`);
15102	EVT ExtVT = cast<VTSDNode>(Val: N0 ->getOperand(Num: `1`))->getVT();
15103	if (N00.getOpcode() == ISD::TRUNCATE \|\| TLI.isTruncateFree(Val: N00, VT2: ExtVT)) {
15104	// fold (sext (sext_inreg x)) -> (sext (trunc x))
15105	if ((!LegalTypes \|\| TLI.isTypeLegal(VT: ExtVT))) {
15106	SDValue T = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: ExtVT, Operand: N00);
15107	return DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT, Operand: T);
15108	}
15109
15110	// If the trunc wasn't legal, try to fold to (sext_inreg (anyext x))
15111	if (!LegalTypes \|\| TLI.isTypeLegal(VT)) {
15112	SDValue ExtSrc = DAG.getAnyExtOrTrunc(Op: N00, DL, VT);
15113	return DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL, VT, N1: ExtSrc,
15114	N2: N0 ->getOperand(Num: `1`));
15115	}
15116	}
15117	}
15118
15119	if (N0.getOpcode() == ISD::TRUNCATE) {
15120	// fold (sext (truncate (load x))) -> (sext (smaller load x))
15121	// fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
15122	if (SDValue NarrowLoad = reduceLoadWidth(N: N0.getNode())) {
15123	SDNode *oye = N0.getOperand(i: `0`).getNode();
15124	if (NarrowLoad.getNode() != N0.getNode()) {
15125	CombineTo(N: N0.getNode(), Res: NarrowLoad);
15126	// CombineTo deleted the truncate, if needed, but not what's under it.
15127	AddToWorklist(N: oye);
15128	}
15129	return SDValue (N, `0`); // Return N so it doesn't get rechecked!
15130	}
15131
15132	// See if the value being truncated is already sign extended. If so, just
15133	// eliminate the trunc/sext pair.
15134	SDValue Op = N0.getOperand(i: `0`);
15135	unsigned OpBits = Op.getScalarValueSizeInBits();
15136	unsigned MidBits = N0.getScalarValueSizeInBits();
15137	unsigned DestBits = VT.getScalarSizeInBits();
15138
15139	if (N0 ->getFlags().hasNoSignedWrap() \|\|
15140	DAG.ComputeNumSignBits(Op) > OpBits - MidBits) {
15141	if (OpBits == DestBits) {
15142	// Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
15143	// bits, it is already ready.
15144	return Op;
15145	}
15146
15147	if (OpBits < DestBits) {
15148	// Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
15149	// bits, just sext from i32.
15150	return DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT, Operand: Op);
15151	}
15152
15153	// Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
15154	// bits, just truncate to i32.
15155	SDNodeFlags Flags;
15156	Flags.setNoSignedWrap(true);
15157	Flags.setNoUnsignedWrap(N0 ->getFlags().hasNoUnsignedWrap());
15158	return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Op, Flags);
15159	}
15160
15161	// fold (sext (truncate x)) -> (sextinreg x).
15162	if (!LegalOperations \|\| TLI.isOperationLegal(Op: ISD::SIGN_EXTEND_INREG,
15163	VT: N0.getValueType())) {
15164	if (OpBits < DestBits)
15165	Op = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: SDLoc (N0), VT, Operand: Op);
15166	else if (OpBits > DestBits)
15167	Op = DAG.getNode(Opcode: ISD::TRUNCATE, DL: SDLoc (N0), VT, Operand: Op);
15168	return DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL, VT, N1: Op,
15169	N2: DAG.getValueType(N0.getValueType()));
15170	}
15171	}
15172
15173	// Try to simplify (sext (load x)).
15174	if (SDValue foldedExt =
15175	tryToFoldExtOfLoad(DAG, Combiner&: *this, TLI, VT, LegalOperations, N, N0,
15176	ExtLoadType: ISD::SEXTLOAD, ExtOpc: ISD::SIGN_EXTEND))
15177	return foldedExt;
15178
15179	if (SDValue foldedExt =
15180	tryToFoldExtOfMaskedLoad(DAG, TLI, VT, LegalOperations, N, N0,
15181	ExtLoadType: ISD::SEXTLOAD, ExtOpc: ISD::SIGN_EXTEND))
15182	return foldedExt;
15183
15184	// fold (sext (load x)) to multiple smaller sextloads.
15185	// Only on illegal but splittable vectors.
15186	if (SDValue ExtLoad = CombineExtLoad(N))
15187	return ExtLoad;
15188
15189	// Try to simplify (sext (sextload x)).
15190	if (SDValue foldedExt = tryToFoldExtOfExtload(
15191	DAG, Combiner&: *this, TLI, VT, LegalOperations, N, N0, ExtLoadType: ISD::SEXTLOAD))
15192	return foldedExt;
15193
15194	// Try to simplify (sext (atomic_load x)).
15195	if (SDValue foldedExt =
15196	tryToFoldExtOfAtomicLoad(DAG, TLI, VT, N0, ExtLoadType: ISD::SEXTLOAD))
15197	return foldedExt;
15198
15199	// fold (sext (and/or/xor (load x), cst)) ->
15200	// (and/or/xor (sextload x), (sext cst))
15201	if (ISD::isBitwiseLogicOp(Opcode: N0.getOpcode()) &&
15202	isa<LoadSDNode>(Val: N0.getOperand(i: `0`)) &&
15203	N0.getOperand(i: `1`).getOpcode() == ISD::Constant &&
15204	(!LegalOperations && TLI.isOperationLegal(Op: N0.getOpcode(), VT))) {
15205	LoadSDNode *LN00 = cast<LoadSDNode>(Val: N0.getOperand(i: `0`));
15206	EVT MemVT = LN00->getMemoryVT();
15207	if (TLI.isLoadExtLegal(ExtType: ISD::SEXTLOAD, ValVT: VT, MemVT) &&
15208	LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
15209	SmallVector<SDNode*, `4`> SetCCs;
15210	bool DoXform = ExtendUsesToFormExtLoad(VT, N: N0.getNode(), N0: N0.getOperand(i: `0`),
15211	ExtOpc: ISD::SIGN_EXTEND, ExtendNodes&: SetCCs, TLI);
15212	if (DoXform) {
15213	SDValue ExtLoad = DAG.getExtLoad(ExtType: ISD::SEXTLOAD, dl: SDLoc (LN00), VT,
15214	Chain: LN00->getChain(), Ptr: LN00->getBasePtr(),
15215	MemVT: LN00->getMemoryVT(),
15216	MMO: LN00->getMemOperand());
15217	APInt Mask = N0.getConstantOperandAPInt(i: `1`).sext(width: VT.getSizeInBits());
15218	SDValue And = DAG.getNode(Opcode: N0.getOpcode(), DL, VT,
15219	N1: ExtLoad, N2: DAG.getConstant(Val: Mask, DL, VT));
15220	ExtendSetCCUses(SetCCs, OrigLoad: N0.getOperand(i: `0`), ExtLoad, ExtType: ISD::SIGN_EXTEND);
15221	bool NoReplaceTruncAnd = !N0.hasOneUse();
15222	bool NoReplaceTrunc = SDValue (LN00, `0`).hasOneUse();
15223	CombineTo(N, Res: And);
15224	// If N0 has multiple uses, change other uses as well.
15225	if (NoReplaceTruncAnd) {
15226	SDValue TruncAnd =
15227	DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: N0.getValueType(), Operand: And);
15228	CombineTo(N: N0.getNode(), Res: TruncAnd);
15229	}
15230	if (NoReplaceTrunc) {
15231	DAG.ReplaceAllUsesOfValueWith(From: SDValue (LN00, `1`), To: ExtLoad.getValue(R: `1`));
15232	} else {
15233	SDValue Trunc = DAG.getNode(Opcode: ISD::TRUNCATE, DL: SDLoc (LN00),
15234	VT: LN00->getValueType(ResNo: `0`), Operand: ExtLoad);
15235	CombineTo(N: LN00, Res0: Trunc, Res1: ExtLoad.getValue(R: `1`));
15236	}
15237	return SDValue (N,`0`); // Return N so it doesn't get rechecked!
15238	}
15239	}
15240	}
15241
15242	if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
15243	return V;
15244
15245	if (SDValue V = foldSextSetcc(N))
15246	return V;
15247
15248	// fold (sext x) -> (zext x) if the sign bit is known zero.
15249	if (!TLI.isSExtCheaperThanZExt(FromTy: N0.getValueType(), ToTy: VT) &&
15250	(!LegalOperations \|\| TLI.isOperationLegal(Op: ISD::ZERO_EXTEND, VT)) &&
15251	DAG.SignBitIsZero(Op: N0))
15252	return DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT, Operand: N0, Flags: SDNodeFlags::NonNeg);
15253
15254	if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(Cast: N))
15255	return NewVSel;
15256
15257	// Eliminate this sign extend by doing a negation in the destination type:
15258	// sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
15259	if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
15260	isNullOrNullSplat(V: N0.getOperand(i: `0`)) &&
15261	N0.getOperand(i: `1`).getOpcode() == ISD::ZERO_EXTEND &&
15262	TLI.isOperationLegalOrCustom(Op: ISD::SUB, VT)) {
15263	SDValue Zext = DAG.getZExtOrTrunc(Op: N0.getOperand(i: `1`).getOperand(i: `0`), DL, VT);
15264	return DAG.getNegative(Val: Zext, DL, VT);
15265	}
15266	// Eliminate this sign extend by doing a decrement in the destination type:
15267	// sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
15268	if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
15269	isAllOnesOrAllOnesSplat(V: N0.getOperand(i: `1`)) &&
15270	N0.getOperand(i: `0`).getOpcode() == ISD::ZERO_EXTEND &&
15271	TLI.isOperationLegalOrCustom(Op: ISD::ADD, VT)) {
15272	SDValue Zext = DAG.getZExtOrTrunc(Op: N0.getOperand(i: `0`).getOperand(i: `0`), DL, VT);
15273	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Zext, N2: DAG.getAllOnesConstant(DL, VT));
15274	}
15275
15276	// fold sext (not i1 X) -> add (zext i1 X), -1
15277	// TODO: This could be extended to handle bool vectors.
15278	if (N0.getValueType() == MVT::i1 && isBitwiseNot(V: N0) && N0.hasOneUse() &&
15279	(!LegalOperations \|\| (TLI.isOperationLegal(Op: ISD::ZERO_EXTEND, VT) &&
15280	TLI.isOperationLegal(Op: ISD::ADD, VT)))) {
15281	// If we can eliminate the 'not', the sext form should be better
15282	if (SDValue NewXor = visitXOR(N: N0.getNode())) {
15283	// Returning N0 is a form of in-visit replacement that may have
15284	// invalidated N0.
15285	if (NewXor.getNode() == N0.getNode()) {
15286	// Return SDValue here as the xor should have already been replaced in
15287	// this sext.
15288	return SDValue ();
15289	}
15290
15291	// Return a new sext with the new xor.
15292	return DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT, Operand: NewXor);
15293	}
15294
15295	SDValue Zext = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT, Operand: N0.getOperand(i: `0`));
15296	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Zext, N2: DAG.getAllOnesConstant(DL, VT));
15297	}
15298
15299	if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, DL, Level))
15300	return Res;
15301
15302	return SDValue ();
15303	}
15304
15305	/// Given an extending node with a pop-count operand, if the target does not
15306	/// support a pop-count in the narrow source type but does support it in the
15307	/// destination type, widen the pop-count to the destination type.
15308	static SDValue widenCtPop(SDNode Extend, SelectionDAG &DAG, const* SDLoc &DL) {
15309	assert((Extend->getOpcode() == ISD::ZERO_EXTEND \|\|
15310	Extend->getOpcode() == ISD::ANY_EXTEND) &&
15311	"Expected extend op");
15312
15313	SDValue CtPop = Extend->getOperand(Num: `0`);
15314	if (CtPop.getOpcode() != ISD::CTPOP \|\| !CtPop.hasOneUse())
15315	return SDValue ();
15316
15317	EVT VT = Extend->getValueType(ResNo: `0`);
15318	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15319	if (TLI.isOperationLegalOrCustom(Op: ISD::CTPOP, VT: CtPop.getValueType()) \|\|
15320	!TLI.isOperationLegalOrCustom(Op: ISD::CTPOP, VT))
15321	return SDValue ();
15322
15323	// zext (ctpop X) --> ctpop (zext X)
15324	SDValue NewZext = DAG.getZExtOrTrunc(Op: CtPop.getOperand(i: `0`), DL, VT);
15325	return DAG.getNode(Opcode: ISD::CTPOP, DL, VT, Operand: NewZext);
15326	}
15327
15328	// If we have (zext (abs X)) where X is a type that will be promoted by type
15329	// legalization, convert to (abs (sext X)). But don't extend past a legal type.
15330	static SDValue widenAbs(SDNode *Extend, SelectionDAG &DAG) {
15331	assert(Extend->getOpcode() == ISD::ZERO_EXTEND && "Expected zero extend.");
15332
15333	EVT VT = Extend->getValueType(ResNo: `0`);
15334	if (VT.isVector())
15335	return SDValue ();
15336
15337	SDValue Abs = Extend->getOperand(Num: `0`);
15338	if (Abs.getOpcode() != ISD::ABS \|\| !Abs.hasOneUse())
15339	return SDValue ();
15340
15341	EVT AbsVT = Abs.getValueType();
15342	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15343	if (TLI.getTypeAction(Context&: *DAG.getContext(), VT: AbsVT) !=
15344	TargetLowering::TypePromoteInteger)
15345	return SDValue ();
15346
15347	EVT LegalVT = TLI.getTypeToTransformTo(Context&: *DAG.getContext(), VT: AbsVT);
15348
15349	SDValue SExt =
15350	DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: SDLoc (Abs), VT: LegalVT, Operand: Abs.getOperand(i: `0`));
15351	SDValue NewAbs = DAG.getNode(Opcode: ISD::ABS, DL: SDLoc (Abs), VT: LegalVT, Operand: SExt);
15352	return DAG.getZExtOrTrunc(Op: NewAbs, DL: SDLoc (Extend), VT);
15353	}
15354
15355	SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
15356	SDValue N0 = N->getOperand(Num: `0`);
15357	EVT VT = N->getValueType(ResNo: `0`);
15358	SDLoc DL(N);
15359
15360	if (VT.isVector())
15361	if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
15362	return FoldedVOp;
15363
15364	// zext(undef) = 0
15365	if (N0.isUndef())
15366	return DAG.getConstant(Val: `0`, DL, VT);
15367
15368	if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
15369	return Res;
15370
15371	// fold (zext (zext x)) -> (zext x)
15372	// fold (zext (aext x)) -> (zext x)
15373	if (N0.getOpcode() == ISD::ZERO_EXTEND \|\| N0.getOpcode() == ISD::ANY_EXTEND) {
15374	SDNodeFlags Flags;
15375	if (N0.getOpcode() == ISD::ZERO_EXTEND)
15376	Flags.setNonNeg(N0 ->getFlags().hasNonNeg());
15377	return DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT, Operand: N0.getOperand(i: `0`), Flags);
15378	}
15379
15380	// fold (zext (aext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
15381	// fold (zext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
15382	if (N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG \|\|
15383	N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG)
15384	return DAG.getNode(Opcode: ISD::ZERO_EXTEND_VECTOR_INREG, DL, VT, Operand: N0.getOperand(i: `0`));
15385
15386	// fold (zext (truncate x)) -> (zext x) or
15387	// (zext (truncate x)) -> (truncate x)
15388	// This is valid when the truncated bits of x are already zero.
15389	SDValue Op;
15390	KnownBits Known;
15391	if (isTruncateOf(DAG, N: N0, Op, Known)) {
15392	APInt TruncatedBits =
15393	(Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
15394	APInt (Op.getScalarValueSizeInBits(), `0`) :
15395	APInt::getBitsSet(numBits: Op.getScalarValueSizeInBits(),
15396	loBit: N0.getScalarValueSizeInBits(),
15397	hiBit: std::min(a: Op.getScalarValueSizeInBits(),
15398	b: VT.getScalarSizeInBits()));
15399	if (TruncatedBits.isSubsetOf(RHS: Known.Zero)) {
15400	SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, DL, VT);
15401	DAG.salvageDebugInfo(N&: *N0.getNode());
15402
15403	return ZExtOrTrunc;
15404	}
15405	}
15406
15407	// fold (zext (truncate x)) -> (and x, mask)
15408	if (N0.getOpcode() == ISD::TRUNCATE) {
15409	// fold (zext (truncate (load x))) -> (zext (smaller load x))
15410	// fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
15411	if (SDValue NarrowLoad = reduceLoadWidth(N: N0.getNode())) {
15412	SDNode *oye = N0.getOperand(i: `0`).getNode();
15413	if (NarrowLoad.getNode() != N0.getNode()) {
15414	CombineTo(N: N0.getNode(), Res: NarrowLoad);
15415	// CombineTo deleted the truncate, if needed, but not what's under it.
15416	AddToWorklist(N: oye);
15417	}
15418	return SDValue (N, `0`); // Return N so it doesn't get rechecked!
15419	}
15420
15421	EVT SrcVT = N0.getOperand(i: `0`).getValueType();
15422	EVT MinVT = N0.getValueType();
15423
15424	if (N->getFlags().hasNonNeg()) {
15425	SDValue Op = N0.getOperand(i: `0`);
15426	unsigned OpBits = SrcVT.getScalarSizeInBits();
15427	unsigned MidBits = MinVT.getScalarSizeInBits();
15428	unsigned DestBits = VT.getScalarSizeInBits();
15429
15430	if (N0 ->getFlags().hasNoSignedWrap() \|\|
15431	DAG.ComputeNumSignBits(Op) > OpBits - MidBits) {
15432	if (OpBits == DestBits) {
15433	// Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
15434	// bits, it is already ready.
15435	return Op;
15436	}
15437
15438	if (OpBits < DestBits) {
15439	// Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
15440	// bits, just sext from i32.
15441	// FIXME: This can probably be ZERO_EXTEND nneg?
15442	return DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT, Operand: Op);
15443	}
15444
15445	// Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
15446	// bits, just truncate to i32.
15447	SDNodeFlags Flags;
15448	Flags.setNoSignedWrap(true);
15449	Flags.setNoUnsignedWrap(true);
15450	return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Op, Flags);
15451	}
15452	}
15453
15454	// Try to mask before the extension to avoid having to generate a larger mask,
15455	// possibly over several sub-vectors.
15456	if (SrcVT.bitsLT(VT) && VT.isVector()) {
15457	if (!LegalOperations \|\| (TLI.isOperationLegal(Op: ISD::AND, VT: SrcVT) &&
15458	TLI.isOperationLegal(Op: ISD::ZERO_EXTEND, VT))) {
15459	SDValue Op = N0.getOperand(i: `0`);
15460	Op = DAG.getZeroExtendInReg(Op, DL, VT: MinVT);
15461	AddToWorklist(N: Op.getNode());
15462	SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, DL, VT);
15463	// Transfer the debug info; the new node is equivalent to N0.
15464	DAG.transferDbgValues(From: N0, To: ZExtOrTrunc);
15465	return ZExtOrTrunc;
15466	}
15467	}
15468
15469	if (!LegalOperations \|\| TLI.isOperationLegal(Op: ISD::AND, VT)) {
15470	SDValue Op = DAG.getAnyExtOrTrunc(Op: N0.getOperand(i: `0`), DL, VT);
15471	AddToWorklist(N: Op.getNode());
15472	SDValue And = DAG.getZeroExtendInReg(Op, DL, VT: MinVT);
15473	// We may safely transfer the debug info describing the truncate node over
15474	// to the equivalent and operation.
15475	DAG.transferDbgValues(From: N0, To: And);
15476	return And;
15477	}
15478	}
15479
15480	// Fold (zext (and (trunc x), cst)) -> (and x, cst),
15481	// if either of the casts is not free.
15482	if (N0.getOpcode() == ISD::AND &&
15483	N0.getOperand(i: `0`).getOpcode() == ISD::TRUNCATE &&
15484	N0.getOperand(i: `1`).getOpcode() == ISD::Constant &&
15485	(!TLI.isTruncateFree(Val: N0.getOperand(i: `0`).getOperand(i: `0`), VT2: N0.getValueType()) \|\|
15486	!TLI.isZExtFree(FromTy: N0.getValueType(), ToTy: VT))) {
15487	SDValue X = N0.getOperand(i: `0`).getOperand(i: `0`);
15488	X = DAG.getAnyExtOrTrunc(Op: X, DL: SDLoc (X), VT);
15489	APInt Mask = N0.getConstantOperandAPInt(i: `1`).zext(width: VT.getSizeInBits());
15490	return DAG.getNode(Opcode: ISD::AND, DL, VT,
15491	N1: X, N2: DAG.getConstant(Val: Mask, DL, VT));
15492	}
15493
15494	// Try to simplify (zext (load x)).
15495	if (SDValue foldedExt = tryToFoldExtOfLoad(
15496	DAG, Combiner&: *this, TLI, VT, LegalOperations, N, N0, ExtLoadType: ISD::ZEXTLOAD,
15497	ExtOpc: ISD::ZERO_EXTEND, NonNegZExt: N->getFlags().hasNonNeg()))
15498	return foldedExt;
15499
15500	if (SDValue foldedExt =
15501	tryToFoldExtOfMaskedLoad(DAG, TLI, VT, LegalOperations, N, N0,
15502	ExtLoadType: ISD::ZEXTLOAD, ExtOpc: ISD::ZERO_EXTEND))
15503	return foldedExt;
15504
15505	// fold (zext (load x)) to multiple smaller zextloads.
15506	// Only on illegal but splittable vectors.
15507	if (SDValue ExtLoad = CombineExtLoad(N))
15508	return ExtLoad;
15509
15510	// Try to simplify (zext (atomic_load x)).
15511	if (SDValue foldedExt =
15512	tryToFoldExtOfAtomicLoad(DAG, TLI, VT, N0, ExtLoadType: ISD::ZEXTLOAD))
15513	return foldedExt;
15514
15515	// fold (zext (and/or/xor (load x), cst)) ->
15516	// (and/or/xor (zextload x), (zext cst))
15517	// Unless (and (load x) cst) will match as a zextload already and has
15518	// additional users, or the zext is already free.
15519	if (ISD::isBitwiseLogicOp(Opcode: N0.getOpcode()) && !TLI.isZExtFree(Val: N0, VT2: VT) &&
15520	isa<LoadSDNode>(Val: N0.getOperand(i: `0`)) &&
15521	N0.getOperand(i: `1`).getOpcode() == ISD::Constant &&
15522	(!LegalOperations && TLI.isOperationLegal(Op: N0.getOpcode(), VT))) {
15523	LoadSDNode *LN00 = cast<LoadSDNode>(Val: N0.getOperand(i: `0`));
15524	EVT MemVT = LN00->getMemoryVT();
15525	if (TLI.isLoadExtLegal(ExtType: ISD::ZEXTLOAD, ValVT: VT, MemVT) &&
15526	LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
15527	bool DoXform = true;
15528	SmallVector<SDNode*, `4`> SetCCs;
15529	if (!N0.hasOneUse()) {
15530	if (N0.getOpcode() == ISD::AND) {
15531	auto *AndC = cast<ConstantSDNode>(Val: N0.getOperand(i: `1`));
15532	EVT LoadResultTy = AndC->getValueType(ResNo: `0`);
15533	EVT ExtVT;
15534	if (isAndLoadExtLoad(AndC, LoadN: LN00, LoadResultTy, ExtVT))
15535	DoXform = false;
15536	}
15537	}
15538	if (DoXform)
15539	DoXform = ExtendUsesToFormExtLoad(VT, N: N0.getNode(), N0: N0.getOperand(i: `0`),
15540	ExtOpc: ISD::ZERO_EXTEND, ExtendNodes&: SetCCs, TLI);
15541	if (DoXform) {
15542	SDValue ExtLoad = DAG.getExtLoad(ExtType: ISD::ZEXTLOAD, dl: SDLoc (LN00), VT,
15543	Chain: LN00->getChain(), Ptr: LN00->getBasePtr(),
15544	MemVT: LN00->getMemoryVT(),
15545	MMO: LN00->getMemOperand());
15546	APInt Mask = N0.getConstantOperandAPInt(i: `1`).zext(width: VT.getSizeInBits());
15547	SDValue And = DAG.getNode(Opcode: N0.getOpcode(), DL, VT,
15548	N1: ExtLoad, N2: DAG.getConstant(Val: Mask, DL, VT));
15549	ExtendSetCCUses(SetCCs, OrigLoad: N0.getOperand(i: `0`), ExtLoad, ExtType: ISD::ZERO_EXTEND);
15550	bool NoReplaceTruncAnd = !N0.hasOneUse();
15551	bool NoReplaceTrunc = SDValue (LN00, `0`).hasOneUse();
15552	CombineTo(N, Res: And);
15553	// If N0 has multiple uses, change other uses as well.
15554	if (NoReplaceTruncAnd) {
15555	SDValue TruncAnd =
15556	DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: N0.getValueType(), Operand: And);
15557	CombineTo(N: N0.getNode(), Res: TruncAnd);
15558	}
15559	if (NoReplaceTrunc) {
15560	DAG.ReplaceAllUsesOfValueWith(From: SDValue (LN00, `1`), To: ExtLoad.getValue(R: `1`));
15561	} else {
15562	SDValue Trunc = DAG.getNode(Opcode: ISD::TRUNCATE, DL: SDLoc (LN00),
15563	VT: LN00->getValueType(ResNo: `0`), Operand: ExtLoad);
15564	CombineTo(N: LN00, Res0: Trunc, Res1: ExtLoad.getValue(R: `1`));
15565	}
15566	return SDValue (N,`0`); // Return N so it doesn't get rechecked!
15567	}
15568	}
15569	}
15570
15571	// fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
15572	// (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
15573	if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
15574	return ZExtLoad;
15575
15576	// Try to simplify (zext (zextload x)).
15577	if (SDValue foldedExt = tryToFoldExtOfExtload(
15578	DAG, Combiner&: *this, TLI, VT, LegalOperations, N, N0, ExtLoadType: ISD::ZEXTLOAD))
15579	return foldedExt;
15580
15581	if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
15582	return V;
15583
15584	if (N0.getOpcode() == ISD::SETCC) {
15585	// Propagate fast-math-flags.
15586	SelectionDAG::FlagInserter FlagsInserter(DAG, N0 ->getFlags());
15587
15588	// Only do this before legalize for now.
15589	if (!LegalOperations && VT.isVector() &&
15590	N0.getValueType().getVectorElementType() == MVT::i1) {
15591	EVT N00VT = N0.getOperand(i: `0`).getValueType();
15592	if (getSetCCResultType(VT: N00VT) == N0.getValueType())
15593	return SDValue ();
15594
15595	// We know that the # elements of the results is the same as the #
15596	// elements of the compare (and the # elements of the compare result for
15597	// that matter). Check to see that they are the same size. If so, we know
15598	// that the element size of the sext'd result matches the element size of
15599	// the compare operands.
15600	if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
15601	// zext(setcc) -> zext_in_reg(vsetcc) for vectors.
15602	SDValue VSetCC = DAG.getNode(Opcode: ISD::SETCC, DL, VT, N1: N0.getOperand(i: `0`),
15603	N2: N0.getOperand(i: `1`), N3: N0.getOperand(i: `2`));
15604	return DAG.getZeroExtendInReg(Op: VSetCC, DL, VT: N0.getValueType());
15605	}
15606
15607	// If the desired elements are smaller or larger than the source
15608	// elements we can use a matching integer vector type and then
15609	// truncate/any extend followed by zext_in_reg.
15610	EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
15611	SDValue VsetCC =
15612	DAG.getNode(Opcode: ISD::SETCC, DL, VT: MatchingVectorType, N1: N0.getOperand(i: `0`),
15613	N2: N0.getOperand(i: `1`), N3: N0.getOperand(i: `2`));
15614	return DAG.getZeroExtendInReg(Op: DAG.getAnyExtOrTrunc(Op: VsetCC, DL, VT), DL,
15615	VT: N0.getValueType());
15616	}
15617
15618	// zext(setcc x,y,cc) -> zext(select x, y, true, false, cc)
15619	EVT N0VT = N0.getValueType();
15620	EVT N00VT = N0.getOperand(i: `0`).getValueType();
15621	if (SDValue SCC = SimplifySelectCC(
15622	DL, N0: N0.getOperand(i: `0`), N1: N0.getOperand(i: `1`),
15623	N2: DAG.getBoolConstant(V: true, DL, VT: N0VT, OpVT: N00VT),
15624	N3: DAG.getBoolConstant(V: false, DL, VT: N0VT, OpVT: N00VT),
15625	CC: cast<CondCodeSDNode>(Val: N0.getOperand(i: `2`))->get(), NotExtCompare: true))
15626	return DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT, Operand: SCC);
15627	}
15628
15629	// (zext (shl (zext x), cst)) -> (shl (zext x), cst)
15630	if ((N0.getOpcode() == ISD::SHL \|\| N0.getOpcode() == ISD::SRL) &&
15631	!TLI.isZExtFree(Val: N0, VT2: VT)) {
15632	SDValue ShVal = N0.getOperand(i: `0`);
15633	SDValue ShAmt = N0.getOperand(i: `1`);
15634	if (auto *ShAmtC = dyn_cast<ConstantSDNode>(Val&: ShAmt)) {
15635	if (ShVal.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse()) {
15636	if (N0.getOpcode() == ISD::SHL) {
15637	// If the original shl may be shifting out bits, do not perform this
15638	// transformation.
15639	unsigned KnownZeroBits = ShVal.getValueSizeInBits() -
15640	ShVal.getOperand(i: `0`).getValueSizeInBits();
15641	if (ShAmtC->getAPIntValue().ugt(RHS: KnownZeroBits)) {
15642	// If the shift is too large, then see if we can deduce that the
15643	// shift is safe anyway.
15644
15645	// Check if the bits being shifted out are known to be zero.
15646	KnownBits KnownShVal = DAG.computeKnownBits(Op: ShVal);
15647	if (ShAmtC->getAPIntValue().ugt(RHS: KnownShVal.countMinLeadingZeros()))
15648	return SDValue ();
15649	}
15650	}
15651
15652	// Ensure that the shift amount is wide enough for the shifted value.
15653	if (Log2_32_Ceil(Value: VT.getSizeInBits()) > ShAmt.getValueSizeInBits())
15654	ShAmt = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: MVT::i32, Operand: ShAmt);
15655
15656	return DAG.getNode(Opcode: N0.getOpcode(), DL, VT,
15657	N1: DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT, Operand: ShVal), N2: ShAmt);
15658	}
15659	}
15660	}
15661
15662	if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(Cast: N))
15663	return NewVSel;
15664
15665	if (SDValue NewCtPop = widenCtPop(Extend: N, DAG, DL))
15666	return NewCtPop;
15667
15668	if (SDValue V = widenAbs(Extend: N, DAG))
15669	return V;
15670
15671	if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, DL, Level))
15672	return Res;
15673
15674	// CSE zext nneg with sext if the zext is not free.
15675	if (N->getFlags().hasNonNeg() && !TLI.isZExtFree(FromTy: N0.getValueType(), ToTy: VT)) {
15676	SDNode *CSENode = DAG.getNodeIfExists(Opcode: ISD::SIGN_EXTEND, VTList: N->getVTList(), Ops: N0);
15677	if (CSENode)
15678	return SDValue (CSENode, `0`);
15679	}
15680
15681	return SDValue ();
15682	}
15683
15684	SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
15685	SDValue N0 = N->getOperand(Num: `0`);
15686	EVT VT = N->getValueType(ResNo: `0`);
15687	SDLoc DL(N);
15688
15689	// aext(undef) = undef
15690	if (N0.isUndef())
15691	return DAG.getUNDEF(VT);
15692
15693	if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
15694	return Res;
15695
15696	// fold (aext (aext x)) -> (aext x)
15697	// fold (aext (zext x)) -> (zext x)
15698	// fold (aext (sext x)) -> (sext x)
15699	if (N0.getOpcode() == ISD::ANY_EXTEND \|\| N0.getOpcode() == ISD::ZERO_EXTEND \|\|
15700	N0.getOpcode() == ISD::SIGN_EXTEND) {
15701	SDNodeFlags Flags;
15702	if (N0.getOpcode() == ISD::ZERO_EXTEND)
15703	Flags.setNonNeg(N0 ->getFlags().hasNonNeg());
15704	return DAG.getNode(Opcode: N0.getOpcode(), DL, VT, Operand: N0.getOperand(i: `0`), Flags);
15705	}
15706
15707	// fold (aext (aext_extend_vector_inreg x)) -> (aext_extend_vector_inreg x)
15708	// fold (aext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
15709	// fold (aext (sext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
15710	if (N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG \|\|
15711	N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG \|\|
15712	N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG)
15713	return DAG.getNode(Opcode: N0.getOpcode(), DL, VT, Operand: N0.getOperand(i: `0`));
15714
15715	// fold (aext (truncate (load x))) -> (aext (smaller load x))
15716	// fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
15717	if (N0.getOpcode() == ISD::TRUNCATE) {
15718	if (SDValue NarrowLoad = reduceLoadWidth(N: N0.getNode())) {
15719	SDNode *oye = N0.getOperand(i: `0`).getNode();
15720	if (NarrowLoad.getNode() != N0.getNode()) {
15721	CombineTo(N: N0.getNode(), Res: NarrowLoad);
15722	// CombineTo deleted the truncate, if needed, but not what's under it.
15723	AddToWorklist(N: oye);
15724	}
15725	return SDValue (N, `0`); // Return N so it doesn't get rechecked!
15726	}
15727	}
15728
15729	// fold (aext (truncate x))
15730	if (N0.getOpcode() == ISD::TRUNCATE)
15731	return DAG.getAnyExtOrTrunc(Op: N0.getOperand(i: `0`), DL, VT);
15732
15733	// Fold (aext (and (trunc x), cst)) -> (and x, cst)
15734	// if the trunc is not free.
15735	if (N0.getOpcode() == ISD::AND &&
15736	N0.getOperand(i: `0`).getOpcode() == ISD::TRUNCATE &&
15737	N0.getOperand(i: `1`).getOpcode() == ISD::Constant &&
15738	!TLI.isTruncateFree(Val: N0.getOperand(i: `0`).getOperand(i: `0`), VT2: N0.getValueType())) {
15739	SDValue X = DAG.getAnyExtOrTrunc(Op: N0.getOperand(i: `0`).getOperand(i: `0`), DL, VT);
15740	SDValue Y = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT, Operand: N0.getOperand(i: `1`));
15741	assert(isa<ConstantSDNode>(Y) && "Expected constant to be folded!");
15742	return DAG.getNode(Opcode: ISD::AND, DL, VT, N1: X, N2: Y);
15743	}
15744
15745	// fold (aext (load x)) -> (aext (truncate (extload x)))
15746	// None of the supported targets knows how to perform load and any_ext
15747	// on vectors in one instruction, so attempt to fold to zext instead.
15748	if (VT.isVector()) {
15749	// Try to simplify (zext (load x)).
15750	if (SDValue foldedExt =
15751	tryToFoldExtOfLoad(DAG, Combiner&: *this, TLI, VT, LegalOperations, N, N0,
15752	ExtLoadType: ISD::ZEXTLOAD, ExtOpc: ISD::ZERO_EXTEND))
15753	return foldedExt;
15754	} else if (ISD::isNON_EXTLoad(N: N0.getNode()) &&
15755	ISD::isUNINDEXEDLoad(N: N0.getNode()) &&
15756	TLI.isLoadExtLegalOrCustom(ExtType: ISD::EXTLOAD, ValVT: VT, MemVT: N0.getValueType())) {
15757	bool DoXform = true;
15758	SmallVector<SDNode *, `4`> SetCCs;
15759	if (!N0.hasOneUse())
15760	DoXform =
15761	ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc: ISD::ANY_EXTEND, ExtendNodes&: SetCCs, TLI);
15762	if (DoXform) {
15763	LoadSDNode *LN0 = cast<LoadSDNode>(Val&: N0);
15764	SDValue ExtLoad = DAG.getExtLoad(ExtType: ISD::EXTLOAD, dl: DL, VT, Chain: LN0->getChain(),
15765	Ptr: LN0->getBasePtr(), MemVT: N0.getValueType(),
15766	MMO: LN0->getMemOperand());
15767	ExtendSetCCUses(SetCCs, OrigLoad: N0, ExtLoad, ExtType: ISD::ANY_EXTEND);
15768	// If the load value is used only by N, replace it via CombineTo N.
15769	bool NoReplaceTrunc = N0.hasOneUse();
15770	CombineTo(N, Res: ExtLoad);
15771	if (NoReplaceTrunc) {
15772	DAG.ReplaceAllUsesOfValueWith(From: SDValue (LN0, `1`), To: ExtLoad.getValue(R: `1`));
15773	recursivelyDeleteUnusedNodes(N: LN0);
15774	} else {
15775	SDValue Trunc =
15776	DAG.getNode(Opcode: ISD::TRUNCATE, DL: SDLoc (N0), VT: N0.getValueType(), Operand: ExtLoad);
15777	CombineTo(N: LN0, Res0: Trunc, Res1: ExtLoad.getValue(R: `1`));
15778	}
15779	return SDValue (N, `0`); // Return N so it doesn't get rechecked!
15780	}
15781	}
15782
15783	// fold (aext (zextload x)) -> (aext (truncate (zextload x)))
15784	// fold (aext (sextload x)) -> (aext (truncate (sextload x)))
15785	// fold (aext ( extload x)) -> (aext (truncate (extload x)))
15786	if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N: N0.getNode()) &&
15787	ISD::isUNINDEXEDLoad(N: N0.getNode()) && N0.hasOneUse()) {
15788	LoadSDNode *LN0 = cast<LoadSDNode>(Val&: N0);
15789	ISD::LoadExtType ExtType = LN0->getExtensionType();
15790	EVT MemVT = LN0->getMemoryVT();
15791	if (!LegalOperations \|\| TLI.isLoadExtLegal(ExtType, ValVT: VT, MemVT)) {
15792	SDValue ExtLoad =
15793	DAG.getExtLoad(ExtType, dl: DL, VT, Chain: LN0->getChain(), Ptr: LN0->getBasePtr(),
15794	MemVT, MMO: LN0->getMemOperand());
15795	CombineTo(N, Res: ExtLoad);
15796	DAG.ReplaceAllUsesOfValueWith(From: SDValue (LN0, `1`), To: ExtLoad.getValue(R: `1`));
15797	recursivelyDeleteUnusedNodes(N: LN0);
15798	return SDValue (N, `0`); // Return N so it doesn't get rechecked!
15799	}
15800	}
15801
15802	if (N0.getOpcode() == ISD::SETCC) {
15803	// Propagate fast-math-flags.
15804	SDNodeFlags Flags = N0 ->getFlags();
15805	SelectionDAG::FlagInserter FlagsInserter(DAG, Flags);
15806
15807	// For vectors:
15808	// aext(setcc) -> vsetcc
15809	// aext(setcc) -> truncate(vsetcc)
15810	// aext(setcc) -> aext(vsetcc)
15811	// Only do this before legalize for now.
15812	if (VT.isVector() && !LegalOperations) {
15813	EVT N00VT = N0.getOperand(i: `0`).getValueType();
15814	if (getSetCCResultType(VT: N00VT) == N0.getValueType())
15815	return SDValue ();
15816
15817	// We know that the # elements of the results is the same as the
15818	// # elements of the compare (and the # elements of the compare result
15819	// for that matter). Check to see that they are the same size. If so,
15820	// we know that the element size of the sext'd result matches the
15821	// element size of the compare operands.
15822	if (VT.getSizeInBits() == N00VT.getSizeInBits())
15823	return DAG.getSetCC(DL, VT, LHS: N0.getOperand(i: `0`), RHS: N0.getOperand(i: `1`),
15824	Cond: cast<CondCodeSDNode>(Val: N0.getOperand(i: `2`))->get(),
15825	/Chain=/{}, /Signaling=/IsSignaling: false, Flags);
15826
15827	// If the desired elements are smaller or larger than the source
15828	// elements we can use a matching integer vector type and then
15829	// truncate/any extend
15830	EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
15831	SDValue VsetCC = DAG.getSetCC(
15832	DL, VT: MatchingVectorType, LHS: N0.getOperand(i: `0`), RHS: N0.getOperand(i: `1`),
15833	Cond: cast<CondCodeSDNode>(Val: N0.getOperand(i: `2`))->get(), /Chain=/{},
15834	/Signaling=/IsSignaling: false, Flags);
15835	return DAG.getAnyExtOrTrunc(Op: VsetCC, DL, VT);
15836	}
15837
15838	// aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
15839	if (SDValue SCC = SimplifySelectCC(
15840	DL, N0: N0.getOperand(i: `0`), N1: N0.getOperand(i: `1`), N2: DAG.getConstant(Val: `1`, DL, VT),
15841	N3: DAG.getConstant(Val: `0`, DL, VT),
15842	CC: cast<CondCodeSDNode>(Val: N0.getOperand(i: `2`))->get(), NotExtCompare: true))
15843	return SCC;
15844	}
15845
15846	if (SDValue NewCtPop = widenCtPop(Extend: N, DAG, DL))
15847	return NewCtPop;
15848
15849	if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, DL, Level))
15850	return Res;
15851
15852	return SDValue ();
15853	}
15854
15855	SDValue DAGCombiner::visitAssertExt(SDNode *N) {
15856	unsigned Opcode = N->getOpcode();
15857	SDValue N0 = N->getOperand(Num: `0`);
15858	SDValue N1 = N->getOperand(Num: `1`);
15859	EVT AssertVT = cast<VTSDNode>(Val&: N1)->getVT();
15860
15861	// fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
15862	if (N0.getOpcode() == Opcode &&
15863	AssertVT == cast<VTSDNode>(Val: N0.getOperand(i: `1`))->getVT())
15864	return N0;
15865
15866	if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
15867	N0.getOperand(i: `0`).getOpcode() == Opcode) {
15868	// We have an assert, truncate, assert sandwich. Make one stronger assert
15869	// by asserting on the smallest asserted type to the larger source type.
15870	// This eliminates the later assert:
15871	// assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
15872	// assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
15873	SDLoc DL(N);
15874	SDValue BigA = N0.getOperand(i: `0`);
15875	EVT BigA_AssertVT = cast<VTSDNode>(Val: BigA.getOperand(i: `1`))->getVT();
15876	EVT MinAssertVT = AssertVT.bitsLT(VT: BigA_AssertVT) ? AssertVT : BigA_AssertVT;
15877	SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
15878	SDValue NewAssert = DAG.getNode(Opcode, DL, VT: BigA.getValueType(),
15879	N1: BigA.getOperand(i: `0`), N2: MinAssertVTVal);
15880	return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: N->getValueType(ResNo: `0`), Operand: NewAssert);
15881	}
15882
15883	// If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
15884	// than X. Just move the AssertZext in front of the truncate and drop the
15885	// AssertSExt.
15886	if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
15887	N0.getOperand(i: `0`).getOpcode() == ISD::AssertSext &&
15888	Opcode == ISD::AssertZext) {
15889	SDValue BigA = N0.getOperand(i: `0`);
15890	EVT BigA_AssertVT = cast<VTSDNode>(Val: BigA.getOperand(i: `1`))->getVT();
15891	if (AssertVT.bitsLT(VT: BigA_AssertVT)) {
15892	SDLoc DL(N);
15893	SDValue NewAssert = DAG.getNode(Opcode, DL, VT: BigA.getValueType(),
15894	N1: BigA.getOperand(i: `0`), N2: N1);
15895	return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: N->getValueType(ResNo: `0`), Operand: NewAssert);
15896	}
15897	}
15898
15899	if (Opcode == ISD::AssertZext && N0.getOpcode() == ISD::AND &&
15900	isa<ConstantSDNode>(Val: N0.getOperand(i: `1`))) {
15901	const APInt &Mask = N0.getConstantOperandAPInt(i: `1`);
15902
15903	// If we have (AssertZext (and (AssertSext X, iX), M), iY) and Y is smaller
15904	// than X, and the And doesn't change the lower iX bits, we can move the
15905	// AssertZext in front of the And and drop the AssertSext.
15906	if (N0.getOperand(i: `0`).getOpcode() == ISD::AssertSext && N0.hasOneUse()) {
15907	SDValue BigA = N0.getOperand(i: `0`);
15908	EVT BigA_AssertVT = cast<VTSDNode>(Val: BigA.getOperand(i: `1`))->getVT();
15909	if (AssertVT.bitsLT(VT: BigA_AssertVT) &&
15910	Mask.countr_one() >= BigA_AssertVT.getScalarSizeInBits()) {
15911	SDLoc DL(N);
15912	SDValue NewAssert =
15913	DAG.getNode(Opcode, DL, VT: N->getValueType(ResNo: `0`), N1: BigA.getOperand(i: `0`), N2: N1);
15914	return DAG.getNode(Opcode: ISD::AND, DL, VT: N->getValueType(ResNo: `0`), N1: NewAssert,
15915	N2: N0.getOperand(i: `1`));
15916	}
15917	}
15918
15919	// Remove AssertZext entirely if the mask guarantees the assertion cannot
15920	// fail.
15921	// TODO: Use KB countMinLeadingZeros to handle non-constant masks?
15922	if (Mask.isIntN(N: AssertVT.getScalarSizeInBits()))
15923	return N0;
15924	}
15925
15926	return SDValue ();
15927	}
15928
15929	SDValue DAGCombiner::visitAssertAlign(SDNode *N) {
15930	SDLoc DL(N);
15931
15932	Align AL = cast<AssertAlignSDNode>(Val: N)->getAlign();
15933	SDValue N0 = N->getOperand(Num: `0`);
15934
15935	// Fold (assertalign (assertalign x, AL0), AL1) ->
15936	// (assertalign x, max(AL0, AL1))
15937	if (auto *AAN = dyn_cast<AssertAlignSDNode>(Val&: N0))
15938	return DAG.getAssertAlign(DL, V: N0.getOperand(i: `0`),
15939	A: std::max(a: AL, b: AAN->getAlign()));
15940
15941	// In rare cases, there are trivial arithmetic ops in source operands. Sink
15942	// this assert down to source operands so that those arithmetic ops could be
15943	// exposed to the DAG combining.
15944	switch (N0.getOpcode()) {
15945	default:
15946	break;
15947	case ISD::ADD:
15948	case ISD::PTRADD:
15949	case ISD::SUB: {
15950	unsigned AlignShift = Log2(A: AL);
15951	SDValue LHS = N0.getOperand(i: `0`);
15952	SDValue RHS = N0.getOperand(i: `1`);
15953	unsigned LHSAlignShift = DAG.computeKnownBits(Op: LHS).countMinTrailingZeros();
15954	unsigned RHSAlignShift = DAG.computeKnownBits(Op: RHS).countMinTrailingZeros();
15955	if (LHSAlignShift >= AlignShift \|\| RHSAlignShift >= AlignShift) {
15956	if (LHSAlignShift < AlignShift)
15957	LHS = DAG.getAssertAlign(DL, V: LHS, A: AL);
15958	if (RHSAlignShift < AlignShift)
15959	RHS = DAG.getAssertAlign(DL, V: RHS, A: AL);
15960	return DAG.getNode(Opcode: N0.getOpcode(), DL, VT: N0.getValueType(), N1: LHS, N2: RHS);
15961	}
15962	break;
15963	}
15964	}
15965
15966	return SDValue ();
15967	}
15968
15969	/// If the result of a load is shifted/masked/truncated to an effectively
15970	/// narrower type, try to transform the load to a narrower type and/or
15971	/// use an extending load.
15972	SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
15973	unsigned Opc = N->getOpcode();
15974
15975	ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
15976	SDValue N0 = N->getOperand(Num: `0`);
15977	EVT VT = N->getValueType(ResNo: `0`);
15978	EVT ExtVT = VT;
15979
15980	// This transformation isn't valid for vector loads.
15981	if (VT.isVector())
15982	return SDValue ();
15983
15984	// The ShAmt variable is used to indicate that we've consumed a right
15985	// shift. I.e. we want to narrow the width of the load by skipping to load the
15986	// ShAmt least significant bits.
15987	unsigned ShAmt = `0`;
15988	// A special case is when the least significant bits from the load are masked
15989	// away, but using an AND rather than a right shift. HasShiftedOffset is used
15990	// to indicate that the narrowed load should be left-shifted ShAmt bits to get
15991	// the result.
15992	unsigned ShiftedOffset = `0`;
15993	// Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
15994	// extended to VT.
15995	if (Opc == ISD::SIGN_EXTEND_INREG) {
15996	ExtType = ISD::SEXTLOAD;
15997	ExtVT = cast<VTSDNode>(Val: N->getOperand(Num: `1`))->getVT();
15998	} else if (Opc == ISD::SRL \|\| Opc == ISD::SRA) {
15999	// Another special-case: SRL/SRA is basically zero/sign-extending a narrower
16000	// value, or it may be shifting a higher subword, half or byte into the
16001	// lowest bits.
16002
16003	// Only handle shift with constant shift amount, and the shiftee must be a
16004	// load.
16005	auto *LN = dyn_cast<LoadSDNode>(Val&: N0);
16006	auto *N1C = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: `1`));
16007	if (!N1C \|\| !LN)
16008	return SDValue ();
16009	// If the shift amount is larger than the memory type then we're not
16010	// accessing any of the loaded bytes.
16011	ShAmt = N1C->getZExtValue();
16012	uint64_t MemoryWidth = LN->getMemoryVT().getScalarSizeInBits();
16013	if (MemoryWidth <= ShAmt)
16014	return SDValue ();
16015	// Attempt to fold away the SRL by using ZEXTLOAD and SRA by using SEXTLOAD.
16016	ExtType = Opc == ISD::SRL ? ISD::ZEXTLOAD : ISD::SEXTLOAD;
16017	ExtVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: MemoryWidth - ShAmt);
16018	// If original load is a SEXTLOAD then we can't simply replace it by a
16019	// ZEXTLOAD (we could potentially replace it by a more narrow SEXTLOAD
16020	// followed by a ZEXT, but that is not handled at the moment). Similarly if
16021	// the original load is a ZEXTLOAD and we want to use a SEXTLOAD.
16022	if ((LN->getExtensionType() == ISD::SEXTLOAD \|\|
16023	LN->getExtensionType() == ISD::ZEXTLOAD) &&
16024	LN->getExtensionType() != ExtType)
16025	return SDValue ();
16026	} else if (Opc == ISD::AND) {
16027	// An AND with a constant mask is the same as a truncate + zero-extend.
16028	auto AndC = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: `1`));
16029	if (!AndC)
16030	return SDValue ();
16031
16032	const APInt &Mask = AndC->getAPIntValue();
16033	unsigned ActiveBits = `0`;
16034	if (Mask.isMask()) {
16035	ActiveBits = Mask.countr_one();
16036	} else if (Mask.isShiftedMask(MaskIdx&: ShAmt, MaskLen&: ActiveBits)) {
16037	ShiftedOffset = ShAmt;
16038	} else {
16039	return SDValue ();
16040	}
16041
16042	ExtType = ISD::ZEXTLOAD;
16043	ExtVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: ActiveBits);
16044	}
16045
16046	// In case Opc==SRL we've already prepared ExtVT/ExtType/ShAmt based on doing
16047	// a right shift. Here we redo some of those checks, to possibly adjust the
16048	// ExtVT even further based on "a masking AND". We could also end up here for
16049	// other reasons (e.g. based on Opc==TRUNCATE) and that is why some checks
16050	// need to be done here as well.
16051	if (Opc == ISD::SRL \|\| N0.getOpcode() == ISD::SRL) {
16052	SDValue SRL = Opc == ISD::SRL ? SDValue (N, `0`) : N0;
16053	// Bail out when the SRL has more than one use. This is done for historical
16054	// (undocumented) reasons. Maybe intent was to guard the AND-masking below
16055	// check below? And maybe it could be non-profitable to do the transform in
16056	// case the SRL has multiple uses and we get here with Opc!=ISD::SRL?
16057	// FIXME: Can't we just skip this check for the Opc==ISD::SRL case.
16058	if (!SRL.hasOneUse())
16059	return SDValue ();
16060
16061	// Only handle shift with constant shift amount, and the shiftee must be a
16062	// load.
16063	auto *LN = dyn_cast<LoadSDNode>(Val: SRL.getOperand(i: `0`));
16064	auto *SRL1C = dyn_cast<ConstantSDNode>(Val: SRL.getOperand(i: `1`));
16065	if (!SRL1C \|\| !LN)
16066	return SDValue ();
16067
16068	// If the shift amount is larger than the input type then we're not
16069	// accessing any of the loaded bytes. If the load was a zextload/extload
16070	// then the result of the shift+trunc is zero/undef (handled elsewhere).
16071	ShAmt = SRL1C->getZExtValue();
16072	uint64_t MemoryWidth = LN->getMemoryVT().getSizeInBits();
16073	if (ShAmt >= MemoryWidth)
16074	return SDValue ();
16075
16076	// Because a SRL must be assumed to need* to zero-extend the high bits*
16077	// (as opposed to anyext the high bits), we can't combine the zextload
16078	// lowering of SRL and an sextload.
16079	if (LN->getExtensionType() == ISD::SEXTLOAD)
16080	return SDValue ();
16081
16082	// Avoid reading outside the memory accessed by the original load (could
16083	// happened if we only adjust the load base pointer by ShAmt). Instead we
16084	// try to narrow the load even further. The typical scenario here is:
16085	// (i64 (truncate (i96 (srl (load x), 64)))) ->
16086	// (i64 (truncate (i96 (zextload (load i32 + offset) from i32))))
16087	if (ExtVT.getScalarSizeInBits() > MemoryWidth - ShAmt) {
16088	// Don't replace sextload by zextload.
16089	if (ExtType == ISD::SEXTLOAD)
16090	return SDValue ();
16091	// Narrow the load.
16092	ExtType = ISD::ZEXTLOAD;
16093	ExtVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: MemoryWidth - ShAmt);
16094	}
16095
16096	// If the SRL is only used by a masking AND, we may be able to adjust
16097	// the ExtVT to make the AND redundant.
16098	SDNode Mask = (SRL ->user_begin());
16099	if (SRL.hasOneUse() && Mask->getOpcode() == ISD::AND &&
16100	isa<ConstantSDNode>(Val: Mask->getOperand(Num: `1`))) {
16101	unsigned Offset, ActiveBits;
16102	const APInt& ShiftMask = Mask->getConstantOperandAPInt(Num: `1`);
16103	if (ShiftMask.isMask()) {
16104	EVT MaskedVT =
16105	EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: ShiftMask.countr_one());
16106	// If the mask is smaller, recompute the type.
16107	if ((ExtVT.getScalarSizeInBits() > MaskedVT.getScalarSizeInBits()) &&
16108	TLI.isLoadExtLegal(ExtType, ValVT: SRL.getValueType(), MemVT: MaskedVT))
16109	ExtVT = MaskedVT;
16110	} else if (ExtType == ISD::ZEXTLOAD &&
16111	ShiftMask.isShiftedMask(MaskIdx&: Offset, MaskLen&: ActiveBits) &&
16112	(Offset + ShAmt) < VT.getScalarSizeInBits()) {
16113	EVT MaskedVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: ActiveBits);
16114	// If the mask is shifted we can use a narrower load and a shl to insert
16115	// the trailing zeros.
16116	if (((Offset + ActiveBits) <= ExtVT.getScalarSizeInBits()) &&
16117	TLI.isLoadExtLegal(ExtType, ValVT: SRL.getValueType(), MemVT: MaskedVT)) {
16118	ExtVT = MaskedVT;
16119	ShAmt = Offset + ShAmt;
16120	ShiftedOffset = Offset;
16121	}
16122	}
16123	}
16124
16125	N0 = SRL.getOperand(i: `0`);
16126	}
16127
16128	// If the load is shifted left (and the result isn't shifted back right), we
16129	// can fold a truncate through the shift. The typical scenario is that N
16130	// points at a TRUNCATE here so the attempted fold is:
16131	// (truncate (shl (load x), c))) -> (shl (narrow load x), c)
16132	// ShLeftAmt will indicate how much a narrowed load should be shifted left.
16133	unsigned ShLeftAmt = `0`;
16134	if (ShAmt == `0` && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
16135	ExtVT == VT && TLI.isNarrowingProfitable(N, SrcVT: N0.getValueType(), DestVT: VT)) {
16136	if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: `1`))) {
16137	ShLeftAmt = N01->getZExtValue();
16138	N0 = N0.getOperand(i: `0`);
16139	}
16140	}
16141
16142	// If we haven't found a load, we can't narrow it.
16143	if (!isa<LoadSDNode>(Val: N0))
16144	return SDValue ();
16145
16146	LoadSDNode *LN0 = cast<LoadSDNode>(Val&: N0);
16147	// Reducing the width of a volatile load is illegal. For atomics, we may be
16148	// able to reduce the width provided we never widen again. (see D66309)
16149	if (!LN0->isSimple() \|\|
16150	!isLegalNarrowLdSt(LDST: LN0, ExtType, MemVT&: ExtVT, ShAmt))
16151	return SDValue ();
16152
16153	auto AdjustBigEndianShift = [&](unsigned ShAmt) {
16154	unsigned LVTStoreBits =
16155	LN0->getMemoryVT().getStoreSizeInBits().getFixedValue();
16156	unsigned EVTStoreBits = ExtVT.getStoreSizeInBits().getFixedValue();
16157	return LVTStoreBits - EVTStoreBits - ShAmt;
16158	};
16159
16160	// We need to adjust the pointer to the load by ShAmt bits in order to load
16161	// the correct bytes.
16162	unsigned PtrAdjustmentInBits =
16163	DAG.getDataLayout().isBigEndian() ? AdjustBigEndianShift (ShAmt) : ShAmt;
16164
16165	uint64_t PtrOff = PtrAdjustmentInBits / `8`;
16166	SDLoc DL(LN0);
16167	// The original load itself didn't wrap, so an offset within it doesn't.
16168	SDValue NewPtr =
16169	DAG.getMemBasePlusOffset(Base: LN0->getBasePtr(), Offset: TypeSize::getFixed(ExactSize: PtrOff),
16170	DL, Flags: SDNodeFlags::NoUnsignedWrap);
16171	AddToWorklist(N: NewPtr.getNode());
16172
16173	SDValue Load;
16174	if (ExtType == ISD::NON_EXTLOAD) {
16175	const MDNode *OldRanges = LN0->getRanges();
16176	const MDNode NewRanges = nullptr*;
16177	// If LSBs are loaded and the truncated ConstantRange for the OldRanges
16178	// metadata is not the full-set for the new width then create a NewRanges
16179	// metadata for the truncated load
16180	if (ShAmt == `0` && OldRanges) {
16181	ConstantRange CR = getConstantRangeFromMetadata(RangeMD: *OldRanges);
16182	unsigned BitSize = VT.getScalarSizeInBits();
16183
16184	// It is possible for an 8-bit extending load with 8-bit range
16185	// metadata to be narrowed to an 8-bit load. This guard is necessary to
16186	// ensure that truncation is strictly smaller.
16187	if (CR.getBitWidth() > BitSize) {
16188	ConstantRange TruncatedCR = CR.truncate(BitWidth: BitSize);
16189	if (!TruncatedCR.isFullSet()) {
16190	Metadata *Bounds[`2`] = {
16191	ConstantAsMetadata::get(
16192	C: ConstantInt::get(Context&: *DAG.getContext(), V: TruncatedCR.getLower())),
16193	ConstantAsMetadata::get(
16194	C: ConstantInt::get(Context&: *DAG.getContext(), V: TruncatedCR.getUpper()))};
16195	NewRanges = MDNode::get(Context&: *DAG.getContext(), MDs: Bounds);
16196	}
16197	} else if (CR.getBitWidth() == BitSize)
16198	NewRanges = OldRanges;
16199	}
16200	Load = DAG.getLoad(VT, dl: DL, Chain: LN0->getChain(), Ptr: NewPtr,
16201	PtrInfo: LN0->getPointerInfo().getWithOffset(O: PtrOff),
16202	Alignment: LN0->getBaseAlign(), MMOFlags: LN0->getMemOperand()->getFlags(),
16203	AAInfo: LN0->getAAInfo(), Ranges: NewRanges);
16204	} else
16205	Load = DAG.getExtLoad(ExtType, dl: DL, VT, Chain: LN0->getChain(), Ptr: NewPtr,
16206	PtrInfo: LN0->getPointerInfo().getWithOffset(O: PtrOff), MemVT: ExtVT,
16207	Alignment: LN0->getBaseAlign(), MMOFlags: LN0->getMemOperand()->getFlags(),
16208	AAInfo: LN0->getAAInfo());
16209
16210	// Replace the old load's chain with the new load's chain.
16211	WorklistRemover DeadNodes(*this);
16212	DAG.ReplaceAllUsesOfValueWith(From: N0.getValue(R: `1`), To: Load.getValue(R: `1`));
16213
16214	// Shift the result left, if we've swallowed a left shift.
16215	SDValue Result = Load;
16216	if (ShLeftAmt != `0`) {
16217	// If the shift amount is as large as the result size (but, presumably,
16218	// no larger than the source) then the useful bits of the result are
16219	// zero; we can't simply return the shortened shift, because the result
16220	// of that operation is undefined.
16221	if (ShLeftAmt >= VT.getScalarSizeInBits())
16222	Result = DAG.getConstant(Val: `0`, DL, VT);
16223	else
16224	Result = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Result,
16225	N2: DAG.getShiftAmountConstant(Val: ShLeftAmt, VT, DL));
16226	}
16227
16228	if (ShiftedOffset != `0`) {
16229	// We're using a shifted mask, so the load now has an offset. This means
16230	// that data has been loaded into the lower bytes than it would have been
16231	// before, so we need to shl the loaded data into the correct position in the
16232	// register.
16233	SDValue ShiftC = DAG.getConstant(Val: ShiftedOffset, DL, VT);
16234	Result = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Result, N2: ShiftC);
16235	DAG.ReplaceAllUsesOfValueWith(From: SDValue (N, `0`), To: Result);
16236	}
16237
16238	// Return the new loaded value.
16239	return Result;
16240	}
16241
16242	SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
16243	SDValue N0 = N->getOperand(Num: `0`);
16244	SDValue N1 = N->getOperand(Num: `1`);
16245	EVT VT = N->getValueType(ResNo: `0`);
16246	EVT ExtVT = cast<VTSDNode>(Val&: N1)->getVT();
16247	unsigned VTBits = VT.getScalarSizeInBits();
16248	unsigned ExtVTBits = ExtVT.getScalarSizeInBits();
16249	SDLoc DL(N);
16250
16251	// sext_vector_inreg(undef) = 0 because the top bit will all be the same.
16252	if (N0.isUndef())
16253	return DAG.getConstant(Val: `0`, DL, VT);
16254
16255	// fold (sext_in_reg c1) -> c1
16256	if (SDValue C =
16257	DAG.FoldConstantArithmetic(Opcode: ISD::SIGN_EXTEND_INREG, DL, VT, Ops: {N0, N1}))
16258	return C;
16259
16260	// If the input is already sign extended, just drop the extension.
16261	if (ExtVTBits >= DAG.ComputeMaxSignificantBits(Op: N0))
16262	return N0;
16263
16264	// fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
16265	if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
16266	ExtVT.bitsLT(VT: cast<VTSDNode>(Val: N0.getOperand(i: `1`))->getVT()))
16267	return DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL, VT, N1: N0.getOperand(i: `0`), N2: N1);
16268
16269	// fold (sext_in_reg (sext x)) -> (sext x)
16270	// fold (sext_in_reg (aext x)) -> (sext x)
16271	// if x is small enough or if we know that x has more than 1 sign bit and the
16272	// sign_extend_inreg is extending from one of them.
16273	if (N0.getOpcode() == ISD::SIGN_EXTEND \|\| N0.getOpcode() == ISD::ANY_EXTEND) {
16274	SDValue N00 = N0.getOperand(i: `0`);
16275	unsigned N00Bits = N00.getScalarValueSizeInBits();
16276	if ((N00Bits <= ExtVTBits \|\|
16277	DAG.ComputeMaxSignificantBits(Op: N00) <= ExtVTBits) &&
16278	(!LegalOperations \|\| TLI.isOperationLegal(Op: ISD::SIGN_EXTEND, VT)))
16279	return DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT, Operand: N00);
16280	}
16281
16282	// fold (sext_in_reg (_extend_vector_inreg x)) -> (sext_vector_inreg x)*
16283	// if x is small enough or if we know that x has more than 1 sign bit and the
16284	// sign_extend_inreg is extending from one of them.
16285	if (ISD::isExtVecInRegOpcode(Opcode: N0.getOpcode())) {
16286	SDValue N00 = N0.getOperand(i: `0`);
16287	unsigned N00Bits = N00.getScalarValueSizeInBits();
16288	bool IsZext = N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
16289	if ((N00Bits == ExtVTBits \|\|
16290	(!IsZext && (N00Bits < ExtVTBits \|\|
16291	DAG.ComputeMaxSignificantBits(Op: N00) <= ExtVTBits))) &&
16292	(!LegalOperations \|\|
16293	TLI.isOperationLegal(Op: ISD::SIGN_EXTEND_VECTOR_INREG, VT)))
16294	return DAG.getNode(Opcode: ISD::SIGN_EXTEND_VECTOR_INREG, DL, VT, Operand: N00);
16295	}
16296
16297	// fold (sext_in_reg (zext x)) -> (sext x)
16298	// iff we are extending the source sign bit.
16299	if (N0.getOpcode() == ISD::ZERO_EXTEND) {
16300	SDValue N00 = N0.getOperand(i: `0`);
16301	if (N00.getScalarValueSizeInBits() == ExtVTBits &&
16302	(!LegalOperations \|\| TLI.isOperationLegal(Op: ISD::SIGN_EXTEND, VT)))
16303	return DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT, Operand: N00);
16304	}
16305
16306	// fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
16307	if (DAG.MaskedValueIsZero(Op: N0, Mask: APInt::getOneBitSet(numBits: VTBits, BitNo: ExtVTBits - `1`)))
16308	return DAG.getZeroExtendInReg(Op: N0, DL, VT: ExtVT);
16309
16310	// fold operands of sext_in_reg based on knowledge that the top bits are not
16311	// demanded.
16312	if (SimplifyDemandedBits(Op: SDValue (N, `0`)))
16313	return SDValue (N, `0`);
16314
16315	// fold (sext_in_reg (load x)) -> (smaller sextload x)
16316	// fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
16317	if (SDValue NarrowLoad = reduceLoadWidth(N))
16318	return NarrowLoad;
16319
16320	// fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
16321	// fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
16322	// We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
16323	if (N0.getOpcode() == ISD::SRL) {
16324	if (auto *ShAmt = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: `1`)))
16325	if (ShAmt->getAPIntValue().ule(RHS: VTBits - ExtVTBits)) {
16326	// We can turn this into an SRA iff the input to the SRL is already sign
16327	// extended enough.
16328	unsigned InSignBits = DAG.ComputeNumSignBits(Op: N0.getOperand(i: `0`));
16329	if (((VTBits - ExtVTBits) - ShAmt->getZExtValue()) < InSignBits)
16330	return DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: N0.getOperand(i: `0`),
16331	N2: N0.getOperand(i: `1`));
16332	}
16333	}
16334
16335	// fold (sext_inreg (extload x)) -> (sextload x)
16336	// If sextload is not supported by target, we can only do the combine when
16337	// load has one use. Doing otherwise can block folding the extload with other
16338	// extends that the target does support.
16339	if (ISD::isEXTLoad(N: N0.getNode()) && ISD::isUNINDEXEDLoad(N: N0.getNode()) &&
16340	ExtVT == cast<LoadSDNode>(Val&: N0)->getMemoryVT() &&
16341	((!LegalOperations && cast<LoadSDNode>(Val&: N0)->isSimple() &&
16342	N0.hasOneUse()) \|\|
16343	TLI.isLoadExtLegal(ExtType: ISD::SEXTLOAD, ValVT: VT, MemVT: ExtVT))) {
16344	auto *LN0 = cast<LoadSDNode>(Val&: N0);
16345	SDValue ExtLoad =
16346	DAG.getExtLoad(ExtType: ISD::SEXTLOAD, dl: DL, VT, Chain: LN0->getChain(),
16347	Ptr: LN0->getBasePtr(), MemVT: ExtVT, MMO: LN0->getMemOperand());
16348	CombineTo(N, Res: ExtLoad);
16349	CombineTo(N: N0.getNode(), Res0: ExtLoad, Res1: ExtLoad.getValue(R: `1`));
16350	AddToWorklist(N: ExtLoad.getNode());
16351	return SDValue (N, `0`); // Return N so it doesn't get rechecked!
16352	}
16353
16354	// fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
16355	if (ISD::isZEXTLoad(N: N0.getNode()) && ISD::isUNINDEXEDLoad(N: N0.getNode()) &&
16356	N0.hasOneUse() && ExtVT == cast<LoadSDNode>(Val&: N0)->getMemoryVT() &&
16357	((!LegalOperations && cast<LoadSDNode>(Val&: N0)->isSimple()) &&
16358	TLI.isLoadExtLegal(ExtType: ISD::SEXTLOAD, ValVT: VT, MemVT: ExtVT))) {
16359	auto *LN0 = cast<LoadSDNode>(Val&: N0);
16360	SDValue ExtLoad =
16361	DAG.getExtLoad(ExtType: ISD::SEXTLOAD, dl: DL, VT, Chain: LN0->getChain(),
16362	Ptr: LN0->getBasePtr(), MemVT: ExtVT, MMO: LN0->getMemOperand());
16363	CombineTo(N, Res: ExtLoad);
16364	CombineTo(N: N0.getNode(), Res0: ExtLoad, Res1: ExtLoad.getValue(R: `1`));
16365	return SDValue (N, `0`); // Return N so it doesn't get rechecked!
16366	}
16367
16368	// fold (sext_inreg (masked_load x)) -> (sext_masked_load x)
16369	// ignore it if the masked load is already sign extended
16370	bool Frozen = N0.getOpcode() == ISD::FREEZE && N0.hasOneUse();
16371	if (auto *Ld = dyn_cast<MaskedLoadSDNode>(Val: Frozen ? N0.getOperand(i: `0`) : N0)) {
16372	if (ExtVT == Ld->getMemoryVT() && Ld->hasNUsesOfValue(NUses: `1`, Value: `0`) &&
16373	Ld->getExtensionType() != ISD::LoadExtType::NON_EXTLOAD &&
16374	TLI.isLoadExtLegal(ExtType: ISD::SEXTLOAD, ValVT: VT, MemVT: ExtVT)) {
16375	SDValue ExtMaskedLoad = DAG.getMaskedLoad(
16376	VT, dl: DL, Chain: Ld->getChain(), Base: Ld->getBasePtr(), Offset: Ld->getOffset(),
16377	Mask: Ld->getMask(), Src0: Ld->getPassThru(), MemVT: ExtVT, MMO: Ld->getMemOperand(),
16378	AM: Ld->getAddressingMode(), ISD::SEXTLOAD, IsExpanding: Ld->isExpandingLoad());
16379	CombineTo(N, Res: Frozen ? N0 : ExtMaskedLoad);
16380	CombineTo(N: Ld, Res0: ExtMaskedLoad, Res1: ExtMaskedLoad.getValue(R: `1`));
16381	return SDValue (N, `0`); // Return N so it doesn't get rechecked!
16382	}
16383	}
16384
16385	// fold (sext_inreg (masked_gather x)) -> (sext_masked_gather x)
16386	if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(Val&: N0)) {
16387	if (SDValue (GN0, `0`).hasOneUse() && ExtVT == GN0->getMemoryVT() &&
16388	TLI.isVectorLoadExtDesirable(ExtVal: SDValue (N, `0`))) {
16389	SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
16390	GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
16391
16392	SDValue ExtLoad = DAG.getMaskedGather(
16393	VTs: DAG.getVTList(VT1: VT, VT2: MVT::Other), MemVT: ExtVT, dl: DL, Ops, MMO: GN0->getMemOperand(),
16394	IndexType: GN0->getIndexType(), ExtTy: ISD::SEXTLOAD);
16395
16396	CombineTo(N, Res: ExtLoad);
16397	CombineTo(N: N0.getNode(), Res0: ExtLoad, Res1: ExtLoad.getValue(R: `1`));
16398	AddToWorklist(N: ExtLoad.getNode());
16399	return SDValue (N, `0`); // Return N so it doesn't get rechecked!
16400	}
16401	}
16402
16403	// Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
16404	if (ExtVTBits <= `16` && N0.getOpcode() == ISD::OR) {
16405	if (SDValue BSwap = MatchBSwapHWordLow(N: N0.getNode(), N0: N0.getOperand(i: `0`),
16406	N1: N0.getOperand(i: `1`), DemandHighBits: false))
16407	return DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL, VT, N1: BSwap, N2: N1);
16408	}
16409
16410	// Fold (iM_signext_inreg
16411	// (extract_subvector (zext\|anyext\|sext iN_v to _) _)
16412	// from iN)
16413	// -> (extract_subvector (signext iN_v to iM))
16414	if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() &&
16415	ISD::isExtOpcode(Opcode: N0.getOperand(i: `0`).getOpcode())) {
16416	SDValue InnerExt = N0.getOperand(i: `0`);
16417	EVT InnerExtVT = InnerExt ->getValueType(ResNo: `0`);
16418	SDValue Extendee = InnerExt ->getOperand(Num: `0`);
16419
16420	if (ExtVTBits == Extendee.getValueType().getScalarSizeInBits() &&
16421	(!LegalOperations \|\|
16422	TLI.isOperationLegal(Op: ISD::SIGN_EXTEND, VT: InnerExtVT))) {
16423	SDValue SignExtExtendee =
16424	DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: InnerExtVT, Operand: Extendee);
16425	return DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT, N1: SignExtExtendee,
16426	N2: N0.getOperand(i: `1`));
16427	}
16428	}
16429
16430	return SDValue ();
16431	}
16432
16433	static SDValue foldExtendVectorInregToExtendOfSubvector(
16434	SDNode N, const* SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG,
16435	bool LegalOperations) {
16436	unsigned InregOpcode = N->getOpcode();
16437	unsigned Opcode = DAG.getOpcode_EXTEND(Opcode: InregOpcode);
16438
16439	SDValue Src = N->getOperand(Num: `0`);
16440	EVT VT = N->getValueType(ResNo: `0`);
16441	EVT SrcVT = VT.changeVectorElementType(
16442	Context&: *DAG.getContext(), EltVT: Src.getValueType().getVectorElementType());
16443
16444	assert(ISD::isExtVecInRegOpcode(InregOpcode) &&
16445	"Expected EXTEND_VECTOR_INREG dag node in input!");
16446
16447	// Profitability check: our operand must be an one-use CONCAT_VECTORS.
16448	// FIXME: one-use check may be overly restrictive
16449	if (!Src.hasOneUse() \|\| Src.getOpcode() != ISD::CONCAT_VECTORS)
16450	return SDValue ();
16451
16452	// Profitability check: we must be extending exactly one of it's operands.
16453	// FIXME: this is probably overly restrictive.
16454	Src = Src.getOperand(i: `0`);
16455	if (Src.getValueType() != SrcVT)
16456	return SDValue ();
16457
16458	if (LegalOperations && !TLI.isOperationLegal(Op: Opcode, VT))
16459	return SDValue ();
16460
16461	return DAG.getNode(Opcode, DL, VT, Operand: Src);
16462	}
16463
16464	SDValue DAGCombiner::visitEXTEND_VECTOR_INREG(SDNode *N) {
16465	SDValue N0 = N->getOperand(Num: `0`);
16466	EVT VT = N->getValueType(ResNo: `0`);
16467	SDLoc DL(N);
16468
16469	if (N0.isUndef()) {
16470	// aext_vector_inreg(undef) = undef because the top bits are undefined.
16471	// {s/z}ext_vector_inreg(undef) = 0 because the top bits must be the same.
16472	return N->getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG
16473	? DAG.getUNDEF(VT)
16474	: DAG.getConstant(Val: `0`, DL, VT);
16475	}
16476
16477	if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
16478	return Res;
16479
16480	if (SimplifyDemandedVectorElts(Op: SDValue (N, `0`)))
16481	return SDValue (N, `0`);
16482
16483	if (SDValue R = foldExtendVectorInregToExtendOfSubvector(N, DL, TLI, DAG,
16484	LegalOperations))
16485	return R;
16486
16487	return SDValue ();
16488	}
16489
16490	SDValue DAGCombiner::visitTRUNCATE_USAT_U(SDNode *N) {
16491	EVT VT = N->getValueType(ResNo: `0`);
16492	SDValue N0 = N->getOperand(Num: `0`);
16493
16494	SDValue FPVal;
16495	if (sd_match(N: N0, P: m_FPToUI(Op: m_Value(N&: FPVal))) &&
16496	DAG.getTargetLoweringInfo().shouldConvertFpToSat(
16497	Op: ISD::FP_TO_UINT_SAT, FPVT: FPVal.getValueType(), VT))
16498	return DAG.getNode(Opcode: ISD::FP_TO_UINT_SAT, DL: SDLoc (N0), VT, N1: FPVal,
16499	N2: DAG.getValueType(VT.getScalarType()));
16500
16501	return SDValue ();
16502	}
16503
16504	/// Detect patterns of truncation with unsigned saturation:
16505	///
16506	/// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type).
16507	/// Return the source value x to be truncated or SDValue() if the pattern was
16508	/// not matched.
16509	///
16510	static SDValue detectUSatUPattern(SDValue In, EVT VT) {
16511	unsigned NumDstBits = VT.getScalarSizeInBits();
16512	unsigned NumSrcBits = In.getScalarValueSizeInBits();
16513	// Saturation with truncation. We truncate from InVT to VT.
16514	assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
16515
16516	SDValue Min;
16517	APInt UnsignedMax = APInt::getMaxValue(numBits: NumDstBits).zext(width: NumSrcBits);
16518	if (sd_match(N: In, P: m_UMin(L: m_Value(N&: Min), R: m_SpecificInt(V: UnsignedMax))))
16519	return Min;
16520
16521	return SDValue ();
16522	}
16523
16524	/// Detect patterns of truncation with signed saturation:
16525	/// (truncate (smin (smax (x, signed_min_of_dest_type),
16526	/// signed_max_of_dest_type)) to dest_type)
16527	/// or:
16528	/// (truncate (smax (smin (x, signed_max_of_dest_type),
16529	/// signed_min_of_dest_type)) to dest_type).
16530	///
16531	/// Return the source value to be truncated or SDValue() if the pattern was not
16532	/// matched.
16533	static SDValue detectSSatSPattern(SDValue In, EVT VT) {
16534	unsigned NumDstBits = VT.getScalarSizeInBits();
16535	unsigned NumSrcBits = In.getScalarValueSizeInBits();
16536	// Saturation with truncation. We truncate from InVT to VT.
16537	assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
16538
16539	SDValue Val;
16540	APInt SignedMax = APInt::getSignedMaxValue(numBits: NumDstBits).sext(width: NumSrcBits);
16541	APInt SignedMin = APInt::getSignedMinValue(numBits: NumDstBits).sext(width: NumSrcBits);
16542
16543	if (sd_match(N: In, P: m_SMin(L: m_SMax(L: m_Value(N&: Val), R: m_SpecificInt(V: SignedMin)),
16544	R: m_SpecificInt(V: SignedMax))))
16545	return Val;
16546
16547	if (sd_match(N: In, P: m_SMax(L: m_SMin(L: m_Value(N&: Val), R: m_SpecificInt(V: SignedMax)),
16548	R: m_SpecificInt(V: SignedMin))))
16549	return Val;
16550
16551	return SDValue ();
16552	}
16553
16554	/// Detect patterns of truncation with unsigned saturation:
16555	static SDValue detectSSatUPattern(SDValue In, EVT VT, SelectionDAG &DAG,
16556	const SDLoc &DL) {
16557	unsigned NumDstBits = VT.getScalarSizeInBits();
16558	unsigned NumSrcBits = In.getScalarValueSizeInBits();
16559	// Saturation with truncation. We truncate from InVT to VT.
16560	assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
16561
16562	SDValue Val;
16563	APInt UnsignedMax = APInt::getMaxValue(numBits: NumDstBits).zext(width: NumSrcBits);
16564	// Min == 0, Max is unsigned max of destination type.
16565	if (sd_match(N: In, P: m_SMax(L: m_SMin(L: m_Value(N&: Val), R: m_SpecificInt(V: UnsignedMax)),
16566	R: m_Zero())))
16567	return Val;
16568
16569	if (sd_match(N: In, P: m_SMin(L: m_SMax(L: m_Value(N&: Val), R: m_Zero()),
16570	R: m_SpecificInt(V: UnsignedMax))))
16571	return Val;
16572
16573	if (sd_match(N: In, P: m_UMin(L: m_SMax(L: m_Value(N&: Val), R: m_Zero()),
16574	R: m_SpecificInt(V: UnsignedMax))))
16575	return Val;
16576
16577	return SDValue ();
16578	}
16579
16580	static SDValue foldToSaturated(SDNode *N, EVT &VT, SDValue &Src, EVT &SrcVT,
16581	SDLoc &DL, const TargetLowering &TLI,
16582	SelectionDAG &DAG) {
16583	auto AllowedTruncateSat = [&](unsigned Opc, EVT SrcVT, EVT VT) -> bool {
16584	return (TLI.isOperationLegalOrCustom(Op: Opc, VT: SrcVT) &&
16585	TLI.isTypeDesirableForOp(Opc, VT));
16586	};
16587
16588	if (Src.getOpcode() == ISD::SMIN \|\| Src.getOpcode() == ISD::SMAX) {
16589	if (AllowedTruncateSat (ISD::TRUNCATE_SSAT_S, SrcVT, VT))
16590	if (SDValue SSatVal = detectSSatSPattern(In: Src, VT))
16591	return DAG.getNode(Opcode: ISD::TRUNCATE_SSAT_S, DL, VT, Operand: SSatVal);
16592	if (AllowedTruncateSat (ISD::TRUNCATE_SSAT_U, SrcVT, VT))
16593	if (SDValue SSatVal = detectSSatUPattern(In: Src, VT, DAG, DL))
16594	return DAG.getNode(Opcode: ISD::TRUNCATE_SSAT_U, DL, VT, Operand: SSatVal);
16595	} else if (Src.getOpcode() == ISD::UMIN) {
16596	if (AllowedTruncateSat (ISD::TRUNCATE_SSAT_U, SrcVT, VT))
16597	if (SDValue SSatVal = detectSSatUPattern(In: Src, VT, DAG, DL))
16598	return DAG.getNode(Opcode: ISD::TRUNCATE_SSAT_U, DL, VT, Operand: SSatVal);
16599	if (AllowedTruncateSat (ISD::TRUNCATE_USAT_U, SrcVT, VT))
16600	if (SDValue USatVal = detectUSatUPattern(In: Src, VT))
16601	return DAG.getNode(Opcode: ISD::TRUNCATE_USAT_U, DL, VT, Operand: USatVal);
16602	}
16603
16604	return SDValue ();
16605	}
16606
16607	SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
16608	SDValue N0 = N->getOperand(Num: `0`);
16609	EVT VT = N->getValueType(ResNo: `0`);
16610	EVT SrcVT = N0.getValueType();
16611	bool isLE = DAG.getDataLayout().isLittleEndian();
16612	SDLoc DL(N);
16613
16614	// trunc(undef) = undef
16615	if (N0.isUndef())
16616	return DAG.getUNDEF(VT);
16617
16618	// fold (truncate (truncate x)) -> (truncate x)
16619	if (N0.getOpcode() == ISD::TRUNCATE)
16620	return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: N0.getOperand(i: `0`));
16621
16622	// fold saturated truncate
16623	if (SDValue SaturatedTR = foldToSaturated(N, VT, Src&: N0, SrcVT, DL, TLI, DAG))
16624	return SaturatedTR;
16625
16626	// fold (truncate c1) -> c1
16627	if (SDValue C = DAG.FoldConstantArithmetic(Opcode: ISD::TRUNCATE, DL, VT, Ops: {N0}))
16628	return C;
16629
16630	// fold (truncate (ext x)) -> (ext x) or (truncate x) or x
16631	if (N0.getOpcode() == ISD::ZERO_EXTEND \|\|
16632	N0.getOpcode() == ISD::SIGN_EXTEND \|\|
16633	N0.getOpcode() == ISD::ANY_EXTEND) {
16634	// if the source is smaller than the dest, we still need an extend.
16635	if (N0.getOperand(i: `0`).getValueType().bitsLT(VT)) {
16636	SDNodeFlags Flags;
16637	if (N0.getOpcode() == ISD::ZERO_EXTEND)
16638	Flags.setNonNeg(N0 ->getFlags().hasNonNeg());
16639	return DAG.getNode(Opcode: N0.getOpcode(), DL, VT, Operand: N0.getOperand(i: `0`), Flags);
16640	}
16641	// if the source is larger than the dest, than we just need the truncate.
16642	if (N0.getOperand(i: `0`).getValueType().bitsGT(VT))
16643	return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: N0.getOperand(i: `0`));
16644	// if the source and dest are the same type, we can drop both the extend
16645	// and the truncate.
16646	return N0.getOperand(i: `0`);
16647	}
16648
16649	// Try to narrow a truncate-of-sext_in_reg to the destination type:
16650	// trunc (sign_ext_inreg X, iM) to iN --> sign_ext_inreg (trunc X to iN), iM
16651	if (!LegalTypes && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
16652	N0.hasOneUse()) {
16653	SDValue X = N0.getOperand(i: `0`);
16654	SDValue ExtVal = N0.getOperand(i: `1`);
16655	EVT ExtVT = cast<VTSDNode>(Val&: ExtVal)->getVT();
16656	if (ExtVT.bitsLT(VT) && TLI.preferSextInRegOfTruncate(TruncVT: VT, VT: SrcVT, ExtVT)) {
16657	SDValue TrX = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: X);
16658	return DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL, VT, N1: TrX, N2: ExtVal);
16659	}
16660	}
16661
16662	// If this is anyext(trunc), don't fold it, allow ourselves to be folded.
16663	if (N->hasOneUse() && (N->user_begin()->getOpcode() == ISD::ANY_EXTEND))
16664	return SDValue ();
16665
16666	// Fold extract-and-trunc into a narrow extract. For example:
16667	// i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
16668	// i32 y = TRUNCATE(i64 x)
16669	// -- becomes --
16670	// v16i8 b = BITCAST (v2i64 val)
16671	// i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
16672	//
16673	// Note: We only run this optimization after type legalization (which often
16674	// creates this pattern) and before operation legalization after which
16675	// we need to be more careful about the vector instructions that we generate.
16676	if (LegalTypes && !LegalOperations && VT.isScalarInteger() && VT != MVT::i1 &&
16677	N0 ->hasOneUse()) {
16678	EVT TrTy = N->getValueType(ResNo: `0`);
16679	SDValue Src = N0;
16680
16681	// Check for cases where we shift down an upper element before truncation.
16682	int EltOffset = `0`;
16683	if (Src.getOpcode() == ISD::SRL && Src.getOperand(i: `0`)->hasOneUse()) {
16684	if (auto ShAmt = DAG.getValidShiftAmount(V: Src)) {
16685	if ((*ShAmt % TrTy.getSizeInBits()) == `0`) {
16686	Src = Src.getOperand(i: `0`);
16687	EltOffset = *ShAmt / TrTy.getSizeInBits();
16688	}
16689	}
16690	}
16691
16692	if (Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
16693	EVT VecTy = Src.getOperand(i: `0`).getValueType();
16694	EVT ExTy = Src.getValueType();
16695
16696	auto EltCnt = VecTy.getVectorElementCount();
16697	unsigned SizeRatio = ExTy.getSizeInBits() / TrTy.getSizeInBits();
16698	auto NewEltCnt = EltCnt * SizeRatio;
16699
16700	EVT NVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: TrTy, EC: NewEltCnt);
16701	assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
16702
16703	SDValue EltNo = Src ->getOperand(Num: `1`);
16704	if (isa<ConstantSDNode>(Val: EltNo) && isTypeLegal(VT: NVT)) {
16705	int Elt = EltNo ->getAsZExtVal();
16706	int Index = isLE ? (Elt * SizeRatio + EltOffset)
16707	: (Elt * SizeRatio + (SizeRatio - `1`) - EltOffset);
16708	return DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: TrTy,
16709	N1: DAG.getBitcast(VT: NVT, V: Src.getOperand(i: `0`)),
16710	N2: DAG.getVectorIdxConstant(Val: Index, DL));
16711	}
16712	}
16713	}
16714
16715	// trunc (select c, a, b) -> select c, (trunc a), (trunc b)
16716	if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse() &&
16717	TLI.isTruncateFree(FromVT: SrcVT, ToVT: VT)) {
16718	if (!LegalOperations \|\|
16719	(TLI.isOperationLegal(Op: ISD::SELECT, VT: SrcVT) &&
16720	TLI.isNarrowingProfitable(N: N0.getNode(), SrcVT, DestVT: VT))) {
16721	SDLoc SL(N0);
16722	SDValue Cond = N0.getOperand(i: `0`);
16723	SDValue TruncOp0 = DAG.getNode(Opcode: ISD::TRUNCATE, DL: SL, VT, Operand: N0.getOperand(i: `1`));
16724	SDValue TruncOp1 = DAG.getNode(Opcode: ISD::TRUNCATE, DL: SL, VT, Operand: N0.getOperand(i: `2`));
16725	return DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: Cond, N2: TruncOp0, N3: TruncOp1);
16726	}
16727	}
16728
16729	// trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
16730	if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
16731	(!LegalOperations \|\| TLI.isOperationLegal(Op: ISD::SHL, VT)) &&
16732	TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
16733	SDValue Amt = N0.getOperand(i: `1`);
16734	KnownBits Known = DAG.computeKnownBits(Op: Amt);
16735	unsigned Size = VT.getScalarSizeInBits();
16736	if (Known.countMaxActiveBits() <= Log2_32(Value: Size)) {
16737	EVT AmtVT = TLI.getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
16738	SDValue Trunc = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: N0.getOperand(i: `0`));
16739	if (AmtVT != Amt.getValueType()) {
16740	Amt = DAG.getZExtOrTrunc(Op: Amt, DL, VT: AmtVT);
16741	AddToWorklist(N: Amt.getNode());
16742	}
16743	return DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Trunc, N2: Amt);
16744	}
16745	}
16746
16747	if (SDValue V = foldSubToUSubSat(DstVT: VT, N: N0.getNode(), DL))
16748	return V;
16749
16750	if (SDValue ABD = foldABSToABD(N, DL))
16751	return ABD;
16752
16753	// Attempt to pre-truncate BUILD_VECTOR sources.
16754	if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
16755	N0.hasOneUse() &&
16756	// Avoid creating illegal types if running after type legalizer.
16757	(!LegalTypes \|\| TLI.isTypeLegal(VT: VT.getScalarType()))) {
16758	if (TLI.isTruncateFree(FromVT: SrcVT.getScalarType(), ToVT: VT.getScalarType()))
16759	return DAG.UnrollVectorOp(N);
16760
16761	// trunc(build_vector(ext(x), ext(x)) -> build_vector(x,x)
16762	if (SDValue SplatVal = DAG.getSplatValue(V: N0)) {
16763	if (ISD::isExtOpcode(Opcode: SplatVal.getOpcode()) &&
16764	SrcVT.getScalarType() == SplatVal.getValueType())
16765	return DAG.UnrollVectorOp(N);
16766	}
16767	}
16768
16769	// trunc (splat_vector x) -> splat_vector (trunc x)
16770	if (N0.getOpcode() == ISD::SPLAT_VECTOR &&
16771	(!LegalTypes \|\| TLI.isTypeLegal(VT: VT.getScalarType())) &&
16772	(!LegalOperations \|\| TLI.isOperationLegal(Op: ISD::SPLAT_VECTOR, VT))) {
16773	EVT SVT = VT.getScalarType();
16774	return DAG.getSplatVector(
16775	VT, DL, Op: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: SVT, Operand: N0 ->getOperand(Num: `0`)));
16776	}
16777
16778	// Fold a series of buildvector, bitcast, and truncate if possible.
16779	// For example fold
16780	// (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
16781	// (2xi32 (buildvector x, y)).
16782	if (Level == AfterLegalizeVectorOps && VT.isVector() &&
16783	N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
16784	N0.getOperand(i: `0`).getOpcode() == ISD::BUILD_VECTOR &&
16785	N0.getOperand(i: `0`).hasOneUse()) {
16786	SDValue BuildVect = N0.getOperand(i: `0`);
16787	EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
16788	EVT TruncVecEltTy = VT.getVectorElementType();
16789
16790	// Check that the element types match.
16791	if (BuildVectEltTy == TruncVecEltTy) {
16792	// Now we only need to compute the offset of the truncated elements.
16793	unsigned BuildVecNumElts = BuildVect.getNumOperands();
16794	unsigned TruncVecNumElts = VT.getVectorNumElements();
16795	unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
16796	unsigned FirstElt = isLE ? `0` : (TruncEltOffset - `1`);
16797
16798	assert((BuildVecNumElts % TruncVecNumElts) == `0` &&
16799	"Invalid number of elements");
16800
16801	SmallVector<SDValue, `8`> Opnds;
16802	for (unsigned i = FirstElt, e = BuildVecNumElts; i < e;
16803	i += TruncEltOffset)
16804	Opnds.push_back(Elt: BuildVect.getOperand(i));
16805
16806	return DAG.getBuildVector(VT, DL, Ops: Opnds);
16807	}
16808	}
16809
16810	// fold (truncate (load x)) -> (smaller load x)
16811	// fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
16812	if (!LegalTypes \|\| TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
16813	if (SDValue Reduced = reduceLoadWidth(N))
16814	return Reduced;
16815
16816	// Handle the case where the truncated result is at least as wide as the
16817	// loaded type.
16818	if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N: N0.getNode())) {
16819	auto *LN0 = cast<LoadSDNode>(Val&: N0);
16820	if (LN0->isSimple() && LN0->getMemoryVT().bitsLE(VT)) {
16821	SDValue NewLoad = DAG.getExtLoad(
16822	ExtType: LN0->getExtensionType(), dl: SDLoc (LN0), VT, Chain: LN0->getChain(),
16823	Ptr: LN0->getBasePtr(), MemVT: LN0->getMemoryVT(), MMO: LN0->getMemOperand());
16824	DAG.ReplaceAllUsesOfValueWith(From: N0.getValue(R: `1`), To: NewLoad.getValue(R: `1`));
16825	return NewLoad;
16826	}
16827	}
16828	}
16829
16830	// fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
16831	// where ... are all 'undef'.
16832	if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
16833	SmallVector<EVT, `8`> VTs;
16834	SDValue V;
16835	unsigned Idx = `0`;
16836	unsigned NumDefs = `0`;
16837
16838	for (unsigned i = `0`, e = N0.getNumOperands(); i != e; ++i) {
16839	SDValue X = N0.getOperand(i);
16840	if (!X.isUndef()) {
16841	V = X;
16842	Idx = i;
16843	NumDefs++;
16844	}
16845	// Stop if more than one members are non-undef.
16846	if (NumDefs > `1`)
16847	break;
16848
16849	VTs.push_back(Elt: EVT::getVectorVT(Context&: *DAG.getContext(),
16850	VT: VT.getVectorElementType(),
16851	EC: X.getValueType().getVectorElementCount()));
16852	}
16853
16854	if (NumDefs == `0`)
16855	return DAG.getUNDEF(VT);
16856
16857	if (NumDefs == `1`) {
16858	assert(V.getNode() && "The single defined operand is empty!");
16859	SmallVector<SDValue, `8`> Opnds;
16860	for (unsigned i = `0`, e = VTs.size(); i != e; ++i) {
16861	if (i != Idx) {
16862	Opnds.push_back(Elt: DAG.getUNDEF(VT: VTs [i]));
16863	continue;
16864	}
16865	SDValue NV = DAG.getNode(Opcode: ISD::TRUNCATE, DL: SDLoc (V), VT: VTs [i], Operand: V);
16866	AddToWorklist(N: NV.getNode());
16867	Opnds.push_back(Elt: NV);
16868	}
16869	return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT, Ops: Opnds);
16870	}
16871	}
16872
16873	// Fold truncate of a bitcast of a vector to an extract of the low vector
16874	// element.
16875	//
16876	// e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
16877	if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
16878	SDValue VecSrc = N0.getOperand(i: `0`);
16879	EVT VecSrcVT = VecSrc.getValueType();
16880	if (VecSrcVT.isVector() && VecSrcVT.getScalarType() == VT &&
16881	(!LegalOperations \|\|
16882	TLI.isOperationLegal(Op: ISD::EXTRACT_VECTOR_ELT, VT: VecSrcVT))) {
16883	unsigned Idx = isLE ? `0` : VecSrcVT.getVectorNumElements() - `1`;
16884	return DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT, N1: VecSrc,
16885	N2: DAG.getVectorIdxConstant(Val: Idx, DL));
16886	}
16887	}
16888
16889	// Simplify the operands using demanded-bits information.
16890	if (SimplifyDemandedBits(Op: SDValue (N, `0`)))
16891	return SDValue (N, `0`);
16892
16893	// fold (truncate (extract_subvector(ext x))) ->
16894	// (extract_subvector x)
16895	// TODO: This can be generalized to cover cases where the truncate and extract
16896	// do not fully cancel each other out.
16897	if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
16898	SDValue N00 = N0.getOperand(i: `0`);
16899	if (N00.getOpcode() == ISD::SIGN_EXTEND \|\|
16900	N00.getOpcode() == ISD::ZERO_EXTEND \|\|
16901	N00.getOpcode() == ISD::ANY_EXTEND) {
16902	if (N00.getOperand(i: `0`)->getValueType(ResNo: `0`).getVectorElementType() ==
16903	VT.getVectorElementType())
16904	return DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL: SDLoc (N0 ->getOperand(Num: `0`)), VT,
16905	N1: N00.getOperand(i: `0`), N2: N0.getOperand(i: `1`));
16906	}
16907	}
16908
16909	if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(Cast: N))
16910	return NewVSel;
16911
16912	// Narrow a suitable binary operation with a non-opaque constant operand by
16913	// moving it ahead of the truncate. This is limited to pre-legalization
16914	// because targets may prefer a wider type during later combines and invert
16915	// this transform.
16916	switch (N0.getOpcode()) {
16917	case ISD::ADD:
16918	case ISD::SUB:
16919	case ISD::MUL:
16920	case ISD::AND:
16921	case ISD::OR:
16922	case ISD::XOR:
16923	if (!LegalOperations && N0.hasOneUse() &&
16924	(N0.getOperand(i: `0`) == N0.getOperand(i: `1`) \|\|
16925	isConstantOrConstantVector(N: N0.getOperand(i: `0`), NoOpaques: true) \|\|
16926	isConstantOrConstantVector(N: N0.getOperand(i: `1`), NoOpaques: true))) {
16927	// TODO: We already restricted this to pre-legalization, but for vectors
16928	// we are extra cautious to not create an unsupported operation.
16929	// Target-specific changes are likely needed to avoid regressions here.
16930	if (VT.isScalarInteger() \|\| TLI.isOperationLegal(Op: N0.getOpcode(), VT)) {
16931	SDValue NarrowL = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: N0.getOperand(i: `0`));
16932	SDValue NarrowR = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: N0.getOperand(i: `1`));
16933	SDNodeFlags Flags;
16934	// Propagate nuw for sub.
16935	if (N0 ->getOpcode() == ISD::SUB && N0 ->getFlags().hasNoUnsignedWrap() &&
16936	DAG.MaskedValueIsZero(
16937	Op: N0 ->getOperand(Num: `0`),
16938	Mask: APInt::getBitsSetFrom(numBits: SrcVT.getScalarSizeInBits(),
16939	loBit: VT.getScalarSizeInBits())))
16940	Flags.setNoUnsignedWrap(true);
16941	return DAG.getNode(Opcode: N0.getOpcode(), DL, VT, N1: NarrowL, N2: NarrowR, Flags);
16942	}
16943	}
16944	break;
16945	case ISD::ADDE:
16946	case ISD::UADDO_CARRY:
16947	// (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
16948	// (trunc uaddo_carry(X, Y, Carry)) ->
16949	// (uaddo_carry trunc(X), trunc(Y), Carry)
16950	// When the adde's carry is not used.
16951	// We only do for uaddo_carry before legalize operation
16952	if (((!LegalOperations && N0.getOpcode() == ISD::UADDO_CARRY) \|\|
16953	TLI.isOperationLegal(Op: N0.getOpcode(), VT)) &&
16954	N0.hasOneUse() && !N0 ->hasAnyUseOfValue(Value: `1`)) {
16955	SDValue X = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: N0.getOperand(i: `0`));
16956	SDValue Y = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: N0.getOperand(i: `1`));
16957	SDVTList VTs = DAG.getVTList(VT1: VT, VT2: N0 ->getValueType(ResNo: `1`));
16958	return DAG.getNode(Opcode: N0.getOpcode(), DL, VTList: VTs, N1: X, N2: Y, N3: N0.getOperand(i: `2`));
16959	}
16960	break;
16961	case ISD::USUBSAT:
16962	// Truncate the USUBSAT only if LHS is a known zero-extension, its not
16963	// enough to know that the upper bits are zero we must ensure that we don't
16964	// introduce an extra truncate.
16965	if (!LegalOperations && N0.hasOneUse() &&
16966	N0.getOperand(i: `0`).getOpcode() == ISD::ZERO_EXTEND &&
16967	N0.getOperand(i: `0`).getOperand(i: `0`).getScalarValueSizeInBits() <=
16968	VT.getScalarSizeInBits() &&
16969	hasOperation(Opcode: N0.getOpcode(), VT)) {
16970	return getTruncatedUSUBSAT(DstVT: VT, SrcVT, LHS: N0.getOperand(i: `0`), RHS: N0.getOperand(i: `1`),
16971	DAG, DL);
16972	}
16973	break;
16974	case ISD::AVGCEILS:
16975	case ISD::AVGCEILU:
16976	// trunc (avgceilu (sext (x), sext (y))) -> avgceils(x, y)
16977	// trunc (avgceils (zext (x), zext (y))) -> avgceilu(x, y)
16978	if (N0.hasOneUse()) {
16979	SDValue Op0 = N0.getOperand(i: `0`);
16980	SDValue Op1 = N0.getOperand(i: `1`);
16981	if (N0.getOpcode() == ISD::AVGCEILU) {
16982	if (TLI.isOperationLegalOrCustom(Op: ISD::AVGCEILS, VT) &&
16983	Op0.getOpcode() == ISD::SIGN_EXTEND &&
16984	Op1.getOpcode() == ISD::SIGN_EXTEND &&
16985	Op0.getOperand(i: `0`).getValueType() == VT &&
16986	Op1.getOperand(i: `0`).getValueType() == VT)
16987	return DAG.getNode(Opcode: ISD::AVGCEILS, DL, VT, N1: Op0.getOperand(i: `0`),
16988	N2: Op1.getOperand(i: `0`));
16989	} else {
16990	if (TLI.isOperationLegalOrCustom(Op: ISD::AVGCEILU, VT) &&
16991	Op0.getOpcode() == ISD::ZERO_EXTEND &&
16992	Op1.getOpcode() == ISD::ZERO_EXTEND &&
16993	Op0.getOperand(i: `0`).getValueType() == VT &&
16994	Op1.getOperand(i: `0`).getValueType() == VT)
16995	return DAG.getNode(Opcode: ISD::AVGCEILU, DL, VT, N1: Op0.getOperand(i: `0`),
16996	N2: Op1.getOperand(i: `0`));
16997	}
16998	}
16999	[[fallthrough]];
17000	case ISD::AVGFLOORS:
17001	case ISD::AVGFLOORU:
17002	case ISD::ABDS:
17003	case ISD::ABDU:
17004	// (trunc (avg a, b)) -> (avg (trunc a), (trunc b))
17005	// (trunc (abdu/abds a, b)) -> (abdu/abds (trunc a), (trunc b))
17006	if (!LegalOperations && N0.hasOneUse() &&
17007	TLI.isOperationLegal(Op: N0.getOpcode(), VT)) {
17008	EVT TruncVT = VT;
17009	unsigned SrcBits = SrcVT.getScalarSizeInBits();
17010	unsigned TruncBits = TruncVT.getScalarSizeInBits();
17011
17012	SDValue A = N0.getOperand(i: `0`);
17013	SDValue B = N0.getOperand(i: `1`);
17014	bool CanFold = false;
17015
17016	if (N0.getOpcode() == ISD::AVGFLOORU \|\| N0.getOpcode() == ISD::AVGCEILU \|\|
17017	N0.getOpcode() == ISD::ABDU) {
17018	APInt UpperBits = APInt::getBitsSetFrom(numBits: SrcBits, loBit: TruncBits);
17019	CanFold = DAG.MaskedValueIsZero(Op: B, Mask: UpperBits) &&
17020	DAG.MaskedValueIsZero(Op: A, Mask: UpperBits);
17021	} else {
17022	unsigned NeededBits = SrcBits - TruncBits;
17023	CanFold = DAG.ComputeNumSignBits(Op: B) > NeededBits &&
17024	DAG.ComputeNumSignBits(Op: A) > NeededBits;
17025	}
17026
17027	if (CanFold) {
17028	SDValue NewA = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: TruncVT, Operand: A);
17029	SDValue NewB = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: TruncVT, Operand: B);
17030	return DAG.getNode(Opcode: N0.getOpcode(), DL, VT: TruncVT, N1: NewA, N2: NewB);
17031	}
17032	}
17033	break;
17034	}
17035
17036	return SDValue ();
17037	}
17038
17039	static SDNode getBuildPairElt(SDNode N, unsigned i) {
17040	SDValue Elt = N->getOperand(Num: i);
17041	if (Elt.getOpcode() != ISD::MERGE_VALUES)
17042	return Elt.getNode();
17043	return Elt.getOperand(i: Elt.getResNo()).getNode();
17044	}
17045
17046	/// build_pair (load, load) -> load
17047	/// if load locations are consecutive.
17048	SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
17049	assert(N->getOpcode() == ISD::BUILD_PAIR);
17050
17051	auto *LD1 = dyn_cast<LoadSDNode>(Val: getBuildPairElt(N, i: `0`));
17052	auto *LD2 = dyn_cast<LoadSDNode>(Val: getBuildPairElt(N, i: `1`));
17053
17054	// A BUILD_PAIR is always having the least significant part in elt 0 and the
17055	// most significant part in elt 1. So when combining into one large load, we
17056	// need to consider the endianness.
17057	if (DAG.getDataLayout().isBigEndian())
17058	std::swap(a&: LD1, b&: LD2);
17059
17060	if (!LD1 \|\| !LD2 \|\| !ISD::isNON_EXTLoad(N: LD1) \|\| !ISD::isNON_EXTLoad(N: LD2) \|\|
17061	!LD1->hasOneUse() \|\| !LD2->hasOneUse() \|\|
17062	LD1->getAddressSpace() != LD2->getAddressSpace())
17063	return SDValue ();
17064
17065	unsigned LD1Fast = `0`;
17066	EVT LD1VT = LD1->getValueType(ResNo: `0`);
17067	unsigned LD1Bytes = LD1VT.getStoreSize();
17068	if ((!LegalOperations \|\| TLI.isOperationLegal(Op: ISD::LOAD, VT)) &&
17069	DAG.areNonVolatileConsecutiveLoads(LD: LD2, Base: LD1, Bytes: LD1Bytes, Dist: `1`) &&
17070	TLI.allowsMemoryAccess(Context&: *DAG.getContext(), DL: DAG.getDataLayout(), VT,
17071	MMO: *LD1->getMemOperand(), Fast: &LD1Fast) && LD1Fast)
17072	return DAG.getLoad(VT, dl: SDLoc (N), Chain: LD1->getChain(), Ptr: LD1->getBasePtr(),
17073	PtrInfo: LD1->getPointerInfo(), Alignment: LD1->getAlign());
17074
17075	return SDValue ();
17076	}
17077
17078	static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
17079	// On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
17080	// and Lo parts; on big-endian machines it doesn't.
17081	return DAG.getDataLayout().isBigEndian() ? `1` : `0`;
17082	}
17083
17084	SDValue DAGCombiner::foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
17085	const TargetLowering &TLI) {
17086	// If this is not a bitcast to an FP type or if the target doesn't have
17087	// IEEE754-compliant FP logic, we're done.
17088	EVT VT = N->getValueType(ResNo: `0`);
17089	SDValue N0 = N->getOperand(Num: `0`);
17090	EVT SourceVT = N0.getValueType();
17091
17092	if (!VT.isFloatingPoint())
17093	return SDValue ();
17094
17095	// TODO: Handle cases where the integer constant is a different scalar
17096	// bitwidth to the FP.
17097	if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
17098	return SDValue ();
17099
17100	unsigned FPOpcode;
17101	APInt SignMask;
17102	switch (N0.getOpcode()) {
17103	case ISD::AND:
17104	FPOpcode = ISD::FABS;
17105	SignMask = ~APInt::getSignMask(BitWidth: SourceVT.getScalarSizeInBits());
17106	break;
17107	case ISD::XOR:
17108	FPOpcode = ISD::FNEG;
17109	SignMask = APInt::getSignMask(BitWidth: SourceVT.getScalarSizeInBits());
17110	break;
17111	case ISD::OR:
17112	FPOpcode = ISD::FABS;
17113	SignMask = APInt::getSignMask(BitWidth: SourceVT.getScalarSizeInBits());
17114	break;
17115	default:
17116	return SDValue ();
17117	}
17118
17119	if (LegalOperations && !TLI.isOperationLegal(Op: FPOpcode, VT))
17120	return SDValue ();
17121
17122	// This needs to be the inverse of logic in foldSignChangeInBitcast.
17123	// FIXME: I don't think looking for bitcast intrinsically makes sense, but
17124	// removing this would require more changes.
17125	auto IsBitCastOrFree = [&TLI, FPOpcode](SDValue Op, EVT VT) {
17126	if (sd_match(N: Op, P: m_BitCast(Op: m_SpecificVT(RefVT: VT))))
17127	return true;
17128
17129	return FPOpcode == ISD::FABS ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
17130	};
17131
17132	// Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
17133	// Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
17134	// Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
17135	// fneg (fabs X)
17136	SDValue LogicOp0 = N0.getOperand(i: `0`);
17137	ConstantSDNode LogicOp1 = isConstOrConstSplat(N: N0.getOperand(i: `1`), AllowUndefs: true*);
17138	if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
17139	IsBitCastOrFree (LogicOp0, VT)) {
17140	SDValue CastOp0 = DAG.getNode(Opcode: ISD::BITCAST, DL: SDLoc (N), VT, Operand: LogicOp0);
17141	SDValue FPOp = DAG.getNode(Opcode: FPOpcode, DL: SDLoc (N), VT, Operand: CastOp0);
17142	NumFPLogicOpsConv ++;
17143	if (N0.getOpcode() == ISD::OR)
17144	return DAG.getNode(Opcode: ISD::FNEG, DL: SDLoc (N), VT, Operand: FPOp);
17145	return FPOp;
17146	}
17147
17148	return SDValue ();
17149	}
17150
17151	SDValue DAGCombiner::visitBITCAST(SDNode *N) {
17152	SDValue N0 = N->getOperand(Num: `0`);
17153	EVT VT = N->getValueType(ResNo: `0`);
17154
17155	if (N0.isUndef())
17156	return DAG.getUNDEF(VT);
17157
17158	// If the input is a BUILD_VECTOR with all constant elements, fold this now.
17159	// Only do this before legalize types, unless both types are integer and the
17160	// scalar type is legal. Only do this before legalize ops, since the target
17161	// maybe depending on the bitcast.
17162	// First check to see if this is all constant.
17163	// TODO: Support FP bitcasts after legalize types.
17164	if (VT.isVector() &&
17165	(!LegalTypes \|\|
17166	(!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
17167	TLI.isTypeLegal(VT: VT.getVectorElementType()))) &&
17168	N0.getOpcode() == ISD::BUILD_VECTOR && N0 ->hasOneUse() &&
17169	cast<BuildVectorSDNode>(Val&: N0)->isConstant())
17170	return DAG.FoldConstantBuildVector(BV: cast<BuildVectorSDNode>(Val&: N0), DL: SDLoc (N),
17171	DstEltVT: VT.getVectorElementType());
17172
17173	// If the input is a constant, let getNode fold it.
17174	if (isIntOrFPConstant(V: N0)) {
17175	// If we can't allow illegal operations, we need to check that this is just
17176	// a fp -> int or int -> conversion and that the resulting operation will
17177	// be legal.
17178	if (!LegalOperations \|\|
17179	(isa<ConstantSDNode>(Val: N0) && VT.isFloatingPoint() && !VT.isVector() &&
17180	TLI.isOperationLegal(Op: ISD::ConstantFP, VT)) \|\|
17181	(isa<ConstantFPSDNode>(Val: N0) && VT.isInteger() && !VT.isVector() &&
17182	TLI.isOperationLegal(Op: ISD::Constant, VT))) {
17183	SDValue C = DAG.getBitcast(VT, V: N0);
17184	if (C.getNode() != N)
17185	return C;
17186	}
17187	}
17188
17189	// (conv (conv x, t1), t2) -> (conv x, t2)
17190	if (N0.getOpcode() == ISD::BITCAST)
17191	return DAG.getBitcast(VT, V: N0.getOperand(i: `0`));
17192
17193	// fold (conv (logicop (conv x), (c))) -> (logicop x, (conv c))
17194	// iff the current bitwise logicop type isn't legal
17195	if (ISD::isBitwiseLogicOp(Opcode: N0.getOpcode()) && VT.isInteger() &&
17196	!TLI.isTypeLegal(VT: N0.getOperand(i: `0`).getValueType())) {
17197	auto IsFreeBitcast = [VT](SDValue V) {
17198	return (V.getOpcode() == ISD::BITCAST &&
17199	V.getOperand(i: `0`).getValueType() == VT) \|\|
17200	(ISD::isBuildVectorOfConstantSDNodes(N: V.getNode()) &&
17201	V ->hasOneUse());
17202	};
17203	if (IsFreeBitcast (N0.getOperand(i: `0`)) && IsFreeBitcast (N0.getOperand(i: `1`)))
17204	return DAG.getNode(Opcode: N0.getOpcode(), DL: SDLoc (N), VT,
17205	N1: DAG.getBitcast(VT, V: N0.getOperand(i: `0`)),
17206	N2: DAG.getBitcast(VT, V: N0.getOperand(i: `1`)));
17207	}
17208
17209	// fold (conv (load x)) -> (load (conv)x)*
17210	// fold (conv (freeze (load x))) -> (freeze (load (conv)x))*
17211	// If the resultant load doesn't need a higher alignment than the original!
17212	auto CastLoad = [this, &VT](SDValue N0, const SDLoc &DL) {
17213	if (N0.getOpcode() == ISD::AssertNoFPClass)
17214	N0 = N0.getOperand(i: `0`);
17215	if (!ISD::isNormalLoad(N: N0.getNode()) \|\| !N0.hasOneUse())
17216	return SDValue ();
17217
17218	// Do not remove the cast if the types differ in endian layout.
17219	if (TLI.hasBigEndianPartOrdering(VT: N0.getValueType(), DL: DAG.getDataLayout()) !=
17220	TLI.hasBigEndianPartOrdering(VT, DL: DAG.getDataLayout()))
17221	return SDValue ();
17222
17223	// If the load is volatile, we only want to change the load type if the
17224	// resulting load is legal. Otherwise we might increase the number of
17225	// memory accesses. We don't care if the original type was legal or not
17226	// as we assume software couldn't rely on the number of accesses of an
17227	// illegal type.
17228	auto *LN0 = cast<LoadSDNode>(Val&: N0);
17229	if ((LegalOperations \|\| !LN0->isSimple()) &&
17230	!TLI.isOperationLegal(Op: ISD::LOAD, VT))
17231	return SDValue ();
17232
17233	if (!TLI.isLoadBitCastBeneficial(LoadVT: N0.getValueType(), BitcastVT: VT, DAG,
17234	MMO: *LN0->getMemOperand()))
17235	return SDValue ();
17236
17237	// If the range metadata type does not match the new memory
17238	// operation type, remove the range metadata.
17239	if (const MDNode *MD = LN0->getRanges()) {
17240	ConstantInt *Lower = mdconst::extract<ConstantInt>(MD: MD->getOperand(I: `0`));
17241	if (Lower->getBitWidth() != VT.getScalarSizeInBits() \|\| !VT.isInteger()) {
17242	LN0->getMemOperand()->clearRanges();
17243	}
17244	}
17245	SDValue Load = DAG.getLoad(VT, dl: DL, Chain: LN0->getChain(), Ptr: LN0->getBasePtr(),
17246	MMO: LN0->getMemOperand());
17247	DAG.ReplaceAllUsesOfValueWith(From: N0.getValue(R: `1`), To: Load.getValue(R: `1`));
17248	return Load;
17249	};
17250
17251	if (SDValue NewLd = CastLoad (N0, SDLoc (N)))
17252	return NewLd;
17253
17254	if (N0.getOpcode() == ISD::FREEZE && N0.hasOneUse())
17255	if (SDValue NewLd = CastLoad (N0.getOperand(i: `0`), SDLoc (N)))
17256	return DAG.getFreeze(V: NewLd);
17257
17258	if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
17259	return V;
17260
17261	// fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
17262	// fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
17263	//
17264	// For ppc_fp128:
17265	// fold (bitcast (fneg x)) ->
17266	// flipbit = signbit
17267	// (xor (bitcast x) (build_pair flipbit, flipbit))
17268	//
17269	// fold (bitcast (fabs x)) ->
17270	// flipbit = (and (extract_element (bitcast x), 0), signbit)
17271	// (xor (bitcast x) (build_pair flipbit, flipbit))
17272	// This often reduces constant pool loads.
17273	if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(VT: N0.getValueType())) \|\|
17274	(N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(VT: N0.getValueType()))) &&
17275	N0 ->hasOneUse() && VT.isInteger() && !VT.isVector() &&
17276	!N0.getValueType().isVector()) {
17277	SDValue NewConv = DAG.getBitcast(VT, V: N0.getOperand(i: `0`));
17278	AddToWorklist(N: NewConv.getNode());
17279
17280	SDLoc DL(N);
17281	if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
17282	assert(VT.getSizeInBits() == `128`);
17283	SDValue SignBit = DAG.getConstant(
17284	Val: APInt::getSignMask(BitWidth: VT.getSizeInBits() / `2`), DL: SDLoc (N0), VT: MVT::i64);
17285	SDValue FlipBit;
17286	if (N0.getOpcode() == ISD::FNEG) {
17287	FlipBit = SignBit;
17288	AddToWorklist(N: FlipBit.getNode());
17289	} else {
17290	assert(N0.getOpcode() == ISD::FABS);
17291	SDValue Hi =
17292	DAG.getNode(Opcode: ISD::EXTRACT_ELEMENT, DL: SDLoc (NewConv), VT: MVT::i64, N1: NewConv,
17293	N2: DAG.getIntPtrConstant(Val: getPPCf128HiElementSelector(DAG),
17294	DL: SDLoc (NewConv)));
17295	AddToWorklist(N: Hi.getNode());
17296	FlipBit = DAG.getNode(Opcode: ISD::AND, DL: SDLoc (N0), VT: MVT::i64, N1: Hi, N2: SignBit);
17297	AddToWorklist(N: FlipBit.getNode());
17298	}
17299	SDValue FlipBits =
17300	DAG.getNode(Opcode: ISD::BUILD_PAIR, DL: SDLoc (N0), VT, N1: FlipBit, N2: FlipBit);
17301	AddToWorklist(N: FlipBits.getNode());
17302	return DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: NewConv, N2: FlipBits);
17303	}
17304	APInt SignBit = APInt::getSignMask(BitWidth: VT.getSizeInBits());
17305	if (N0.getOpcode() == ISD::FNEG)
17306	return DAG.getNode(Opcode: ISD::XOR, DL, VT,
17307	N1: NewConv, N2: DAG.getConstant(Val: SignBit, DL, VT));
17308	assert(N0.getOpcode() == ISD::FABS);
17309	return DAG.getNode(Opcode: ISD::AND, DL, VT,
17310	N1: NewConv, N2: DAG.getConstant(Val: ~SignBit, DL, VT));
17311	}
17312
17313	// fold (bitconvert (fcopysign cst, x)) ->
17314	// (or (and (bitconvert x), sign), (and cst, (not sign)))
17315	// Note that we don't handle (copysign x, cst) because this can always be
17316	// folded to an fneg or fabs.
17317	//
17318	// For ppc_fp128:
17319	// fold (bitcast (fcopysign cst, x)) ->
17320	// flipbit = (and (extract_element
17321	// (xor (bitcast cst), (bitcast x)), 0),
17322	// signbit)
17323	// (xor (bitcast cst) (build_pair flipbit, flipbit))
17324	if (N0.getOpcode() == ISD::FCOPYSIGN && N0 ->hasOneUse() &&
17325	isa<ConstantFPSDNode>(Val: N0.getOperand(i: `0`)) && VT.isInteger() &&
17326	!VT.isVector()) {
17327	unsigned OrigXWidth = N0.getOperand(i: `1`).getValueSizeInBits();
17328	EVT IntXVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: OrigXWidth);
17329	if (isTypeLegal(VT: IntXVT)) {
17330	SDValue X = DAG.getBitcast(VT: IntXVT, V: N0.getOperand(i: `1`));
17331	AddToWorklist(N: X.getNode());
17332
17333	// If X has a different width than the result/lhs, sext it or truncate it.
17334	unsigned VTWidth = VT.getSizeInBits();
17335	if (OrigXWidth < VTWidth) {
17336	X = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: SDLoc (N), VT, Operand: X);
17337	AddToWorklist(N: X.getNode());
17338	} else if (OrigXWidth > VTWidth) {
17339	// To get the sign bit in the right place, we have to shift it right
17340	// before truncating.
17341	SDLoc DL(X);
17342	X = DAG.getNode(Opcode: ISD::SRL, DL,
17343	VT: X.getValueType(), N1: X,
17344	N2: DAG.getConstant(Val: OrigXWidth-VTWidth, DL,
17345	VT: X.getValueType()));
17346	AddToWorklist(N: X.getNode());
17347	X = DAG.getNode(Opcode: ISD::TRUNCATE, DL: SDLoc (X), VT, Operand: X);
17348	AddToWorklist(N: X.getNode());
17349	}
17350
17351	if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
17352	APInt SignBit = APInt::getSignMask(BitWidth: VT.getSizeInBits() / `2`);
17353	SDValue Cst = DAG.getBitcast(VT, V: N0.getOperand(i: `0`));
17354	AddToWorklist(N: Cst.getNode());
17355	SDValue X = DAG.getBitcast(VT, V: N0.getOperand(i: `1`));
17356	AddToWorklist(N: X.getNode());
17357	SDValue XorResult = DAG.getNode(Opcode: ISD::XOR, DL: SDLoc (N0), VT, N1: Cst, N2: X);
17358	AddToWorklist(N: XorResult.getNode());
17359	SDValue XorResult64 = DAG.getNode(
17360	Opcode: ISD::EXTRACT_ELEMENT, DL: SDLoc (XorResult), VT: MVT::i64, N1: XorResult,
17361	N2: DAG.getIntPtrConstant(Val: getPPCf128HiElementSelector(DAG),
17362	DL: SDLoc (XorResult)));
17363	AddToWorklist(N: XorResult64.getNode());
17364	SDValue FlipBit =
17365	DAG.getNode(Opcode: ISD::AND, DL: SDLoc (XorResult64), VT: MVT::i64, N1: XorResult64,
17366	N2: DAG.getConstant(Val: SignBit, DL: SDLoc (XorResult64), VT: MVT::i64));
17367	AddToWorklist(N: FlipBit.getNode());
17368	SDValue FlipBits =
17369	DAG.getNode(Opcode: ISD::BUILD_PAIR, DL: SDLoc (N0), VT, N1: FlipBit, N2: FlipBit);
17370	AddToWorklist(N: FlipBits.getNode());
17371	return DAG.getNode(Opcode: ISD::XOR, DL: SDLoc (N), VT, N1: Cst, N2: FlipBits);
17372	}
17373	APInt SignBit = APInt::getSignMask(BitWidth: VT.getSizeInBits());
17374	X = DAG.getNode(Opcode: ISD::AND, DL: SDLoc (X), VT,
17375	N1: X, N2: DAG.getConstant(Val: SignBit, DL: SDLoc (X), VT));
17376	AddToWorklist(N: X.getNode());
17377
17378	SDValue Cst = DAG.getBitcast(VT, V: N0.getOperand(i: `0`));
17379	Cst = DAG.getNode(Opcode: ISD::AND, DL: SDLoc (Cst), VT,
17380	N1: Cst, N2: DAG.getConstant(Val: ~SignBit, DL: SDLoc (Cst), VT));
17381	AddToWorklist(N: Cst.getNode());
17382
17383	return DAG.getNode(Opcode: ISD::OR, DL: SDLoc (N), VT, N1: X, N2: Cst);
17384	}
17385	}
17386
17387	// bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
17388	if (N0.getOpcode() == ISD::BUILD_PAIR)
17389	if (SDValue CombineLD = CombineConsecutiveLoads(N: N0.getNode(), VT))
17390	return CombineLD;
17391
17392	// int_vt (bitcast (vec_vt (scalar_to_vector elt_vt:x)))
17393	// => int_vt (any_extend elt_vt:x)
17394	if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && VT.isScalarInteger()) {
17395	SDValue SrcScalar = N0.getOperand(i: `0`);
17396	if (SrcScalar.getValueType().isScalarInteger())
17397	return DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: SDLoc (N), VT, Operand: SrcScalar);
17398	}
17399
17400	// Remove double bitcasts from shuffles - this is often a legacy of
17401	// XformToShuffleWithZero being used to combine bitmaskings (of
17402	// float vectors bitcast to integer vectors) into shuffles.
17403	// bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
17404	if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
17405	N0 ->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
17406	VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
17407	!(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
17408	ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Val&: N0);
17409
17410	// If operands are a bitcast, peek through if it casts the original VT.
17411	// If operands are a constant, just bitcast back to original VT.
17412	auto PeekThroughBitcast = [&](SDValue Op) {
17413	if (Op.getOpcode() == ISD::BITCAST &&
17414	Op.getOperand(i: `0`).getValueType() == VT)
17415	return SDValue (Op.getOperand(i: `0`));
17416	if (Op.isUndef() \|\| isAnyConstantBuildVector(V: Op))
17417	return DAG.getBitcast(VT, V: Op);
17418	return SDValue ();
17419	};
17420
17421	// FIXME: If either input vector is bitcast, try to convert the shuffle to
17422	// the result type of this bitcast. This would eliminate at least one
17423	// bitcast. See the transform in InstCombine.
17424	SDValue SV0 = PeekThroughBitcast (N0 ->getOperand(Num: `0`));
17425	SDValue SV1 = PeekThroughBitcast (N0 ->getOperand(Num: `1`));
17426	if (!(SV0 && SV1))
17427	return SDValue ();
17428
17429	int MaskScale =
17430	VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
17431	SmallVector<int, `8`> NewMask;
17432	for (int M : SVN->getMask())
17433	for (int i = `0`; i != MaskScale; ++i)
17434	NewMask.push_back(Elt: M < `0` ? -`1` : M * MaskScale + i);
17435
17436	SDValue LegalShuffle =
17437	TLI.buildLegalVectorShuffle(VT, DL: SDLoc (N), N0: SV0, N1: SV1, Mask: NewMask, DAG);
17438	if (LegalShuffle)
17439	return LegalShuffle;
17440	}
17441
17442	return SDValue ();
17443	}
17444
17445	SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
17446	EVT VT = N->getValueType(ResNo: `0`);
17447	return CombineConsecutiveLoads(N, VT);
17448	}
17449
17450	SDValue DAGCombiner::visitFREEZE(SDNode *N) {
17451	SDValue N0 = N->getOperand(Num: `0`);
17452
17453	if (DAG.isGuaranteedNotToBeUndefOrPoison(Op: N0, /PoisonOnly/ false))
17454	return N0;
17455
17456	// If we have frozen and unfrozen users of N0, update so everything uses N.
17457	if (!N0.isUndef() && !N0.hasOneUse()) {
17458	SDValue FrozenN0(N, `0`);
17459	// Unfreeze all uses of N to avoid double deleting N from the CSE map.
17460	DAG.ReplaceAllUsesOfValueWith(From: FrozenN0, To: N0);
17461	DAG.ReplaceAllUsesOfValueWith(From: N0, To: FrozenN0);
17462	// ReplaceAllUsesOfValueWith will have also updated the use in N, thus
17463	// creating a cycle in a DAG. Let's undo that by mutating the freeze.
17464	assert(N->getOperand(`0`) == FrozenN0 && "Expected cycle in DAG");
17465	DAG.UpdateNodeOperands(N, Op: N0);
17466	return FrozenN0;
17467	}
17468
17469	// We currently avoid folding freeze over SRA/SRL, due to the problems seen
17470	// with (freeze (assert ext)) blocking simplifications of SRA/SRL. See for
17471	// example https://reviews.llvm.org/D136529#4120959.
17472	if (N0.getOpcode() == ISD::SRA \|\| N0.getOpcode() == ISD::SRL)
17473	return SDValue ();
17474
17475	// Fold freeze(op(x, ...)) -> op(freeze(x), ...).
17476	// Try to push freeze through instructions that propagate but don't produce
17477	// poison as far as possible. If an operand of freeze follows three
17478	// conditions 1) one-use, 2) does not produce poison, and 3) has all but one
17479	// guaranteed-non-poison operands (or is a BUILD_VECTOR or similar) then push
17480	// the freeze through to the operands that are not guaranteed non-poison.
17481	// NOTE: we will strip poison-generating flags, so ignore them here.
17482	if (DAG.canCreateUndefOrPoison(Op: N0, /PoisonOnly/ false,
17483	/ConsiderFlags/ false) \|\|
17484	N0 ->getNumValues() != `1` \|\| !N0 ->hasOneUse())
17485	return SDValue ();
17486
17487	// TOOD: we should always allow multiple operands, however this increases the
17488	// likelihood of infinite loops due to the ReplaceAllUsesOfValueWith call
17489	// below causing later nodes that share frozen operands to fold again and no
17490	// longer being able to confirm other operands are not poison due to recursion
17491	// depth limits on isGuaranteedNotToBeUndefOrPoison.
17492	bool AllowMultipleMaybePoisonOperands =
17493	N0.getOpcode() == ISD::SELECT_CC \|\| N0.getOpcode() == ISD::SETCC \|\|
17494	N0.getOpcode() == ISD::BUILD_VECTOR \|\|
17495	N0.getOpcode() == ISD::INSERT_SUBVECTOR \|\|
17496	N0.getOpcode() == ISD::BUILD_PAIR \|\|
17497	N0.getOpcode() == ISD::VECTOR_SHUFFLE \|\|
17498	N0.getOpcode() == ISD::CONCAT_VECTORS \|\| N0.getOpcode() == ISD::FMUL;
17499
17500	// Avoid turning a BUILD_VECTOR that can be recognized as "all zeros", "all
17501	// ones" or "constant" into something that depends on FrozenUndef. We can
17502	// instead pick undef values to keep those properties, while at the same time
17503	// folding away the freeze.
17504	// If we implement a more general solution for folding away freeze(undef) in
17505	// the future, then this special handling can be removed.
17506	if (N0.getOpcode() == ISD::BUILD_VECTOR) {
17507	SDLoc DL(N0);
17508	EVT VT = N0.getValueType();
17509	if (llvm::ISD::isBuildVectorAllOnes(N: N0.getNode()) && VT.isInteger())
17510	return DAG.getAllOnesConstant(DL, VT);
17511	if (llvm::ISD::isBuildVectorOfConstantSDNodes(N: N0.getNode())) {
17512	SmallVector<SDValue, `8`> NewVecC;
17513	for (const SDValue &Op : N0 ->op_values())
17514	NewVecC.push_back(
17515	Elt: Op.isUndef() ? DAG.getConstant(Val: `0`, DL, VT: Op.getValueType()) : Op);
17516	return DAG.getBuildVector(VT, DL, Ops: NewVecC);
17517	}
17518	}
17519
17520	SmallSet<SDValue, `8`> MaybePoisonOperands;
17521	SmallVector<unsigned, `8`> MaybePoisonOperandNumbers;
17522	for (auto [OpNo, Op] : enumerate(First: N0 ->ops())) {
17523	if (DAG.isGuaranteedNotToBeUndefOrPoison(Op, /PoisonOnly=/false))
17524	continue;
17525	bool HadMaybePoisonOperands = !MaybePoisonOperands.empty();
17526	bool IsNewMaybePoisonOperand = MaybePoisonOperands.insert(V: Op).second;
17527	if (IsNewMaybePoisonOperand)
17528	MaybePoisonOperandNumbers.push_back(Elt: OpNo);
17529	if (!HadMaybePoisonOperands)
17530	continue;
17531	if (IsNewMaybePoisonOperand && !AllowMultipleMaybePoisonOperands) {
17532	// Multiple maybe-poison ops when not allowed - bail out.
17533	return SDValue ();
17534	}
17535	}
17536	// NOTE: the whole op may be not guaranteed to not be undef or poison because
17537	// it could create undef or poison due to it's poison-generating flags.
17538	// So not finding any maybe-poison operands is fine.
17539
17540	for (unsigned OpNo : MaybePoisonOperandNumbers) {
17541	// N0 can mutate during iteration, so make sure to refetch the maybe poison
17542	// operands via the operand numbers. The typical scenario is that we have
17543	// something like this
17544	// t262: i32 = freeze t181
17545	// t150: i32 = ctlz_zero_undef t262
17546	// t184: i32 = ctlz_zero_undef t181
17547	// t268: i32 = select_cc t181, Constant:i32<0>, t184, t186, setne:ch
17548	// When freezing the t181 operand we get t262 back, and then the
17549	// ReplaceAllUsesOfValueWith call will not only replace t181 by t262, but
17550	// also recursively replace t184 by t150.
17551	SDValue MaybePoisonOperand = N->getOperand(Num: `0`).getOperand(i: OpNo);
17552	// Don't replace every single UNDEF everywhere with frozen UNDEF, though.
17553	if (MaybePoisonOperand.isUndef())
17554	continue;
17555	// First, freeze each offending operand.
17556	SDValue FrozenMaybePoisonOperand = DAG.getFreeze(V: MaybePoisonOperand);
17557	// Then, change all other uses of unfrozen operand to use frozen operand.
17558	DAG.ReplaceAllUsesOfValueWith(From: MaybePoisonOperand, To: FrozenMaybePoisonOperand);
17559	if (FrozenMaybePoisonOperand.getOpcode() == ISD::FREEZE &&
17560	FrozenMaybePoisonOperand.getOperand(i: `0`) == FrozenMaybePoisonOperand) {
17561	// But, that also updated the use in the freeze we just created, thus
17562	// creating a cycle in a DAG. Let's undo that by mutating the freeze.
17563	DAG.UpdateNodeOperands(N: FrozenMaybePoisonOperand.getNode(),
17564	Op: MaybePoisonOperand);
17565	}
17566
17567	// This node has been merged with another.
17568	if (N->getOpcode() == ISD::DELETED_NODE)
17569	return SDValue (N, `0`);
17570	}
17571
17572	assert(N->getOpcode() != ISD::DELETED_NODE && "Node was deleted!");
17573
17574	// The whole node may have been updated, so the value we were holding
17575	// may no longer be valid. Re-fetch the operand we're `freeze`ing.
17576	N0 = N->getOperand(Num: `0`);
17577
17578	// Finally, recreate the node, it's operands were updated to use
17579	// frozen operands, so we just need to use it's "original" operands.
17580	SmallVector<SDValue> Ops(N0 ->ops());
17581	// TODO: ISD::UNDEF and ISD::POISON should get separate handling, but best
17582	// leave for a future patch.
17583	for (SDValue &Op : Ops) {
17584	if (Op.isUndef())
17585	Op = DAG.getFreeze(V: Op);
17586	}
17587
17588	SDLoc DL(N0);
17589
17590	// Special case handling for ShuffleVectorSDNode nodes.
17591	if (auto *SVN = dyn_cast<ShuffleVectorSDNode>(Val&: N0))
17592	return DAG.getVectorShuffle(VT: N0.getValueType(), dl: DL, N1: Ops [`0`], N2: Ops [`1`],
17593	Mask: SVN->getMask());
17594
17595	// NOTE: this strips poison generating flags.
17596	// Folding freeze(op(x, ...)) -> op(freeze(x), ...) does not require nnan,
17597	// ninf, nsz, or fast.
17598	// However, contract, reassoc, afn, and arcp should be preserved,
17599	// as these fast-math flags do not introduce poison values.
17600	SDNodeFlags SrcFlags = N0 ->getFlags();
17601	SDNodeFlags SafeFlags;
17602	SafeFlags.setAllowContract(SrcFlags.hasAllowContract());
17603	SafeFlags.setAllowReassociation(SrcFlags.hasAllowReassociation());
17604	SafeFlags.setApproximateFuncs(SrcFlags.hasApproximateFuncs());
17605	SafeFlags.setAllowReciprocal(SrcFlags.hasAllowReciprocal());
17606	return DAG.getNode(Opcode: N0.getOpcode(), DL, VTList: N0 ->getVTList(), Ops, Flags: SafeFlags);
17607	}
17608
17609	// Returns true if floating point contraction is allowed on the FMUL-SDValue
17610	// `N`
17611	static bool isContractableFMUL(const TargetOptions &Options, SDValue N) {
17612	assert(N.getOpcode() == ISD::FMUL);
17613
17614	return Options.AllowFPOpFusion == FPOpFusion::Fast \|\|
17615	N ->getFlags().hasAllowContract();
17616	}
17617
17618	/// Try to perform FMA combining on a given FADD node.
17619	template <class MatchContextClass>
17620	SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
17621	SDValue N0 = N->getOperand(Num: `0`);
17622	SDValue N1 = N->getOperand(Num: `1`);
17623	EVT VT = N->getValueType(ResNo: `0`);
17624	SDLoc SL(N);
17625	MatchContextClass matcher(DAG, TLI, N);
17626	const TargetOptions &Options = DAG.getTarget().Options;
17627
17628	bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
17629
17630	// Floating-point multiply-add with intermediate rounding.
17631	// FIXME: Make isFMADLegal have specific behavior when using VPMatchContext.
17632	// FIXME: Add VP_FMAD opcode.
17633	bool HasFMAD = !UseVP && (LegalOperations && TLI.isFMADLegal(DAG, N));
17634
17635	// Floating-point multiply-add without intermediate rounding.
17636	bool HasFMA =
17637	(!LegalOperations \|\| matcher.isOperationLegalOrCustom(ISD::FMA, VT)) &&
17638	TLI.isFMAFasterThanFMulAndFAdd(MF: DAG.getMachineFunction(), VT);
17639
17640	// No valid opcode, do not combine.
17641	if (!HasFMAD && !HasFMA)
17642	return SDValue ();
17643
17644	bool AllowFusionGlobally =
17645	Options.AllowFPOpFusion == FPOpFusion::Fast \|\| HasFMAD;
17646	// If the addition is not contractable, do not combine.
17647	if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
17648	return SDValue ();
17649
17650	// Folding fadd (fmul x, y), (fmul x, y) -> fma x, y, (fmul x, y) is never
17651	// beneficial. It does not reduce latency. It increases register pressure. It
17652	// replaces an fadd with an fma which is a more complex instruction, so is
17653	// likely to have a larger encoding, use more functional units, etc.
17654	if (N0 == N1)
17655	return SDValue ();
17656
17657	if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
17658	return SDValue ();
17659
17660	// Always prefer FMAD to FMA for precision.
17661	unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
17662	bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
17663
17664	auto isFusedOp = [&](SDValue N) {
17665	return matcher.match(N, ISD::FMA) \|\| matcher.match(N, ISD::FMAD);
17666	};
17667
17668	// Is the node an FMUL and contractable either due to global flags or
17669	// SDNodeFlags.
17670	auto isContractableFMUL = [AllowFusionGlobally, &matcher](SDValue N) {
17671	if (!matcher.match(N, ISD::FMUL))
17672	return false;
17673	return AllowFusionGlobally \|\| N ->getFlags().hasAllowContract();
17674	};
17675	// If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
17676	// prefer to fold the multiply with fewer uses.
17677	if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
17678	if (N0 ->use_size() > N1 ->use_size())
17679	std::swap(a&: N0, b&: N1);
17680	}
17681
17682	// fold (fadd (fmul x, y), z) -> (fma x, y, z)
17683	if (isContractableFMUL(N0) && (Aggressive \|\| N0 ->hasOneUse())) {
17684	return matcher.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(i: `0`),
17685	N0.getOperand(i: `1`), N1);
17686	}
17687
17688	// fold (fadd x, (fmul y, z)) -> (fma y, z, x)
17689	// Note: Commutes FADD operands.
17690	if (isContractableFMUL(N1) && (Aggressive \|\| N1 ->hasOneUse())) {
17691	return matcher.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(i: `0`),
17692	N1.getOperand(i: `1`), N0);
17693	}
17694
17695	// fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
17696	// fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
17697	// This also works with nested fma instructions:
17698	// fadd (fma A, B, (fma (C, D, (fmul (E, F))))), G -->
17699	// fma A, B, (fma C, D, fma (E, F, G))
17700	// fadd (G, (fma A, B, (fma (C, D, (fmul (E, F)))))) -->
17701	// fma A, B, (fma C, D, fma (E, F, G)).
17702	// This requires reassociation because it changes the order of operations.
17703	bool CanReassociate = N->getFlags().hasAllowReassociation();
17704	if (CanReassociate) {
17705	SDValue FMA, E;
17706	if (isFusedOp(N0) && N0.hasOneUse()) {
17707	FMA = N0;
17708	E = N1;
17709	} else if (isFusedOp(N1) && N1.hasOneUse()) {
17710	FMA = N1;
17711	E = N0;
17712	}
17713
17714	SDValue TmpFMA = FMA;
17715	while (E && isFusedOp(TmpFMA) && TmpFMA.hasOneUse()) {
17716	SDValue FMul = TmpFMA ->getOperand(Num: `2`);
17717	if (matcher.match(FMul, ISD::FMUL) && FMul.hasOneUse()) {
17718	SDValue C = FMul.getOperand(i: `0`);
17719	SDValue D = FMul.getOperand(i: `1`);
17720	SDValue CDE = matcher.getNode(PreferredFusedOpcode, SL, VT, C, D, E);
17721	DAG.ReplaceAllUsesOfValueWith(From: FMul, To: CDE);
17722	// Replacing the inner FMul could cause the outer FMA to be simplified
17723	// away.
17724	return FMA.getOpcode() == ISD::DELETED_NODE ? SDValue (N, `0`) : FMA;
17725	}
17726
17727	TmpFMA = TmpFMA ->getOperand(Num: `2`);
17728	}
17729	}
17730
17731	// Look through FP_EXTEND nodes to do more combining.
17732
17733	// fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
17734	if (matcher.match(N0, ISD::FP_EXTEND)) {
17735	SDValue N00 = N0.getOperand(i: `0`);
17736	if (isContractableFMUL(N00) &&
17737	TLI.isFPExtFoldable(DAG, Opcode: PreferredFusedOpcode, DestVT: VT,
17738	SrcVT: N00.getValueType())) {
17739	return matcher.getNode(
17740	PreferredFusedOpcode, SL, VT,
17741	matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(i: `0`)),
17742	matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(i: `1`)), N1);
17743	}
17744	}
17745
17746	// fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
17747	// Note: Commutes FADD operands.
17748	if (matcher.match(N1, ISD::FP_EXTEND)) {
17749	SDValue N10 = N1.getOperand(i: `0`);
17750	if (isContractableFMUL(N10) &&
17751	TLI.isFPExtFoldable(DAG, Opcode: PreferredFusedOpcode, DestVT: VT,
17752	SrcVT: N10.getValueType())) {
17753	return matcher.getNode(
17754	PreferredFusedOpcode, SL, VT,
17755	matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(i: `0`)),
17756	matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(i: `1`)), N0);
17757	}
17758	}
17759
17760	// More folding opportunities when target permits.
17761	if (Aggressive) {
17762	// fold (fadd (fma x, y, (fpext (fmul u, v))), z)
17763	// -> (fma x, y, (fma (fpext u), (fpext v), z))
17764	auto FoldFAddFMAFPExtFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
17765	SDValue Z) {
17766	return matcher.getNode(
17767	PreferredFusedOpcode, SL, VT, X, Y,
17768	matcher.getNode(PreferredFusedOpcode, SL, VT,
17769	matcher.getNode(ISD::FP_EXTEND, SL, VT, U),
17770	matcher.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
17771	};
17772	if (isFusedOp(N0)) {
17773	SDValue N02 = N0.getOperand(i: `2`);
17774	if (matcher.match(N02, ISD::FP_EXTEND)) {
17775	SDValue N020 = N02.getOperand(i: `0`);
17776	if (isContractableFMUL(N020) &&
17777	TLI.isFPExtFoldable(DAG, Opcode: PreferredFusedOpcode, DestVT: VT,
17778	SrcVT: N020.getValueType())) {
17779	return FoldFAddFMAFPExtFMul(N0.getOperand(i: `0`), N0.getOperand(i: `1`),
17780	N020.getOperand(i: `0`), N020.getOperand(i: `1`),
17781	N1);
17782	}
17783	}
17784	}
17785
17786	// fold (fadd (fpext (fma x, y, (fmul u, v))), z)
17787	// -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
17788	// FIXME: This turns two single-precision and one double-precision
17789	// operation into two double-precision operations, which might not be
17790	// interesting for all targets, especially GPUs.
17791	auto FoldFAddFPExtFMAFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
17792	SDValue Z) {
17793	return matcher.getNode(
17794	PreferredFusedOpcode, SL, VT,
17795	matcher.getNode(ISD::FP_EXTEND, SL, VT, X),
17796	matcher.getNode(ISD::FP_EXTEND, SL, VT, Y),
17797	matcher.getNode(PreferredFusedOpcode, SL, VT,
17798	matcher.getNode(ISD::FP_EXTEND, SL, VT, U),
17799	matcher.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
17800	};
17801	if (N0.getOpcode() == ISD::FP_EXTEND) {
17802	SDValue N00 = N0.getOperand(i: `0`);
17803	if (isFusedOp(N00)) {
17804	SDValue N002 = N00.getOperand(i: `2`);
17805	if (isContractableFMUL(N002) &&
17806	TLI.isFPExtFoldable(DAG, Opcode: PreferredFusedOpcode, DestVT: VT,
17807	SrcVT: N00.getValueType())) {
17808	return FoldFAddFPExtFMAFMul(N00.getOperand(i: `0`), N00.getOperand(i: `1`),
17809	N002.getOperand(i: `0`), N002.getOperand(i: `1`),
17810	N1);
17811	}
17812	}
17813	}
17814
17815	// fold (fadd x, (fma y, z, (fpext (fmul u, v)))
17816	// -> (fma y, z, (fma (fpext u), (fpext v), x))
17817	if (isFusedOp(N1)) {
17818	SDValue N12 = N1.getOperand(i: `2`);
17819	if (N12.getOpcode() == ISD::FP_EXTEND) {
17820	SDValue N120 = N12.getOperand(i: `0`);
17821	if (isContractableFMUL(N120) &&
17822	TLI.isFPExtFoldable(DAG, Opcode: PreferredFusedOpcode, DestVT: VT,
17823	SrcVT: N120.getValueType())) {
17824	return FoldFAddFMAFPExtFMul(N1.getOperand(i: `0`), N1.getOperand(i: `1`),
17825	N120.getOperand(i: `0`), N120.getOperand(i: `1`),
17826	N0);
17827	}
17828	}
17829	}
17830
17831	// fold (fadd x, (fpext (fma y, z, (fmul u, v)))
17832	// -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
17833	// FIXME: This turns two single-precision and one double-precision
17834	// operation into two double-precision operations, which might not be
17835	// interesting for all targets, especially GPUs.
17836	if (N1.getOpcode() == ISD::FP_EXTEND) {
17837	SDValue N10 = N1.getOperand(i: `0`);
17838	if (isFusedOp(N10)) {
17839	SDValue N102 = N10.getOperand(i: `2`);
17840	if (isContractableFMUL(N102) &&
17841	TLI.isFPExtFoldable(DAG, Opcode: PreferredFusedOpcode, DestVT: VT,
17842	SrcVT: N10.getValueType())) {
17843	return FoldFAddFPExtFMAFMul(N10.getOperand(i: `0`), N10.getOperand(i: `1`),
17844	N102.getOperand(i: `0`), N102.getOperand(i: `1`),
17845	N0);
17846	}
17847	}
17848	}
17849	}
17850
17851	return SDValue ();
17852	}
17853
17854	/// Try to perform FMA combining on a given FSUB node.
17855	template <class MatchContextClass>
17856	SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
17857	SDValue N0 = N->getOperand(Num: `0`);
17858	SDValue N1 = N->getOperand(Num: `1`);
17859	EVT VT = N->getValueType(ResNo: `0`);
17860	SDLoc SL(N);
17861	MatchContextClass matcher(DAG, TLI, N);
17862	const TargetOptions &Options = DAG.getTarget().Options;
17863
17864	bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
17865
17866	// Floating-point multiply-add with intermediate rounding.
17867	// FIXME: Make isFMADLegal have specific behavior when using VPMatchContext.
17868	// FIXME: Add VP_FMAD opcode.
17869	bool HasFMAD = !UseVP && (LegalOperations && TLI.isFMADLegal(DAG, N));
17870
17871	// Floating-point multiply-add without intermediate rounding.
17872	bool HasFMA =
17873	(!LegalOperations \|\| matcher.isOperationLegalOrCustom(ISD::FMA, VT)) &&
17874	TLI.isFMAFasterThanFMulAndFAdd(MF: DAG.getMachineFunction(), VT);
17875
17876	// No valid opcode, do not combine.
17877	if (!HasFMAD && !HasFMA)
17878	return SDValue ();
17879
17880	const SDNodeFlags Flags = N->getFlags();
17881	bool AllowFusionGlobally =
17882	(Options.AllowFPOpFusion == FPOpFusion::Fast \|\| HasFMAD);
17883
17884	// If the subtraction is not contractable, do not combine.
17885	if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
17886	return SDValue ();
17887
17888	if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
17889	return SDValue ();
17890
17891	// Always prefer FMAD to FMA for precision.
17892	unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
17893	bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
17894	bool NoSignedZero = Flags.hasNoSignedZeros();
17895
17896	// Is the node an FMUL and contractable either due to global flags or
17897	// SDNodeFlags.
17898	auto isContractableFMUL = [AllowFusionGlobally, &matcher](SDValue N) {
17899	if (!matcher.match(N, ISD::FMUL))
17900	return false;
17901	return AllowFusionGlobally \|\| N ->getFlags().hasAllowContract();
17902	};
17903
17904	// fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
17905	auto tryToFoldXYSubZ = [&](SDValue XY, SDValue Z) {
17906	if (isContractableFMUL(XY) && (Aggressive \|\| XY ->hasOneUse())) {
17907	return matcher.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(i: `0`),
17908	XY.getOperand(i: `1`),
17909	matcher.getNode(ISD::FNEG, SL, VT, Z));
17910	}
17911	return SDValue ();
17912	};
17913
17914	// fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
17915	// Note: Commutes FSUB operands.
17916	auto tryToFoldXSubYZ = [&](SDValue X, SDValue YZ) {
17917	if (isContractableFMUL(YZ) && (Aggressive \|\| YZ ->hasOneUse())) {
17918	return matcher.getNode(
17919	PreferredFusedOpcode, SL, VT,
17920	matcher.getNode(ISD::FNEG, SL, VT, YZ.getOperand(i: `0`)),
17921	YZ.getOperand(i: `1`), X);
17922	}
17923	return SDValue ();
17924	};
17925
17926	// If we have two choices trying to fold (fsub (fmul u, v), (fmul x, y)),
17927	// prefer to fold the multiply with fewer uses.
17928	if (isContractableFMUL(N0) && isContractableFMUL(N1) &&
17929	(N0 ->use_size() > N1 ->use_size())) {
17930	// fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b))
17931	if (SDValue V = tryToFoldXSubYZ(N0, N1))
17932	return V;
17933	// fold (fsub (fmul a, b), (fmul c, d)) -> (fma a, b, (fneg (fmul c, d)))
17934	if (SDValue V = tryToFoldXYSubZ(N0, N1))
17935	return V;
17936	} else {
17937	// fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
17938	if (SDValue V = tryToFoldXYSubZ(N0, N1))
17939	return V;
17940	// fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
17941	if (SDValue V = tryToFoldXSubYZ(N0, N1))
17942	return V;
17943	}
17944
17945	// fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
17946	if (matcher.match(N0, ISD::FNEG) && isContractableFMUL(N0.getOperand(i: `0`)) &&
17947	(Aggressive \|\| (N0 ->hasOneUse() && N0.getOperand(i: `0`).hasOneUse()))) {
17948	SDValue N00 = N0.getOperand(i: `0`).getOperand(i: `0`);
17949	SDValue N01 = N0.getOperand(i: `0`).getOperand(i: `1`);
17950	return matcher.getNode(PreferredFusedOpcode, SL, VT,
17951	matcher.getNode(ISD::FNEG, SL, VT, N00), N01,
17952	matcher.getNode(ISD::FNEG, SL, VT, N1));
17953	}
17954
17955	// Look through FP_EXTEND nodes to do more combining.
17956
17957	// fold (fsub (fpext (fmul x, y)), z)
17958	// -> (fma (fpext x), (fpext y), (fneg z))
17959	if (matcher.match(N0, ISD::FP_EXTEND)) {
17960	SDValue N00 = N0.getOperand(i: `0`);
17961	if (isContractableFMUL(N00) &&
17962	TLI.isFPExtFoldable(DAG, Opcode: PreferredFusedOpcode, DestVT: VT,
17963	SrcVT: N00.getValueType())) {
17964	return matcher.getNode(
17965	PreferredFusedOpcode, SL, VT,
17966	matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(i: `0`)),
17967	matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(i: `1`)),
17968	matcher.getNode(ISD::FNEG, SL, VT, N1));
17969	}
17970	}
17971
17972	// fold (fsub x, (fpext (fmul y, z)))
17973	// -> (fma (fneg (fpext y)), (fpext z), x)
17974	// Note: Commutes FSUB operands.
17975	if (matcher.match(N1, ISD::FP_EXTEND)) {
17976	SDValue N10 = N1.getOperand(i: `0`);
17977	if (isContractableFMUL(N10) &&
17978	TLI.isFPExtFoldable(DAG, Opcode: PreferredFusedOpcode, DestVT: VT,
17979	SrcVT: N10.getValueType())) {
17980	return matcher.getNode(
17981	PreferredFusedOpcode, SL, VT,
17982	matcher.getNode(
17983	ISD::FNEG, SL, VT,
17984	matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(i: `0`))),
17985	matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(i: `1`)), N0);
17986	}
17987	}
17988
17989	// fold (fsub (fpext (fneg (fmul, x, y))), z)
17990	// -> (fneg (fma (fpext x), (fpext y), z))
17991	// Note: This could be removed with appropriate canonicalization of the
17992	// input expression into (fneg (fadd (fpext (fmul, x, y)), z)). However, the
17993	// command line flag -fp-contract=fast and fast-math flag contract prevent
17994	// from implementing the canonicalization in visitFSUB.
17995	if (matcher.match(N0, ISD::FP_EXTEND)) {
17996	SDValue N00 = N0.getOperand(i: `0`);
17997	if (matcher.match(N00, ISD::FNEG)) {
17998	SDValue N000 = N00.getOperand(i: `0`);
17999	if (isContractableFMUL(N000) &&
18000	TLI.isFPExtFoldable(DAG, Opcode: PreferredFusedOpcode, DestVT: VT,
18001	SrcVT: N00.getValueType())) {
18002	return matcher.getNode(
18003	ISD::FNEG, SL, VT,
18004	matcher.getNode(
18005	PreferredFusedOpcode, SL, VT,
18006	matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(i: `0`)),
18007	matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(i: `1`)),
18008	N1));
18009	}
18010	}
18011	}
18012
18013	// fold (fsub (fneg (fpext (fmul, x, y))), z)
18014	// -> (fneg (fma (fpext x)), (fpext y), z)
18015	// Note: This could be removed with appropriate canonicalization of the
18016	// input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
18017	// command line flag -fp-contract=fast and fast-math flag contract prevent
18018	// from implementing the canonicalization in visitFSUB.
18019	if (matcher.match(N0, ISD::FNEG)) {
18020	SDValue N00 = N0.getOperand(i: `0`);
18021	if (matcher.match(N00, ISD::FP_EXTEND)) {
18022	SDValue N000 = N00.getOperand(i: `0`);
18023	if (isContractableFMUL(N000) &&
18024	TLI.isFPExtFoldable(DAG, Opcode: PreferredFusedOpcode, DestVT: VT,
18025	SrcVT: N000.getValueType())) {
18026	return matcher.getNode(
18027	ISD::FNEG, SL, VT,
18028	matcher.getNode(
18029	PreferredFusedOpcode, SL, VT,
18030	matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(i: `0`)),
18031	matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(i: `1`)),
18032	N1));
18033	}
18034	}
18035	}
18036
18037	auto isContractableAndReassociableFMUL = [&isContractableFMUL](SDValue N) {
18038	return isContractableFMUL(N) && N ->getFlags().hasAllowReassociation();
18039	};
18040
18041	auto isFusedOp = [&](SDValue N) {
18042	return matcher.match(N, ISD::FMA) \|\| matcher.match(N, ISD::FMAD);
18043	};
18044
18045	// More folding opportunities when target permits.
18046	if (Aggressive && N->getFlags().hasAllowReassociation()) {
18047	bool CanFuse = N->getFlags().hasAllowContract();
18048	// fold (fsub (fma x, y, (fmul u, v)), z)
18049	// -> (fma x, y (fma u, v, (fneg z)))
18050	if (CanFuse && isFusedOp(N0) &&
18051	isContractableAndReassociableFMUL(N0.getOperand(i: `2`)) &&
18052	N0 ->hasOneUse() && N0.getOperand(i: `2`)->hasOneUse()) {
18053	return matcher.getNode(
18054	PreferredFusedOpcode, SL, VT, N0.getOperand(i: `0`), N0.getOperand(i: `1`),
18055	matcher.getNode(PreferredFusedOpcode, SL, VT,
18056	N0.getOperand(i: `2`).getOperand(i: `0`),
18057	N0.getOperand(i: `2`).getOperand(i: `1`),
18058	matcher.getNode(ISD::FNEG, SL, VT, N1)));
18059	}
18060
18061	// fold (fsub x, (fma y, z, (fmul u, v)))
18062	// -> (fma (fneg y), z, (fma (fneg u), v, x))
18063	if (CanFuse && isFusedOp(N1) &&
18064	isContractableAndReassociableFMUL(N1.getOperand(i: `2`)) &&
18065	N1 ->hasOneUse() && NoSignedZero) {
18066	SDValue N20 = N1.getOperand(i: `2`).getOperand(i: `0`);
18067	SDValue N21 = N1.getOperand(i: `2`).getOperand(i: `1`);
18068	return matcher.getNode(
18069	PreferredFusedOpcode, SL, VT,
18070	matcher.getNode(ISD::FNEG, SL, VT, N1.getOperand(i: `0`)),
18071	N1.getOperand(i: `1`),
18072	matcher.getNode(PreferredFusedOpcode, SL, VT,
18073	matcher.getNode(ISD::FNEG, SL, VT, N20), N21, N0));
18074	}
18075
18076	// fold (fsub (fma x, y, (fpext (fmul u, v))), z)
18077	// -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
18078	if (isFusedOp(N0) && N0 ->hasOneUse()) {
18079	SDValue N02 = N0.getOperand(i: `2`);
18080	if (matcher.match(N02, ISD::FP_EXTEND)) {
18081	SDValue N020 = N02.getOperand(i: `0`);
18082	if (isContractableAndReassociableFMUL(N020) &&
18083	TLI.isFPExtFoldable(DAG, Opcode: PreferredFusedOpcode, DestVT: VT,
18084	SrcVT: N020.getValueType())) {
18085	return matcher.getNode(
18086	PreferredFusedOpcode, SL, VT, N0.getOperand(i: `0`), N0.getOperand(i: `1`),
18087	matcher.getNode(
18088	PreferredFusedOpcode, SL, VT,
18089	matcher.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(i: `0`)),
18090	matcher.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(i: `1`)),
18091	matcher.getNode(ISD::FNEG, SL, VT, N1)));
18092	}
18093	}
18094	}
18095
18096	// fold (fsub (fpext (fma x, y, (fmul u, v))), z)
18097	// -> (fma (fpext x), (fpext y),
18098	// (fma (fpext u), (fpext v), (fneg z)))
18099	// FIXME: This turns two single-precision and one double-precision
18100	// operation into two double-precision operations, which might not be
18101	// interesting for all targets, especially GPUs.
18102	if (matcher.match(N0, ISD::FP_EXTEND)) {
18103	SDValue N00 = N0.getOperand(i: `0`);
18104	if (isFusedOp(N00)) {
18105	SDValue N002 = N00.getOperand(i: `2`);
18106	if (isContractableAndReassociableFMUL(N002) &&
18107	TLI.isFPExtFoldable(DAG, Opcode: PreferredFusedOpcode, DestVT: VT,
18108	SrcVT: N00.getValueType())) {
18109	return matcher.getNode(
18110	PreferredFusedOpcode, SL, VT,
18111	matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(i: `0`)),
18112	matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(i: `1`)),
18113	matcher.getNode(
18114	PreferredFusedOpcode, SL, VT,
18115	matcher.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(i: `0`)),
18116	matcher.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(i: `1`)),
18117	matcher.getNode(ISD::FNEG, SL, VT, N1)));
18118	}
18119	}
18120	}
18121
18122	// fold (fsub x, (fma y, z, (fpext (fmul u, v))))
18123	// -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
18124	if (isFusedOp(N1) && matcher.match(N1.getOperand(i: `2`), ISD::FP_EXTEND) &&
18125	N1 ->hasOneUse()) {
18126	SDValue N120 = N1.getOperand(i: `2`).getOperand(i: `0`);
18127	if (isContractableAndReassociableFMUL(N120) &&
18128	TLI.isFPExtFoldable(DAG, Opcode: PreferredFusedOpcode, DestVT: VT,
18129	SrcVT: N120.getValueType())) {
18130	SDValue N1200 = N120.getOperand(i: `0`);
18131	SDValue N1201 = N120.getOperand(i: `1`);
18132	return matcher.getNode(
18133	PreferredFusedOpcode, SL, VT,
18134	matcher.getNode(ISD::FNEG, SL, VT, N1.getOperand(i: `0`)),
18135	N1.getOperand(i: `1`),
18136	matcher.getNode(
18137	PreferredFusedOpcode, SL, VT,
18138	matcher.getNode(ISD::FNEG, SL, VT,
18139	matcher.getNode(ISD::FP_EXTEND, SL, VT, N1200)),
18140	matcher.getNode(ISD::FP_EXTEND, SL, VT, N1201), N0));
18141	}
18142	}
18143
18144	// fold (fsub x, (fpext (fma y, z, (fmul u, v))))
18145	// -> (fma (fneg (fpext y)), (fpext z),
18146	// (fma (fneg (fpext u)), (fpext v), x))
18147	// FIXME: This turns two single-precision and one double-precision
18148	// operation into two double-precision operations, which might not be
18149	// interesting for all targets, especially GPUs.
18150	if (matcher.match(N1, ISD::FP_EXTEND) && isFusedOp(N1.getOperand(i: `0`))) {
18151	SDValue CvtSrc = N1.getOperand(i: `0`);
18152	SDValue N100 = CvtSrc.getOperand(i: `0`);
18153	SDValue N101 = CvtSrc.getOperand(i: `1`);
18154	SDValue N102 = CvtSrc.getOperand(i: `2`);
18155	if (isContractableAndReassociableFMUL(N102) &&
18156	TLI.isFPExtFoldable(DAG, Opcode: PreferredFusedOpcode, DestVT: VT,
18157	SrcVT: CvtSrc.getValueType())) {
18158	SDValue N1020 = N102.getOperand(i: `0`);
18159	SDValue N1021 = N102.getOperand(i: `1`);
18160	return matcher.getNode(
18161	PreferredFusedOpcode, SL, VT,
18162	matcher.getNode(ISD::FNEG, SL, VT,
18163	matcher.getNode(ISD::FP_EXTEND, SL, VT, N100)),
18164	matcher.getNode(ISD::FP_EXTEND, SL, VT, N101),
18165	matcher.getNode(
18166	PreferredFusedOpcode, SL, VT,
18167	matcher.getNode(ISD::FNEG, SL, VT,
18168	matcher.getNode(ISD::FP_EXTEND, SL, VT, N1020)),
18169	matcher.getNode(ISD::FP_EXTEND, SL, VT, N1021), N0));
18170	}
18171	}
18172	}
18173
18174	return SDValue ();
18175	}
18176
18177	/// Try to perform FMA combining on a given FMUL node based on the distributive
18178	/// law x (y + 1) = x * y + x and variants thereof (commuted versions,*
18179	/// subtraction instead of addition).
18180	SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
18181	SDValue N0 = N->getOperand(Num: `0`);
18182	SDValue N1 = N->getOperand(Num: `1`);
18183	EVT VT = N->getValueType(ResNo: `0`);
18184	SDLoc SL(N);
18185
18186	assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
18187
18188	const TargetOptions &Options = DAG.getTarget().Options;
18189
18190	// The transforms below are incorrect when x == 0 and y == inf, because the
18191	// intermediate multiplication produces a nan.
18192	SDValue FAdd = N0.getOpcode() == ISD::FADD ? N0 : N1;
18193	if (!FAdd ->getFlags().hasNoInfs())
18194	return SDValue ();
18195
18196	// Floating-point multiply-add without intermediate rounding.
18197	bool HasFMA =
18198	isContractableFMUL(Options, N: SDValue (N, `0`)) &&
18199	(!LegalOperations \|\| TLI.isOperationLegalOrCustom(Op: ISD::FMA, VT)) &&
18200	TLI.isFMAFasterThanFMulAndFAdd(MF: DAG.getMachineFunction(), VT);
18201
18202	// Floating-point multiply-add with intermediate rounding. This can result
18203	// in a less precise result due to the changed rounding order.
18204	bool HasFMAD = LegalOperations && TLI.isFMADLegal(DAG, N);
18205
18206	// No valid opcode, do not combine.
18207	if (!HasFMAD && !HasFMA)
18208	return SDValue ();
18209
18210	// Always prefer FMAD to FMA for precision.
18211	unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
18212	bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
18213
18214	// fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
18215	// fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
18216	auto FuseFADD = [&](SDValue X, SDValue Y) {
18217	if (X.getOpcode() == ISD::FADD && (Aggressive \|\| X ->hasOneUse())) {
18218	if (auto C = isConstOrConstSplatFP(N: X.getOperand(i: `1`), AllowUndefs: true*)) {
18219	if (C->isExactlyValue(V: +`1.0`))
18220	return DAG.getNode(Opcode: PreferredFusedOpcode, DL: SL, VT, N1: X.getOperand(i: `0`), N2: Y,
18221	N3: Y);
18222	if (C->isExactlyValue(V: -`1.0`))
18223	return DAG.getNode(Opcode: PreferredFusedOpcode, DL: SL, VT, N1: X.getOperand(i: `0`), N2: Y,
18224	N3: DAG.getNode(Opcode: ISD::FNEG, DL: SL, VT, Operand: Y));
18225	}
18226	}
18227	return SDValue ();
18228	};
18229
18230	if (SDValue FMA = FuseFADD (N0, N1))
18231	return FMA;
18232	if (SDValue FMA = FuseFADD (N1, N0))
18233	return FMA;
18234
18235	// fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
18236	// fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
18237	// fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
18238	// fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
18239	auto FuseFSUB = [&](SDValue X, SDValue Y) {
18240	if (X.getOpcode() == ISD::FSUB && (Aggressive \|\| X ->hasOneUse())) {
18241	if (auto C0 = isConstOrConstSplatFP(N: X.getOperand(i: `0`), AllowUndefs: true*)) {
18242	if (C0->isExactlyValue(V: +`1.0`))
18243	return DAG.getNode(Opcode: PreferredFusedOpcode, DL: SL, VT,
18244	N1: DAG.getNode(Opcode: ISD::FNEG, DL: SL, VT, Operand: X.getOperand(i: `1`)), N2: Y,
18245	N3: Y);
18246	if (C0->isExactlyValue(V: -`1.0`))
18247	return DAG.getNode(Opcode: PreferredFusedOpcode, DL: SL, VT,
18248	N1: DAG.getNode(Opcode: ISD::FNEG, DL: SL, VT, Operand: X.getOperand(i: `1`)), N2: Y,
18249	N3: DAG.getNode(Opcode: ISD::FNEG, DL: SL, VT, Operand: Y));
18250	}
18251	if (auto C1 = isConstOrConstSplatFP(N: X.getOperand(i: `1`), AllowUndefs: true*)) {
18252	if (C1->isExactlyValue(V: +`1.0`))
18253	return DAG.getNode(Opcode: PreferredFusedOpcode, DL: SL, VT, N1: X.getOperand(i: `0`), N2: Y,
18254	N3: DAG.getNode(Opcode: ISD::FNEG, DL: SL, VT, Operand: Y));
18255	if (C1->isExactlyValue(V: -`1.0`))
18256	return DAG.getNode(Opcode: PreferredFusedOpcode, DL: SL, VT, N1: X.getOperand(i: `0`), N2: Y,
18257	N3: Y);
18258	}
18259	}
18260	return SDValue ();
18261	};
18262
18263	if (SDValue FMA = FuseFSUB (N0, N1))
18264	return FMA;
18265	if (SDValue FMA = FuseFSUB (N1, N0))
18266	return FMA;
18267
18268	return SDValue ();
18269	}
18270
18271	SDValue DAGCombiner::visitVP_FADD(SDNode *N) {
18272	SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18273
18274	// FADD -> FMA combines:
18275	if (SDValue Fused = visitFADDForFMACombine<VPMatchContext>(N)) {
18276	if (Fused.getOpcode() != ISD::DELETED_NODE)
18277	AddToWorklist(N: Fused.getNode());
18278	return Fused;
18279	}
18280	return SDValue ();
18281	}
18282
18283	SDValue DAGCombiner::visitFADD(SDNode *N) {
18284	SDValue N0 = N->getOperand(Num: `0`);
18285	SDValue N1 = N->getOperand(Num: `1`);
18286	bool N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N: N0);
18287	bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N: N1);
18288	EVT VT = N->getValueType(ResNo: `0`);
18289	SDLoc DL(N);
18290	SDNodeFlags Flags = N->getFlags();
18291	SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18292
18293	if (SDValue R = DAG.simplifyFPBinop(Opcode: N->getOpcode(), X: N0, Y: N1, Flags))
18294	return R;
18295
18296	// fold (fadd c1, c2) -> c1 + c2
18297	if (SDValue C = DAG.FoldConstantArithmetic(Opcode: ISD::FADD, DL, VT, Ops: {N0, N1}))
18298	return C;
18299
18300	// canonicalize constant to RHS
18301	if (N0CFP && !N1CFP)
18302	return DAG.getNode(Opcode: ISD::FADD, DL, VT, N1, N2: N0);
18303
18304	// fold vector ops
18305	if (VT.isVector())
18306	if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
18307	return FoldedVOp;
18308
18309	// N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
18310	ConstantFPSDNode N1C = isConstOrConstSplatFP(N: N1, AllowUndefs: true*);
18311	if (N1C && N1C->isZero())
18312	if (N1C->isNegative() \|\| DAG.canIgnoreSignBitOfZero(Op: SDValue (N, `0`)))
18313	return N0;
18314
18315	if (SDValue NewSel = foldBinOpIntoSelect(BO: N))
18316	return NewSel;
18317
18318	// fold (fadd A, (fneg B)) -> (fsub A, B)
18319	if (!LegalOperations \|\| TLI.isOperationLegalOrCustom(Op: ISD::FSUB, VT))
18320	if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
18321	Op: N1, DAG, LegalOps: LegalOperations, OptForSize: ForCodeSize))
18322	return DAG.getNode(Opcode: ISD::FSUB, DL, VT, N1: N0, N2: NegN1);
18323
18324	// fold (fadd (fneg A), B) -> (fsub B, A)
18325	if (!LegalOperations \|\| TLI.isOperationLegalOrCustom(Op: ISD::FSUB, VT))
18326	if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
18327	Op: N0, DAG, LegalOps: LegalOperations, OptForSize: ForCodeSize))
18328	return DAG.getNode(Opcode: ISD::FSUB, DL, VT, N1, N2: NegN0);
18329
18330	auto isFMulNegTwo = [](SDValue FMul) {
18331	if (!FMul.hasOneUse() \|\| FMul.getOpcode() != ISD::FMUL)
18332	return false;
18333	auto C = isConstOrConstSplatFP(N: FMul.getOperand(i: `1`), AllowUndefs: true*);
18334	return C && C->isExactlyValue(V: -`2.0`);
18335	};
18336
18337	// fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
18338	if (isFMulNegTwo (N0)) {
18339	SDValue B = N0.getOperand(i: `0`);
18340	SDValue Add = DAG.getNode(Opcode: ISD::FADD, DL, VT, N1: B, N2: B);
18341	return DAG.getNode(Opcode: ISD::FSUB, DL, VT, N1, N2: Add);
18342	}
18343	// fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
18344	if (isFMulNegTwo (N1)) {
18345	SDValue B = N1.getOperand(i: `0`);
18346	SDValue Add = DAG.getNode(Opcode: ISD::FADD, DL, VT, N1: B, N2: B);
18347	return DAG.getNode(Opcode: ISD::FSUB, DL, VT, N1: N0, N2: Add);
18348	}
18349
18350	// No FP constant should be created after legalization as Instruction
18351	// Selection pass has a hard time dealing with FP constants.
18352	bool AllowNewConst = (Level < AfterLegalizeDAG);
18353
18354	// If nnan is enabled, fold lots of things.
18355	if (Flags.hasNoNaNs() && AllowNewConst) {
18356	// If allowed, fold (fadd (fneg x), x) -> 0.0
18357	if (N0.getOpcode() == ISD::FNEG && N0.getOperand(i: `0`) == N1)
18358	return DAG.getConstantFP(Val: `0.0`, DL, VT);
18359
18360	// If allowed, fold (fadd x, (fneg x)) -> 0.0
18361	if (N1.getOpcode() == ISD::FNEG && N1.getOperand(i: `0`) == N0)
18362	return DAG.getConstantFP(Val: `0.0`, DL, VT);
18363	}
18364
18365	// If reassoc and nsz, fold lots of things.
18366	// TODO: break out portions of the transformations below for which Unsafe is
18367	// considered and which do not require both nsz and reassoc
18368	if (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros() &&
18369	AllowNewConst) {
18370	// fadd (fadd x, c1), c2 -> fadd x, c1 + c2
18371	if (N1CFP && N0.getOpcode() == ISD::FADD &&
18372	DAG.isConstantFPBuildVectorOrConstantFP(N: N0.getOperand(i: `1`))) {
18373	SDValue NewC = DAG.getNode(Opcode: ISD::FADD, DL, VT, N1: N0.getOperand(i: `1`), N2: N1);
18374	return DAG.getNode(Opcode: ISD::FADD, DL, VT, N1: N0.getOperand(i: `0`), N2: NewC);
18375	}
18376
18377	// We can fold chains of FADD's of the same value into multiplications.
18378	// This transform is not safe in general because we are reducing the number
18379	// of rounding steps.
18380	if (TLI.isOperationLegalOrCustom(Op: ISD::FMUL, VT) && !N0CFP && !N1CFP) {
18381	if (N0.getOpcode() == ISD::FMUL) {
18382	bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N: N0.getOperand(i: `0`));
18383	bool CFP01 = DAG.isConstantFPBuildVectorOrConstantFP(N: N0.getOperand(i: `1`));
18384
18385	// (fadd (fmul x, c), x) -> (fmul x, c+1)
18386	if (CFP01 && !CFP00 && N0.getOperand(i: `0`) == N1) {
18387	SDValue NewCFP = DAG.getNode(Opcode: ISD::FADD, DL, VT, N1: N0.getOperand(i: `1`),
18388	N2: DAG.getConstantFP(Val: `1.0`, DL, VT));
18389	return DAG.getNode(Opcode: ISD::FMUL, DL, VT, N1, N2: NewCFP);
18390	}
18391
18392	// (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
18393	if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
18394	N1.getOperand(i: `0`) == N1.getOperand(i: `1`) &&
18395	N0.getOperand(i: `0`) == N1.getOperand(i: `0`)) {
18396	SDValue NewCFP = DAG.getNode(Opcode: ISD::FADD, DL, VT, N1: N0.getOperand(i: `1`),
18397	N2: DAG.getConstantFP(Val: `2.0`, DL, VT));
18398	return DAG.getNode(Opcode: ISD::FMUL, DL, VT, N1: N0.getOperand(i: `0`), N2: NewCFP);
18399	}
18400	}
18401
18402	if (N1.getOpcode() == ISD::FMUL) {
18403	bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N: N1.getOperand(i: `0`));
18404	bool CFP11 = DAG.isConstantFPBuildVectorOrConstantFP(N: N1.getOperand(i: `1`));
18405
18406	// (fadd x, (fmul x, c)) -> (fmul x, c+1)
18407	if (CFP11 && !CFP10 && N1.getOperand(i: `0`) == N0) {
18408	SDValue NewCFP = DAG.getNode(Opcode: ISD::FADD, DL, VT, N1: N1.getOperand(i: `1`),
18409	N2: DAG.getConstantFP(Val: `1.0`, DL, VT));
18410	return DAG.getNode(Opcode: ISD::FMUL, DL, VT, N1: N0, N2: NewCFP);
18411	}
18412
18413	// (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
18414	if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
18415	N0.getOperand(i: `0`) == N0.getOperand(i: `1`) &&
18416	N1.getOperand(i: `0`) == N0.getOperand(i: `0`)) {
18417	SDValue NewCFP = DAG.getNode(Opcode: ISD::FADD, DL, VT, N1: N1.getOperand(i: `1`),
18418	N2: DAG.getConstantFP(Val: `2.0`, DL, VT));
18419	return DAG.getNode(Opcode: ISD::FMUL, DL, VT, N1: N1.getOperand(i: `0`), N2: NewCFP);
18420	}
18421	}
18422
18423	if (N0.getOpcode() == ISD::FADD) {
18424	bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N: N0.getOperand(i: `0`));
18425	// (fadd (fadd x, x), x) -> (fmul x, 3.0)
18426	if (!CFP00 && N0.getOperand(i: `0`) == N0.getOperand(i: `1`) &&
18427	(N0.getOperand(i: `0`) == N1)) {
18428	return DAG.getNode(Opcode: ISD::FMUL, DL, VT, N1,
18429	N2: DAG.getConstantFP(Val: `3.0`, DL, VT));
18430	}
18431	}
18432
18433	if (N1.getOpcode() == ISD::FADD) {
18434	bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N: N1.getOperand(i: `0`));
18435	// (fadd x, (fadd x, x)) -> (fmul x, 3.0)
18436	if (!CFP10 && N1.getOperand(i: `0`) == N1.getOperand(i: `1`) &&
18437	N1.getOperand(i: `0`) == N0) {
18438	return DAG.getNode(Opcode: ISD::FMUL, DL, VT, N1: N0,
18439	N2: DAG.getConstantFP(Val: `3.0`, DL, VT));
18440	}
18441	}
18442
18443	// (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
18444	if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
18445	N0.getOperand(i: `0`) == N0.getOperand(i: `1`) &&
18446	N1.getOperand(i: `0`) == N1.getOperand(i: `1`) &&
18447	N0.getOperand(i: `0`) == N1.getOperand(i: `0`)) {
18448	return DAG.getNode(Opcode: ISD::FMUL, DL, VT, N1: N0.getOperand(i: `0`),
18449	N2: DAG.getConstantFP(Val: `4.0`, DL, VT));
18450	}
18451	}
18452	} // reassoc && nsz && AllowNewConst
18453
18454	if (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros()) {
18455	// Fold fadd(vecreduce(x), vecreduce(y)) -> vecreduce(fadd(x, y))
18456	if (SDValue SD = reassociateReduction(RedOpc: ISD::VECREDUCE_FADD, Opc: ISD::FADD, DL,
18457	VT, N0, N1, Flags))
18458	return SD;
18459	}
18460
18461	// FADD -> FMA combines:
18462	if (SDValue Fused = visitFADDForFMACombine<EmptyMatchContext>(N)) {
18463	if (Fused.getOpcode() != ISD::DELETED_NODE)
18464	AddToWorklist(N: Fused.getNode());
18465	return Fused;
18466	}
18467	return SDValue ();
18468	}
18469
18470	SDValue DAGCombiner::visitSTRICT_FADD(SDNode *N) {
18471	SDValue Chain = N->getOperand(Num: `0`);
18472	SDValue N0 = N->getOperand(Num: `1`);
18473	SDValue N1 = N->getOperand(Num: `2`);
18474	EVT VT = N->getValueType(ResNo: `0`);
18475	EVT ChainVT = N->getValueType(ResNo: `1`);
18476	SDLoc DL(N);
18477	SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18478
18479	// fold (strict_fadd A, (fneg B)) -> (strict_fsub A, B)
18480	if (!LegalOperations \|\| TLI.isOperationLegalOrCustom(Op: ISD::STRICT_FSUB, VT))
18481	if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
18482	Op: N1, DAG, LegalOps: LegalOperations, OptForSize: ForCodeSize)) {
18483	return DAG.getNode(Opcode: ISD::STRICT_FSUB, DL, VTList: DAG.getVTList(VT1: VT, VT2: ChainVT),
18484	Ops: {Chain, N0, NegN1});
18485	}
18486
18487	// fold (strict_fadd (fneg A), B) -> (strict_fsub B, A)
18488	if (!LegalOperations \|\| TLI.isOperationLegalOrCustom(Op: ISD::STRICT_FSUB, VT))
18489	if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
18490	Op: N0, DAG, LegalOps: LegalOperations, OptForSize: ForCodeSize)) {
18491	return DAG.getNode(Opcode: ISD::STRICT_FSUB, DL, VTList: DAG.getVTList(VT1: VT, VT2: ChainVT),
18492	Ops: {Chain, N1, NegN0});
18493	}
18494	return SDValue ();
18495	}
18496
18497	SDValue DAGCombiner::visitFSUB(SDNode *N) {
18498	SDValue N0 = N->getOperand(Num: `0`);
18499	SDValue N1 = N->getOperand(Num: `1`);
18500	ConstantFPSDNode N0CFP = isConstOrConstSplatFP(N: N0, AllowUndefs: true*);
18501	ConstantFPSDNode N1CFP = isConstOrConstSplatFP(N: N1, AllowUndefs: true*);
18502	EVT VT = N->getValueType(ResNo: `0`);
18503	SDLoc DL(N);
18504	const SDNodeFlags Flags = N->getFlags();
18505	SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18506
18507	if (SDValue R = DAG.simplifyFPBinop(Opcode: N->getOpcode(), X: N0, Y: N1, Flags))
18508	return R;
18509
18510	// fold (fsub c1, c2) -> c1-c2
18511	if (SDValue C = DAG.FoldConstantArithmetic(Opcode: ISD::FSUB, DL, VT, Ops: {N0, N1}))
18512	return C;
18513
18514	// fold vector ops
18515	if (VT.isVector())
18516	if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
18517	return FoldedVOp;
18518
18519	if (SDValue NewSel = foldBinOpIntoSelect(BO: N))
18520	return NewSel;
18521
18522	// (fsub A, 0) -> A
18523	if (N1CFP && N1CFP->isZero()) {
18524	if (!N1CFP->isNegative() \|\| DAG.canIgnoreSignBitOfZero(Op: SDValue (N, `0`))) {
18525	return N0;
18526	}
18527	}
18528
18529	if (N0 == N1) {
18530	// (fsub x, x) -> 0.0
18531	if (Flags.hasNoNaNs())
18532	return DAG.getConstantFP(Val: `0.0f`, DL, VT);
18533	}
18534
18535	// (fsub -0.0, N1) -> -N1
18536	if (N0CFP && N0CFP->isZero()) {
18537	if (N0CFP->isNegative() \|\| DAG.canIgnoreSignBitOfZero(Op: SDValue (N, `0`))) {
18538	// We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are
18539	// flushed to zero, unless all users treat denorms as zero (DAZ).
18540	// FIXME: This transform will change the sign of a NaN and the behavior
18541	// of a signaling NaN. It is only valid when a NoNaN flag is present.
18542	DenormalMode DenormMode = DAG.getDenormalMode(VT);
18543	if (DenormMode == DenormalMode::getIEEE()) {
18544	if (SDValue NegN1 =
18545	TLI.getNegatedExpression(Op: N1, DAG, LegalOps: LegalOperations, OptForSize: ForCodeSize))
18546	return NegN1;
18547	if (!LegalOperations \|\| TLI.isOperationLegal(Op: ISD::FNEG, VT))
18548	return DAG.getNode(Opcode: ISD::FNEG, DL, VT, Operand: N1);
18549	}
18550	}
18551	}
18552
18553	if (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros() &&
18554	N1.getOpcode() == ISD::FADD) {
18555	// X - (X + Y) -> -Y
18556	if (N0 == N1 ->getOperand(Num: `0`))
18557	return DAG.getNode(Opcode: ISD::FNEG, DL, VT, Operand: N1 ->getOperand(Num: `1`));
18558	// X - (Y + X) -> -Y
18559	if (N0 == N1 ->getOperand(Num: `1`))
18560	return DAG.getNode(Opcode: ISD::FNEG, DL, VT, Operand: N1 ->getOperand(Num: `0`));
18561	}
18562
18563	// fold (fsub A, (fneg B)) -> (fadd A, B)
18564	if (SDValue NegN1 =
18565	TLI.getNegatedExpression(Op: N1, DAG, LegalOps: LegalOperations, OptForSize: ForCodeSize))
18566	return DAG.getNode(Opcode: ISD::FADD, DL, VT, N1: N0, N2: NegN1);
18567
18568	// FSUB -> FMA combines:
18569	if (SDValue Fused = visitFSUBForFMACombine<EmptyMatchContext>(N)) {
18570	AddToWorklist(N: Fused.getNode());
18571	return Fused;
18572	}
18573
18574	return SDValue ();
18575	}
18576
18577	// Transform IEEE Floats:
18578	// (fmul C, (uitofp Pow2))
18579	// -> (bitcast_to_FP (add (bitcast_to_INT C), Log2(Pow2) << mantissa))
18580	// (fdiv C, (uitofp Pow2))
18581	// -> (bitcast_to_FP (sub (bitcast_to_INT C), Log2(Pow2) << mantissa))
18582	//
18583	// The rationale is fmul/fdiv by a power of 2 is just change the exponent, so
18584	// there is no need for more than an add/sub.
18585	//
18586	// This is valid under the following circumstances:
18587	// 1) We are dealing with IEEE floats
18588	// 2) C is normal
18589	// 3) The fmul/fdiv add/sub will not go outside of min/max exponent bounds.
18590	// TODO: Much of this could also be used for generating `ldexp` on targets the
18591	// prefer it.
18592	SDValue DAGCombiner::combineFMulOrFDivWithIntPow2(SDNode *N) {
18593	EVT VT = N->getValueType(ResNo: `0`);
18594	if (!APFloat::isIEEELikeFP(VT.getFltSemantics()))
18595	return SDValue ();
18596
18597	SDValue ConstOp, Pow2Op;
18598
18599	std::optional<int> Mantissa;
18600	auto GetConstAndPow2Ops = [&](unsigned ConstOpIdx) {
18601	if (ConstOpIdx == `1` && N->getOpcode() == ISD::FDIV)
18602	return false;
18603
18604	ConstOp = peekThroughBitcasts(V: N->getOperand(Num: ConstOpIdx));
18605	Pow2Op = N->getOperand(Num: `1` - ConstOpIdx);
18606	if (Pow2Op.getOpcode() != ISD::UINT_TO_FP &&
18607	(Pow2Op.getOpcode() != ISD::SINT_TO_FP \|\|
18608	!DAG.computeKnownBits(Op: Pow2Op).isNonNegative()))
18609	return false;
18610
18611	Pow2Op = Pow2Op.getOperand(i: `0`);
18612
18613	// `Log2(Pow2Op) < Pow2Op.getScalarSizeInBits()`.
18614	// TODO: We could use knownbits to make this bound more precise.
18615	int MaxExpChange = Pow2Op.getValueType().getScalarSizeInBits();
18616
18617	auto IsFPConstValid = [N, MaxExpChange, &Mantissa](ConstantFPSDNode *CFP) {
18618	if (CFP == nullptr)
18619	return false;
18620
18621	const APFloat &APF = CFP->getValueAPF();
18622
18623	// Make sure we have normal constant.
18624	if (!APF.isNormal())
18625	return false;
18626
18627	// Make sure the floats exponent is within the bounds that this transform
18628	// produces bitwise equals value.
18629	int CurExp = ilogb(Arg: APF);
18630	// FMul by pow2 will only increase exponent.
18631	int MinExp =
18632	N->getOpcode() == ISD::FMUL ? CurExp : (CurExp - MaxExpChange);
18633	// FDiv by pow2 will only decrease exponent.
18634	int MaxExp =
18635	N->getOpcode() == ISD::FDIV ? CurExp : (CurExp + MaxExpChange);
18636	if (MinExp <= APFloat::semanticsMinExponent(APF.getSemantics()) \|\|
18637	MaxExp >= APFloat::semanticsMaxExponent(APF.getSemantics()))
18638	return false;
18639
18640	// Finally make sure we actually know the mantissa for the float type.
18641	int ThisMantissa = APFloat::semanticsPrecision(APF.getSemantics()) - `1`;
18642	if (!Mantissa)
18643	Mantissa = ThisMantissa;
18644
18645	return *Mantissa == ThisMantissa && ThisMantissa > `0`;
18646	};
18647
18648	// TODO: We may be able to include undefs.
18649	return ISD::matchUnaryFpPredicate(Op: ConstOp, Match: IsFPConstValid);
18650	};
18651
18652	if (!GetConstAndPow2Ops (`0`) && !GetConstAndPow2Ops (`1`))
18653	return SDValue ();
18654
18655	if (!TLI.optimizeFMulOrFDivAsShiftAddBitcast(N, FPConst: ConstOp, IntPow2: Pow2Op))
18656	return SDValue ();
18657
18658	// Get log2 after all other checks have taken place. This is because
18659	// BuildLogBase2 may create a new node.
18660	SDLoc DL(N);
18661	// Get Log2 type with same bitwidth as the float type (VT).
18662	EVT NewIntVT = VT.changeElementType(
18663	Context&: *DAG.getContext(),
18664	EltVT: EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: VT.getScalarSizeInBits()));
18665
18666	SDValue Log2 = BuildLogBase2(V: Pow2Op, DL, KnownNeverZero: DAG.isKnownNeverZero(Op: Pow2Op),
18667	/InexpensiveOnly/ true, OutVT: NewIntVT);
18668	if (!Log2)
18669	return SDValue ();
18670
18671	// Perform actual transform.
18672	SDValue MantissaShiftCnt =
18673	DAG.getShiftAmountConstant(Val: *Mantissa, VT: NewIntVT, DL);
18674	// TODO: Sometimes Log2 is of form `(X + C)`. `(X + C) << C1` should fold to
18675	// `(X << C1) + (C << C1)`, but that isn't always the case because of the
18676	// cast. We could implement that by handle here to handle the casts.
18677	SDValue Shift = DAG.getNode(Opcode: ISD::SHL, DL, VT: NewIntVT, N1: Log2, N2: MantissaShiftCnt);
18678	SDValue ResAsInt =
18679	DAG.getNode(Opcode: N->getOpcode() == ISD::FMUL ? ISD::ADD : ISD::SUB, DL,
18680	VT: NewIntVT, N1: DAG.getBitcast(VT: NewIntVT, V: ConstOp), N2: Shift);
18681	SDValue ResAsFP = DAG.getBitcast(VT, V: ResAsInt);
18682	return ResAsFP;
18683	}
18684
18685	SDValue DAGCombiner::visitFMUL(SDNode *N) {
18686	SDValue N0 = N->getOperand(Num: `0`);
18687	SDValue N1 = N->getOperand(Num: `1`);
18688	ConstantFPSDNode N1CFP = isConstOrConstSplatFP(N: N1, AllowUndefs: true*);
18689	EVT VT = N->getValueType(ResNo: `0`);
18690	SDLoc DL(N);
18691	const SDNodeFlags Flags = N->getFlags();
18692	SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18693
18694	if (SDValue R = DAG.simplifyFPBinop(Opcode: N->getOpcode(), X: N0, Y: N1, Flags))
18695	return R;
18696
18697	// fold (fmul c1, c2) -> c1c2*
18698	if (SDValue C = DAG.FoldConstantArithmetic(Opcode: ISD::FMUL, DL, VT, Ops: {N0, N1}))
18699	return C;
18700
18701	// canonicalize constant to RHS
18702	if (DAG.isConstantFPBuildVectorOrConstantFP(N: N0) &&
18703	!DAG.isConstantFPBuildVectorOrConstantFP(N: N1))
18704	return DAG.getNode(Opcode: ISD::FMUL, DL, VT, N1, N2: N0);
18705
18706	// fold vector ops
18707	if (VT.isVector())
18708	if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
18709	return FoldedVOp;
18710
18711	if (SDValue NewSel = foldBinOpIntoSelect(BO: N))
18712	return NewSel;
18713
18714	if (Flags.hasAllowReassociation()) {
18715	// fmul (fmul X, C1), C2 -> fmul X, C1 C2*
18716	if (DAG.isConstantFPBuildVectorOrConstantFP(N: N1) &&
18717	N0.getOpcode() == ISD::FMUL) {
18718	SDValue N00 = N0.getOperand(i: `0`);
18719	SDValue N01 = N0.getOperand(i: `1`);
18720	// Avoid an infinite loop by making sure that N00 is not a constant
18721	// (the inner multiply has not been constant folded yet).
18722	if (DAG.isConstantFPBuildVectorOrConstantFP(N: N01) &&
18723	!DAG.isConstantFPBuildVectorOrConstantFP(N: N00)) {
18724	SDValue MulConsts = DAG.getNode(Opcode: ISD::FMUL, DL, VT, N1: N01, N2: N1);
18725	return DAG.getNode(Opcode: ISD::FMUL, DL, VT, N1: N00, N2: MulConsts);
18726	}
18727	}
18728
18729	// Match a special-case: we convert X 2.0 into fadd.*
18730	// fmul (fadd X, X), C -> fmul X, 2.0 C*
18731	if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
18732	N0.getOperand(i: `0`) == N0.getOperand(i: `1`)) {
18733	const SDValue Two = DAG.getConstantFP(Val: `2.0`, DL, VT);
18734	SDValue MulConsts = DAG.getNode(Opcode: ISD::FMUL, DL, VT, N1: Two, N2: N1);
18735	return DAG.getNode(Opcode: ISD::FMUL, DL, VT, N1: N0.getOperand(i: `0`), N2: MulConsts);
18736	}
18737
18738	// Fold fmul(vecreduce(x), vecreduce(y)) -> vecreduce(fmul(x, y))
18739	if (SDValue SD = reassociateReduction(RedOpc: ISD::VECREDUCE_FMUL, Opc: ISD::FMUL, DL,
18740	VT, N0, N1, Flags))
18741	return SD;
18742	}
18743
18744	// fold (fmul X, 2.0) -> (fadd X, X)
18745	if (N1CFP && N1CFP->isExactlyValue(V: +`2.0`))
18746	return DAG.getNode(Opcode: ISD::FADD, DL, VT, N1: N0, N2: N0);
18747
18748	// fold (fmul X, -1.0) -> (fsub -0.0, X)
18749	if (N1CFP && N1CFP->isExactlyValue(V: -`1.0`)) {
18750	if (!LegalOperations \|\| TLI.isOperationLegal(Op: ISD::FSUB, VT)) {
18751	return DAG.getNode(Opcode: ISD::FSUB, DL, VT,
18752	N1: DAG.getConstantFP(Val: -`0.0`, DL, VT), N2: N0, Flags);
18753	}
18754	}
18755
18756	// -N0 -N1 --> N0 * N1*
18757	TargetLowering::NegatibleCost CostN0 =
18758	TargetLowering::NegatibleCost::Expensive;
18759	TargetLowering::NegatibleCost CostN1 =
18760	TargetLowering::NegatibleCost::Expensive;
18761	SDValue NegN0 =
18762	TLI.getNegatedExpression(Op: N0, DAG, LegalOps: LegalOperations, OptForSize: ForCodeSize, Cost&: CostN0);
18763	if (NegN0) {
18764	HandleSDNode NegN0Handle(NegN0);
18765	SDValue NegN1 =
18766	TLI.getNegatedExpression(Op: N1, DAG, LegalOps: LegalOperations, OptForSize: ForCodeSize, Cost&: CostN1);
18767	if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper \|\|
18768	CostN1 == TargetLowering::NegatibleCost::Cheaper))
18769	return DAG.getNode(Opcode: ISD::FMUL, DL, VT, N1: NegN0, N2: NegN1);
18770	}
18771
18772	// fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
18773	// fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
18774	if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
18775	(N0.getOpcode() == ISD::SELECT \|\| N1.getOpcode() == ISD::SELECT) &&
18776	TLI.isOperationLegal(Op: ISD::FABS, VT)) {
18777	SDValue Select = N0, X = N1;
18778	if (Select.getOpcode() != ISD::SELECT)
18779	std::swap(a&: Select, b&: X);
18780
18781	SDValue Cond = Select.getOperand(i: `0`);
18782	auto TrueOpnd = dyn_cast<ConstantFPSDNode>(Val: Select.getOperand(i: `1`));
18783	auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Val: Select.getOperand(i: `2`));
18784
18785	if (TrueOpnd && FalseOpnd &&
18786	Cond.getOpcode() == ISD::SETCC && Cond.getOperand(i: `0`) == X &&
18787	isa<ConstantFPSDNode>(Val: Cond.getOperand(i: `1`)) &&
18788	cast<ConstantFPSDNode>(Val: Cond.getOperand(i: `1`))->isExactlyValue(V: `0.0`)) {
18789	ISD::CondCode CC = cast<CondCodeSDNode>(Val: Cond.getOperand(i: `2`))->get();
18790	switch (CC) {
18791	default: break;
18792	case ISD::SETOLT:
18793	case ISD::SETULT:
18794	case ISD::SETOLE:
18795	case ISD::SETULE:
18796	case ISD::SETLT:
18797	case ISD::SETLE:
18798	std::swap(a&: TrueOpnd, b&: FalseOpnd);
18799	[[fallthrough]];
18800	case ISD::SETOGT:
18801	case ISD::SETUGT:
18802	case ISD::SETOGE:
18803	case ISD::SETUGE:
18804	case ISD::SETGT:
18805	case ISD::SETGE:
18806	if (TrueOpnd->isExactlyValue(V: -`1.0`) && FalseOpnd->isExactlyValue(V: `1.0`) &&
18807	TLI.isOperationLegal(Op: ISD::FNEG, VT))
18808	return DAG.getNode(Opcode: ISD::FNEG, DL, VT,
18809	Operand: DAG.getNode(Opcode: ISD::FABS, DL, VT, Operand: X));
18810	if (TrueOpnd->isExactlyValue(V: `1.0`) && FalseOpnd->isExactlyValue(V: -`1.0`))
18811	return DAG.getNode(Opcode: ISD::FABS, DL, VT, Operand: X);
18812
18813	break;
18814	}
18815	}
18816	}
18817
18818	// FMUL -> FMA combines:
18819	if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
18820	AddToWorklist(N: Fused.getNode());
18821	return Fused;
18822	}
18823
18824	// Don't do `combineFMulOrFDivWithIntPow2` until after FMUL -> FMA has been
18825	// able to run.
18826	if (SDValue R = combineFMulOrFDivWithIntPow2(N))
18827	return R;
18828
18829	return SDValue ();
18830	}
18831
18832	template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) {
18833	SDValue N0 = N->getOperand(Num: `0`);
18834	SDValue N1 = N->getOperand(Num: `1`);
18835	SDValue N2 = N->getOperand(Num: `2`);
18836	ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Val&: N0);
18837	ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(Val&: N1);
18838	ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(Val&: N2);
18839	EVT VT = N->getValueType(ResNo: `0`);
18840	SDLoc DL(N);
18841	// FMA nodes have flags that propagate to the created nodes.
18842	SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18843	MatchContextClass matcher(DAG, TLI, N);
18844
18845	// Constant fold FMA.
18846	if (SDValue C =
18847	DAG.FoldConstantArithmetic(Opcode: N->getOpcode(), DL, VT, Ops: {N0, N1, N2}))
18848	return C;
18849
18850	// (-N0 -N1) + N2 --> (N0 * N1) + N2*
18851	TargetLowering::NegatibleCost CostN0 =
18852	TargetLowering::NegatibleCost::Expensive;
18853	TargetLowering::NegatibleCost CostN1 =
18854	TargetLowering::NegatibleCost::Expensive;
18855	SDValue NegN0 =
18856	TLI.getNegatedExpression(Op: N0, DAG, LegalOps: LegalOperations, OptForSize: ForCodeSize, Cost&: CostN0);
18857	if (NegN0) {
18858	HandleSDNode NegN0Handle(NegN0);
18859	SDValue NegN1 =
18860	TLI.getNegatedExpression(Op: N1, DAG, LegalOps: LegalOperations, OptForSize: ForCodeSize, Cost&: CostN1);
18861	if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper \|\|
18862	CostN1 == TargetLowering::NegatibleCost::Cheaper))
18863	return matcher.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
18864	}
18865
18866	if (N->getFlags().hasNoNaNs() && N->getFlags().hasNoInfs()) {
18867	if (N->getFlags().hasNoSignedZeros() \|\|
18868	(N2CFP && !N2CFP->isExactlyValue(V: -`0.0`))) {
18869	if (N0CFP && N0CFP->isZero())
18870	return N2;
18871	if (N1CFP && N1CFP->isZero())
18872	return N2;
18873	}
18874	}
18875
18876	// FIXME: Support splat of constant.
18877	if (N0CFP && N0CFP->isExactlyValue(V: `1.0`))
18878	return matcher.getNode(ISD::FADD, DL, VT, N1, N2);
18879	if (N1CFP && N1CFP->isExactlyValue(V: `1.0`))
18880	return matcher.getNode(ISD::FADD, DL, VT, N0, N2);
18881
18882	// Canonicalize (fma c, x, y) -> (fma x, c, y)
18883	if (DAG.isConstantFPBuildVectorOrConstantFP(N: N0) &&
18884	!DAG.isConstantFPBuildVectorOrConstantFP(N: N1))
18885	return matcher.getNode(ISD::FMA, DL, VT, N1, N0, N2);
18886
18887	bool CanReassociate = N->getFlags().hasAllowReassociation();
18888	if (CanReassociate) {
18889	// (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
18890	if (matcher.match(N2, ISD::FMUL) && N0 == N2.getOperand(i: `0`) &&
18891	DAG.isConstantFPBuildVectorOrConstantFP(N: N1) &&
18892	DAG.isConstantFPBuildVectorOrConstantFP(N: N2.getOperand(i: `1`))) {
18893	return matcher.getNode(
18894	ISD::FMUL, DL, VT, N0,
18895	matcher.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(i: `1`)));
18896	}
18897
18898	// (fma (fmul x, c1), c2, y) -> (fma x, c1c2, y)*
18899	if (matcher.match(N0, ISD::FMUL) &&
18900	DAG.isConstantFPBuildVectorOrConstantFP(N: N1) &&
18901	DAG.isConstantFPBuildVectorOrConstantFP(N: N0.getOperand(i: `1`))) {
18902	return matcher.getNode(
18903	ISD::FMA, DL, VT, N0.getOperand(i: `0`),
18904	matcher.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(i: `1`)), N2);
18905	}
18906	}
18907
18908	// (fma x, -1, y) -> (fadd (fneg x), y)
18909	// FIXME: Support splat of constant.
18910	if (N1CFP) {
18911	if (N1CFP->isExactlyValue(V: `1.0`))
18912	return matcher.getNode(ISD::FADD, DL, VT, N0, N2);
18913
18914	if (N1CFP->isExactlyValue(V: -`1.0`) &&
18915	(!LegalOperations \|\| TLI.isOperationLegal(Op: ISD::FNEG, VT))) {
18916	SDValue RHSNeg = matcher.getNode(ISD::FNEG, DL, VT, N0);
18917	AddToWorklist(N: RHSNeg.getNode());
18918	return matcher.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
18919	}
18920
18921	// fma (fneg x), K, y -> fma x -K, y
18922	if (matcher.match(N0, ISD::FNEG) &&
18923	(TLI.isOperationLegal(Op: ISD::ConstantFP, VT) \|\|
18924	(N1.hasOneUse() &&
18925	!TLI.isFPImmLegal(N1CFP->getValueAPF(), VT, ForCodeSize)))) {
18926	return matcher.getNode(ISD::FMA, DL, VT, N0.getOperand(i: `0`),
18927	matcher.getNode(ISD::FNEG, DL, VT, N1), N2);
18928	}
18929	}
18930
18931	// FIXME: Support splat of constant.
18932	if (CanReassociate) {
18933	// (fma x, c, x) -> (fmul x, (c+1))
18934	if (N1CFP && N0 == N2) {
18935	return matcher.getNode(ISD::FMUL, DL, VT, N0,
18936	matcher.getNode(ISD::FADD, DL, VT, N1,
18937	DAG.getConstantFP(Val: `1.0`, DL, VT)));
18938	}
18939
18940	// (fma x, c, (fneg x)) -> (fmul x, (c-1))
18941	if (N1CFP && matcher.match(N2, ISD::FNEG) && N2.getOperand(i: `0`) == N0) {
18942	return matcher.getNode(ISD::FMUL, DL, VT, N0,
18943	matcher.getNode(ISD::FADD, DL, VT, N1,
18944	DAG.getConstantFP(Val: -`1.0`, DL, VT)));
18945	}
18946	}
18947
18948	// fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z))
18949	// fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z))
18950	if (!TLI.isFNegFree(VT))
18951	if (SDValue Neg = TLI.getCheaperNegatedExpression(
18952	Op: SDValue (N, `0`), DAG, LegalOps: LegalOperations, OptForSize: ForCodeSize))
18953	return matcher.getNode(ISD::FNEG, DL, VT, Neg);
18954	return SDValue ();
18955	}
18956
18957	SDValue DAGCombiner::visitFMAD(SDNode *N) {
18958	SDValue N0 = N->getOperand(Num: `0`);
18959	SDValue N1 = N->getOperand(Num: `1`);
18960	SDValue N2 = N->getOperand(Num: `2`);
18961	EVT VT = N->getValueType(ResNo: `0`);
18962	SDLoc DL(N);
18963
18964	// Constant fold FMAD.
18965	if (SDValue C = DAG.FoldConstantArithmetic(Opcode: ISD::FMAD, DL, VT, Ops: {N0, N1, N2}))
18966	return C;
18967
18968	return SDValue ();
18969	}
18970
18971	SDValue DAGCombiner::visitFMULADD(SDNode *N) {
18972	SDValue N0 = N->getOperand(Num: `0`);
18973	SDValue N1 = N->getOperand(Num: `1`);
18974	SDValue N2 = N->getOperand(Num: `2`);
18975	EVT VT = N->getValueType(ResNo: `0`);
18976	SDLoc DL(N);
18977
18978	// Constant fold FMULADD.
18979	if (SDValue C =
18980	DAG.FoldConstantArithmetic(Opcode: ISD::FMULADD, DL, VT, Ops: {N0, N1, N2}))
18981	return C;
18982
18983	return SDValue ();
18984	}
18985
18986	// Combine multiple FDIVs with the same divisor into multiple FMULs by the
18987	// reciprocal.
18988	// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a recip; b * recip)*
18989	// Notice that this is not always beneficial. One reason is different targets
18990	// may have different costs for FDIV and FMUL, so sometimes the cost of two
18991	// FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
18992	// is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
18993	SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
18994	// TODO: Limit this transform based on optsize/minsize - it always creates at
18995	// least 1 extra instruction. But the perf win may be substantial enough
18996	// that only minsize should restrict this.
18997	const SDNodeFlags Flags = N->getFlags();
18998	if (LegalDAG \|\| !Flags.hasAllowReciprocal())
18999	return SDValue ();
19000
19001	// Skip if current node is a reciprocal/fneg-reciprocal.
19002	SDValue N0 = N->getOperand(Num: `0`), N1 = N->getOperand(Num: `1`);
19003	ConstantFPSDNode N0CFP = isConstOrConstSplatFP(N: N0, /* AllowUndefs / true);
19004	if (N0CFP && (N0CFP->isExactlyValue(V: `1.0`) \|\| N0CFP->isExactlyValue(V: -`1.0`)))
19005	return SDValue ();
19006
19007	// Exit early if the target does not want this transform or if there can't
19008	// possibly be enough uses of the divisor to make the transform worthwhile.
19009	unsigned MinUses = TLI.combineRepeatedFPDivisors();
19010
19011	// For splat vectors, scale the number of uses by the splat factor. If we can
19012	// convert the division into a scalar op, that will likely be much faster.
19013	unsigned NumElts = `1`;
19014	EVT VT = N->getValueType(ResNo: `0`);
19015	if (VT.isVector() && DAG.isSplatValue(V: N1))
19016	NumElts = VT.getVectorMinNumElements();
19017
19018	if (!MinUses \|\| (N1 ->use_size() * NumElts) < MinUses)
19019	return SDValue ();
19020
19021	// Find all FDIV users of the same divisor.
19022	// Use a set because duplicates may be present in the user list.
19023	SetVector<SDNode *> Users;
19024	for (auto *U : N1 ->users()) {
19025	if (U->getOpcode() == ISD::FDIV && U->getOperand(Num: `1`) == N1) {
19026	// Skip X/sqrt(X) that has not been simplified to sqrt(X) yet.
19027	if (U->getOperand(Num: `1`).getOpcode() == ISD::FSQRT &&
19028	U->getOperand(Num: `0`) == U->getOperand(Num: `1`).getOperand(i: `0`) &&
19029	U->getFlags().hasAllowReassociation() &&
19030	U->getFlags().hasNoSignedZeros())
19031	continue;
19032
19033	// This division is eligible for optimization only if global unsafe math
19034	// is enabled or if this division allows reciprocal formation.
19035	if (U->getFlags().hasAllowReciprocal())
19036	Users.insert(X: U);
19037	}
19038	}
19039
19040	// Now that we have the actual number of divisor uses, make sure it meets
19041	// the minimum threshold specified by the target.
19042	if ((Users.size() * NumElts) < MinUses)
19043	return SDValue ();
19044
19045	SDLoc DL(N);
19046	SDValue FPOne = DAG.getConstantFP(Val: `1.0`, DL, VT);
19047	SDValue Reciprocal = DAG.getNode(Opcode: ISD::FDIV, DL, VT, N1: FPOne, N2: N1, Flags);
19048
19049	// Dividend / Divisor -> Dividend Reciprocal*
19050	for (auto *U : Users) {
19051	SDValue Dividend = U->getOperand(Num: `0`);
19052	if (Dividend != FPOne) {
19053	SDValue NewNode = DAG.getNode(Opcode: ISD::FMUL, DL: SDLoc (U), VT, N1: Dividend,
19054	N2: Reciprocal, Flags);
19055	CombineTo(N: U, Res: NewNode);
19056	} else if (U != Reciprocal.getNode()) {
19057	// In the absence of fast-math-flags, this user node is always the
19058	// same node as Reciprocal, but with FMF they may be different nodes.
19059	CombineTo(N: U, Res: Reciprocal);
19060	}
19061	}
19062	return SDValue (N, `0`); // N was replaced.
19063	}
19064
19065	SDValue DAGCombiner::visitFDIV(SDNode *N) {
19066	SDValue N0 = N->getOperand(Num: `0`);
19067	SDValue N1 = N->getOperand(Num: `1`);
19068	EVT VT = N->getValueType(ResNo: `0`);
19069	SDLoc DL(N);
19070	SDNodeFlags Flags = N->getFlags();
19071	SelectionDAG::FlagInserter FlagsInserter(DAG, N);
19072
19073	if (SDValue R = DAG.simplifyFPBinop(Opcode: N->getOpcode(), X: N0, Y: N1, Flags))
19074	return R;
19075
19076	// fold (fdiv c1, c2) -> c1/c2
19077	if (SDValue C = DAG.FoldConstantArithmetic(Opcode: ISD::FDIV, DL, VT, Ops: {N0, N1}))
19078	return C;
19079
19080	// fold vector ops
19081	if (VT.isVector())
19082	if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
19083	return FoldedVOp;
19084
19085	if (SDValue NewSel = foldBinOpIntoSelect(BO: N))
19086	return NewSel;
19087
19088	if (SDValue V = combineRepeatedFPDivisors(N))
19089	return V;
19090
19091	// fold (fdiv X, c2) -> (fmul X, 1/c2) if there is no loss in precision, or
19092	// the loss is acceptable with AllowReciprocal.
19093	if (auto N1CFP = isConstOrConstSplatFP(N: N1, AllowUndefs: true*)) {
19094	// Compute the reciprocal 1.0 / c2.
19095	const APFloat &N1APF = N1CFP->getValueAPF();
19096	APFloat Recip = APFloat::getOne(Sem: N1APF.getSemantics());
19097	APFloat::opStatus st = Recip.divide(RHS: N1APF, RM: APFloat::rmNearestTiesToEven);
19098	// Only do the transform if the reciprocal is a legal fp immediate that
19099	// isn't too nasty (eg NaN, denormal, ...).
19100	if (((st == APFloat::opOK && !Recip.isDenormal()) \|\|
19101	(st == APFloat::opInexact && Flags.hasAllowReciprocal())) &&
19102	(!LegalOperations \|\|
19103	// FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
19104	// backend)... we should handle this gracefully after Legalize.
19105	// TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) \|\|
19106	TLI.isOperationLegal(Op: ISD::ConstantFP, VT) \|\|
19107	TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
19108	return DAG.getNode(Opcode: ISD::FMUL, DL, VT, N1: N0,
19109	N2: DAG.getConstantFP(Val: Recip, DL, VT));
19110	}
19111
19112	if (Flags.hasAllowReciprocal()) {
19113	// If this FDIV is part of a reciprocal square root, it may be folded
19114	// into a target-specific square root estimate instruction.
19115	bool N1AllowReciprocal = N1 ->getFlags().hasAllowReciprocal();
19116	if (N1.getOpcode() == ISD::FSQRT) {
19117	if (SDValue RV = buildRsqrtEstimate(Op: N1.getOperand(i: `0`), Flags: N1 ->getFlags()))
19118	return DAG.getNode(Opcode: ISD::FMUL, DL, VT, N1: N0, N2: RV);
19119	} else if (N1.getOpcode() == ISD::FP_EXTEND &&
19120	N1.getOperand(i: `0`).getOpcode() == ISD::FSQRT &&
19121	N1AllowReciprocal) {
19122	if (SDValue RV = buildRsqrtEstimate(Op: N1.getOperand(i: `0`).getOperand(i: `0`),
19123	Flags: N1.getOperand(i: `0`)->getFlags())) {
19124	RV = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: SDLoc (N1), VT, Operand: RV);
19125	AddToWorklist(N: RV.getNode());
19126	return DAG.getNode(Opcode: ISD::FMUL, DL, VT, N1: N0, N2: RV);
19127	}
19128	} else if (N1.getOpcode() == ISD::FP_ROUND &&
19129	N1.getOperand(i: `0`).getOpcode() == ISD::FSQRT) {
19130	if (SDValue RV = buildRsqrtEstimate(Op: N1.getOperand(i: `0`).getOperand(i: `0`),
19131	Flags: N1.getOperand(i: `0`)->getFlags())) {
19132	RV = DAG.getNode(Opcode: ISD::FP_ROUND, DL: SDLoc (N1), VT, N1: RV, N2: N1.getOperand(i: `1`));
19133	AddToWorklist(N: RV.getNode());
19134	return DAG.getNode(Opcode: ISD::FMUL, DL, VT, N1: N0, N2: RV);
19135	}
19136	} else if (N1.getOpcode() == ISD::FMUL) {
19137	// Look through an FMUL. Even though this won't remove the FDIV directly,
19138	// it's still worthwhile to get rid of the FSQRT if possible.
19139	SDValue Sqrt, Y;
19140	if (N1.getOperand(i: `0`).getOpcode() == ISD::FSQRT) {
19141	Sqrt = N1.getOperand(i: `0`);
19142	Y = N1.getOperand(i: `1`);
19143	} else if (N1.getOperand(i: `1`).getOpcode() == ISD::FSQRT) {
19144	Sqrt = N1.getOperand(i: `1`);
19145	Y = N1.getOperand(i: `0`);
19146	}
19147	if (Sqrt.getNode()) {
19148	// If the other multiply operand is known positive, pull it into the
19149	// sqrt. That will eliminate the division if we convert to an estimate.
19150	if (Flags.hasAllowReassociation() && N1.hasOneUse() &&
19151	N1 ->getFlags().hasAllowReassociation() && Sqrt.hasOneUse()) {
19152	SDValue A;
19153	if (Y.getOpcode() == ISD::FABS && Y.hasOneUse())
19154	A = Y.getOperand(i: `0`);
19155	else if (Y == Sqrt.getOperand(i: `0`))
19156	A = Y;
19157	if (A) {
19158	// X / (fabs(A) sqrt(Z)) --> X / sqrt(AAZ) --> X * rsqrt(AAZ)*
19159	// X / (A sqrt(A)) --> X / sqrt(AAA) --> X * rsqrt(AAA)*
19160	SDValue AA = DAG.getNode(Opcode: ISD::FMUL, DL, VT, N1: A, N2: A);
19161	SDValue AAZ =
19162	DAG.getNode(Opcode: ISD::FMUL, DL, VT, N1: AA, N2: Sqrt.getOperand(i: `0`));
19163	if (SDValue Rsqrt = buildRsqrtEstimate(Op: AAZ, Flags: Sqrt ->getFlags()))
19164	return DAG.getNode(Opcode: ISD::FMUL, DL, VT, N1: N0, N2: Rsqrt);
19165
19166	// Estimate creation failed. Clean up speculatively created nodes.
19167	recursivelyDeleteUnusedNodes(N: AAZ.getNode());
19168	}
19169	}
19170
19171	// We found a FSQRT, so try to make this fold:
19172	// X / (Y sqrt(Z)) -> X * (rsqrt(Z) / Y)*
19173	if (SDValue Rsqrt =
19174	buildRsqrtEstimate(Op: Sqrt.getOperand(i: `0`), Flags: Sqrt ->getFlags())) {
19175	SDValue Div = DAG.getNode(Opcode: ISD::FDIV, DL: SDLoc (N1), VT, N1: Rsqrt, N2: Y);
19176	AddToWorklist(N: Div.getNode());
19177	return DAG.getNode(Opcode: ISD::FMUL, DL, VT, N1: N0, N2: Div);
19178	}
19179	}
19180	}
19181
19182	// Fold into a reciprocal estimate and multiply instead of a real divide.
19183	if (Flags.hasNoInfs())
19184	if (SDValue RV = BuildDivEstimate(N: N0, Op: N1, Flags))
19185	return RV;
19186	}
19187
19188	// Fold X/Sqrt(X) -> Sqrt(X)
19189	if (DAG.canIgnoreSignBitOfZero(Op: SDValue (N, `0`)) &&
19190	Flags.hasAllowReassociation())
19191	if (N1.getOpcode() == ISD::FSQRT && N0 == N1.getOperand(i: `0`))
19192	return N1;
19193
19194	// (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
19195	TargetLowering::NegatibleCost CostN0 =
19196	TargetLowering::NegatibleCost::Expensive;
19197	TargetLowering::NegatibleCost CostN1 =
19198	TargetLowering::NegatibleCost::Expensive;
19199	SDValue NegN0 =
19200	TLI.getNegatedExpression(Op: N0, DAG, LegalOps: LegalOperations, OptForSize: ForCodeSize, Cost&: CostN0);
19201	if (NegN0) {
19202	HandleSDNode NegN0Handle(NegN0);
19203	SDValue NegN1 =
19204	TLI.getNegatedExpression(Op: N1, DAG, LegalOps: LegalOperations, OptForSize: ForCodeSize, Cost&: CostN1);
19205	if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper \|\|
19206	CostN1 == TargetLowering::NegatibleCost::Cheaper))
19207	return DAG.getNode(Opcode: ISD::FDIV, DL, VT, N1: NegN0, N2: NegN1);
19208	}
19209
19210	if (SDValue R = combineFMulOrFDivWithIntPow2(N))
19211	return R;
19212
19213	return SDValue ();
19214	}
19215
19216	SDValue DAGCombiner::visitFREM(SDNode *N) {
19217	SDValue N0 = N->getOperand(Num: `0`);
19218	SDValue N1 = N->getOperand(Num: `1`);
19219	EVT VT = N->getValueType(ResNo: `0`);
19220	SDNodeFlags Flags = N->getFlags();
19221	SelectionDAG::FlagInserter FlagsInserter(DAG, N);
19222	SDLoc DL(N);
19223
19224	if (SDValue R = DAG.simplifyFPBinop(Opcode: N->getOpcode(), X: N0, Y: N1, Flags))
19225	return R;
19226
19227	// fold (frem c1, c2) -> fmod(c1,c2)
19228	if (SDValue C = DAG.FoldConstantArithmetic(Opcode: ISD::FREM, DL, VT, Ops: {N0, N1}))
19229	return C;
19230
19231	if (SDValue NewSel = foldBinOpIntoSelect(BO: N))
19232	return NewSel;
19233
19234	// Lower frem N0, N1 => x - trunc(N0 / N1) N1, providing N1 is an integer*
19235	// power of 2.
19236	if (!TLI.isOperationLegal(Op: ISD::FREM, VT) &&
19237	TLI.isOperationLegalOrCustom(Op: ISD::FMUL, VT) &&
19238	TLI.isOperationLegalOrCustom(Op: ISD::FDIV, VT) &&
19239	TLI.isOperationLegalOrCustom(Op: ISD::FTRUNC, VT) &&
19240	DAG.isKnownToBeAPowerOfTwoFP(Val: N1)) {
19241	bool NeedsCopySign = !DAG.canIgnoreSignBitOfZero(Op: SDValue (N, `0`)) &&
19242	!DAG.cannotBeOrderedNegativeFP(Op: N0);
19243	SDValue Div = DAG.getNode(Opcode: ISD::FDIV, DL, VT, N1: N0, N2: N1);
19244	SDValue Rnd = DAG.getNode(Opcode: ISD::FTRUNC, DL, VT, Operand: Div);
19245	SDValue MLA;
19246	if (TLI.isFMAFasterThanFMulAndFAdd(MF: DAG.getMachineFunction(), VT)) {
19247	MLA = DAG.getNode(Opcode: ISD::FMA, DL, VT, N1: DAG.getNode(Opcode: ISD::FNEG, DL, VT, Operand: Rnd),
19248	N2: N1, N3: N0);
19249	} else {
19250	SDValue Mul = DAG.getNode(Opcode: ISD::FMUL, DL, VT, N1: Rnd, N2: N1);
19251	MLA = DAG.getNode(Opcode: ISD::FSUB, DL, VT, N1: N0, N2: Mul);
19252	}
19253	return NeedsCopySign ? DAG.getNode(Opcode: ISD::FCOPYSIGN, DL, VT, N1: MLA, N2: N0) : MLA;
19254	}
19255
19256	return SDValue ();
19257	}
19258
19259	SDValue DAGCombiner::visitFSQRT(SDNode *N) {
19260	SDNodeFlags Flags = N->getFlags();
19261
19262	// Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as:
19263	// sqrt(+Inf) == rsqrt(+Inf) +Inf = 0 * +Inf = NaN*
19264	if (!Flags.hasApproximateFuncs() \|\| !Flags.hasNoInfs())
19265	return SDValue ();
19266
19267	SDValue N0 = N->getOperand(Num: `0`);
19268	if (TLI.isFsqrtCheap(X: N0, DAG))
19269	return SDValue ();
19270
19271	// FSQRT nodes have flags that propagate to the created nodes.
19272	SelectionDAG::FlagInserter FlagInserter(DAG, Flags);
19273	// TODO: If this is N0/sqrt(N0), and we reach this node before trying to
19274	// transform the fdiv, we may produce a sub-optimal estimate sequence
19275	// because the reciprocal calculation may not have to filter out a
19276	// 0.0 input.
19277	return buildSqrtEstimate(Op: N0, Flags);
19278	}
19279
19280	/// copysign(x, fp_extend(y)) -> copysign(x, y)
19281	/// copysign(x, fp_round(y)) -> copysign(x, y)
19282	/// Operands to the functions are the type of X and Y respectively.
19283	static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(EVT XTy, EVT YTy) {
19284	// Always fold no-op FP casts.
19285	if (XTy == YTy)
19286	return true;
19287
19288	// Do not optimize out type conversion of f128 type yet.
19289	// For some targets like x86_64, configuration is changed to keep one f128
19290	// value in one SSE register, but instruction selection cannot handle
19291	// FCOPYSIGN on SSE registers yet.
19292	if (YTy == MVT::f128)
19293	return false;
19294
19295	// Avoid mismatched vector operand types, for better instruction selection.
19296	return !YTy.isVector();
19297	}
19298
19299	static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
19300	SDValue N1 = N->getOperand(Num: `1`);
19301	if (N1.getOpcode() != ISD::FP_EXTEND &&
19302	N1.getOpcode() != ISD::FP_ROUND)
19303	return false;
19304	EVT N1VT = N1 ->getValueType(ResNo: `0`);
19305	EVT N1Op0VT = N1 ->getOperand(Num: `0`).getValueType();
19306	return CanCombineFCOPYSIGN_EXTEND_ROUND(XTy: N1VT, YTy: N1Op0VT);
19307	}
19308
19309	SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
19310	SDValue N0 = N->getOperand(Num: `0`);
19311	SDValue N1 = N->getOperand(Num: `1`);
19312	EVT VT = N->getValueType(ResNo: `0`);
19313	SDLoc DL(N);
19314
19315	// fold (fcopysign c1, c2) -> fcopysign(c1,c2)
19316	if (SDValue C = DAG.FoldConstantArithmetic(Opcode: ISD::FCOPYSIGN, DL, VT, Ops: {N0, N1}))
19317	return C;
19318
19319	// copysign(x, fp_extend(y)) -> copysign(x, y)
19320	// copysign(x, fp_round(y)) -> copysign(x, y)
19321	if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
19322	return DAG.getNode(Opcode: ISD::FCOPYSIGN, DL, VT, N1: N0, N2: N1.getOperand(i: `0`));
19323
19324	if (SimplifyDemandedBits(Op: SDValue (N, `0`)))
19325	return SDValue (N, `0`);
19326
19327	if (VT != N1.getValueType())
19328	return SDValue ();
19329
19330	// If this is equivalent to a disjoint or, replace it with one. This can
19331	// happen if the sign operand is a sign mask (i.e., x << sign_bit_position).
19332	if (DAG.SignBitIsZeroFP(Op: N0) &&
19333	DAG.computeKnownBits(Op: N1).Zero.isMaxSignedValue()) {
19334	// TODO: Just directly match the shift pattern. computeKnownBits is heavy
19335	// for a such a narrowly targeted case.
19336	EVT IntVT = VT.changeTypeToInteger();
19337	// TODO: It appears to be profitable in some situations to unconditionally
19338	// emit a fabs(n0) to perform this combine.
19339	SDValue CastSrc0 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: IntVT, Operand: N0);
19340	SDValue CastSrc1 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: IntVT, Operand: N1);
19341
19342	SDValue SignOr = DAG.getNode(Opcode: ISD::OR, DL, VT: IntVT, N1: CastSrc0, N2: CastSrc1,
19343	Flags: SDNodeFlags::Disjoint);
19344	return DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: SignOr);
19345	}
19346
19347	return SDValue ();
19348	}
19349
19350	SDValue DAGCombiner::visitFPOW(SDNode *N) {
19351	ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N: N->getOperand(Num: `1`));
19352	if (!ExponentC)
19353	return SDValue ();
19354	SelectionDAG::FlagInserter FlagsInserter(DAG, N);
19355
19356	// Try to convert x * (1/3) into cube root.*
19357	// TODO: Handle the various flavors of long double.
19358	// TODO: Since we're approximating, we don't need an exact 1/3 exponent.
19359	// Some range near 1/3 should be fine.
19360	EVT VT = N->getValueType(ResNo: `0`);
19361	EVT ScalarVT = VT.getScalarType();
19362	if ((ScalarVT == MVT::f32 &&
19363	ExponentC->getValueAPF().isExactlyValue(V: `1.0f` / `3.0f`)) \|\|
19364	(ScalarVT == MVT::f64 &&
19365	ExponentC->getValueAPF().isExactlyValue(V: `1.0` / `3.0`))) {
19366	// pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
19367	// pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
19368	// pow(-val, 1/3) = nan; cbrt(-val) = -num.
19369	// For regular numbers, rounding may cause the results to differ.
19370	// Therefore, we require { nsz ninf nnan afn } for this transform.
19371	// TODO: We could select out the special cases if we don't have nsz/ninf.
19372	SDNodeFlags Flags = N->getFlags();
19373	if (!Flags.hasNoSignedZeros() \|\| !Flags.hasNoInfs() \|\| !Flags.hasNoNaNs() \|\|
19374	!Flags.hasApproximateFuncs())
19375	return SDValue ();
19376
19377	// Do not create a cbrt() libcall if the target does not have it, and do not
19378	// turn a pow that has lowering support into a cbrt() libcall.
19379	RTLIB::Libcall LC = RTLIB::getCBRT(RetVT: VT);
19380	bool HasLibCall =
19381	DAG.getLibcalls().getLibcallImpl(Call: LC) != RTLIB::Unsupported;
19382	if (!HasLibCall \|\|
19383	(!DAG.getTargetLoweringInfo().isOperationExpand(Op: ISD::FPOW, VT) &&
19384	DAG.getTargetLoweringInfo().isOperationExpand(Op: ISD::FCBRT, VT)))
19385	return SDValue ();
19386
19387	return DAG.getNode(Opcode: ISD::FCBRT, DL: SDLoc (N), VT, Operand: N->getOperand(Num: `0`));
19388	}
19389
19390	// Try to convert x * (1/4) and x ** (3/4) into square roots.*
19391	// x * (1/2) is canonicalized to sqrt, so we do not bother with that case.*
19392	// TODO: This could be extended (using a target hook) to handle smaller
19393	// power-of-2 fractional exponents.
19394	bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(V: `0.25`);
19395	bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(V: `0.75`);
19396	if (ExponentIs025 \|\| ExponentIs075) {
19397	// pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
19398	// pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) = NaN.
19399	// pow(-0.0, 0.75) = +0.0; sqrt(-0.0) sqrt(sqrt(-0.0)) = +0.0.*
19400	// pow(-inf, 0.75) = +inf; sqrt(-inf) sqrt(sqrt(-inf)) = NaN.*
19401	// For regular numbers, rounding may cause the results to differ.
19402	// Therefore, we require { nsz ninf afn } for this transform.
19403	// TODO: We could select out the special cases if we don't have nsz/ninf.
19404	SDNodeFlags Flags = N->getFlags();
19405
19406	// We only need no signed zeros for the 0.25 case.
19407	if ((!Flags.hasNoSignedZeros() && ExponentIs025) \|\| !Flags.hasNoInfs() \|\|
19408	!Flags.hasApproximateFuncs())
19409	return SDValue ();
19410
19411	// Don't double the number of libcalls. We are trying to inline fast code.
19412	if (!DAG.getTargetLoweringInfo().isOperationLegalOrCustom(Op: ISD::FSQRT, VT))
19413	return SDValue ();
19414
19415	// Assume that libcalls are the smallest code.
19416	// TODO: This restriction should probably be lifted for vectors.
19417	if (ForCodeSize)
19418	return SDValue ();
19419
19420	// pow(X, 0.25) --> sqrt(sqrt(X))
19421	SDLoc DL(N);
19422	SDValue Sqrt = DAG.getNode(Opcode: ISD::FSQRT, DL, VT, Operand: N->getOperand(Num: `0`));
19423	SDValue SqrtSqrt = DAG.getNode(Opcode: ISD::FSQRT, DL, VT, Operand: Sqrt);
19424	if (ExponentIs025)
19425	return SqrtSqrt;
19426	// pow(X, 0.75) --> sqrt(X) sqrt(sqrt(X))*
19427	return DAG.getNode(Opcode: ISD::FMUL, DL, VT, N1: Sqrt, N2: SqrtSqrt);
19428	}
19429
19430	return SDValue ();
19431	}
19432
19433	static SDValue foldFPToIntToFP(SDNode N, const* SDLoc &DL, SelectionDAG &DAG,
19434	const TargetLowering &TLI) {
19435	// We can fold the fpto[us]i -> [us]itofp pattern into a single ftrunc.
19436	// Additionally, if there are clamps ([us]min or [us]max) around
19437	// the fpto[us]i, we can fold those into fminnum/fmaxnum around the ftrunc.
19438	// If NoSignedZerosFPMath is enabled, this is a direct replacement.
19439	// Otherwise, for strict math, we must handle edge cases:
19440	// 1. For unsigned conversions, use FABS to handle negative cases. Take -0.0
19441	// as example, it first becomes integer 0, and is converted back to +0.0.
19442	// FTRUNC on its own could produce -0.0.
19443
19444	// FIXME: We should be able to use node-level FMF here.
19445	EVT VT = N->getValueType(ResNo: `0`);
19446	if (!TLI.isOperationLegal(Op: ISD::FTRUNC, VT))
19447	return SDValue ();
19448
19449	bool IsUnsigned = N->getOpcode() == ISD::UINT_TO_FP;
19450	bool IsSigned = N->getOpcode() == ISD::SINT_TO_FP;
19451	assert(IsSigned \|\| IsUnsigned);
19452
19453	bool IsSignedZeroSafe = DAG.getTarget().Options.NoSignedZerosFPMath \|\|
19454	DAG.canIgnoreSignBitOfZero(Op: SDValue (N, `0`));
19455	// For signed conversions: The optimization changes signed zero behavior.
19456	if (IsSigned && !IsSignedZeroSafe)
19457	return SDValue ();
19458	// For unsigned conversions, we need FABS to canonicalize -0.0 to +0.0
19459	// (unless outputting a signed zero is OK).
19460	if (IsUnsigned && !IsSignedZeroSafe && !TLI.isFAbsFree(VT))
19461	return SDValue ();
19462
19463	// Collect potential clamp operations (outermost to innermost) and peel.
19464	struct ClampInfo {
19465	bool IsMin;
19466	SDValue Constant;
19467	};
19468	constexpr unsigned MaxClamps = `2`;
19469	SmallVector<ClampInfo, MaxClamps> Clamps;
19470	unsigned MinOp = IsUnsigned ? ISD::UMIN : ISD::SMIN;
19471	unsigned MaxOp = IsUnsigned ? ISD::UMAX : ISD::SMAX;
19472	SDValue IntVal = N->getOperand(Num: `0`);
19473	for (unsigned Level = `0`; Level < MaxClamps; ++Level) {
19474	if (!IntVal.hasOneUse() \|\|
19475	(IntVal.getOpcode() != MinOp && IntVal.getOpcode() != MaxOp))
19476	break;
19477	SDValue RHS = IntVal.getOperand(i: `1`);
19478	APInt IntConst;
19479	if (auto *IntConstNode = dyn_cast<ConstantSDNode>(Val&: RHS))
19480	IntConst = IntConstNode->getAPIntValue();
19481	else if (!ISD::isConstantSplatVector(N: RHS.getNode(), SplatValue&: IntConst))
19482	return SDValue ();
19483	APFloat FPConst(VT.getFltSemantics());
19484	FPConst.convertFromAPInt(Input: IntConst, IsSigned, RM: APFloat::rmNearestTiesToEven);
19485	// Verify roundtrip exactness.
19486	APSInt RoundTrip(IntConst.getBitWidth(), IsUnsigned);
19487	bool IsExact;
19488	if (FPConst.convertToInteger(Result&: RoundTrip, RM: APFloat::rmTowardZero, IsExact: &IsExact) !=
19489	APFloat::opOK \|\|
19490	!IsExact \|\| static_cast<const APInt &>(RoundTrip) != IntConst)
19491	return SDValue ();
19492	bool IsMin = IntVal.getOpcode() == MinOp;
19493	Clamps.push_back(Elt: {.IsMin: IsMin, .Constant: DAG.getConstantFP(Val: FPConst, DL, VT)});
19494	IntVal = IntVal.getOperand(i: `0`);
19495	}
19496
19497	// Check that the sequence ends with the correct kind of fpto[us]i.
19498	unsigned FPToIntOp = IsUnsigned ? ISD::FP_TO_UINT : ISD::FP_TO_SINT;
19499	if (IntVal.getOpcode() != FPToIntOp \|\|
19500	IntVal.getOperand(i: `0`).getValueType() != VT)
19501	return SDValue ();
19502
19503	SDValue Result = IntVal.getOperand(i: `0`);
19504	if (IsUnsigned && !IsSignedZeroSafe && TLI.isFAbsFree(VT))
19505	Result = DAG.getNode(Opcode: ISD::FABS, DL, VT, Operand: Result);
19506	Result = DAG.getNode(Opcode: ISD::FTRUNC, DL, VT, Operand: Result);
19507	// Apply clamps, if any, in reverse order (innermost first).
19508	for (const ClampInfo &Clamp : reverse(C&: Clamps)) {
19509	unsigned FPClampOp =
19510	getMinMaxOpcodeForClamp(IsMin: Clamp.IsMin, Operand1: Result, Operand2: Clamp.Constant, DAG, TLI);
19511	if (FPClampOp == ISD::DELETED_NODE)
19512	return SDValue ();
19513	Result = DAG.getNode(Opcode: FPClampOp, DL, VT, N1: Result, N2: Clamp.Constant);
19514	}
19515	return Result;
19516	}
19517
19518	SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
19519	SDValue N0 = N->getOperand(Num: `0`);
19520	EVT VT = N->getValueType(ResNo: `0`);
19521	EVT OpVT = N0.getValueType();
19522	SDLoc DL(N);
19523
19524	// [us]itofp(undef) = 0, because the result value is bounded.
19525	if (N0.isUndef())
19526	return DAG.getConstantFP(Val: `0.0`, DL, VT);
19527
19528	// fold (sint_to_fp c1) -> c1fp
19529	// ...but only if the target supports immediate floating-point values
19530	if ((!LegalOperations \|\| TLI.isOperationLegalOrCustom(Op: ISD::ConstantFP, VT)))
19531	if (SDValue C = DAG.FoldConstantArithmetic(Opcode: ISD::SINT_TO_FP, DL, VT, Ops: {N0}))
19532	return C;
19533
19534	// If the input is a legal type, and SINT_TO_FP is not legal on this target,
19535	// but UINT_TO_FP is legal on this target, try to convert.
19536	if (!hasOperation(Opcode: ISD::SINT_TO_FP, VT: OpVT) &&
19537	hasOperation(Opcode: ISD::UINT_TO_FP, VT: OpVT)) {
19538	// If the sign bit is known to be zero, we can change this to UINT_TO_FP.
19539	if (DAG.SignBitIsZero(Op: N0))
19540	return DAG.getNode(Opcode: ISD::UINT_TO_FP, DL, VT, Operand: N0);
19541	}
19542
19543	// The next optimizations are desirable only if SELECT_CC can be lowered.
19544	// fold (sint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), -1.0, 0.0)
19545	if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
19546	!VT.isVector() &&
19547	(!LegalOperations \|\| TLI.isOperationLegalOrCustom(Op: ISD::ConstantFP, VT)))
19548	return DAG.getSelect(DL, VT, Cond: N0, LHS: DAG.getConstantFP(Val: -`1.0`, DL, VT),
19549	RHS: DAG.getConstantFP(Val: `0.0`, DL, VT));
19550
19551	// fold (sint_to_fp (zext (setcc x, y, cc))) ->
19552	// (select (setcc x, y, cc), 1.0, 0.0)
19553	if (N0.getOpcode() == ISD::ZERO_EXTEND &&
19554	N0.getOperand(i: `0`).getOpcode() == ISD::SETCC && !VT.isVector() &&
19555	(!LegalOperations \|\| TLI.isOperationLegalOrCustom(Op: ISD::ConstantFP, VT)))
19556	return DAG.getSelect(DL, VT, Cond: N0.getOperand(i: `0`),
19557	LHS: DAG.getConstantFP(Val: `1.0`, DL, VT),
19558	RHS: DAG.getConstantFP(Val: `0.0`, DL, VT));
19559
19560	if (SDValue FTrunc = foldFPToIntToFP(N, DL, DAG, TLI))
19561	return FTrunc;
19562
19563	// fold (sint_to_fp (trunc nsw x)) -> (sint_to_fp x)
19564	if (N0.getOpcode() == ISD::TRUNCATE && N0 ->getFlags().hasNoSignedWrap() &&
19565	TLI.isTypeDesirableForOp(ISD::SINT_TO_FP,
19566	VT: N0.getOperand(i: `0`).getValueType()))
19567	return DAG.getNode(Opcode: ISD::SINT_TO_FP, DL, VT, Operand: N0.getOperand(i: `0`));
19568
19569	return SDValue ();
19570	}
19571
19572	SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
19573	SDValue N0 = N->getOperand(Num: `0`);
19574	EVT VT = N->getValueType(ResNo: `0`);
19575	EVT OpVT = N0.getValueType();
19576	SDLoc DL(N);
19577
19578	// [us]itofp(undef) = 0, because the result value is bounded.
19579	if (N0.isUndef())
19580	return DAG.getConstantFP(Val: `0.0`, DL, VT);
19581
19582	// fold (uint_to_fp c1) -> c1fp
19583	// ...but only if the target supports immediate floating-point values
19584	if ((!LegalOperations \|\| TLI.isOperationLegalOrCustom(Op: ISD::ConstantFP, VT)))
19585	if (SDValue C = DAG.FoldConstantArithmetic(Opcode: ISD::UINT_TO_FP, DL, VT, Ops: {N0}))
19586	return C;
19587
19588	// If the input is a legal type, and UINT_TO_FP is not legal on this target,
19589	// but SINT_TO_FP is legal on this target, try to convert.
19590	if (!hasOperation(Opcode: ISD::UINT_TO_FP, VT: OpVT) &&
19591	hasOperation(Opcode: ISD::SINT_TO_FP, VT: OpVT)) {
19592	// If the sign bit is known to be zero, we can change this to SINT_TO_FP.
19593	if (DAG.SignBitIsZero(Op: N0))
19594	return DAG.getNode(Opcode: ISD::SINT_TO_FP, DL, VT, Operand: N0);
19595	}
19596
19597	// fold (uint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), 1.0, 0.0)
19598	if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
19599	(!LegalOperations \|\| TLI.isOperationLegalOrCustom(Op: ISD::ConstantFP, VT)))
19600	return DAG.getSelect(DL, VT, Cond: N0, LHS: DAG.getConstantFP(Val: `1.0`, DL, VT),
19601	RHS: DAG.getConstantFP(Val: `0.0`, DL, VT));
19602
19603	if (SDValue FTrunc = foldFPToIntToFP(N, DL, DAG, TLI))
19604	return FTrunc;
19605
19606	// fold (uint_to_fp (trunc nuw x)) -> (uint_to_fp x)
19607	if (N0.getOpcode() == ISD::TRUNCATE && N0 ->getFlags().hasNoUnsignedWrap() &&
19608	TLI.isTypeDesirableForOp(ISD::UINT_TO_FP,
19609	VT: N0.getOperand(i: `0`).getValueType()))
19610	return DAG.getNode(Opcode: ISD::UINT_TO_FP, DL, VT, Operand: N0.getOperand(i: `0`));
19611
19612	return SDValue ();
19613	}
19614
19615	// Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
19616	static SDValue FoldIntToFPToInt(SDNode N, const* SDLoc &DL, SelectionDAG &DAG) {
19617	SDValue N0 = N->getOperand(Num: `0`);
19618	EVT VT = N->getValueType(ResNo: `0`);
19619
19620	if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
19621	return SDValue ();
19622
19623	SDValue Src = N0.getOperand(i: `0`);
19624	EVT SrcVT = Src.getValueType();
19625	bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
19626	bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
19627
19628	// We can safely assume the conversion won't overflow the output range,
19629	// because (for example) (uint8_t)18293.f is undefined behavior.
19630
19631	// Since we can assume the conversion won't overflow, our decision as to
19632	// whether the input will fit in the float should depend on the minimum
19633	// of the input range and output range.
19634
19635	// This means this is also safe for a signed input and unsigned output, since
19636	// a negative input would lead to undefined behavior.
19637	unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
19638	unsigned OutputSize = (int)VT.getScalarSizeInBits();
19639	unsigned ActualSize = std::min(a: InputSize, b: OutputSize);
19640	const fltSemantics &Sem = N0.getValueType().getFltSemantics();
19641
19642	// We can only fold away the float conversion if the input range can be
19643	// represented exactly in the float range.
19644	if (APFloat::semanticsPrecision(Sem) >= ActualSize) {
19645	if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
19646	unsigned ExtOp =
19647	IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
19648	return DAG.getNode(Opcode: ExtOp, DL, VT, Operand: Src);
19649	}
19650	if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
19651	return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Src);
19652	return DAG.getBitcast(VT, V: Src);
19653	}
19654	return SDValue ();
19655	}
19656
19657	SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
19658	SDValue N0 = N->getOperand(Num: `0`);
19659	EVT VT = N->getValueType(ResNo: `0`);
19660	SDLoc DL(N);
19661
19662	// fold (fp_to_sint undef) -> undef
19663	if (N0.isUndef())
19664	return DAG.getUNDEF(VT);
19665
19666	// fold (fp_to_sint c1fp) -> c1
19667	if (SDValue C = DAG.FoldConstantArithmetic(Opcode: ISD::FP_TO_SINT, DL, VT, Ops: {N0}))
19668	return C;
19669
19670	return FoldIntToFPToInt(N, DL, DAG);
19671	}
19672
19673	SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
19674	SDValue N0 = N->getOperand(Num: `0`);
19675	EVT VT = N->getValueType(ResNo: `0`);
19676	SDLoc DL(N);
19677
19678	// fold (fp_to_uint undef) -> undef
19679	if (N0.isUndef())
19680	return DAG.getUNDEF(VT);
19681
19682	// fold (fp_to_uint c1fp) -> c1
19683	if (SDValue C = DAG.FoldConstantArithmetic(Opcode: ISD::FP_TO_UINT, DL, VT, Ops: {N0}))
19684	return C;
19685
19686	return FoldIntToFPToInt(N, DL, DAG);
19687	}
19688
19689	SDValue DAGCombiner::visitXROUND(SDNode *N) {
19690	SDValue N0 = N->getOperand(Num: `0`);
19691	EVT VT = N->getValueType(ResNo: `0`);
19692
19693	// fold (lrint\|llrint undef) -> undef
19694	// fold (lround\|llround undef) -> undef
19695	if (N0.isUndef())
19696	return DAG.getUNDEF(VT);
19697
19698	// fold (lrint\|llrint c1fp) -> c1
19699	// fold (lround\|llround c1fp) -> c1
19700	if (SDValue C =
19701	DAG.FoldConstantArithmetic(Opcode: N->getOpcode(), DL: SDLoc (N), VT, Ops: {N0}))
19702	return C;
19703
19704	return SDValue ();
19705	}
19706
19707	SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
19708	SDValue N0 = N->getOperand(Num: `0`);
19709	SDValue N1 = N->getOperand(Num: `1`);
19710	EVT VT = N->getValueType(ResNo: `0`);
19711	SDLoc DL(N);
19712
19713	// fold (fp_round c1fp) -> c1fp
19714	if (SDValue C = DAG.FoldConstantArithmetic(Opcode: ISD::FP_ROUND, DL, VT, Ops: {N0, N1}))
19715	return C;
19716
19717	// fold (fp_round (fp_extend x)) -> x
19718	if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(i: `0`).getValueType())
19719	return N0.getOperand(i: `0`);
19720
19721	// fold (fp_round (fp_round x)) -> (fp_round x)
19722	if (N0.getOpcode() == ISD::FP_ROUND) {
19723	const bool NIsTrunc = N->getConstantOperandVal(Num: `1`) == `1`;
19724	const bool N0IsTrunc = N0.getConstantOperandVal(i: `1`) == `1`;
19725
19726	// Avoid folding legal fp_rounds into non-legal ones.
19727	if (!hasOperation(Opcode: ISD::FP_ROUND, VT))
19728	return SDValue ();
19729
19730	// Skip this folding if it results in an fp_round from f80 to f16.
19731	//
19732	// f80 to f16 always generates an expensive (and as yet, unimplemented)
19733	// libcall to __truncxfhf2 instead of selecting native f16 conversion
19734	// instructions from f32 or f64. Moreover, the first (value-preserving)
19735	// fp_round from f80 to either f32 or f64 may become a NOP in platforms like
19736	// x86.
19737	if (N0.getOperand(i: `0`).getValueType() == MVT::f80 && VT == MVT::f16)
19738	return SDValue ();
19739
19740	// If the first fp_round isn't a value preserving truncation, it might
19741	// introduce a tie in the second fp_round, that wouldn't occur in the
19742	// single-step fp_round we want to fold to.
19743	// In other words, double rounding isn't the same as rounding.
19744	// Also, this is a value preserving truncation iff both fp_round's are.
19745	if ((N->getFlags().hasAllowContract() &&
19746	N0 ->getFlags().hasAllowContract()) \|\|
19747	N0IsTrunc)
19748	return DAG.getNode(
19749	Opcode: ISD::FP_ROUND, DL, VT, N1: N0.getOperand(i: `0`),
19750	N2: DAG.getIntPtrConstant(Val: NIsTrunc && N0IsTrunc, DL, /isTarget=/true));
19751	}
19752
19753	// fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
19754	// Note: From a legality perspective, this is a two step transform. First,
19755	// we duplicate the fp_round to the arguments of the copysign, then we
19756	// eliminate the fp_round on Y. The second step requires an additional
19757	// predicate to match the implementation above.
19758	if (N0.getOpcode() == ISD::FCOPYSIGN && N0 ->hasOneUse() &&
19759	CanCombineFCOPYSIGN_EXTEND_ROUND(XTy: VT,
19760	YTy: N0.getValueType())) {
19761	SDValue Tmp = DAG.getNode(Opcode: ISD::FP_ROUND, DL: SDLoc (N0), VT,
19762	N1: N0.getOperand(i: `0`), N2: N1);
19763	AddToWorklist(N: Tmp.getNode());
19764	return DAG.getNode(Opcode: ISD::FCOPYSIGN, DL, VT, N1: Tmp, N2: N0.getOperand(i: `1`));
19765	}
19766
19767	if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(Cast: N))
19768	return NewVSel;
19769
19770	return SDValue ();
19771	}
19772
19773	// Eliminate a floating-point widening of a narrowed value if the fast math
19774	// flags allow it.
19775	static SDValue eliminateFPCastPair(SDNode *N) {
19776	SDValue N0 = N->getOperand(Num: `0`);
19777	EVT VT = N->getValueType(ResNo: `0`);
19778
19779	unsigned NarrowingOp;
19780	switch (N->getOpcode()) {
19781	case ISD::FP16_TO_FP:
19782	NarrowingOp = ISD::FP_TO_FP16;
19783	break;
19784	case ISD::BF16_TO_FP:
19785	NarrowingOp = ISD::FP_TO_BF16;
19786	break;
19787	case ISD::FP_EXTEND:
19788	NarrowingOp = ISD::FP_ROUND;
19789	break;
19790	default:
19791	llvm_unreachable("Expected widening FP cast");
19792	}
19793
19794	if (N0.getOpcode() == NarrowingOp && N0.getOperand(i: `0`).getValueType() == VT) {
19795	const SDNodeFlags NarrowFlags = N0 ->getFlags();
19796	const SDNodeFlags WidenFlags = N->getFlags();
19797	// Narrowing can introduce inf and change the encoding of a nan, so the
19798	// widen must have the nnan and ninf flags to indicate that we don't need to
19799	// care about that. We are also removing a rounding step, and that requires
19800	// both the narrow and widen to allow contraction.
19801	if (WidenFlags.hasNoNaNs() && WidenFlags.hasNoInfs() &&
19802	NarrowFlags.hasAllowContract() && WidenFlags.hasAllowContract()) {
19803	return N0.getOperand(i: `0`);
19804	}
19805	}
19806
19807	return SDValue ();
19808	}
19809
19810	SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
19811	SelectionDAG::FlagInserter FlagsInserter(DAG, N);
19812	SDValue N0 = N->getOperand(Num: `0`);
19813	EVT VT = N->getValueType(ResNo: `0`);
19814	SDLoc DL(N);
19815
19816	if (VT.isVector())
19817	if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
19818	return FoldedVOp;
19819
19820	// If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
19821	if (N->hasOneUse() && N->user_begin()->getOpcode() == ISD::FP_ROUND)
19822	return SDValue ();
19823
19824	// fold (fp_extend c1fp) -> c1fp
19825	if (SDValue C = DAG.FoldConstantArithmetic(Opcode: ISD::FP_EXTEND, DL, VT, Ops: {N0}))
19826	return C;
19827
19828	// fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
19829	if (N0.getOpcode() == ISD::FP16_TO_FP &&
19830	TLI.getOperationAction(Op: ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
19831	return DAG.getNode(Opcode: ISD::FP16_TO_FP, DL, VT, Operand: N0.getOperand(i: `0`));
19832
19833	// Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
19834	// value of X.
19835	if (N0.getOpcode() == ISD::FP_ROUND && N0.getConstantOperandVal(i: `1`) == `1`) {
19836	SDValue In = N0.getOperand(i: `0`);
19837	if (In.getValueType() == VT) return In;
19838	if (VT.bitsLT(VT: In.getValueType()))
19839	return DAG.getNode(Opcode: ISD::FP_ROUND, DL, VT, N1: In, N2: N0.getOperand(i: `1`));
19840	return DAG.getNode(Opcode: ISD::FP_EXTEND, DL, VT, Operand: In);
19841	}
19842
19843	// fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
19844	if (ISD::isNormalLoad(N: N0.getNode()) && N0.hasOneUse() &&
19845	TLI.isLoadExtLegalOrCustom(ExtType: ISD::EXTLOAD, ValVT: VT, MemVT: N0.getValueType())) {
19846	LoadSDNode *LN0 = cast<LoadSDNode>(Val&: N0);
19847	SDValue ExtLoad = DAG.getExtLoad(ExtType: ISD::EXTLOAD, dl: DL, VT,
19848	Chain: LN0->getChain(),
19849	Ptr: LN0->getBasePtr(), MemVT: N0.getValueType(),
19850	MMO: LN0->getMemOperand());
19851	CombineTo(N, Res: ExtLoad);
19852	CombineTo(
19853	N: N0.getNode(),
19854	Res0: DAG.getNode(Opcode: ISD::FP_ROUND, DL: SDLoc (N0), VT: N0.getValueType(), N1: ExtLoad,
19855	N2: DAG.getIntPtrConstant(Val: `1`, DL: SDLoc (N0), /isTarget=/true)),
19856	Res1: ExtLoad.getValue(R: `1`));
19857	return SDValue (N, `0`); // Return N so it doesn't get rechecked!
19858	}
19859
19860	if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(Cast: N))
19861	return NewVSel;
19862
19863	if (SDValue CastEliminated = eliminateFPCastPair(N))
19864	return CastEliminated;
19865
19866	return SDValue ();
19867	}
19868
19869	SDValue DAGCombiner::visitFCEIL(SDNode *N) {
19870	SDValue N0 = N->getOperand(Num: `0`);
19871	EVT VT = N->getValueType(ResNo: `0`);
19872
19873	// fold (fceil c1) -> fceil(c1)
19874	if (SDValue C = DAG.FoldConstantArithmetic(Opcode: ISD::FCEIL, DL: SDLoc (N), VT, Ops: {N0}))
19875	return C;
19876
19877	return SDValue ();
19878	}
19879
19880	SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
19881	SDValue N0 = N->getOperand(Num: `0`);
19882	EVT VT = N->getValueType(ResNo: `0`);
19883
19884	// fold (ftrunc c1) -> ftrunc(c1)
19885	if (SDValue C = DAG.FoldConstantArithmetic(Opcode: ISD::FTRUNC, DL: SDLoc (N), VT, Ops: {N0}))
19886	return C;
19887
19888	// fold ftrunc (known rounded int x) -> x
19889	// ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
19890	// likely to be generated to extract integer from a rounded floating value.
19891	switch (N0.getOpcode()) {
19892	default: break;
19893	case ISD::FRINT:
19894	case ISD::FTRUNC:
19895	case ISD::FNEARBYINT:
19896	case ISD::FROUNDEVEN:
19897	case ISD::FFLOOR:
19898	case ISD::FCEIL:
19899	return N0;
19900	}
19901
19902	return SDValue ();
19903	}
19904
19905	SDValue DAGCombiner::visitFFREXP(SDNode *N) {
19906	SDValue N0 = N->getOperand(Num: `0`);
19907
19908	// fold (ffrexp c1) -> ffrexp(c1)
19909	if (DAG.isConstantFPBuildVectorOrConstantFP(N: N0))
19910	return DAG.getNode(Opcode: ISD::FFREXP, DL: SDLoc (N), VTList: N->getVTList(), N: N0);
19911	return SDValue ();
19912	}
19913
19914	SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
19915	SDValue N0 = N->getOperand(Num: `0`);
19916	EVT VT = N->getValueType(ResNo: `0`);
19917
19918	// fold (ffloor c1) -> ffloor(c1)
19919	if (SDValue C = DAG.FoldConstantArithmetic(Opcode: ISD::FFLOOR, DL: SDLoc (N), VT, Ops: {N0}))
19920	return C;
19921
19922	return SDValue ();
19923	}
19924
19925	SDValue DAGCombiner::visitFNEG(SDNode *N) {
19926	SDValue N0 = N->getOperand(Num: `0`);
19927	EVT VT = N->getValueType(ResNo: `0`);
19928	SelectionDAG::FlagInserter FlagsInserter(DAG, N);
19929
19930	// Constant fold FNEG.
19931	if (SDValue C = DAG.FoldConstantArithmetic(Opcode: ISD::FNEG, DL: SDLoc (N), VT, Ops: {N0}))
19932	return C;
19933
19934	if (SDValue NegN0 =
19935	TLI.getNegatedExpression(Op: N0, DAG, LegalOps: LegalOperations, OptForSize: ForCodeSize))
19936	return NegN0;
19937
19938	// -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
19939	// FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't
19940	// know it was called from a context with a nsz flag if the input fsub does
19941	// not.
19942	if (N0.getOpcode() == ISD::FSUB && N->getFlags().hasNoSignedZeros() &&
19943	N0.hasOneUse()) {
19944	return DAG.getNode(Opcode: ISD::FSUB, DL: SDLoc (N), VT, N1: N0.getOperand(i: `1`),
19945	N2: N0.getOperand(i: `0`));
19946	}
19947
19948	if (SimplifyDemandedBits(Op: SDValue (N, `0`)))
19949	return SDValue (N, `0`);
19950
19951	if (SDValue Cast = foldSignChangeInBitcast(N))
19952	return Cast;
19953
19954	return SDValue ();
19955	}
19956
19957	SDValue DAGCombiner::visitFMinMax(SDNode *N) {
19958	SDValue N0 = N->getOperand(Num: `0`);
19959	SDValue N1 = N->getOperand(Num: `1`);
19960	EVT VT = N->getValueType(ResNo: `0`);
19961	const SDNodeFlags Flags = N->getFlags();
19962	unsigned Opc = N->getOpcode();
19963	bool PropAllNaNsToQNaNs = Opc == ISD::FMINIMUM \|\| Opc == ISD::FMAXIMUM;
19964	bool PropOnlySNaNsToQNaNs = Opc == ISD::FMINNUM \|\| Opc == ISD::FMAXNUM;
19965	bool IsMin =
19966	Opc == ISD::FMINNUM \|\| Opc == ISD::FMINIMUM \|\| Opc == ISD::FMINIMUMNUM;
19967	SelectionDAG::FlagInserter FlagsInserter(DAG, N);
19968
19969	// Constant fold.
19970	if (SDValue C = DAG.FoldConstantArithmetic(Opcode: Opc, DL: SDLoc (N), VT, Ops: {N0, N1}))
19971	return C;
19972
19973	// Canonicalize to constant on RHS.
19974	if (DAG.isConstantFPBuildVectorOrConstantFP(N: N0) &&
19975	!DAG.isConstantFPBuildVectorOrConstantFP(N: N1))
19976	return DAG.getNode(Opcode: N->getOpcode(), DL: SDLoc (N), VT, N1, N2: N0);
19977
19978	if (const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N: N1)) {
19979	const APFloat &AF = N1CFP->getValueAPF();
19980
19981	// minnum(X, qnan) -> X
19982	// maxnum(X, qnan) -> X
19983	// minnum(X, snan) -> qnan
19984	// maxnum(X, snan) -> qnan
19985	// minimum(X, nan) -> qnan
19986	// maximum(X, nan) -> qnan
19987	// minimumnum(X, nan) -> X
19988	// maximumnum(X, nan) -> X
19989	if (AF.isNaN()) {
19990	if (PropAllNaNsToQNaNs \|\| (AF.isSignaling() && PropOnlySNaNsToQNaNs)) {
19991	if (AF.isSignaling())
19992	return DAG.getConstantFP(Val: AF.makeQuiet(), DL: SDLoc (N), VT);
19993	return N->getOperand(Num: `1`);
19994	}
19995	return N->getOperand(Num: `0`);
19996	}
19997
19998	// In the following folds, inf can be replaced with the largest finite
19999	// float, if the ninf flag is set.
20000	if (AF.isInfinity() \|\| (Flags.hasNoInfs() && AF.isLargest())) {
20001	// minnum(X, -inf) -> -inf (ignoring sNaN -> qNaN propagation)
20002	// maxnum(X, +inf) -> +inf (ignoring sNaN -> qNaN propagation)
20003	// minimum(X, -inf) -> -inf if nnan
20004	// maximum(X, +inf) -> +inf if nnan
20005	// minimumnum(X, -inf) -> -inf
20006	// maximumnum(X, +inf) -> +inf
20007	if (IsMin == AF.isNegative() &&
20008	(!PropAllNaNsToQNaNs \|\| Flags.hasNoNaNs()))
20009	return N->getOperand(Num: `1`);
20010
20011	// minnum(X, +inf) -> X if nnan
20012	// maxnum(X, -inf) -> X if nnan
20013	// minimum(X, +inf) -> X (ignoring quieting of sNaNs)
20014	// maximum(X, -inf) -> X (ignoring quieting of sNaNs)
20015	// minimumnum(X, +inf) -> X if nnan
20016	// maximumnum(X, -inf) -> X if nnan
20017	if (IsMin != AF.isNegative() && (PropAllNaNsToQNaNs \|\| Flags.hasNoNaNs()))
20018	return N->getOperand(Num: `0`);
20019	}
20020	}
20021
20022	// There are no VECREDUCE variants of FMINIMUMNUM or FMAXIMUMNUM
20023	if (Opc == ISD::FMINIMUMNUM \|\| Opc == ISD::FMAXIMUMNUM)
20024	return SDValue ();
20025
20026	if (SDValue SD = reassociateReduction(
20027	RedOpc: PropAllNaNsToQNaNs
20028	? (IsMin ? ISD::VECREDUCE_FMINIMUM : ISD::VECREDUCE_FMAXIMUM)
20029	: (IsMin ? ISD::VECREDUCE_FMIN : ISD::VECREDUCE_FMAX),
20030	Opc, DL: SDLoc (N), VT, N0, N1, Flags))
20031	return SD;
20032
20033	return SDValue ();
20034	}
20035
20036	SDValue DAGCombiner::visitFABS(SDNode *N) {
20037	SDValue N0 = N->getOperand(Num: `0`);
20038	EVT VT = N->getValueType(ResNo: `0`);
20039	SDLoc DL(N);
20040
20041	// fold (fabs c1) -> fabs(c1)
20042	if (SDValue C = DAG.FoldConstantArithmetic(Opcode: ISD::FABS, DL, VT, Ops: {N0}))
20043	return C;
20044
20045	if (SimplifyDemandedBits(Op: SDValue (N, `0`)))
20046	return SDValue (N, `0`);
20047
20048	if (SDValue Cast = foldSignChangeInBitcast(N))
20049	return Cast;
20050
20051	return SDValue ();
20052	}
20053
20054	SDValue DAGCombiner::visitBRCOND(SDNode *N) {
20055	SDValue Chain = N->getOperand(Num: `0`);
20056	SDValue N1 = N->getOperand(Num: `1`);
20057	SDValue N2 = N->getOperand(Num: `2`);
20058
20059	// BRCOND(FREEZE(cond)) is equivalent to BRCOND(cond) (both are
20060	// nondeterministic jumps).
20061	if (N1 ->getOpcode() == ISD::FREEZE && N1.hasOneUse()) {
20062	return DAG.getNode(Opcode: ISD::BRCOND, DL: SDLoc (N), VT: MVT::Other, N1: Chain,
20063	N2: N1 ->getOperand(Num: `0`), N3: N2, Flags: N->getFlags());
20064	}
20065
20066	// Variant of the previous fold where there is a SETCC in between:
20067	// BRCOND(SETCC(FREEZE(X), CONST, Cond))
20068	// =>
20069	// BRCOND(FREEZE(SETCC(X, CONST, Cond)))
20070	// =>
20071	// BRCOND(SETCC(X, CONST, Cond))
20072	// This is correct if FREEZE(X) has one use and SETCC(FREEZE(X), CONST, Cond)
20073	// isn't equivalent to true or false.
20074	// For example, SETCC(FREEZE(X), -128, SETULT) cannot be folded to
20075	// FREEZE(SETCC(X, -128, SETULT)) because X can be poison.
20076	if (N1 ->getOpcode() == ISD::SETCC && N1.hasOneUse()) {
20077	SDValue S0 = N1 ->getOperand(Num: `0`), S1 = N1 ->getOperand(Num: `1`);
20078	ISD::CondCode Cond = cast<CondCodeSDNode>(Val: N1 ->getOperand(Num: `2`))->get();
20079	ConstantSDNode *S0C = dyn_cast<ConstantSDNode>(Val&: S0);
20080	ConstantSDNode *S1C = dyn_cast<ConstantSDNode>(Val&: S1);
20081	bool Updated = false;
20082
20083	// Is 'X Cond C' always true or false?
20084	auto IsAlwaysTrueOrFalse = [](ISD::CondCode Cond, ConstantSDNode *C) {
20085	bool False = (Cond == ISD::SETULT && C->isZero()) \|\|
20086	(Cond == ISD::SETLT && C->isMinSignedValue()) \|\|
20087	(Cond == ISD::SETUGT && C->isAllOnes()) \|\|
20088	(Cond == ISD::SETGT && C->isMaxSignedValue());
20089	bool True = (Cond == ISD::SETULE && C->isAllOnes()) \|\|
20090	(Cond == ISD::SETLE && C->isMaxSignedValue()) \|\|
20091	(Cond == ISD::SETUGE && C->isZero()) \|\|
20092	(Cond == ISD::SETGE && C->isMinSignedValue());
20093	return True \|\| False;
20094	};
20095
20096	if (S0 ->getOpcode() == ISD::FREEZE && S0.hasOneUse() && S1C) {
20097	if (!IsAlwaysTrueOrFalse (Cond, S1C)) {
20098	S0 = S0 ->getOperand(Num: `0`);
20099	Updated = true;
20100	}
20101	}
20102	if (S1 ->getOpcode() == ISD::FREEZE && S1.hasOneUse() && S0C) {
20103	if (!IsAlwaysTrueOrFalse (ISD::getSetCCSwappedOperands(Operation: Cond), S0C)) {
20104	S1 = S1 ->getOperand(Num: `0`);
20105	Updated = true;
20106	}
20107	}
20108
20109	if (Updated)
20110	return DAG.getNode(
20111	Opcode: ISD::BRCOND, DL: SDLoc (N), VT: MVT::Other, N1: Chain,
20112	N2: DAG.getSetCC(DL: SDLoc (N1), VT: N1 ->getValueType(ResNo: `0`), LHS: S0, RHS: S1, Cond), N3: N2,
20113	Flags: N->getFlags());
20114	}
20115
20116	// If N is a constant we could fold this into a fallthrough or unconditional
20117	// branch. However that doesn't happen very often in normal code, because
20118	// Instcombine/SimplifyCFG should have handled the available opportunities.
20119	// If we did this folding here, it would be necessary to update the
20120	// MachineBasicBlock CFG, which is awkward.
20121
20122	// fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
20123	// on the target, also copy fast math flags.
20124	if (N1.getOpcode() == ISD::SETCC &&
20125	TLI.isOperationLegalOrCustom(Op: ISD::BR_CC,
20126	VT: N1.getOperand(i: `0`).getValueType())) {
20127	return DAG.getNode(Opcode: ISD::BR_CC, DL: SDLoc (N), VT: MVT::Other, N1: Chain,
20128	N2: N1.getOperand(i: `2`), N3: N1.getOperand(i: `0`), N4: N1.getOperand(i: `1`), N5: N2,
20129	Flags: N1 ->getFlags());
20130	}
20131
20132	if (N1.hasOneUse()) {
20133	// rebuildSetCC calls visitXor which may change the Chain when there is a
20134	// STRICT_FSETCC/STRICT_FSETCCS involved. Use a handle to track changes.
20135	HandleSDNode ChainHandle(Chain);
20136	if (SDValue NewN1 = rebuildSetCC(N: N1))
20137	return DAG.getNode(Opcode: ISD::BRCOND, DL: SDLoc (N), VT: MVT::Other,
20138	N1: ChainHandle.getValue(), N2: NewN1, N3: N2, Flags: N->getFlags());
20139	}
20140
20141	return SDValue ();
20142	}
20143
20144	SDValue DAGCombiner::rebuildSetCC(SDValue N) {
20145	if (N.getOpcode() == ISD::SRL \|\|
20146	(N.getOpcode() == ISD::TRUNCATE &&
20147	(N.getOperand(i: `0`).hasOneUse() &&
20148	N.getOperand(i: `0`).getOpcode() == ISD::SRL))) {
20149	// Look pass the truncate.
20150	if (N.getOpcode() == ISD::TRUNCATE)
20151	N = N.getOperand(i: `0`);
20152
20153	// Match this pattern so that we can generate simpler code:
20154	//
20155	// %a = ...
20156	// %b = and i32 %a, 2
20157	// %c = srl i32 %b, 1
20158	// brcond i32 %c ...
20159	//
20160	// into
20161	//
20162	// %a = ...
20163	// %b = and i32 %a, 2
20164	// %c = setcc eq %b, 0
20165	// brcond %c ...
20166	//
20167	// This applies only when the AND constant value has one bit set and the
20168	// SRL constant is equal to the log2 of the AND constant. The back-end is
20169	// smart enough to convert the result into a TEST/JMP sequence.
20170	SDValue Op0 = N.getOperand(i: `0`);
20171	SDValue Op1 = N.getOperand(i: `1`);
20172
20173	if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
20174	SDValue AndOp1 = Op0.getOperand(i: `1`);
20175
20176	if (AndOp1.getOpcode() == ISD::Constant) {
20177	const APInt &AndConst = AndOp1 ->getAsAPIntVal();
20178
20179	if (AndConst.isPowerOf2() &&
20180	Op1 ->getAsAPIntVal() == AndConst.logBase2()) {
20181	SDLoc DL(N);
20182	return DAG.getSetCC(DL, VT: getSetCCResultType(VT: Op0.getValueType()),
20183	LHS: Op0, RHS: DAG.getConstant(Val: `0`, DL, VT: Op0.getValueType()),
20184	Cond: ISD::SETNE);
20185	}
20186	}
20187	}
20188	}
20189
20190	// Transform (brcond (xor x, y)) -> (brcond (setcc, x, y, ne))
20191	// Transform (brcond (xor (xor x, y), -1)) -> (brcond (setcc, x, y, eq))
20192	if (N.getOpcode() == ISD::XOR) {
20193	// Because we may call this on a speculatively constructed
20194	// SimplifiedSetCC Node, we need to simplify this node first.
20195	// Ideally this should be folded into SimplifySetCC and not
20196	// here. For now, grab a handle to N so we don't lose it from
20197	// replacements interal to the visit.
20198	HandleSDNode XORHandle(N);
20199	while (N.getOpcode() == ISD::XOR) {
20200	SDValue Tmp = visitXOR(N: N.getNode());
20201	// No simplification done.
20202	if (!Tmp.getNode())
20203	break;
20204	// Returning N is form in-visit replacement that may invalidated
20205	// N. Grab value from Handle.
20206	if (Tmp.getNode() == N.getNode())
20207	N = XORHandle.getValue();
20208	else // Node simplified. Try simplifying again.
20209	N = Tmp;
20210	}
20211
20212	if (N.getOpcode() != ISD::XOR)
20213	return N;
20214
20215	SDValue Op0 = N ->getOperand(Num: `0`);
20216	SDValue Op1 = N ->getOperand(Num: `1`);
20217
20218	if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
20219	bool Equal = false;
20220	// (brcond (xor (xor x, y), -1)) -> (brcond (setcc x, y, eq))
20221	if (isBitwiseNot(V: N) && Op0.hasOneUse() && Op0.getOpcode() == ISD::XOR &&
20222	Op0.getValueType() == MVT::i1) {
20223	N = Op0;
20224	Op0 = N ->getOperand(Num: `0`);
20225	Op1 = N ->getOperand(Num: `1`);
20226	Equal = true;
20227	}
20228
20229	EVT SetCCVT = N.getValueType();
20230	if (LegalTypes)
20231	SetCCVT = getSetCCResultType(VT: SetCCVT);
20232	// Replace the uses of XOR with SETCC. Note, avoid this transformation if
20233	// it would introduce illegal operations post-legalization as this can
20234	// result in infinite looping between converting xor->setcc here, and
20235	// expanding setcc->xor in LegalizeSetCCCondCode if requested.
20236	const ISD::CondCode CC = Equal ? ISD::SETEQ : ISD::SETNE;
20237	if (!LegalOperations \|\| TLI.isCondCodeLegal(CC, VT: Op0.getSimpleValueType()))
20238	return DAG.getSetCC(DL: SDLoc (N), VT: SetCCVT, LHS: Op0, RHS: Op1, Cond: CC);
20239	}
20240	}
20241
20242	return SDValue ();
20243	}
20244
20245	// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
20246	//
20247	SDValue DAGCombiner::visitBR_CC(SDNode *N) {
20248	CondCodeSDNode *CC = cast<CondCodeSDNode>(Val: N->getOperand(Num: `1`));
20249	SDValue CondLHS = N->getOperand(Num: `2`), CondRHS = N->getOperand(Num: `3`);
20250
20251	// If N is a constant we could fold this into a fallthrough or unconditional
20252	// branch. However that doesn't happen very often in normal code, because
20253	// Instcombine/SimplifyCFG should have handled the available opportunities.
20254	// If we did this folding here, it would be necessary to update the
20255	// MachineBasicBlock CFG, which is awkward.
20256
20257	// Use SimplifySetCC to simplify SETCC's.
20258	SDValue Simp = SimplifySetCC(VT: getSetCCResultType(VT: CondLHS.getValueType()),
20259	N0: CondLHS, N1: CondRHS, Cond: CC->get(), DL: SDLoc (N),
20260	foldBooleans: false);
20261	if (Simp.getNode()) AddToWorklist(N: Simp.getNode());
20262
20263	// fold to a simpler setcc
20264	if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
20265	return DAG.getNode(Opcode: ISD::BR_CC, DL: SDLoc (N), VT: MVT::Other,
20266	N1: N->getOperand(Num: `0`), N2: Simp.getOperand(i: `2`),
20267	N3: Simp.getOperand(i: `0`), N4: Simp.getOperand(i: `1`),
20268	N5: N->getOperand(Num: `4`));
20269
20270	return SDValue ();
20271	}
20272
20273	static bool getCombineLoadStoreParts(SDNode N, unsigned* Inc, unsigned Dec,
20274	bool &IsLoad, bool &IsMasked, SDValue &Ptr,
20275	const TargetLowering &TLI) {
20276	if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val: N)) {
20277	if (LD->isIndexed())
20278	return false;
20279	EVT VT = LD->getMemoryVT();
20280	if (!TLI.isIndexedLoadLegal(IdxMode: Inc, VT) && !TLI.isIndexedLoadLegal(IdxMode: Dec, VT))
20281	return false;
20282	Ptr = LD->getBasePtr();
20283	} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Val: N)) {
20284	if (ST->isIndexed())
20285	return false;
20286	EVT VT = ST->getMemoryVT();
20287	if (!TLI.isIndexedStoreLegal(IdxMode: Inc, VT) && !TLI.isIndexedStoreLegal(IdxMode: Dec, VT))
20288	return false;
20289	Ptr = ST->getBasePtr();
20290	IsLoad = false;
20291	} else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Val: N)) {
20292	if (LD->isIndexed())
20293	return false;
20294	EVT VT = LD->getMemoryVT();
20295	if (!TLI.isIndexedMaskedLoadLegal(IdxMode: Inc, VT) &&
20296	!TLI.isIndexedMaskedLoadLegal(IdxMode: Dec, VT))
20297	return false;
20298	Ptr = LD->getBasePtr();
20299	IsMasked = true;
20300	} else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Val: N)) {
20301	if (ST->isIndexed())
20302	return false;
20303	EVT VT = ST->getMemoryVT();
20304	if (!TLI.isIndexedMaskedStoreLegal(IdxMode: Inc, VT) &&
20305	!TLI.isIndexedMaskedStoreLegal(IdxMode: Dec, VT))
20306	return false;
20307	Ptr = ST->getBasePtr();
20308	IsLoad = false;
20309	IsMasked = true;
20310	} else {
20311	return false;
20312	}
20313	return true;
20314	}
20315
20316	/// Try turning a load/store into a pre-indexed load/store when the base
20317	/// pointer is an add or subtract and it has other uses besides the load/store.
20318	/// After the transformation, the new indexed load/store has effectively folded
20319	/// the add/subtract in and all of its other uses are redirected to the
20320	/// new load/store.
20321	bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
20322	if (Level < AfterLegalizeDAG)
20323	return false;
20324
20325	bool IsLoad = true;
20326	bool IsMasked = false;
20327	SDValue Ptr;
20328	if (!getCombineLoadStoreParts(N, Inc: ISD::PRE_INC, Dec: ISD::PRE_DEC, IsLoad, IsMasked,
20329	Ptr, TLI))
20330	return false;
20331
20332	// If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
20333	// out. There is no reason to make this a preinc/predec.
20334	if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) \|\|
20335	Ptr ->hasOneUse())
20336	return false;
20337
20338	// Ask the target to do addressing mode selection.
20339	SDValue BasePtr;
20340	SDValue Offset;
20341	ISD::MemIndexedMode AM = ISD::UNINDEXED;
20342	if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
20343	return false;
20344
20345	// Backends without true r+i pre-indexed forms may need to pass a
20346	// constant base with a variable offset so that constant coercion
20347	// will work with the patterns in canonical form.
20348	bool Swapped = false;
20349	if (isa<ConstantSDNode>(Val: BasePtr)) {
20350	std::swap(a&: BasePtr, b&: Offset);
20351	Swapped = true;
20352	}
20353
20354	// Don't create a indexed load / store with zero offset.
20355	if (isNullConstant(V: Offset))
20356	return false;
20357
20358	// Try turning it into a pre-indexed load / store except when:
20359	// 1) The new base ptr is a frame index.
20360	// 2) If N is a store and the new base ptr is either the same as or is a
20361	// predecessor of the value being stored.
20362	// 3) Another use of old base ptr is a predecessor of N. If ptr is folded
20363	// that would create a cycle.
20364	// 4) All uses are load / store ops that use it as old base ptr.
20365
20366	// Check #1. Preinc'ing a frame index would require copying the stack pointer
20367	// (plus the implicit offset) to a register to preinc anyway.
20368	if (isa<FrameIndexSDNode>(Val: BasePtr) \|\| isa<RegisterSDNode>(Val: BasePtr))
20369	return false;
20370
20371	// Check #2.
20372	if (!IsLoad) {
20373	SDValue Val = IsMasked ? cast<MaskedStoreSDNode>(Val: N)->getValue()
20374	: cast<StoreSDNode>(Val: N)->getValue();
20375
20376	// Would require a copy.
20377	if (Val == BasePtr)
20378	return false;
20379
20380	// Would create a cycle.
20381	if (Val == Ptr \|\| Ptr ->isPredecessorOf(N: Val.getNode()))
20382	return false;
20383	}
20384
20385	// Caches for hasPredecessorHelper.
20386	SmallPtrSet<const SDNode *, `32`> Visited;
20387	SmallVector<const SDNode *, `16`> Worklist;
20388	Worklist.push_back(Elt: N);
20389
20390	// If the offset is a constant, there may be other adds of constants that
20391	// can be folded with this one. We should do this to avoid having to keep
20392	// a copy of the original base pointer.
20393	SmallVector<SDNode *, `16`> OtherUses;
20394	unsigned MaxSteps = SelectionDAG::getHasPredecessorMaxSteps();
20395	if (isa<ConstantSDNode>(Val: Offset))
20396	for (SDUse &Use : BasePtr ->uses()) {
20397	// Skip the use that is Ptr and uses of other results from BasePtr's
20398	// node (important for nodes that return multiple results).
20399	if (Use.getUser() == Ptr.getNode() \|\| Use != BasePtr)
20400	continue;
20401
20402	if (SDNode::hasPredecessorHelper(N: Use.getUser(), Visited, Worklist,
20403	MaxSteps))
20404	continue;
20405
20406	if (Use.getUser()->getOpcode() != ISD::ADD &&
20407	Use.getUser()->getOpcode() != ISD::SUB) {
20408	OtherUses.clear();
20409	break;
20410	}
20411
20412	SDValue Op1 = Use.getUser()->getOperand(Num: (Use.getOperandNo() + `1`) & `1`);
20413	if (!isa<ConstantSDNode>(Val: Op1)) {
20414	OtherUses.clear();
20415	break;
20416	}
20417
20418	// FIXME: In some cases, we can be smarter about this.
20419	if (Op1.getValueType() != Offset.getValueType()) {
20420	OtherUses.clear();
20421	break;
20422	}
20423
20424	OtherUses.push_back(Elt: Use.getUser());
20425	}
20426
20427	if (Swapped)
20428	std::swap(a&: BasePtr, b&: Offset);
20429
20430	// Now check for #3 and #4.
20431	bool RealUse = false;
20432
20433	for (SDNode *User : Ptr ->users()) {
20434	if (User == N)
20435	continue;
20436	if (SDNode::hasPredecessorHelper(N: User, Visited, Worklist, MaxSteps))
20437	return false;
20438
20439	// If Ptr may be folded in addressing mode of other use, then it's
20440	// not profitable to do this transformation.
20441	if (!canFoldInAddressingMode(N: Ptr.getNode(), Use: User, DAG, TLI))
20442	RealUse = true;
20443	}
20444
20445	if (!RealUse)
20446	return false;
20447
20448	SDValue Result;
20449	if (!IsMasked) {
20450	if (IsLoad)
20451	Result = DAG.getIndexedLoad(OrigLoad: SDValue (N, `0`), dl: SDLoc (N), Base: BasePtr, Offset, AM);
20452	else
20453	Result =
20454	DAG.getIndexedStore(OrigStore: SDValue (N, `0`), dl: SDLoc (N), Base: BasePtr, Offset, AM);
20455	} else {
20456	if (IsLoad)
20457	Result = DAG.getIndexedMaskedLoad(OrigLoad: SDValue (N, `0`), dl: SDLoc (N), Base: BasePtr,
20458	Offset, AM);
20459	else
20460	Result = DAG.getIndexedMaskedStore(OrigStore: SDValue (N, `0`), dl: SDLoc (N), Base: BasePtr,
20461	Offset, AM);
20462	}
20463	++PreIndexedNodes;
20464	++NodesCombined;
20465	LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
20466	Result.dump(&DAG); dbgs() << `'\n'`);
20467	WorklistRemover DeadNodes(*this);
20468	if (IsLoad) {
20469	DAG.ReplaceAllUsesOfValueWith(From: SDValue (N, `0`), To: Result.getValue(R: `0`));
20470	DAG.ReplaceAllUsesOfValueWith(From: SDValue (N, `1`), To: Result.getValue(R: `2`));
20471	} else {
20472	DAG.ReplaceAllUsesOfValueWith(From: SDValue (N, `0`), To: Result.getValue(R: `1`));
20473	}
20474
20475	// Finally, since the node is now dead, remove it from the graph.
20476	deleteAndRecombine(N);
20477
20478	if (Swapped)
20479	std::swap(a&: BasePtr, b&: Offset);
20480
20481	// Replace other uses of BasePtr that can be updated to use Ptr
20482	for (SDNode *OtherUse : OtherUses) {
20483	unsigned OffsetIdx = `1`;
20484	if (OtherUse->getOperand(Num: OffsetIdx).getNode() == BasePtr.getNode())
20485	OffsetIdx = `0`;
20486	assert(OtherUse->getOperand(!OffsetIdx).getNode() == BasePtr.getNode() &&
20487	"Expected BasePtr operand");
20488
20489	// We need to replace ptr0 in the following expression:
20490	// x0 offset0 + y0 * ptr0 = t0*
20491	// knowing that
20492	// x1 offset1 + y1 * ptr0 = t1 (the indexed load/store)*
20493	//
20494	// where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
20495	// indexed load/store and the expression that needs to be re-written.
20496	//
20497	// Therefore, we have:
20498	// t0 = (x0 offset0 - x1 * y0 * y1 offset1) + (y0 y1) * t1*
20499
20500	auto *CN = cast<ConstantSDNode>(Val: OtherUse->getOperand(Num: OffsetIdx));
20501	const APInt &Offset0 = CN->getAPIntValue();
20502	const APInt &Offset1 = Offset ->getAsAPIntVal();
20503	int X0 = (OtherUse->getOpcode() == ISD::SUB && OffsetIdx == `1`) ? -`1` : `1`;
20504	int Y0 = (OtherUse->getOpcode() == ISD::SUB && OffsetIdx == `0`) ? -`1` : `1`;
20505	int X1 = (AM == ISD::PRE_DEC && !Swapped) ? -`1` : `1`;
20506	int Y1 = (AM == ISD::PRE_DEC && Swapped) ? -`1` : `1`;
20507
20508	unsigned Opcode = (Y0 * Y1 < `0`) ? ISD::SUB : ISD::ADD;
20509
20510	APInt CNV = Offset0;
20511	if (X0 < `0`) CNV = -CNV;
20512	if (X1 * Y0 * Y1 < `0`) CNV = CNV + Offset1;
20513	else CNV = CNV - Offset1;
20514
20515	SDLoc DL(OtherUse);
20516
20517	// We can now generate the new expression.
20518	SDValue NewOp1 = DAG.getConstant(Val: CNV, DL, VT: CN->getValueType(ResNo: `0`));
20519	SDValue NewOp2 = Result.getValue(R: IsLoad ? `1` : `0`);
20520
20521	SDValue NewUse =
20522	DAG.getNode(Opcode, DL, VT: OtherUse->getValueType(ResNo: `0`), N1: NewOp1, N2: NewOp2);
20523	DAG.ReplaceAllUsesOfValueWith(From: SDValue (OtherUse, `0`), To: NewUse);
20524	deleteAndRecombine(N: OtherUse);
20525	}
20526
20527	// Replace the uses of Ptr with uses of the updated base value.
20528	DAG.ReplaceAllUsesOfValueWith(From: Ptr, To: Result.getValue(R: IsLoad ? `1` : `0`));
20529	deleteAndRecombine(N: Ptr.getNode());
20530	AddToWorklist(N: Result.getNode());
20531
20532	return true;
20533	}
20534
20535	static bool shouldCombineToPostInc(SDNode N, SDValue Ptr, SDNode PtrUse,
20536	SDValue &BasePtr, SDValue &Offset,
20537	ISD::MemIndexedMode &AM,
20538	SelectionDAG &DAG,
20539	const TargetLowering &TLI) {
20540	if (PtrUse == N \|\|
20541	(PtrUse->getOpcode() != ISD::ADD && PtrUse->getOpcode() != ISD::SUB))
20542	return false;
20543
20544	if (!TLI.getPostIndexedAddressParts(N, PtrUse, BasePtr, Offset, AM, DAG))
20545	return false;
20546
20547	// Don't create a indexed load / store with zero offset.
20548	if (isNullConstant(V: Offset))
20549	return false;
20550
20551	if (isa<FrameIndexSDNode>(Val: BasePtr) \|\| isa<RegisterSDNode>(Val: BasePtr))
20552	return false;
20553
20554	SmallPtrSet<const SDNode *, `32`> Visited;
20555	unsigned MaxSteps = SelectionDAG::getHasPredecessorMaxSteps();
20556	for (SDNode *User : BasePtr ->users()) {
20557	if (User == Ptr.getNode())
20558	continue;
20559
20560	// No if there's a later user which could perform the index instead.
20561	if (isa<MemSDNode>(Val: User)) {
20562	bool IsLoad = true;
20563	bool IsMasked = false;
20564	SDValue OtherPtr;
20565	if (getCombineLoadStoreParts(N: User, Inc: ISD::POST_INC, Dec: ISD::POST_DEC, IsLoad,
20566	IsMasked, Ptr&: OtherPtr, TLI)) {
20567	SmallVector<const SDNode *, `2`> Worklist;
20568	Worklist.push_back(Elt: User);
20569	if (SDNode::hasPredecessorHelper(N, Visited, Worklist, MaxSteps))
20570	return false;
20571	}
20572	}
20573
20574	// If all the uses are load / store addresses, then don't do the
20575	// transformation.
20576	if (User->getOpcode() == ISD::ADD \|\| User->getOpcode() == ISD::SUB) {
20577	for (SDNode *UserUser : User->users())
20578	if (canFoldInAddressingMode(N: User, Use: UserUser, DAG, TLI))
20579	return false;
20580	}
20581	}
20582	return true;
20583	}
20584
20585	static SDNode getPostIndexedLoadStoreOp(SDNode N, bool &IsLoad,
20586	bool &IsMasked, SDValue &Ptr,
20587	SDValue &BasePtr, SDValue &Offset,
20588	ISD::MemIndexedMode &AM,
20589	SelectionDAG &DAG,
20590	const TargetLowering &TLI) {
20591	if (!getCombineLoadStoreParts(N, Inc: ISD::POST_INC, Dec: ISD::POST_DEC, IsLoad,
20592	IsMasked, Ptr, TLI) \|\|
20593	Ptr ->hasOneUse())
20594	return nullptr;
20595
20596	// Try turning it into a post-indexed load / store except when
20597	// 1) All uses are load / store ops that use it as base ptr (and
20598	// it may be folded as addressing mmode).
20599	// 2) Op must be independent of N, i.e. Op is neither a predecessor
20600	// nor a successor of N. Otherwise, if Op is folded that would
20601	// create a cycle.
20602	unsigned MaxSteps = SelectionDAG::getHasPredecessorMaxSteps();
20603	for (SDUse &U : Ptr ->uses()) {
20604	if (U.getResNo() != Ptr.getResNo())
20605	continue;
20606
20607	// Check for #1.
20608	SDNode *Op = U.getUser();
20609	if (!shouldCombineToPostInc(N, Ptr, PtrUse: Op, BasePtr, Offset, AM, DAG, TLI))
20610	continue;
20611
20612	// Check for #2.
20613	SmallPtrSet<const SDNode *, `32`> Visited;
20614	SmallVector<const SDNode *, `8`> Worklist;
20615	// Ptr is predecessor to both N and Op.
20616	Visited.insert(Ptr: Ptr.getNode());
20617	Worklist.push_back(Elt: N);
20618	Worklist.push_back(Elt: Op);
20619	if (!SDNode::hasPredecessorHelper(N, Visited, Worklist, MaxSteps) &&
20620	!SDNode::hasPredecessorHelper(N: Op, Visited, Worklist, MaxSteps))
20621	return Op;
20622	}
20623	return nullptr;
20624	}
20625
20626	/// Try to combine a load/store with a add/sub of the base pointer node into a
20627	/// post-indexed load/store. The transformation folded the add/subtract into the
20628	/// new indexed load/store effectively and all of its uses are redirected to the
20629	/// new load/store.
20630	bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
20631	if (Level < AfterLegalizeDAG)
20632	return false;
20633
20634	bool IsLoad = true;
20635	bool IsMasked = false;
20636	SDValue Ptr;
20637	SDValue BasePtr;
20638	SDValue Offset;
20639	ISD::MemIndexedMode AM = ISD::UNINDEXED;
20640	SDNode *Op = getPostIndexedLoadStoreOp(N, IsLoad, IsMasked, Ptr, BasePtr,
20641	Offset, AM, DAG, TLI);
20642	if (!Op)
20643	return false;
20644
20645	SDValue Result;
20646	if (!IsMasked)
20647	Result = IsLoad ? DAG.getIndexedLoad(OrigLoad: SDValue (N, `0`), dl: SDLoc (N), Base: BasePtr,
20648	Offset, AM)
20649	: DAG.getIndexedStore(OrigStore: SDValue (N, `0`), dl: SDLoc (N),
20650	Base: BasePtr, Offset, AM);
20651	else
20652	Result = IsLoad ? DAG.getIndexedMaskedLoad(OrigLoad: SDValue (N, `0`), dl: SDLoc (N),
20653	Base: BasePtr, Offset, AM)
20654	: DAG.getIndexedMaskedStore(OrigStore: SDValue (N, `0`), dl: SDLoc (N),
20655	Base: BasePtr, Offset, AM);
20656	++PostIndexedNodes;
20657	++NodesCombined;
20658	LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG); dbgs() << "\nWith: ";
20659	Result.dump(&DAG); dbgs() << `'\n'`);
20660	WorklistRemover DeadNodes(*this);
20661	if (IsLoad) {
20662	DAG.ReplaceAllUsesOfValueWith(From: SDValue (N, `0`), To: Result.getValue(R: `0`));
20663	DAG.ReplaceAllUsesOfValueWith(From: SDValue (N, `1`), To: Result.getValue(R: `2`));
20664	} else {
20665	DAG.ReplaceAllUsesOfValueWith(From: SDValue (N, `0`), To: Result.getValue(R: `1`));
20666	}
20667
20668	// Finally, since the node is now dead, remove it from the graph.
20669	deleteAndRecombine(N);
20670
20671	// Replace the uses of Use with uses of the updated base value.
20672	DAG.ReplaceAllUsesOfValueWith(From: SDValue (Op, `0`),
20673	To: Result.getValue(R: IsLoad ? `1` : `0`));
20674	deleteAndRecombine(N: Op);
20675	return true;
20676	}
20677
20678	/// Return the base-pointer arithmetic from an indexed \p LD.
20679	SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
20680	ISD::MemIndexedMode AM = LD->getAddressingMode();
20681	assert(AM != ISD::UNINDEXED);
20682	SDValue BP = LD->getOperand(Num: `1`);
20683	SDValue Inc = LD->getOperand(Num: `2`);
20684
20685	// Some backends use TargetConstants for load offsets, but don't expect
20686	// TargetConstants in general ADD nodes. We can convert these constants into
20687	// regular Constants (if the constant is not opaque).
20688	assert((Inc.getOpcode() != ISD::TargetConstant \|\|
20689	!cast<ConstantSDNode>(Inc)->isOpaque()) &&
20690	"Cannot split out indexing using opaque target constants");
20691	if (Inc.getOpcode() == ISD::TargetConstant) {
20692	ConstantSDNode *ConstInc = cast<ConstantSDNode>(Val&: Inc);
20693	Inc = DAG.getConstant(Val: *ConstInc->getConstantIntValue(), DL: SDLoc (Inc),
20694	VT: ConstInc->getValueType(ResNo: `0`));
20695	}
20696
20697	unsigned Opc =
20698	(AM == ISD::PRE_INC \|\| AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
20699	return DAG.getNode(Opcode: Opc, DL: SDLoc (LD), VT: BP.getSimpleValueType(), N1: BP, N2: Inc);
20700	}
20701
20702	static inline ElementCount numVectorEltsOrZero(EVT T) {
20703	return T.isVector() ? T.getVectorElementCount() : ElementCount::getFixed(MinVal: `0`);
20704	}
20705
20706	bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
20707	EVT STType = Val.getValueType();
20708	EVT STMemType = ST->getMemoryVT();
20709	if (STType == STMemType)
20710	return true;
20711	if (isTypeLegal(VT: STMemType))
20712	return false; // fail.
20713	if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
20714	TLI.isOperationLegal(Op: ISD::FTRUNC, VT: STMemType)) {
20715	Val = DAG.getNode(Opcode: ISD::FTRUNC, DL: SDLoc (ST), VT: STMemType, Operand: Val);
20716	return true;
20717	}
20718	if (numVectorEltsOrZero(T: STType) == numVectorEltsOrZero(T: STMemType) &&
20719	STType.isInteger() && STMemType.isInteger()) {
20720	Val = DAG.getNode(Opcode: ISD::TRUNCATE, DL: SDLoc (ST), VT: STMemType, Operand: Val);
20721	return true;
20722	}
20723	if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
20724	Val = DAG.getBitcast(VT: STMemType, V: Val);
20725	return true;
20726	}
20727	return false; // fail.
20728	}
20729
20730	bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
20731	EVT LDMemType = LD->getMemoryVT();
20732	EVT LDType = LD->getValueType(ResNo: `0`);
20733	assert(Val.getValueType() == LDMemType &&
20734	"Attempting to extend value of non-matching type");
20735	if (LDType == LDMemType)
20736	return true;
20737	if (LDMemType.isInteger() && LDType.isInteger()) {
20738	switch (LD->getExtensionType()) {
20739	case ISD::NON_EXTLOAD:
20740	Val = DAG.getBitcast(VT: LDType, V: Val);
20741	return true;
20742	case ISD::EXTLOAD:
20743	Val = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: SDLoc (LD), VT: LDType, Operand: Val);
20744	return true;
20745	case ISD::SEXTLOAD:
20746	Val = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: SDLoc (LD), VT: LDType, Operand: Val);
20747	return true;
20748	case ISD::ZEXTLOAD:
20749	Val = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: SDLoc (LD), VT: LDType, Operand: Val);
20750	return true;
20751	}
20752	}
20753	return false;
20754	}
20755
20756	StoreSDNode DAGCombiner::getUniqueStoreFeeding(LoadSDNode LD,
20757	int64_t &Offset) {
20758	SDValue Chain = LD->getOperand(Num: `0`);
20759
20760	// Look through CALLSEQ_START.
20761	if (Chain.getOpcode() == ISD::CALLSEQ_START)
20762	Chain = Chain ->getOperand(Num: `0`);
20763
20764	StoreSDNode ST = nullptr*;
20765	SmallVector<SDValue, `8`> Aliases;
20766	if (Chain.getOpcode() == ISD::TokenFactor) {
20767	// Look for unique store within the TokenFactor.
20768	for (SDValue Op : Chain ->ops()) {
20769	StoreSDNode *Store = dyn_cast<StoreSDNode>(Val: Op.getNode());
20770	if (!Store)
20771	continue;
20772	BaseIndexOffset BasePtrLD = BaseIndexOffset::match(N: LD, DAG);
20773	BaseIndexOffset BasePtrST = BaseIndexOffset::match(N: Store, DAG);
20774	if (!BasePtrST.equalBaseIndex(Other: BasePtrLD, DAG, Off&: Offset))
20775	continue;
20776	// Make sure the store is not aliased with any nodes in TokenFactor.
20777	GatherAllAliases(N: Store, OriginalChain: Chain, Aliases);
20778	if (Aliases.empty() \|\|
20779	(Aliases.size() == `1` && Aliases.front().getNode() == Store))
20780	ST = Store;
20781	break;
20782	}
20783	} else {
20784	StoreSDNode *Store = dyn_cast<StoreSDNode>(Val: Chain.getNode());
20785	if (Store) {
20786	BaseIndexOffset BasePtrLD = BaseIndexOffset::match(N: LD, DAG);
20787	BaseIndexOffset BasePtrST = BaseIndexOffset::match(N: Store, DAG);
20788	if (BasePtrST.equalBaseIndex(Other: BasePtrLD, DAG, Off&: Offset))
20789	ST = Store;
20790	}
20791	}
20792
20793	return ST;
20794	}
20795
20796	SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
20797	if (OptLevel == CodeGenOptLevel::None \|\| !LD->isSimple())
20798	return SDValue ();
20799	SDValue Chain = LD->getOperand(Num: `0`);
20800	int64_t Offset;
20801
20802	StoreSDNode *ST = getUniqueStoreFeeding(LD, Offset);
20803	// TODO: Relax this restriction for unordered atomics (see D66309)
20804	if (!ST \|\| !ST->isSimple() \|\| ST->getAddressSpace() != LD->getAddressSpace())
20805	return SDValue ();
20806
20807	EVT LDType = LD->getValueType(ResNo: `0`);
20808	EVT LDMemType = LD->getMemoryVT();
20809	EVT STMemType = ST->getMemoryVT();
20810	EVT STType = ST->getValue().getValueType();
20811
20812	// There are two cases to consider here:
20813	// 1. The store is fixed width and the load is scalable. In this case we
20814	// don't know at compile time if the store completely envelops the load
20815	// so we abandon the optimisation.
20816	// 2. The store is scalable and the load is fixed width. We could
20817	// potentially support a limited number of cases here, but there has been
20818	// no cost-benefit analysis to prove it's worth it.
20819	bool LdStScalable = LDMemType.isScalableVT();
20820	if (LdStScalable != STMemType.isScalableVT())
20821	return SDValue ();
20822
20823	// If we are dealing with scalable vectors on a big endian platform the
20824	// calculation of offsets below becomes trickier, since we do not know at
20825	// compile time the absolute size of the vector. Until we've done more
20826	// analysis on big-endian platforms it seems better to bail out for now.
20827	if (LdStScalable && DAG.getDataLayout().isBigEndian())
20828	return SDValue ();
20829
20830	// Normalize for Endianness. After this Offset=0 will denote that the least
20831	// significant bit in the loaded value maps to the least significant bit in
20832	// the stored value). With Offset=n (for n > 0) the loaded value starts at the
20833	// n:th least significant byte of the stored value.
20834	int64_t OrigOffset = Offset;
20835	if (DAG.getDataLayout().isBigEndian())
20836	Offset = ((int64_t)STMemType.getStoreSizeInBits().getFixedValue() -
20837	(int64_t)LDMemType.getStoreSizeInBits().getFixedValue()) /
20838	`8` -
20839	Offset;
20840
20841	// Check that the stored value cover all bits that are loaded.
20842	bool STCoversLD;
20843
20844	TypeSize LdMemSize = LDMemType.getSizeInBits();
20845	TypeSize StMemSize = STMemType.getSizeInBits();
20846	if (LdStScalable)
20847	STCoversLD = (Offset == `0`) && LdMemSize == StMemSize;
20848	else
20849	STCoversLD = (Offset >= `0`) && (Offset * `8` + LdMemSize.getFixedValue() <=
20850	StMemSize.getFixedValue());
20851
20852	auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
20853	if (LD->isIndexed()) {
20854	// Cannot handle opaque target constants and we must respect the user's
20855	// request not to split indexes from loads.
20856	if (!canSplitIdx(LD))
20857	return SDValue ();
20858	SDValue Idx = SplitIndexingFromLoad(LD);
20859	SDValue Ops[] = {Val, Idx, Chain};
20860	return CombineTo(N: LD, To: Ops, NumTo: `3`);
20861	}
20862	return CombineTo(N: LD, Res0: Val, Res1: Chain);
20863	};
20864
20865	if (!STCoversLD)
20866	return SDValue ();
20867
20868	// Memory as copy space (potentially masked).
20869	if (Offset == `0` && LDType == STType && STMemType == LDMemType) {
20870	// Simple case: Direct non-truncating forwarding
20871	if (LDType.getSizeInBits() == LdMemSize)
20872	return ReplaceLd (LD, ST->getValue(), Chain);
20873	// Can we model the truncate and extension with an and mask?
20874	if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
20875	!LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
20876	// Mask to size of LDMemType
20877	auto Mask =
20878	DAG.getConstant(Val: APInt::getLowBitsSet(numBits: STType.getFixedSizeInBits(),
20879	loBitsSet: StMemSize.getFixedValue()),
20880	DL: SDLoc (ST), VT: STType);
20881	auto Val = DAG.getNode(Opcode: ISD::AND, DL: SDLoc (LD), VT: LDType, N1: ST->getValue(), N2: Mask);
20882	return ReplaceLd (LD, Val, Chain);
20883	}
20884	}
20885
20886	// Handle some cases for big-endian that would be Offset 0 and handled for
20887	// little-endian.
20888	SDValue Val = ST->getValue();
20889	if (DAG.getDataLayout().isBigEndian() && Offset > `0` && OrigOffset == `0`) {
20890	if (STType.isInteger() && !STType.isVector() && LDType.isInteger() &&
20891	!LDType.isVector() && isTypeLegal(VT: STType) &&
20892	TLI.isOperationLegal(Op: ISD::SRL, VT: STType)) {
20893	Val = DAG.getNode(
20894	Opcode: ISD::SRL, DL: SDLoc (LD), VT: STType, N1: Val,
20895	N2: DAG.getShiftAmountConstant(Val: Offset * `8`, VT: STType, DL: SDLoc (LD)));
20896	Offset = `0`;
20897	}
20898	}
20899
20900	// TODO: Deal with nonzero offset.
20901	if (LD->getBasePtr().isUndef() \|\| Offset != `0`)
20902	return SDValue ();
20903	// Model necessary truncations / extenstions.
20904	// Truncate Value To Stored Memory Size.
20905	do {
20906	if (!getTruncatedStoreValue(ST, Val))
20907	break;
20908	if (!isTypeLegal(VT: LDMemType))
20909	break;
20910	if (STMemType != LDMemType) {
20911	if (LdMemSize == StMemSize) {
20912	if (TLI.isOperationLegal(Op: ISD::BITCAST, VT: LDMemType) &&
20913	isTypeLegal(VT: LDMemType) &&
20914	TLI.isOperationLegal(Op: ISD::BITCAST, VT: STMemType) &&
20915	isTypeLegal(VT: STMemType) &&
20916	TLI.isLoadBitCastBeneficial(LoadVT: LDMemType, BitcastVT: STMemType, DAG,
20917	MMO: *LD->getMemOperand()))
20918	Val = DAG.getBitcast(VT: LDMemType, V: Val);
20919	else
20920	break;
20921	} else if (LDMemType.isVector() && isTypeLegal(VT: STMemType)) {
20922	EVT EltVT = LDMemType.getVectorElementType();
20923	TypeSize EltSize = EltVT.getSizeInBits();
20924
20925	if (!StMemSize.isKnownMultipleOf(RHS: EltSize))
20926	break;
20927
20928	EVT InterVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: EltVT,
20929	NumElements: StMemSize.divideCoefficientBy(RHS: EltSize));
20930	if (!TLI.isOperationLegalOrCustom(Op: ISD::EXTRACT_SUBVECTOR, VT: LDMemType) \|\|
20931	!TLI.isTypeLegal(VT: InterVT))
20932	break;
20933
20934	// In case of big-endian the offset is normalized to zero, denoting
20935	// the last bit. For big-endian we need to transform the extraction
20936	// to the last sub-vector.
20937	unsigned ExtIdx = `0`;
20938	if (DAG.getDataLayout().isBigEndian()) {
20939	ExtIdx =
20940	InterVT.getVectorNumElements() - LDMemType.getVectorNumElements();
20941	}
20942
20943	if (!TLI.isExtractSubvectorCheap(ResVT: LDMemType, SrcVT: InterVT, Index: ExtIdx))
20944	break;
20945	Val = DAG.getExtractSubvector(DL: SDLoc (LD), VT: LDMemType,
20946	Vec: DAG.getBitcast(VT: InterVT, V: Val), Idx: ExtIdx);
20947	} else if (!STMemType.isVector() && !LDMemType.isVector() &&
20948	STMemType.isInteger() && LDMemType.isInteger())
20949	Val = DAG.getNode(Opcode: ISD::TRUNCATE, DL: SDLoc (LD), VT: LDMemType, Operand: Val);
20950	else
20951	break;
20952	}
20953	if (!extendLoadedValueToExtension(LD, Val))
20954	break;
20955	return ReplaceLd (LD, Val, Chain);
20956	} while (false);
20957
20958	// On failure, cleanup dead nodes we may have created.
20959	if (Val ->use_empty())
20960	deleteAndRecombine(N: Val.getNode());
20961	return SDValue ();
20962	}
20963
20964	SDValue DAGCombiner::visitLOAD(SDNode *N) {
20965	LoadSDNode *LD = cast<LoadSDNode>(Val: N);
20966	SDValue Chain = LD->getChain();
20967	SDValue Ptr = LD->getBasePtr();
20968
20969	// If load is not volatile and there are no uses of the loaded value (and
20970	// the updated indexed value in case of indexed loads), change uses of the
20971	// chain value into uses of the chain input (i.e. delete the dead load).
20972	// TODO: Allow this for unordered atomics (see D66309)
20973	if (LD->isSimple()) {
20974	if (N->getValueType(ResNo: `1`) == MVT::Other) {
20975	// Unindexed loads.
20976	if (!N->hasAnyUseOfValue(Value: `0`)) {
20977	// It's not safe to use the two value CombineTo variant here. e.g.
20978	// v1, chain2 = load chain1, loc
20979	// v2, chain3 = load chain2, loc
20980	// v3 = add v2, c
20981	// Now we replace use of chain2 with chain1. This makes the second load
20982	// isomorphic to the one we are deleting, and thus makes this load live.
20983	LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
20984	dbgs() << "\nWith chain: "; Chain.dump(&DAG);
20985	dbgs() << "\n");
20986	WorklistRemover DeadNodes(*this);
20987	DAG.ReplaceAllUsesOfValueWith(From: SDValue (N, `1`), To: Chain);
20988	AddUsersToWorklist(N: Chain.getNode());
20989	if (N->use_empty())
20990	deleteAndRecombine(N);
20991
20992	return SDValue (N, `0`); // Return N so it doesn't get rechecked!
20993	}
20994	} else {
20995	// Indexed loads.
20996	assert(N->getValueType(`2`) == MVT::Other && "Malformed indexed loads?");
20997
20998	// If this load has an opaque TargetConstant offset, then we cannot split
20999	// the indexing into an add/sub directly (that TargetConstant may not be
21000	// valid for a different type of node, and we cannot convert an opaque
21001	// target constant into a regular constant).
21002	bool CanSplitIdx = canSplitIdx(LD);
21003
21004	if (!N->hasAnyUseOfValue(Value: `0`) && (CanSplitIdx \|\| !N->hasAnyUseOfValue(Value: `1`))) {
21005	SDValue Undef = DAG.getUNDEF(VT: N->getValueType(ResNo: `0`));
21006	SDValue Index;
21007	if (N->hasAnyUseOfValue(Value: `1`) && CanSplitIdx) {
21008	Index = SplitIndexingFromLoad(LD);
21009	// Try to fold the base pointer arithmetic into subsequent loads and
21010	// stores.
21011	AddUsersToWorklist(N);
21012	} else
21013	Index = DAG.getUNDEF(VT: N->getValueType(ResNo: `1`));
21014	LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
21015	dbgs() << "\nWith: "; Undef.dump(&DAG);
21016	dbgs() << " and 2 other values\n");
21017	WorklistRemover DeadNodes(*this);
21018	DAG.ReplaceAllUsesOfValueWith(From: SDValue (N, `0`), To: Undef);
21019	DAG.ReplaceAllUsesOfValueWith(From: SDValue (N, `1`), To: Index);
21020	DAG.ReplaceAllUsesOfValueWith(From: SDValue (N, `2`), To: Chain);
21021	deleteAndRecombine(N);
21022	return SDValue (N, `0`); // Return N so it doesn't get rechecked!
21023	}
21024	}
21025	}
21026
21027	// If this load is directly stored, replace the load value with the stored
21028	// value.
21029	if (auto V = ForwardStoreValueToDirectLoad(LD))
21030	return V;
21031
21032	// Try to infer better alignment information than the load already has.
21033	if (OptLevel != CodeGenOptLevel::None && LD->isUnindexed() &&
21034	!LD->isAtomic()) {
21035	if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
21036	if (*Alignment > LD->getAlign() &&
21037	isAligned(Lhs: *Alignment, SizeInBytes: LD->getSrcValueOffset())) {
21038	SDValue NewLoad = DAG.getExtLoad(
21039	ExtType: LD->getExtensionType(), dl: SDLoc (N), VT: LD->getValueType(ResNo: `0`), Chain, Ptr,
21040	PtrInfo: LD->getPointerInfo(), MemVT: LD->getMemoryVT(), Alignment: *Alignment,
21041	MMOFlags: LD->getMemOperand()->getFlags(), AAInfo: LD->getAAInfo());
21042	// NewLoad will always be N as we are only refining the alignment
21043	assert(NewLoad.getNode() == N);
21044	(void)NewLoad;
21045	}
21046	}
21047	}
21048
21049	if (LD->isUnindexed()) {
21050	// Walk up chain skipping non-aliasing memory nodes.
21051	SDValue BetterChain = FindBetterChain(N: LD, Chain);
21052
21053	// If there is a better chain.
21054	if (Chain != BetterChain) {
21055	SDValue ReplLoad;
21056
21057	// Replace the chain to void dependency.
21058	if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
21059	ReplLoad = DAG.getLoad(VT: N->getValueType(ResNo: `0`), dl: SDLoc (LD),
21060	Chain: BetterChain, Ptr, MMO: LD->getMemOperand());
21061	} else {
21062	ReplLoad = DAG.getExtLoad(ExtType: LD->getExtensionType(), dl: SDLoc (LD),
21063	VT: LD->getValueType(ResNo: `0`),
21064	Chain: BetterChain, Ptr, MemVT: LD->getMemoryVT(),
21065	MMO: LD->getMemOperand());
21066	}
21067
21068	// Create token factor to keep old chain connected.
21069	SDValue Token = DAG.getNode(Opcode: ISD::TokenFactor, DL: SDLoc (N),
21070	VT: MVT::Other, N1: Chain, N2: ReplLoad.getValue(R: `1`));
21071
21072	// Replace uses with load result and token factor
21073	return CombineTo(N, Res0: ReplLoad.getValue(R: `0`), Res1: Token);
21074	}
21075	}
21076
21077	// Try transforming N to an indexed load.
21078	if (CombineToPreIndexedLoadStore(N) \|\| CombineToPostIndexedLoadStore(N))
21079	return SDValue (N, `0`);
21080
21081	// Try to slice up N to more direct loads if the slices are mapped to
21082	// different register banks or pairing can take place.
21083	if (SliceUpLoad(N))
21084	return SDValue (N, `0`);
21085
21086	return SDValue ();
21087	}
21088
21089	namespace {
21090
21091	/// Helper structure used to slice a load in smaller loads.
21092	/// Basically a slice is obtained from the following sequence:
21093	/// Origin = load Ty1, Base
21094	/// Shift = srl Ty1 Origin, CstTy Amount
21095	/// Inst = trunc Shift to Ty2
21096	///
21097	/// Then, it will be rewritten into:
21098	/// Slice = load SliceTy, Base + SliceOffset
21099	/// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
21100	///
21101	/// SliceTy is deduced from the number of bits that are actually used to
21102	/// build Inst.
21103	struct LoadedSlice {
21104	/// Helper structure used to compute the cost of a slice.
21105	struct Cost {
21106	/// Are we optimizing for code size.
21107	bool ForCodeSize = false;
21108
21109	/// Various cost.
21110	unsigned Loads = `0`;
21111	unsigned Truncates = `0`;
21112	unsigned CrossRegisterBanksCopies = `0`;
21113	unsigned ZExts = `0`;
21114	unsigned Shift = `0`;
21115
21116	explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {}
21117
21118	/// Get the cost of one isolated slice.
21119	Cost(const LoadedSlice &LS, bool ForCodeSize)
21120	: ForCodeSize(ForCodeSize), Loads(`1`) {
21121	EVT TruncType = LS.Inst->getValueType(ResNo: `0`);
21122	EVT LoadedType = LS.getLoadedType();
21123	if (TruncType != LoadedType &&
21124	!LS.DAG->getTargetLoweringInfo().isZExtFree(FromTy: LoadedType, ToTy: TruncType))
21125	ZExts = `1`;
21126	}
21127
21128	/// Account for slicing gain in the current cost.
21129	/// Slicing provide a few gains like removing a shift or a
21130	/// truncate. This method allows to grow the cost of the original
21131	/// load with the gain from this slice.
21132	void addSliceGain(const LoadedSlice &LS) {
21133	// Each slice saves a truncate.
21134	const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
21135	if (!TLI.isTruncateFree(Val: LS.Inst->getOperand(Num: `0`), VT2: LS.Inst->getValueType(ResNo: `0`)))
21136	++Truncates;
21137	// If there is a shift amount, this slice gets rid of it.
21138	if (LS.Shift)
21139	++Shift;
21140	// If this slice can merge a cross register bank copy, account for it.
21141	if (LS.canMergeExpensiveCrossRegisterBankCopy())
21142	++CrossRegisterBanksCopies;
21143	}
21144
21145	Cost &operator+=(const Cost &RHS) {
21146	Loads += RHS.Loads;
21147	Truncates += RHS.Truncates;
21148	CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
21149	ZExts += RHS.ZExts;
21150	Shift += RHS.Shift;
21151	return *this;
21152	}
21153
21154	bool operator==(const Cost &RHS) const {
21155	return Loads == RHS.Loads && Truncates == RHS.Truncates &&
21156	CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
21157	ZExts == RHS.ZExts && Shift == RHS.Shift;
21158	}
21159
21160	bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
21161
21162	bool operator<(const Cost &RHS) const {
21163	// Assume cross register banks copies are as expensive as loads.
21164	// FIXME: Do we want some more target hooks?
21165	unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
21166	unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
21167	// Unless we are optimizing for code size, consider the
21168	// expensive operation first.
21169	if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
21170	return ExpensiveOpsLHS < ExpensiveOpsRHS;
21171	return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
21172	(RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
21173	}
21174
21175	bool operator>(const Cost &RHS) const { return RHS < *this; }
21176
21177	bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
21178
21179	bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
21180	};
21181
21182	// The last instruction that represent the slice. This should be a
21183	// truncate instruction.
21184	SDNode *Inst;
21185
21186	// The original load instruction.
21187	LoadSDNode *Origin;
21188
21189	// The right shift amount in bits from the original load.
21190	unsigned Shift;
21191
21192	// The DAG from which Origin came from.
21193	// This is used to get some contextual information about legal types, etc.
21194	SelectionDAG *DAG;
21195
21196	LoadedSlice(SDNode Inst = nullptr, LoadSDNode Origin = nullptr,
21197	unsigned Shift = `0`, SelectionDAG DAG = nullptr*)
21198	: Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
21199
21200	/// Get the bits used in a chunk of bits \p BitWidth large.
21201	/// \return Result is \p BitWidth and has used bits set to 1 and
21202	/// not used bits set to 0.
21203	APInt getUsedBits() const {
21204	// Reproduce the trunc(lshr) sequence:
21205	// - Start from the truncated value.
21206	// - Zero extend to the desired bit width.
21207	// - Shift left.
21208	assert(Origin && "No original load to compare against.");
21209	unsigned BitWidth = Origin->getValueSizeInBits(ResNo: `0`);
21210	assert(Inst && "This slice is not bound to an instruction");
21211	assert(Inst->getValueSizeInBits(`0`) <= BitWidth &&
21212	"Extracted slice is bigger than the whole type!");
21213	APInt UsedBits(Inst->getValueSizeInBits(ResNo: `0`), `0`);
21214	UsedBits.setAllBits();
21215	UsedBits = UsedBits.zext(width: BitWidth);
21216	UsedBits <<= Shift;
21217	return UsedBits;
21218	}
21219
21220	/// Get the size of the slice to be loaded in bytes.
21221	unsigned getLoadedSize() const {
21222	unsigned SliceSize = getUsedBits().popcount();
21223	assert(!(SliceSize & `0x7`) && "Size is not a multiple of a byte.");
21224	return SliceSize / `8`;
21225	}
21226
21227	/// Get the type that will be loaded for this slice.
21228	/// Note: This may not be the final type for the slice.
21229	EVT getLoadedType() const {
21230	assert(DAG && "Missing context");
21231	LLVMContext &Ctxt = *DAG->getContext();
21232	return EVT::getIntegerVT(Context&: Ctxt, BitWidth: getLoadedSize() * `8`);
21233	}
21234
21235	/// Get the alignment of the load used for this slice.
21236	Align getAlign() const {
21237	Align Alignment = Origin->getAlign();
21238	uint64_t Offset = getOffsetFromBase();
21239	if (Offset != `0`)
21240	Alignment = commonAlignment(A: Alignment, Offset: Alignment.value() + Offset);
21241	return Alignment;
21242	}
21243
21244	/// Check if this slice can be rewritten with legal operations.
21245	bool isLegal() const {
21246	// An invalid slice is not legal.
21247	if (!Origin \|\| !Inst \|\| !DAG)
21248	return false;
21249
21250	// Offsets are for indexed load only, we do not handle that.
21251	if (!Origin->getOffset().isUndef())
21252	return false;
21253
21254	const TargetLowering &TLI = DAG->getTargetLoweringInfo();
21255
21256	// Check that the type is legal.
21257	EVT SliceType = getLoadedType();
21258	if (!TLI.isTypeLegal(VT: SliceType))
21259	return false;
21260
21261	// Check that the load is legal for this type.
21262	if (!TLI.isOperationLegal(Op: ISD::LOAD, VT: SliceType))
21263	return false;
21264
21265	// Check that the offset can be computed.
21266	// 1. Check its type.
21267	EVT PtrType = Origin->getBasePtr().getValueType();
21268	if (PtrType == MVT::Untyped \|\| PtrType.isExtended())
21269	return false;
21270
21271	// 2. Check that it fits in the immediate.
21272	if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
21273	return false;
21274
21275	// 3. Check that the computation is legal.
21276	if (!TLI.isOperationLegal(Op: ISD::ADD, VT: PtrType))
21277	return false;
21278
21279	// Check that the zext is legal if it needs one.
21280	EVT TruncateType = Inst->getValueType(ResNo: `0`);
21281	if (TruncateType != SliceType &&
21282	!TLI.isOperationLegal(Op: ISD::ZERO_EXTEND, VT: TruncateType))
21283	return false;
21284
21285	return true;
21286	}
21287
21288	/// Get the offset in bytes of this slice in the original chunk of
21289	/// bits.
21290	/// \pre DAG != nullptr.
21291	uint64_t getOffsetFromBase() const {
21292	assert(DAG && "Missing context.");
21293	bool IsBigEndian = DAG->getDataLayout().isBigEndian();
21294	assert(!(Shift & `0x7`) && "Shifts not aligned on Bytes are not supported.");
21295	uint64_t Offset = Shift / `8`;
21296	unsigned TySizeInBytes = Origin->getValueSizeInBits(ResNo: `0`) / `8`;
21297	assert(!(Origin->getValueSizeInBits(`0`) & `0x7`) &&
21298	"The size of the original loaded type is not a multiple of a"
21299	" byte.");
21300	// If Offset is bigger than TySizeInBytes, it means we are loading all
21301	// zeros. This should have been optimized before in the process.
21302	assert(TySizeInBytes > Offset &&
21303	"Invalid shift amount for given loaded size");
21304	if (IsBigEndian)
21305	Offset = TySizeInBytes - Offset - getLoadedSize();
21306	return Offset;
21307	}
21308
21309	/// Generate the sequence of instructions to load the slice
21310	/// represented by this object and redirect the uses of this slice to
21311	/// this new sequence of instructions.
21312	/// \pre this->Inst && this->Origin are valid Instructions and this
21313	/// object passed the legal check: LoadedSlice::isLegal returned true.
21314	/// \return The last instruction of the sequence used to load the slice.
21315	SDValue loadSlice() const {
21316	assert(Inst && Origin && "Unable to replace a non-existing slice.");
21317	const SDValue &OldBaseAddr = Origin->getBasePtr();
21318	SDValue BaseAddr = OldBaseAddr;
21319	// Get the offset in that chunk of bytes w.r.t. the endianness.
21320	int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
21321	assert(Offset >= `0` && "Offset too big to fit in int64_t!");
21322	if (Offset) {
21323	// BaseAddr = BaseAddr + Offset.
21324	EVT ArithType = BaseAddr.getValueType();
21325	SDLoc DL(Origin);
21326	BaseAddr = DAG->getNode(Opcode: ISD::ADD, DL, VT: ArithType, N1: BaseAddr,
21327	N2: DAG->getConstant(Val: Offset, DL, VT: ArithType));
21328	}
21329
21330	// Create the type of the loaded slice according to its size.
21331	EVT SliceType = getLoadedType();
21332
21333	// Create the load for the slice.
21334	SDValue LastInst =
21335	DAG->getLoad(VT: SliceType, dl: SDLoc (Origin), Chain: Origin->getChain(), Ptr: BaseAddr,
21336	PtrInfo: Origin->getPointerInfo().getWithOffset(O: Offset), Alignment: getAlign(),
21337	MMOFlags: Origin->getMemOperand()->getFlags());
21338	// If the final type is not the same as the loaded type, this means that
21339	// we have to pad with zero. Create a zero extend for that.
21340	EVT FinalType = Inst->getValueType(ResNo: `0`);
21341	if (SliceType != FinalType)
21342	LastInst =
21343	DAG->getNode(Opcode: ISD::ZERO_EXTEND, DL: SDLoc (LastInst), VT: FinalType, Operand: LastInst);
21344	return LastInst;
21345	}
21346
21347	/// Check if this slice can be merged with an expensive cross register
21348	/// bank copy. E.g.,
21349	/// i = load i32
21350	/// f = bitcast i32 i to float
21351	bool canMergeExpensiveCrossRegisterBankCopy() const {
21352	if (!Inst \|\| !Inst->hasOneUse())
21353	return false;
21354	SDNode User = Inst->user_begin();
21355	if (User->getOpcode() != ISD::BITCAST)
21356	return false;
21357	assert(DAG && "Missing context");
21358	const TargetLowering &TLI = DAG->getTargetLoweringInfo();
21359	EVT ResVT = User->getValueType(ResNo: `0`);
21360	const TargetRegisterClass *ResRC =
21361	TLI.getRegClassFor(VT: ResVT.getSimpleVT(), isDivergent: User->isDivergent());
21362	const TargetRegisterClass *ArgRC =
21363	TLI.getRegClassFor(VT: User->getOperand(Num: `0`).getValueType().getSimpleVT(),
21364	isDivergent: User->getOperand(Num: `0`)->isDivergent());
21365	if (ArgRC == ResRC \|\| !TLI.isOperationLegal(Op: ISD::LOAD, VT: ResVT))
21366	return false;
21367
21368	// At this point, we know that we perform a cross-register-bank copy.
21369	// Check if it is expensive.
21370	const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
21371	// Assume bitcasts are cheap, unless both register classes do not
21372	// explicitly share a common sub class.
21373	if (!TRI \|\| TRI->getCommonSubClass(A: ArgRC, B: ResRC))
21374	return false;
21375
21376	// Check if it will be merged with the load.
21377	// 1. Check the alignment / fast memory access constraint.
21378	unsigned IsFast = `0`;
21379	if (!TLI.allowsMemoryAccess(Context&: *DAG->getContext(), DL: DAG->getDataLayout(), VT: ResVT,
21380	AddrSpace: Origin->getAddressSpace(), Alignment: getAlign(),
21381	Flags: Origin->getMemOperand()->getFlags(), Fast: &IsFast) \|\|
21382	!IsFast)
21383	return false;
21384
21385	// 2. Check that the load is a legal operation for that type.
21386	if (!TLI.isOperationLegal(Op: ISD::LOAD, VT: ResVT))
21387	return false;
21388
21389	// 3. Check that we do not have a zext in the way.
21390	if (Inst->getValueType(ResNo: `0`) != getLoadedType())
21391	return false;
21392
21393	return true;
21394	}
21395	};
21396
21397	} // end anonymous namespace
21398
21399	/// Check that all bits set in \p UsedBits form a dense region, i.e.,
21400	/// \p UsedBits looks like 0..0 1..1 0..0.
21401	static bool areUsedBitsDense(const APInt &UsedBits) {
21402	// If all the bits are one, this is dense!
21403	if (UsedBits.isAllOnes())
21404	return true;
21405
21406	// Get rid of the unused bits on the right.
21407	APInt NarrowedUsedBits = UsedBits.lshr(shiftAmt: UsedBits.countr_zero());
21408	// Get rid of the unused bits on the left.
21409	if (NarrowedUsedBits.countl_zero())
21410	NarrowedUsedBits = NarrowedUsedBits.trunc(width: NarrowedUsedBits.getActiveBits());
21411	// Check that the chunk of bits is completely used.
21412	return NarrowedUsedBits.isAllOnes();
21413	}
21414
21415	/// Check whether or not \p First and \p Second are next to each other
21416	/// in memory. This means that there is no hole between the bits loaded
21417	/// by \p First and the bits loaded by \p Second.
21418	static bool areSlicesNextToEachOther(const LoadedSlice &First,
21419	const LoadedSlice &Second) {
21420	assert(First.Origin == Second.Origin && First.Origin &&
21421	"Unable to match different memory origins.");
21422	APInt UsedBits = First.getUsedBits();
21423	assert((UsedBits & Second.getUsedBits()) == `0` &&
21424	"Slices are not supposed to overlap.");
21425	UsedBits \|= Second.getUsedBits();
21426	return areUsedBitsDense(UsedBits);
21427	}
21428
21429	/// Adjust the \p GlobalLSCost according to the target
21430	/// paring capabilities and the layout of the slices.
21431	/// \pre \p GlobalLSCost should account for at least as many loads as
21432	/// there is in the slices in \p LoadedSlices.
21433	static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
21434	LoadedSlice::Cost &GlobalLSCost) {
21435	unsigned NumberOfSlices = LoadedSlices.size();
21436	// If there is less than 2 elements, no pairing is possible.
21437	if (NumberOfSlices < `2`)
21438	return;
21439
21440	// Sort the slices so that elements that are likely to be next to each
21441	// other in memory are next to each other in the list.
21442	llvm::sort(C&: LoadedSlices, Comp: [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
21443	assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
21444	return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
21445	});
21446	const TargetLowering &TLI = LoadedSlices [`0`].DAG->getTargetLoweringInfo();
21447	// First (resp. Second) is the first (resp. Second) potentially candidate
21448	// to be placed in a paired load.
21449	const LoadedSlice First = nullptr*;
21450	const LoadedSlice Second = nullptr*;
21451	for (unsigned CurrSlice = `0`; CurrSlice < NumberOfSlices; ++CurrSlice,
21452	// Set the beginning of the pair.
21453	First = Second) {
21454	Second = &LoadedSlices [CurrSlice];
21455
21456	// If First is NULL, it means we start a new pair.
21457	// Get to the next slice.
21458	if (!First)
21459	continue;
21460
21461	EVT LoadedType = First->getLoadedType();
21462
21463	// If the types of the slices are different, we cannot pair them.
21464	if (LoadedType != Second->getLoadedType())
21465	continue;
21466
21467	// Check if the target supplies paired loads for this type.
21468	Align RequiredAlignment;
21469	if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
21470	// move to the next pair, this type is hopeless.
21471	Second = nullptr;
21472	continue;
21473	}
21474	// Check if we meet the alignment requirement.
21475	if (First->getAlign() < RequiredAlignment)
21476	continue;
21477
21478	// Check that both loads are next to each other in memory.
21479	if (!areSlicesNextToEachOther(First: First, Second: Second))
21480	continue;
21481
21482	assert(GlobalLSCost.Loads > `0` && "We save more loads than we created!");
21483	--GlobalLSCost.Loads;
21484	// Move to the next pair.
21485	Second = nullptr;
21486	}
21487	}
21488
21489	/// Check the profitability of all involved LoadedSlice.
21490	/// Currently, it is considered profitable if there is exactly two
21491	/// involved slices (1) which are (2) next to each other in memory, and
21492	/// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
21493	///
21494	/// Note: The order of the elements in \p LoadedSlices may be modified, but not
21495	/// the elements themselves.
21496	///
21497	/// FIXME: When the cost model will be mature enough, we can relax
21498	/// constraints (1) and (2).
21499	static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
21500	const APInt &UsedBits, bool ForCodeSize) {
21501	unsigned NumberOfSlices = LoadedSlices.size();
21502	if (StressLoadSlicing)
21503	return NumberOfSlices > `1`;
21504
21505	// Check (1).
21506	if (NumberOfSlices != `2`)
21507	return false;
21508
21509	// Check (2).
21510	if (!areUsedBitsDense(UsedBits))
21511	return false;
21512
21513	// Check (3).
21514	LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
21515	// The original code has one big load.
21516	OrigCost.Loads = `1`;
21517	for (unsigned CurrSlice = `0`; CurrSlice < NumberOfSlices; ++CurrSlice) {
21518	const LoadedSlice &LS = LoadedSlices [CurrSlice];
21519	// Accumulate the cost of all the slices.
21520	LoadedSlice::Cost SliceCost(LS, ForCodeSize);
21521	GlobalSlicingCost += SliceCost;
21522
21523	// Account as cost in the original configuration the gain obtained
21524	// with the current slices.
21525	OrigCost.addSliceGain(LS);
21526	}
21527
21528	// If the target supports paired load, adjust the cost accordingly.
21529	adjustCostForPairing(LoadedSlices, GlobalLSCost&: GlobalSlicingCost);
21530	return OrigCost > GlobalSlicingCost;
21531	}
21532
21533	/// If the given load, \p LI, is used only by trunc or trunc(lshr)
21534	/// operations, split it in the various pieces being extracted.
21535	///
21536	/// This sort of thing is introduced by SROA.
21537	/// This slicing takes care not to insert overlapping loads.
21538	/// \pre LI is a simple load (i.e., not an atomic or volatile load).
21539	bool DAGCombiner::SliceUpLoad(SDNode *N) {
21540	if (Level < AfterLegalizeDAG)
21541	return false;
21542
21543	LoadSDNode *LD = cast<LoadSDNode>(Val: N);
21544	if (!LD->isSimple() \|\| !ISD::isNormalLoad(N: LD) \|\|
21545	!LD->getValueType(ResNo: `0`).isInteger())
21546	return false;
21547
21548	// The algorithm to split up a load of a scalable vector into individual
21549	// elements currently requires knowing the length of the loaded type,
21550	// so will need adjusting to work on scalable vectors.
21551	if (LD->getValueType(ResNo: `0`).isScalableVector())
21552	return false;
21553
21554	// Keep track of already used bits to detect overlapping values.
21555	// In that case, we will just abort the transformation.
21556	APInt UsedBits(LD->getValueSizeInBits(ResNo: `0`), `0`);
21557
21558	SmallVector<LoadedSlice, `4`> LoadedSlices;
21559
21560	// Check if this load is used as several smaller chunks of bits.
21561	// Basically, look for uses in trunc or trunc(lshr) and record a new chain
21562	// of computation for each trunc.
21563	for (SDUse &U : LD->uses()) {
21564	// Skip the uses of the chain.
21565	if (U.getResNo() != `0`)
21566	continue;
21567
21568	SDNode *User = U.getUser();
21569	unsigned Shift = `0`;
21570
21571	// Check if this is a trunc(lshr).
21572	if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
21573	isa<ConstantSDNode>(Val: User->getOperand(Num: `1`))) {
21574	Shift = User->getConstantOperandVal(Num: `1`);
21575	User = *User->user_begin();
21576	}
21577
21578	// At this point, User is a Truncate, iff we encountered, trunc or
21579	// trunc(lshr).
21580	if (User->getOpcode() != ISD::TRUNCATE)
21581	return false;
21582
21583	// The width of the type must be a power of 2 and greater than 8-bits.
21584	// Otherwise the load cannot be represented in LLVM IR.
21585	// Moreover, if we shifted with a non-8-bits multiple, the slice
21586	// will be across several bytes. We do not support that.
21587	unsigned Width = User->getValueSizeInBits(ResNo: `0`);
21588	if (Width < `8` \|\| !isPowerOf2_32(Value: Width) \|\| (Shift & `0x7`))
21589	return false;
21590
21591	// Build the slice for this chain of computations.
21592	LoadedSlice LS(User, LD, Shift, &DAG);
21593	APInt CurrentUsedBits = LS.getUsedBits();
21594
21595	// Check if this slice overlaps with another.
21596	if ((CurrentUsedBits & UsedBits) != `0`)
21597	return false;
21598	// Update the bits used globally.
21599	UsedBits \|= CurrentUsedBits;
21600
21601	// Check if the new slice would be legal.
21602	if (!LS.isLegal())
21603	return false;
21604
21605	// Record the slice.
21606	LoadedSlices.push_back(Elt: LS);
21607	}
21608
21609	// Abort slicing if it does not seem to be profitable.
21610	if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
21611	return false;
21612
21613	++SlicedLoads;
21614
21615	// Rewrite each chain to use an independent load.
21616	// By construction, each chain can be represented by a unique load.
21617
21618	// Prepare the argument for the new token factor for all the slices.
21619	SmallVector<SDValue, `8`> ArgChains;
21620	for (const LoadedSlice &LS : LoadedSlices) {
21621	SDValue SliceInst = LS.loadSlice();
21622	CombineTo(N: LS.Inst, Res: SliceInst, AddTo: true);
21623	if (SliceInst.getOpcode() != ISD::LOAD)
21624	SliceInst = SliceInst.getOperand(i: `0`);
21625	assert(SliceInst->getOpcode() == ISD::LOAD &&
21626	"It takes more than a zext to get to the loaded slice!!");
21627	ArgChains.push_back(Elt: SliceInst.getValue(R: `1`));
21628	}
21629
21630	SDValue Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: SDLoc (LD), VT: MVT::Other,
21631	Ops: ArgChains);
21632	DAG.ReplaceAllUsesOfValueWith(From: SDValue (N, `1`), To: Chain);
21633	AddToWorklist(N: Chain.getNode());
21634	return true;
21635	}
21636
21637	/// Check to see if V is (and load (ptr), imm), where the load is having
21638	/// specific bytes cleared out. If so, return the byte size being masked out
21639	/// and the shift amount.
21640	static std::pair<unsigned, unsigned>
21641	CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
21642	std::pair<unsigned, unsigned> Result(`0`, `0`);
21643
21644	// Check for the structure we're looking for.
21645	if (V ->getOpcode() != ISD::AND \|\|
21646	!isa<ConstantSDNode>(Val: V ->getOperand(Num: `1`)) \|\|
21647	!ISD::isNormalLoad(N: V ->getOperand(Num: `0`).getNode()))
21648	return Result;
21649
21650	// Check the chain and pointer.
21651	LoadSDNode *LD = cast<LoadSDNode>(Val: V ->getOperand(Num: `0`));
21652	if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer.
21653
21654	// This only handles simple types.
21655	if (V.getValueType() != MVT::i16 &&
21656	V.getValueType() != MVT::i32 &&
21657	V.getValueType() != MVT::i64)
21658	return Result;
21659
21660	// Check the constant mask. Invert it so that the bits being masked out are
21661	// 0 and the bits being kept are 1. Use getSExtValue so that leading bits
21662	// follow the sign bit for uniformity.
21663	uint64_t NotMask = ~cast<ConstantSDNode>(Val: V ->getOperand(Num: `1`))->getSExtValue();
21664	unsigned NotMaskLZ = llvm::countl_zero(Val: NotMask);
21665	if (NotMaskLZ & `7`) return Result; // Must be multiple of a byte.
21666	unsigned NotMaskTZ = llvm::countr_zero(Val: NotMask);
21667	if (NotMaskTZ & `7`) return Result; // Must be multiple of a byte.
21668	if (NotMaskLZ == `64`) return Result; // All zero mask.
21669
21670	// See if we have a continuous run of bits. If so, we have 01+0
21671	if (llvm::countr_one(Value: NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != `64`)
21672	return Result;
21673
21674	// Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
21675	if (V.getValueType() != MVT::i64 && NotMaskLZ)
21676	NotMaskLZ -= `64`-V.getValueSizeInBits();
21677
21678	unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/`8`;
21679	switch (MaskedBytes) {
21680	case `1`:
21681	case `2`:
21682	case `4`: break;
21683	default: return Result; // All one mask, or 5-byte mask.
21684	}
21685
21686	// Verify that the first bit starts at a multiple of mask so that the access
21687	// is aligned the same as the access width.
21688	if (NotMaskTZ && NotMaskTZ/`8` % MaskedBytes) return Result;
21689
21690	// For narrowing to be valid, it must be the case that the load the
21691	// immediately preceding memory operation before the store.
21692	if (LD == Chain.getNode())
21693	; // ok.
21694	else if (Chain ->getOpcode() == ISD::TokenFactor &&
21695	SDValue (LD, `1`).hasOneUse()) {
21696	// LD has only 1 chain use so they are no indirect dependencies.
21697	if (!LD->isOperandOf(N: Chain.getNode()))
21698	return Result;
21699	} else
21700	return Result; // Fail.
21701
21702	Result.first = MaskedBytes;
21703	Result.second = NotMaskTZ/`8`;
21704	return Result;
21705	}
21706
21707	/// Check to see if IVal is something that provides a value as specified by
21708	/// MaskInfo. If so, replace the specified store with a narrower store of
21709	/// truncated IVal.
21710	static SDValue
21711	ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
21712	SDValue IVal, StoreSDNode *St,
21713	DAGCombiner *DC) {
21714	unsigned NumBytes = MaskInfo.first;
21715	unsigned ByteShift = MaskInfo.second;
21716	SelectionDAG &DAG = DC->getDAG();
21717
21718	// Check to see if IVal is all zeros in the part being masked in by the 'or'
21719	// that uses this. If not, this is not a replacement.
21720	APInt Mask = ~APInt::getBitsSet(numBits: IVal.getValueSizeInBits(),
21721	loBit: ByteShift`8`, hiBit: (ByteShift+NumBytes)`8`);
21722	if (!DAG.MaskedValueIsZero(Op: IVal, Mask)) return SDValue ();
21723
21724	// Check that it is legal on the target to do this. It is legal if the new
21725	// VT we're shrinking to (i8/i16/i32) is legal or we're still before type
21726	// legalization. If the source type is legal, but the store type isn't, see
21727	// if we can use a truncating store.
21728	MVT VT = MVT::getIntegerVT(BitWidth: NumBytes * `8`);
21729	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
21730	bool UseTruncStore;
21731	if (DC->isTypeLegal(VT))
21732	UseTruncStore = false;
21733	else if (TLI.isTypeLegal(VT: IVal.getValueType()) &&
21734	TLI.isTruncStoreLegal(ValVT: IVal.getValueType(), MemVT: VT))
21735	UseTruncStore = true;
21736	else
21737	return SDValue ();
21738
21739	// Can't do this for indexed stores.
21740	if (St->isIndexed())
21741	return SDValue ();
21742
21743	// Check that the target doesn't think this is a bad idea.
21744	if (St->getMemOperand() &&
21745	!TLI.allowsMemoryAccess(Context&: *DAG.getContext(), DL: DAG.getDataLayout(), VT,
21746	MMO: *St->getMemOperand()))
21747	return SDValue ();
21748
21749	// Okay, we can do this! Replace the 'St' store with a store of IVal that is
21750	// shifted by ByteShift and truncated down to NumBytes.
21751	if (ByteShift) {
21752	SDLoc DL(IVal);
21753	IVal = DAG.getNode(
21754	Opcode: ISD::SRL, DL, VT: IVal.getValueType(), N1: IVal,
21755	N2: DAG.getShiftAmountConstant(Val: ByteShift * `8`, VT: IVal.getValueType(), DL));
21756	}
21757
21758	// Figure out the offset for the store and the alignment of the access.
21759	unsigned StOffset;
21760	if (DAG.getDataLayout().isLittleEndian())
21761	StOffset = ByteShift;
21762	else
21763	StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
21764
21765	SDValue Ptr = St->getBasePtr();
21766	if (StOffset) {
21767	SDLoc DL(IVal);
21768	Ptr = DAG.getMemBasePlusOffset(Base: Ptr, Offset: TypeSize::getFixed(ExactSize: StOffset), DL);
21769	}
21770
21771	++OpsNarrowed;
21772	if (UseTruncStore)
21773	return DAG.getTruncStore(Chain: St->getChain(), dl: SDLoc (St), Val: IVal, Ptr,
21774	PtrInfo: St->getPointerInfo().getWithOffset(O: StOffset), SVT: VT,
21775	Alignment: St->getBaseAlign());
21776
21777	// Truncate down to the new size.
21778	IVal = DAG.getNode(Opcode: ISD::TRUNCATE, DL: SDLoc (IVal), VT, Operand: IVal);
21779
21780	return DAG.getStore(Chain: St->getChain(), dl: SDLoc (St), Val: IVal, Ptr,
21781	PtrInfo: St->getPointerInfo().getWithOffset(O: StOffset),
21782	Alignment: St->getBaseAlign());
21783	}
21784
21785	/// Look for sequence of load / op / store where op is one of 'or', 'xor', and
21786	/// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
21787	/// narrowing the load and store if it would end up being a win for performance
21788	/// or code size.
21789	SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
21790	StoreSDNode *ST = cast<StoreSDNode>(Val: N);
21791	if (!ST->isSimple())
21792	return SDValue ();
21793
21794	SDValue Chain = ST->getChain();
21795	SDValue Value = ST->getValue();
21796	SDValue Ptr = ST->getBasePtr();
21797	EVT VT = Value.getValueType();
21798
21799	if (ST->isTruncatingStore() \|\| VT.isVector())
21800	return SDValue ();
21801
21802	unsigned Opc = Value.getOpcode();
21803
21804	if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) \|\|
21805	!Value.hasOneUse())
21806	return SDValue ();
21807
21808	// If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
21809	// is a byte mask indicating a consecutive number of bytes, check to see if
21810	// Y is known to provide just those bytes. If so, we try to replace the
21811	// load + replace + store sequence with a single (narrower) store, which makes
21812	// the load dead.
21813	if (Opc == ISD::OR && EnableShrinkLoadReplaceStoreWithStore) {
21814	std::pair<unsigned, unsigned> MaskedLoad;
21815	MaskedLoad = CheckForMaskedLoad(V: Value.getOperand(i: `0`), Ptr, Chain);
21816	if (MaskedLoad.first)
21817	if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskInfo: MaskedLoad,
21818	IVal: Value.getOperand(i: `1`), St: ST,DC: this))
21819	return NewST;
21820
21821	// Or is commutative, so try swapping X and Y.
21822	MaskedLoad = CheckForMaskedLoad(V: Value.getOperand(i: `1`), Ptr, Chain);
21823	if (MaskedLoad.first)
21824	if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskInfo: MaskedLoad,
21825	IVal: Value.getOperand(i: `0`), St: ST,DC: this))
21826	return NewST;
21827	}
21828
21829	if (!EnableReduceLoadOpStoreWidth)
21830	return SDValue ();
21831
21832	if (Value.getOperand(i: `1`).getOpcode() != ISD::Constant)
21833	return SDValue ();
21834
21835	SDValue N0 = Value.getOperand(i: `0`);
21836	if (ISD::isNormalLoad(N: N0.getNode()) && N0.hasOneUse() &&
21837	Chain == SDValue (N0.getNode(), `1`)) {
21838	LoadSDNode *LD = cast<LoadSDNode>(Val&: N0);
21839	if (LD->getBasePtr() != Ptr \|\|
21840	LD->getPointerInfo().getAddrSpace() !=
21841	ST->getPointerInfo().getAddrSpace())
21842	return SDValue ();
21843
21844	// Find the type NewVT to narrow the load / op / store to.
21845	SDValue N1 = Value.getOperand(i: `1`);
21846	unsigned BitWidth = N1.getValueSizeInBits();
21847	APInt Imm = N1 ->getAsAPIntVal();
21848	if (Opc == ISD::AND)
21849	Imm.flipAllBits();
21850	if (Imm == `0` \|\| Imm.isAllOnes())
21851	return SDValue ();
21852	// Find least/most significant bit that need to be part of the narrowed
21853	// operation. We assume target will need to address/access full bytes, so
21854	// we make sure to align LSB and MSB at byte boundaries.
21855	unsigned BitsPerByteMask = `7u`;
21856	unsigned LSB = Imm.countr_zero() & ~BitsPerByteMask;
21857	unsigned MSB = (Imm.getActiveBits() - `1`) \| BitsPerByteMask;
21858	unsigned NewBW = NextPowerOf2(A: MSB - LSB);
21859	EVT NewVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NewBW);
21860	// The narrowing should be profitable, the load/store operation should be
21861	// legal (or custom) and the store size should be equal to the NewVT width.
21862	while (NewBW < BitWidth &&
21863	(NewVT.getStoreSizeInBits() != NewBW \|\|
21864	!TLI.isOperationLegalOrCustom(Op: Opc, VT: NewVT) \|\|
21865	(!ReduceLoadOpStoreWidthForceNarrowingProfitable &&
21866	!TLI.isNarrowingProfitable(N, SrcVT: VT, DestVT: NewVT)))) {
21867	NewBW = NextPowerOf2(A: NewBW);
21868	NewVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NewBW);
21869	}
21870	if (NewBW >= BitWidth)
21871	return SDValue ();
21872
21873	// If we come this far NewVT/NewBW reflect a power-of-2 sized type that is
21874	// large enough to cover all bits that should be modified. This type might
21875	// however be larger than really needed (such as i32 while we actually only
21876	// need to modify one byte). Now we need to find our how to align the memory
21877	// accesses to satisfy preferred alignments as well as avoiding to access
21878	// memory outside the store size of the orignal access.
21879
21880	unsigned VTStoreSize = VT.getStoreSizeInBits().getFixedValue();
21881
21882	// Let ShAmt denote amount of bits to skip, counted from the least
21883	// significant bits of Imm. And let PtrOff how much the pointer needs to be
21884	// offsetted (in bytes) for the new access.
21885	unsigned ShAmt = `0`;
21886	uint64_t PtrOff = `0`;
21887	for (; ShAmt + NewBW <= VTStoreSize; ShAmt += `8`) {
21888	// Make sure the range [ShAmt, ShAmt+NewBW) cover both LSB and MSB.
21889	if (ShAmt > LSB)
21890	return SDValue ();
21891	if (ShAmt + NewBW < MSB)
21892	continue;
21893
21894	// Calculate PtrOff.
21895	unsigned PtrAdjustmentInBits = DAG.getDataLayout().isBigEndian()
21896	? VTStoreSize - NewBW - ShAmt
21897	: ShAmt;
21898	PtrOff = PtrAdjustmentInBits / `8`;
21899
21900	// Now check if narrow access is allowed and fast, considering alignments.
21901	unsigned IsFast = `0`;
21902	Align NewAlign = commonAlignment(A: LD->getAlign(), Offset: PtrOff);
21903	if (TLI.allowsMemoryAccess(Context&: *DAG.getContext(), DL: DAG.getDataLayout(), VT: NewVT,
21904	AddrSpace: LD->getAddressSpace(), Alignment: NewAlign,
21905	Flags: LD->getMemOperand()->getFlags(), Fast: &IsFast) &&
21906	IsFast)
21907	break;
21908	}
21909	// If loop above did not find any accepted ShAmt we need to exit here.
21910	if (ShAmt + NewBW > VTStoreSize)
21911	return SDValue ();
21912
21913	APInt NewImm = Imm.lshr(shiftAmt: ShAmt).trunc(width: NewBW);
21914	if (Opc == ISD::AND)
21915	NewImm.flipAllBits();
21916	Align NewAlign = commonAlignment(A: LD->getAlign(), Offset: PtrOff);
21917	SDValue NewPtr =
21918	DAG.getMemBasePlusOffset(Base: Ptr, Offset: TypeSize::getFixed(ExactSize: PtrOff), DL: SDLoc (LD));
21919	SDValue NewLD =
21920	DAG.getLoad(VT: NewVT, dl: SDLoc (N0), Chain: LD->getChain(), Ptr: NewPtr,
21921	PtrInfo: LD->getPointerInfo().getWithOffset(O: PtrOff), Alignment: NewAlign,
21922	MMOFlags: LD->getMemOperand()->getFlags(), AAInfo: LD->getAAInfo());
21923	SDValue NewVal = DAG.getNode(Opcode: Opc, DL: SDLoc (Value), VT: NewVT, N1: NewLD,
21924	N2: DAG.getConstant(Val: NewImm, DL: SDLoc (Value), VT: NewVT));
21925	SDValue NewST =
21926	DAG.getStore(Chain, dl: SDLoc (N), Val: NewVal, Ptr: NewPtr,
21927	PtrInfo: ST->getPointerInfo().getWithOffset(O: PtrOff), Alignment: NewAlign);
21928
21929	AddToWorklist(N: NewPtr.getNode());
21930	AddToWorklist(N: NewLD.getNode());
21931	AddToWorklist(N: NewVal.getNode());
21932	WorklistRemover DeadNodes(*this);
21933	DAG.ReplaceAllUsesOfValueWith(From: N0.getValue(R: `1`), To: NewLD.getValue(R: `1`));
21934	++OpsNarrowed;
21935	return NewST;
21936	}
21937
21938	return SDValue ();
21939	}
21940
21941	/// For a given floating point load / store pair, if the load value isn't used
21942	/// by any other operations, then consider transforming the pair to integer
21943	/// load / store operations if the target deems the transformation profitable.
21944	SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
21945	StoreSDNode *ST = cast<StoreSDNode>(Val: N);
21946	SDValue Value = ST->getValue();
21947	if (ISD::isNormalStore(N: ST) && ISD::isNormalLoad(N: Value.getNode()) &&
21948	Value.hasOneUse()) {
21949	LoadSDNode *LD = cast<LoadSDNode>(Val&: Value);
21950	EVT VT = LD->getMemoryVT();
21951	if (!VT.isSimple() \|\| !VT.isFloatingPoint() \|\| VT != ST->getMemoryVT() \|\|
21952	LD->isNonTemporal() \|\| ST->isNonTemporal() \|\|
21953	LD->getPointerInfo().getAddrSpace() != `0` \|\|
21954	ST->getPointerInfo().getAddrSpace() != `0`)
21955	return SDValue ();
21956
21957	TypeSize VTSize = VT.getSizeInBits();
21958
21959	// We don't know the size of scalable types at compile time so we cannot
21960	// create an integer of the equivalent size.
21961	if (VTSize.isScalable())
21962	return SDValue ();
21963
21964	unsigned FastLD = `0`, FastST = `0`;
21965	EVT IntVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: VTSize.getFixedValue());
21966	if (!TLI.isOperationLegal(Op: ISD::LOAD, VT: IntVT) \|\|
21967	!TLI.isOperationLegal(Op: ISD::STORE, VT: IntVT) \|\|
21968	!TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) \|\|
21969	!TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT) \|\|
21970	!TLI.allowsMemoryAccess(Context&: *DAG.getContext(), DL: DAG.getDataLayout(), VT: IntVT,
21971	MMO: *LD->getMemOperand(), Fast: &FastLD) \|\|
21972	!TLI.allowsMemoryAccess(Context&: *DAG.getContext(), DL: DAG.getDataLayout(), VT: IntVT,
21973	MMO: *ST->getMemOperand(), Fast: &FastST) \|\|
21974	!FastLD \|\| !FastST)
21975	return SDValue ();
21976
21977	SDValue NewLD = DAG.getLoad(VT: IntVT, dl: SDLoc (Value), Chain: LD->getChain(),
21978	Ptr: LD->getBasePtr(), MMO: LD->getMemOperand());
21979
21980	SDValue NewST = DAG.getStore(Chain: ST->getChain(), dl: SDLoc (N), Val: NewLD,
21981	Ptr: ST->getBasePtr(), MMO: ST->getMemOperand());
21982
21983	AddToWorklist(N: NewLD.getNode());
21984	AddToWorklist(N: NewST.getNode());
21985	WorklistRemover DeadNodes(*this);
21986	DAG.ReplaceAllUsesOfValueWith(From: Value.getValue(R: `1`), To: NewLD.getValue(R: `1`));
21987	++LdStFP2Int;
21988	return NewST;
21989	}
21990
21991	return SDValue ();
21992	}
21993
21994	// This is a helper function for visitMUL to check the profitability
21995	// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1c2).*
21996	// MulNode is the original multiply, AddNode is (add x, c1),
21997	// and ConstNode is c2.
21998	//
21999	// If the (add x, c1) has multiple uses, we could increase
22000	// the number of adds if we make this transformation.
22001	// It would only be worth doing this if we can remove a
22002	// multiply in the process. Check for that here.
22003	// To illustrate:
22004	// (A + c1) c3*
22005	// (A + c2) c3*
22006	// We're checking for cases where we have common "c3 A" expressions.*
22007	bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
22008	SDValue ConstNode) {
22009	// If the add only has one use, and the target thinks the folding is
22010	// profitable or does not lead to worse code, this would be OK to do.
22011	if (AddNode ->hasOneUse() &&
22012	TLI.isMulAddWithConstProfitable(AddNode, ConstNode))
22013	return true;
22014
22015	// Walk all the users of the constant with which we're multiplying.
22016	for (SDNode *User : ConstNode ->users()) {
22017	if (User == MulNode) // This use is the one we're on right now. Skip it.
22018	continue;
22019
22020	if (User->getOpcode() == ISD::MUL) { // We have another multiply use.
22021	SDNode *OtherOp;
22022	SDNode *MulVar = AddNode.getOperand(i: `0`).getNode();
22023
22024	// OtherOp is what we're multiplying against the constant.
22025	if (User->getOperand(Num: `0`) == ConstNode)
22026	OtherOp = User->getOperand(Num: `1`).getNode();
22027	else
22028	OtherOp = User->getOperand(Num: `0`).getNode();
22029
22030	// Check to see if multiply is with the same operand of our "add".
22031	//
22032	// ConstNode = CONST
22033	// User = ConstNode A <-- visiting User. OtherOp is A.*
22034	// ...
22035	// AddNode = (A + c1) <-- MulVar is A.
22036	// = AddNode ConstNode <-- current visiting instruction.*
22037	//
22038	// If we make this transformation, we will have a common
22039	// multiply (ConstNode A) that we can save.*
22040	if (OtherOp == MulVar)
22041	return true;
22042
22043	// Now check to see if a future expansion will give us a common
22044	// multiply.
22045	//
22046	// ConstNode = CONST
22047	// AddNode = (A + c1)
22048	// ... = AddNode ConstNode <-- current visiting instruction.*
22049	// ...
22050	// OtherOp = (A + c2)
22051	// User = OtherOp ConstNode <-- visiting User.*
22052	//
22053	// If we make this transformation, we will have a common
22054	// multiply (CONST A) after we also do the same transformation*
22055	// to the "t2" instruction.
22056	if (OtherOp->getOpcode() == ISD::ADD &&
22057	DAG.isConstantIntBuildVectorOrConstantInt(N: OtherOp->getOperand(Num: `1`)) &&
22058	OtherOp->getOperand(Num: `0`).getNode() == MulVar)
22059	return true;
22060	}
22061	}
22062
22063	// Didn't find a case where this would be profitable.
22064	return false;
22065	}
22066
22067	SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
22068	unsigned NumStores) {
22069	SmallVector<SDValue, `8`> Chains;
22070	SmallPtrSet<const SDNode *, `8`> Visited;
22071	SDLoc StoreDL(StoreNodes [`0`].MemNode);
22072
22073	for (unsigned i = `0`; i < NumStores; ++i) {
22074	Visited.insert(Ptr: StoreNodes [i].MemNode);
22075	}
22076
22077	// don't include nodes that are children or repeated nodes.
22078	for (unsigned i = `0`; i < NumStores; ++i) {
22079	if (Visited.insert(Ptr: StoreNodes [i].MemNode->getChain().getNode()).second)
22080	Chains.push_back(Elt: StoreNodes [i].MemNode->getChain());
22081	}
22082
22083	assert(!Chains.empty() && "Chain should have generated a chain");
22084	return DAG.getTokenFactor(DL: StoreDL, Vals&: Chains);
22085	}
22086
22087	bool DAGCombiner::hasSameUnderlyingObj(ArrayRef<MemOpLink> StoreNodes) {
22088	const Value UnderlyingObj = nullptr*;
22089	for (const auto &MemOp : StoreNodes) {
22090	const MachineMemOperand *MMO = MemOp.MemNode->getMemOperand();
22091	// Pseudo value like stack frame has its own frame index and size, should
22092	// not use the first store's frame index for other frames.
22093	if (MMO->getPseudoValue())
22094	return false;
22095
22096	if (!MMO->getValue())
22097	return false;
22098
22099	const Value *Obj = getUnderlyingObject(V: MMO->getValue());
22100
22101	if (UnderlyingObj && UnderlyingObj != Obj)
22102	return false;
22103
22104	if (!UnderlyingObj)
22105	UnderlyingObj = Obj;
22106	}
22107
22108	return true;
22109	}
22110
22111	bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
22112	SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
22113	bool IsConstantSrc, bool UseVector, bool UseTrunc) {
22114	// Make sure we have something to merge.
22115	if (NumStores < `2`)
22116	return false;
22117
22118	assert((!UseTrunc \|\| !UseVector) &&
22119	"This optimization cannot emit a vector truncating store");
22120
22121	// The latest Node in the DAG.
22122	SDLoc DL(StoreNodes [`0`].MemNode);
22123
22124	TypeSize ElementSizeBits = MemVT.getStoreSizeInBits();
22125	unsigned SizeInBits = NumStores * ElementSizeBits;
22126	unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : `1`;
22127
22128	std::optional<MachineMemOperand::Flags> Flags;
22129	AAMDNodes AAInfo;
22130	for (unsigned I = `0`; I != NumStores; ++I) {
22131	StoreSDNode *St = cast<StoreSDNode>(Val: StoreNodes [I].MemNode);
22132	if (!Flags) {
22133	Flags = St->getMemOperand()->getFlags();
22134	AAInfo = St->getAAInfo();
22135	continue;
22136	}
22137	// Skip merging if there's an inconsistent flag.
22138	if (Flags != St->getMemOperand()->getFlags())
22139	return false;
22140	// Concatenate AA metadata.
22141	AAInfo = AAInfo.concat(Other: St->getAAInfo());
22142	}
22143
22144	EVT StoreTy;
22145	if (UseVector) {
22146	unsigned Elts = NumStores * NumMemElts;
22147	// Get the type for the merged vector store.
22148	StoreTy = EVT::getVectorVT(Context&: *DAG.getContext(), VT: MemVT.getScalarType(), NumElements: Elts);
22149	} else
22150	StoreTy = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: SizeInBits);
22151
22152	SDValue StoredVal;
22153	if (UseVector) {
22154	if (IsConstantSrc) {
22155	SmallVector<SDValue, `8`> BuildVector;
22156	for (unsigned I = `0`; I != NumStores; ++I) {
22157	StoreSDNode *St = cast<StoreSDNode>(Val: StoreNodes [I].MemNode);
22158	SDValue Val = St->getValue();
22159	// If constant is of the wrong type, convert it now. This comes up
22160	// when one of our stores was truncating.
22161	if (MemVT != Val.getValueType()) {
22162	Val = peekThroughBitcasts(V: Val);
22163	// Deal with constants of wrong size.
22164	if (ElementSizeBits != Val.getValueSizeInBits()) {
22165	auto *C = dyn_cast<ConstantSDNode>(Val);
22166	if (!C)
22167	// Not clear how to truncate FP values.
22168	// TODO: Handle truncation of build_vector constants
22169	return false;
22170
22171	EVT IntMemVT =
22172	EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: MemVT.getSizeInBits());
22173	Val = DAG.getConstant(Val: C->getAPIntValue()
22174	.zextOrTrunc(width: Val.getValueSizeInBits())
22175	.zextOrTrunc(width: ElementSizeBits),
22176	DL: SDLoc (C), VT: IntMemVT);
22177	}
22178	// Make sure correctly size type is the correct type.
22179	Val = DAG.getBitcast(VT: MemVT, V: Val);
22180	}
22181	BuildVector.push_back(Elt: Val);
22182	}
22183	StoredVal = DAG.getNode(Opcode: MemVT.isVector() ? ISD::CONCAT_VECTORS
22184	: ISD::BUILD_VECTOR,
22185	DL, VT: StoreTy, Ops: BuildVector);
22186	} else {
22187	SmallVector<SDValue, `8`> Ops;
22188	for (unsigned i = `0`; i < NumStores; ++i) {
22189	StoreSDNode *St = cast<StoreSDNode>(Val: StoreNodes [i].MemNode);
22190	SDValue Val = peekThroughBitcasts(V: St->getValue());
22191	// All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
22192	// type MemVT. If the underlying value is not the correct
22193	// type, but it is an extraction of an appropriate vector we
22194	// can recast Val to be of the correct type. This may require
22195	// converting between EXTRACT_VECTOR_ELT and
22196	// EXTRACT_SUBVECTOR.
22197	if ((MemVT != Val.getValueType()) &&
22198	(Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT \|\|
22199	Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) {
22200	EVT MemVTScalarTy = MemVT.getScalarType();
22201	// We may need to add a bitcast here to get types to line up.
22202	if (MemVTScalarTy != Val.getValueType().getScalarType()) {
22203	Val = DAG.getBitcast(VT: MemVT, V: Val);
22204	} else if (MemVT.isVector() &&
22205	Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
22206	Val = DAG.getNode(Opcode: ISD::BUILD_VECTOR, DL, VT: MemVT, Operand: Val);
22207	} else {
22208	unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
22209	: ISD::EXTRACT_VECTOR_ELT;
22210	SDValue Vec = Val.getOperand(i: `0`);
22211	SDValue Idx = Val.getOperand(i: `1`);
22212	Val = DAG.getNode(Opcode: OpC, DL: SDLoc (Val), VT: MemVT, N1: Vec, N2: Idx);
22213	}
22214	}
22215	Ops.push_back(Elt: Val);
22216	}
22217
22218	// Build the extracted vector elements back into a vector.
22219	StoredVal = DAG.getNode(Opcode: MemVT.isVector() ? ISD::CONCAT_VECTORS
22220	: ISD::BUILD_VECTOR,
22221	DL, VT: StoreTy, Ops);
22222	}
22223	} else {
22224	// We should always use a vector store when merging extracted vector
22225	// elements, so this path implies a store of constants.
22226	assert(IsConstantSrc && "Merged vector elements should use vector store");
22227
22228	APInt StoreInt(SizeInBits, `0`);
22229
22230	// Construct a single integer constant which is made of the smaller
22231	// constant inputs.
22232	bool IsLE = DAG.getDataLayout().isLittleEndian();
22233	for (unsigned i = `0`; i < NumStores; ++i) {
22234	unsigned Idx = IsLE ? (NumStores - `1` - i) : i;
22235	StoreSDNode *St = cast<StoreSDNode>(Val: StoreNodes [Idx].MemNode);
22236
22237	SDValue Val = St->getValue();
22238	Val = peekThroughBitcasts(V: Val);
22239	StoreInt <<= ElementSizeBits;
22240	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
22241	StoreInt \|= C->getAPIntValue()
22242	.zextOrTrunc(width: ElementSizeBits)
22243	.zextOrTrunc(width: SizeInBits);
22244	} else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
22245	StoreInt \|= C->getValueAPF()
22246	.bitcastToAPInt()
22247	.zextOrTrunc(width: ElementSizeBits)
22248	.zextOrTrunc(width: SizeInBits);
22249	// If fp truncation is necessary give up for now.
22250	if (MemVT.getSizeInBits() != ElementSizeBits)
22251	return false;
22252	} else if (ISD::isBuildVectorOfConstantSDNodes(N: Val.getNode()) \|\|
22253	ISD::isBuildVectorOfConstantFPSDNodes(N: Val.getNode())) {
22254	// Not yet handled
22255	return false;
22256	} else {
22257	llvm_unreachable("Invalid constant element type");
22258	}
22259	}
22260
22261	// Create the new Load and Store operations.
22262	StoredVal = DAG.getConstant(Val: StoreInt, DL, VT: StoreTy);
22263	}
22264
22265	LSBaseSDNode *FirstInChain = StoreNodes [`0`].MemNode;
22266	SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
22267	bool CanReusePtrInfo = hasSameUnderlyingObj(StoreNodes);
22268
22269	// make sure we use trunc store if it's necessary to be legal.
22270	// When generate the new widen store, if the first store's pointer info can
22271	// not be reused, discard the pointer info except the address space because
22272	// now the widen store can not be represented by the original pointer info
22273	// which is for the narrow memory object.
22274	SDValue NewStore;
22275	if (!UseTrunc) {
22276	NewStore = DAG.getStore(
22277	Chain: NewChain, dl: DL, Val: StoredVal, Ptr: FirstInChain->getBasePtr(),
22278	PtrInfo: CanReusePtrInfo
22279	? FirstInChain->getPointerInfo()
22280	: MachinePointerInfo (FirstInChain->getPointerInfo().getAddrSpace()),
22281	Alignment: FirstInChain->getAlign(), MMOFlags: *Flags, AAInfo);
22282	} else { // Must be realized as a trunc store
22283	EVT LegalizedStoredValTy =
22284	TLI.getTypeToTransformTo(Context&: *DAG.getContext(), VT: StoredVal.getValueType());
22285	unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
22286	ConstantSDNode *C = cast<ConstantSDNode>(Val&: StoredVal);
22287	SDValue ExtendedStoreVal =
22288	DAG.getConstant(Val: C->getAPIntValue().zextOrTrunc(width: LegalizedStoreSize), DL,
22289	VT: LegalizedStoredValTy);
22290	NewStore = DAG.getTruncStore(
22291	Chain: NewChain, dl: DL, Val: ExtendedStoreVal, Ptr: FirstInChain->getBasePtr(),
22292	PtrInfo: CanReusePtrInfo
22293	? FirstInChain->getPointerInfo()
22294	: MachinePointerInfo (FirstInChain->getPointerInfo().getAddrSpace()),
22295	SVT: StoredVal.getValueType() /TVT/, Alignment: FirstInChain->getAlign(), MMOFlags: *Flags,
22296	AAInfo);
22297	}
22298
22299	// Replace all merged stores with the new store.
22300	for (unsigned i = `0`; i < NumStores; ++i)
22301	CombineTo(N: StoreNodes [i].MemNode, Res: NewStore);
22302
22303	AddToWorklist(N: NewChain.getNode());
22304	return true;
22305	}
22306
22307	SDNode *
22308	DAGCombiner::getStoreMergeCandidates(StoreSDNode *St,
22309	SmallVectorImpl<MemOpLink> &StoreNodes) {
22310	// This holds the base pointer, index, and the offset in bytes from the base
22311	// pointer. We must have a base and an offset. Do not handle stores to undef
22312	// base pointers.
22313	BaseIndexOffset BasePtr = BaseIndexOffset::match(N: St, DAG);
22314	if (!BasePtr.getBase().getNode() \|\| BasePtr.getBase().isUndef())
22315	return nullptr;
22316
22317	SDValue Val = peekThroughBitcasts(V: St->getValue());
22318	StoreSource StoreSrc = getStoreSource(StoreVal: Val);
22319	assert(StoreSrc != StoreSource::Unknown && "Expected known source for store");
22320
22321	// Match on loadbaseptr if relevant.
22322	EVT MemVT = St->getMemoryVT();
22323	BaseIndexOffset LBasePtr;
22324	EVT LoadVT;
22325	if (StoreSrc == StoreSource::Load) {
22326	auto *Ld = cast<LoadSDNode>(Val);
22327	LBasePtr = BaseIndexOffset::match(N: Ld, DAG);
22328	LoadVT = Ld->getMemoryVT();
22329	// Load and store should be the same type.
22330	if (MemVT != LoadVT)
22331	return nullptr;
22332	// Loads must only have one use.
22333	if (!Ld->hasNUsesOfValue(NUses: `1`, Value: `0`))
22334	return nullptr;
22335	// The memory operands must not be volatile/indexed/atomic.
22336	// TODO: May be able to relax for unordered atomics (see D66309)
22337	if (!Ld->isSimple() \|\| Ld->isIndexed())
22338	return nullptr;
22339	}
22340	auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
22341	int64_t &Offset) -> bool {
22342	// The memory operands must not be volatile/indexed/atomic.
22343	// TODO: May be able to relax for unordered atomics (see D66309)
22344	if (!Other->isSimple() \|\| Other->isIndexed())
22345	return false;
22346	// Don't mix temporal stores with non-temporal stores.
22347	if (St->isNonTemporal() != Other->isNonTemporal())
22348	return false;
22349	if (!TLI.areTwoSDNodeTargetMMOFlagsMergeable(NodeX: St, NodeY: Other))
22350	return false;
22351	SDValue OtherBC = peekThroughBitcasts(V: Other->getValue());
22352	// Allow merging constants of different types as integers.
22353	bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(VT: Other->getMemoryVT())
22354	: Other->getMemoryVT() != MemVT;
22355	switch (StoreSrc) {
22356	case StoreSource::Load: {
22357	if (NoTypeMatch)
22358	return false;
22359	// The Load's Base Ptr must also match.
22360	auto *OtherLd = dyn_cast<LoadSDNode>(Val&: OtherBC);
22361	if (!OtherLd)
22362	return false;
22363	BaseIndexOffset LPtr = BaseIndexOffset::match(N: OtherLd, DAG);
22364	if (LoadVT != OtherLd->getMemoryVT())
22365	return false;
22366	// Loads must only have one use.
22367	if (!OtherLd->hasNUsesOfValue(NUses: `1`, Value: `0`))
22368	return false;
22369	// The memory operands must not be volatile/indexed/atomic.
22370	// TODO: May be able to relax for unordered atomics (see D66309)
22371	if (!OtherLd->isSimple() \|\| OtherLd->isIndexed())
22372	return false;
22373	// Don't mix temporal loads with non-temporal loads.
22374	if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
22375	return false;
22376	if (!TLI.areTwoSDNodeTargetMMOFlagsMergeable(NodeX: *cast<LoadSDNode>(Val),
22377	NodeY: *OtherLd))
22378	return false;
22379	if (!(LBasePtr.equalBaseIndex(Other: LPtr, DAG)))
22380	return false;
22381	break;
22382	}
22383	case StoreSource::Constant:
22384	if (NoTypeMatch)
22385	return false;
22386	if (getStoreSource(StoreVal: OtherBC) != StoreSource::Constant)
22387	return false;
22388	break;
22389	case StoreSource::Extract:
22390	// Do not merge truncated stores here.
22391	if (Other->isTruncatingStore())
22392	return false;
22393	if (!MemVT.bitsEq(VT: OtherBC.getValueType()))
22394	return false;
22395	if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
22396	OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
22397	return false;
22398	break;
22399	default:
22400	llvm_unreachable("Unhandled store source for merging");
22401	}
22402	Ptr = BaseIndexOffset::match(N: Other, DAG);
22403	return (BasePtr.equalBaseIndex(Other: Ptr, DAG, Off&: Offset));
22404	};
22405
22406	// We are looking for a root node which is an ancestor to all mergable
22407	// stores. We search up through a load, to our root and then down
22408	// through all children. For instance we will find Store{1,2,3} if
22409	// St is Store1, Store2. or Store3 where the root is not a load
22410	// which always true for nonvolatile ops. TODO: Expand
22411	// the search to find all valid candidates through multiple layers of loads.
22412	//
22413	// Root
22414	// \|-------\|-------\|
22415	// Load Load Store3
22416	// \| \|
22417	// Store1 Store2
22418	//
22419	// FIXME: We should be able to climb and
22420	// descend TokenFactors to find candidates as well.
22421
22422	SDNode *RootNode = St->getChain().getNode();
22423	// Bail out if we already analyzed this root node and found nothing.
22424	if (ChainsWithoutMergeableStores.contains(Ptr: RootNode))
22425	return nullptr;
22426
22427	// Check if the pair of StoreNode and the RootNode already bail out many
22428	// times which is over the limit in dependence check.
22429	auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
22430	SDNode RootNode) -> bool* {
22431	auto RootCount = StoreRootCountMap.find(Val: StoreNode);
22432	return RootCount != StoreRootCountMap.end() &&
22433	RootCount ->second.first == RootNode &&
22434	RootCount ->second.second > StoreMergeDependenceLimit;
22435	};
22436
22437	auto TryToAddCandidate = [&](SDUse &Use) {
22438	// This must be a chain use.
22439	if (Use.getOperandNo() != `0`)
22440	return;
22441	if (auto *OtherStore = dyn_cast<StoreSDNode>(Val: Use.getUser())) {
22442	BaseIndexOffset Ptr;
22443	int64_t PtrDiff;
22444	if (CandidateMatch (OtherStore, Ptr, PtrDiff) &&
22445	!OverLimitInDependenceCheck (OtherStore, RootNode))
22446	StoreNodes.push_back(Elt: MemOpLink (OtherStore, PtrDiff));
22447	}
22448	};
22449
22450	unsigned NumNodesExplored = `0`;
22451	const unsigned MaxSearchNodes = `1024`;
22452	if (auto *Ldn = dyn_cast<LoadSDNode>(Val: RootNode)) {
22453	RootNode = Ldn->getChain().getNode();
22454	// Bail out if we already analyzed this root node and found nothing.
22455	if (ChainsWithoutMergeableStores.contains(Ptr: RootNode))
22456	return nullptr;
22457	for (auto I = RootNode->use_begin(), E = RootNode->use_end();
22458	I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) {
22459	SDNode *User = I ->getUser();
22460	if (I ->getOperandNo() == `0` && isa<LoadSDNode>(Val: User)) { // walk down chain
22461	for (SDUse &U2 : User->uses())
22462	TryToAddCandidate (U2);
22463	}
22464	// Check stores that depend on the root (e.g. Store 3 in the chart above).
22465	if (I ->getOperandNo() == `0` && isa<StoreSDNode>(Val: User)) {
22466	TryToAddCandidate (*I);
22467	}
22468	}
22469	} else {
22470	for (auto I = RootNode->use_begin(), E = RootNode->use_end();
22471	I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored)
22472	TryToAddCandidate (*I);
22473	}
22474
22475	return RootNode;
22476	}
22477
22478	// We need to check that merging these stores does not cause a loop in the
22479	// DAG. Any store candidate may depend on another candidate indirectly through
22480	// its operands. Check in parallel by searching up from operands of candidates.
22481	bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
22482	SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
22483	SDNode *RootNode) {
22484	// FIXME: We should be able to truncate a full search of
22485	// predecessors by doing a BFS and keeping tabs the originating
22486	// stores from which worklist nodes come from in a similar way to
22487	// TokenFactor simplfication.
22488
22489	SmallPtrSet<const SDNode *, `32`> Visited;
22490	SmallVector<const SDNode *, `8`> Worklist;
22491
22492	// RootNode is a predecessor to all candidates so we need not search
22493	// past it. Add RootNode (peeking through TokenFactors). Do not count
22494	// these towards size check.
22495
22496	Worklist.push_back(Elt: RootNode);
22497	while (!Worklist.empty()) {
22498	auto N = Worklist.pop_back_val();
22499	if (!Visited.insert(Ptr: N).second)
22500	continue; // Already present in Visited.
22501	if (N->getOpcode() == ISD::TokenFactor) {
22502	for (SDValue Op : N->ops())
22503	Worklist.push_back(Elt: Op.getNode());
22504	}
22505	}
22506
22507	// Don't count pruning nodes towards max.
22508	unsigned int Max = `1024` + Visited.size();
22509	// Search Ops of store candidates.
22510	for (unsigned i = `0`; i < NumStores; ++i) {
22511	SDNode *N = StoreNodes [i].MemNode;
22512	// Of the 4 Store Operands:
22513	// Chain (Op 0) -> We have already considered these*
22514	// in candidate selection, but only by following the
22515	// chain dependencies. We could still have a chain
22516	// dependency to a load, that has a non-chain dep to
22517	// another load, that depends on a store, etc. So it is
22518	// possible to have dependencies that consist of a mix
22519	// of chain and non-chain deps, and we need to include
22520	// chain operands in the analysis here..
22521	// Value (Op 1) -> Cycles may happen (e.g. through load chains)*
22522	// Address (Op 2) -> Merged addresses may only vary by a fixed constant,*
22523	// but aren't necessarily fromt the same base node, so
22524	// cycles possible (e.g. via indexed store).
22525	// (Op 3) -> Represents the pre or post-indexing offset (or undef for*
22526	// non-indexed stores). Not constant on all targets (e.g. ARM)
22527	// and so can participate in a cycle.
22528	for (const SDValue &Op : N->op_values())
22529	Worklist.push_back(Elt: Op.getNode());
22530	}
22531	// Search through DAG. We can stop early if we find a store node.
22532	for (unsigned i = `0`; i < NumStores; ++i)
22533	if (SDNode::hasPredecessorHelper(N: StoreNodes [i].MemNode, Visited, Worklist,
22534	MaxSteps: Max)) {
22535	// If the searching bail out, record the StoreNode and RootNode in the
22536	// StoreRootCountMap. If we have seen the pair many times over a limit,
22537	// we won't add the StoreNode into StoreNodes set again.
22538	if (Visited.size() >= Max) {
22539	auto &RootCount = StoreRootCountMap [StoreNodes [i].MemNode];
22540	if (RootCount.first == RootNode)
22541	RootCount.second++;
22542	else
22543	RootCount = {RootNode, `1`};
22544	}
22545	return false;
22546	}
22547	return true;
22548	}
22549
22550	bool DAGCombiner::hasCallInLdStChain(StoreSDNode St, LoadSDNode Ld) {
22551	SmallPtrSet<const SDNode *, `32`> Visited;
22552	SmallVector<std::pair<const SDNode , bool*>, `8`> Worklist;
22553	Worklist.emplace_back(Args: St->getChain().getNode(), Args: false);
22554
22555	while (!Worklist.empty()) {
22556	auto [Node, FoundCall] = Worklist.pop_back_val();
22557	if (!Visited.insert(Ptr: Node).second \|\| Node->getNumOperands() == `0`)
22558	continue;
22559
22560	switch (Node->getOpcode()) {
22561	case ISD::CALLSEQ_END:
22562	Worklist.emplace_back(Args: Node->getOperand(Num: `0`).getNode(), Args: true);
22563	break;
22564	case ISD::TokenFactor:
22565	for (SDValue Op : Node->ops())
22566	Worklist.emplace_back(Args: Op.getNode(), Args&: FoundCall);
22567	break;
22568	case ISD::LOAD:
22569	if (Node == Ld)
22570	return FoundCall;
22571	[[fallthrough]];
22572	default:
22573	assert(Node->getOperand(`0`).getValueType() == MVT::Other &&
22574	"Invalid chain type");
22575	Worklist.emplace_back(Args: Node->getOperand(Num: `0`).getNode(), Args&: FoundCall);
22576	break;
22577	}
22578	}
22579	return false;
22580	}
22581
22582	unsigned
22583	DAGCombiner::getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
22584	int64_t ElementSizeBytes) const {
22585	while (true) {
22586	// Find a store past the width of the first store.
22587	size_t StartIdx = `0`;
22588	while ((StartIdx + `1` < StoreNodes.size()) &&
22589	StoreNodes [StartIdx].OffsetFromBase + ElementSizeBytes !=
22590	StoreNodes [StartIdx + `1`].OffsetFromBase)
22591	++StartIdx;
22592
22593	// Bail if we don't have enough candidates to merge.
22594	if (StartIdx + `1` >= StoreNodes.size())
22595	return `0`;
22596
22597	// Trim stores that overlapped with the first store.
22598	if (StartIdx)
22599	StoreNodes.erase(CS: StoreNodes.begin(), CE: StoreNodes.begin() + StartIdx);
22600
22601	// Scan the memory operations on the chain and find the first
22602	// non-consecutive store memory address.
22603	unsigned NumConsecutiveStores = `1`;
22604	int64_t StartAddress = StoreNodes [`0`].OffsetFromBase;
22605	// Check that the addresses are consecutive starting from the second
22606	// element in the list of stores.
22607	for (unsigned i = `1`, e = StoreNodes.size(); i < e; ++i) {
22608	int64_t CurrAddress = StoreNodes [i].OffsetFromBase;
22609	if (CurrAddress - StartAddress != (ElementSizeBytes * i))
22610	break;
22611	NumConsecutiveStores = i + `1`;
22612	}
22613	if (NumConsecutiveStores > `1`)
22614	return NumConsecutiveStores;
22615
22616	// There are no consecutive stores at the start of the list.
22617	// Remove the first store and try again.
22618	StoreNodes.erase(CS: StoreNodes.begin(), CE: StoreNodes.begin() + `1`);
22619	}
22620	}
22621
22622	bool DAGCombiner::tryStoreMergeOfConstants(
22623	SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
22624	EVT MemVT, SDNode RootNode, bool* AllowVectors) {
22625	LLVMContext &Context = *DAG.getContext();
22626	const DataLayout &DL = DAG.getDataLayout();
22627	int64_t ElementSizeBytes = MemVT.getStoreSize();
22628	unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : `1`;
22629	bool MadeChange = false;
22630
22631	// Store the constants into memory as one consecutive store.
22632	while (NumConsecutiveStores >= `2`) {
22633	LSBaseSDNode *FirstInChain = StoreNodes [`0`].MemNode;
22634	unsigned FirstStoreAS = FirstInChain->getAddressSpace();
22635	Align FirstStoreAlign = FirstInChain->getAlign();
22636	unsigned LastLegalType = `1`;
22637	unsigned LastLegalVectorType = `1`;
22638	bool LastIntegerTrunc = false;
22639	bool NonZero = false;
22640	unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
22641	for (unsigned i = `0`; i < NumConsecutiveStores; ++i) {
22642	StoreSDNode *ST = cast<StoreSDNode>(Val: StoreNodes [i].MemNode);
22643	SDValue StoredVal = ST->getValue();
22644	bool IsElementZero = false;
22645	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val&: StoredVal))
22646	IsElementZero = C->isZero();
22647	else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val&: StoredVal))
22648	IsElementZero = C->getConstantFPValue()->isNullValue();
22649	else if (ISD::isBuildVectorAllZeros(N: StoredVal.getNode()))
22650	IsElementZero = true;
22651	if (IsElementZero) {
22652	if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
22653	FirstZeroAfterNonZero = i;
22654	}
22655	NonZero \|= !IsElementZero;
22656
22657	// Find a legal type for the constant store.
22658	unsigned SizeInBits = (i + `1`) * ElementSizeBytes * `8`;
22659	EVT StoreTy = EVT::getIntegerVT(Context, BitWidth: SizeInBits);
22660	unsigned IsFast = `0`;
22661
22662	// Break early when size is too large to be legal.
22663	if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
22664	break;
22665
22666	if (TLI.isTypeLegal(VT: StoreTy) &&
22667	TLI.canMergeStoresTo(AS: FirstStoreAS, MemVT: StoreTy,
22668	MF: DAG.getMachineFunction()) &&
22669	TLI.allowsMemoryAccess(Context, DL, VT: StoreTy,
22670	MMO: *FirstInChain->getMemOperand(), Fast: &IsFast) &&
22671	IsFast) {
22672	LastIntegerTrunc = false;
22673	LastLegalType = i + `1`;
22674	// Or check whether a truncstore is legal.
22675	} else if (TLI.getTypeAction(Context, VT: StoreTy) ==
22676	TargetLowering::TypePromoteInteger) {
22677	EVT LegalizedStoredValTy =
22678	TLI.getTypeToTransformTo(Context, VT: StoredVal.getValueType());
22679	if (TLI.isTruncStoreLegal(ValVT: LegalizedStoredValTy, MemVT: StoreTy) &&
22680	TLI.canMergeStoresTo(AS: FirstStoreAS, MemVT: LegalizedStoredValTy,
22681	MF: DAG.getMachineFunction()) &&
22682	TLI.allowsMemoryAccess(Context, DL, VT: StoreTy,
22683	MMO: *FirstInChain->getMemOperand(), Fast: &IsFast) &&
22684	IsFast) {
22685	LastIntegerTrunc = true;
22686	LastLegalType = i + `1`;
22687	}
22688	}
22689
22690	// We only use vectors if the target allows it and the function is not
22691	// marked with the noimplicitfloat attribute.
22692	if (TLI.storeOfVectorConstantIsCheap(IsZero: !NonZero, MemVT, NumElem: i + `1`, AddrSpace: FirstStoreAS) &&
22693	AllowVectors) {
22694	// Find a legal type for the vector store.
22695	unsigned Elts = (i + `1`) * NumMemElts;
22696	EVT Ty = EVT::getVectorVT(Context, VT: MemVT.getScalarType(), NumElements: Elts);
22697	if (TLI.isTypeLegal(VT: Ty) && TLI.isTypeLegal(VT: MemVT) &&
22698	TLI.canMergeStoresTo(AS: FirstStoreAS, MemVT: Ty, MF: DAG.getMachineFunction()) &&
22699	TLI.allowsMemoryAccess(Context, DL, VT: Ty,
22700	MMO: *FirstInChain->getMemOperand(), Fast: &IsFast) &&
22701	IsFast)
22702	LastLegalVectorType = i + `1`;
22703	}
22704	}
22705
22706	bool UseVector = (LastLegalVectorType > LastLegalType) && AllowVectors;
22707	unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
22708	bool UseTrunc = LastIntegerTrunc && !UseVector;
22709
22710	// Check if we found a legal integer type that creates a meaningful
22711	// merge.
22712	if (NumElem < `2`) {
22713	// We know that candidate stores are in order and of correct
22714	// shape. While there is no mergeable sequence from the
22715	// beginning one may start later in the sequence. The only
22716	// reason a merge of size N could have failed where another of
22717	// the same size would not have, is if the alignment has
22718	// improved or we've dropped a non-zero value. Drop as many
22719	// candidates as we can here.
22720	unsigned NumSkip = `1`;
22721	while ((NumSkip < NumConsecutiveStores) &&
22722	(NumSkip < FirstZeroAfterNonZero) &&
22723	(StoreNodes [NumSkip].MemNode->getAlign() <= FirstStoreAlign))
22724	NumSkip++;
22725
22726	StoreNodes.erase(CS: StoreNodes.begin(), CE: StoreNodes.begin() + NumSkip);
22727	NumConsecutiveStores -= NumSkip;
22728	continue;
22729	}
22730
22731	// Check that we can merge these candidates without causing a cycle.
22732	if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumStores: NumElem,
22733	RootNode)) {
22734	StoreNodes.erase(CS: StoreNodes.begin(), CE: StoreNodes.begin() + NumElem);
22735	NumConsecutiveStores -= NumElem;
22736	continue;
22737	}
22738
22739	MadeChange \|= mergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumStores: NumElem,
22740	/IsConstantSrc/ true,
22741	UseVector, UseTrunc);
22742
22743	// Remove merged stores for next iteration.
22744	StoreNodes.erase(CS: StoreNodes.begin(), CE: StoreNodes.begin() + NumElem);
22745	NumConsecutiveStores -= NumElem;
22746	}
22747	return MadeChange;
22748	}
22749
22750	bool DAGCombiner::tryStoreMergeOfExtracts(
22751	SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
22752	EVT MemVT, SDNode *RootNode) {
22753	LLVMContext &Context = *DAG.getContext();
22754	const DataLayout &DL = DAG.getDataLayout();
22755	unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : `1`;
22756	bool MadeChange = false;
22757
22758	// Loop on Consecutive Stores on success.
22759	while (NumConsecutiveStores >= `2`) {
22760	LSBaseSDNode *FirstInChain = StoreNodes [`0`].MemNode;
22761	unsigned FirstStoreAS = FirstInChain->getAddressSpace();
22762	Align FirstStoreAlign = FirstInChain->getAlign();
22763	unsigned NumStoresToMerge = `1`;
22764	for (unsigned i = `0`; i < NumConsecutiveStores; ++i) {
22765	// Find a legal type for the vector store.
22766	unsigned Elts = (i + `1`) * NumMemElts;
22767	EVT Ty = EVT::getVectorVT(Context&: *DAG.getContext(), VT: MemVT.getScalarType(), NumElements: Elts);
22768	unsigned IsFast = `0`;
22769
22770	// Break early when size is too large to be legal.
22771	if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
22772	break;
22773
22774	if (TLI.isTypeLegal(VT: Ty) &&
22775	TLI.canMergeStoresTo(AS: FirstStoreAS, MemVT: Ty, MF: DAG.getMachineFunction()) &&
22776	TLI.allowsMemoryAccess(Context, DL, VT: Ty,
22777	MMO: *FirstInChain->getMemOperand(), Fast: &IsFast) &&
22778	IsFast)
22779	NumStoresToMerge = i + `1`;
22780	}
22781
22782	// Check if we found a legal integer type creating a meaningful
22783	// merge.
22784	if (NumStoresToMerge < `2`) {
22785	// We know that candidate stores are in order and of correct
22786	// shape. While there is no mergeable sequence from the
22787	// beginning one may start later in the sequence. The only
22788	// reason a merge of size N could have failed where another of
22789	// the same size would not have, is if the alignment has
22790	// improved. Drop as many candidates as we can here.
22791	unsigned NumSkip = `1`;
22792	while ((NumSkip < NumConsecutiveStores) &&
22793	(StoreNodes [NumSkip].MemNode->getAlign() <= FirstStoreAlign))
22794	NumSkip++;
22795
22796	StoreNodes.erase(CS: StoreNodes.begin(), CE: StoreNodes.begin() + NumSkip);
22797	NumConsecutiveStores -= NumSkip;
22798	continue;
22799	}
22800
22801	// Check that we can merge these candidates without causing a cycle.
22802	if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumStores: NumStoresToMerge,
22803	RootNode)) {
22804	StoreNodes.erase(CS: StoreNodes.begin(),
22805	CE: StoreNodes.begin() + NumStoresToMerge);
22806	NumConsecutiveStores -= NumStoresToMerge;
22807	continue;
22808	}
22809
22810	MadeChange \|= mergeStoresOfConstantsOrVecElts(
22811	StoreNodes, MemVT, NumStores: NumStoresToMerge, /IsConstantSrc/ false,
22812	/UseVector/ true, /UseTrunc/ false);
22813
22814	StoreNodes.erase(CS: StoreNodes.begin(), CE: StoreNodes.begin() + NumStoresToMerge);
22815	NumConsecutiveStores -= NumStoresToMerge;
22816	}
22817	return MadeChange;
22818	}
22819
22820	bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
22821	unsigned NumConsecutiveStores, EVT MemVT,
22822	SDNode RootNode, bool* AllowVectors,
22823	bool IsNonTemporalStore,
22824	bool IsNonTemporalLoad) {
22825	LLVMContext &Context = *DAG.getContext();
22826	const DataLayout &DL = DAG.getDataLayout();
22827	int64_t ElementSizeBytes = MemVT.getStoreSize();
22828	unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : `1`;
22829	bool MadeChange = false;
22830
22831	// Look for load nodes which are used by the stored values.
22832	SmallVector<MemOpLink, `8`> LoadNodes;
22833
22834	// Find acceptable loads. Loads need to have the same chain (token factor),
22835	// must not be zext, volatile, indexed, and they must be consecutive.
22836	BaseIndexOffset LdBasePtr;
22837
22838	for (unsigned i = `0`; i < NumConsecutiveStores; ++i) {
22839	StoreSDNode *St = cast<StoreSDNode>(Val: StoreNodes [i].MemNode);
22840	SDValue Val = peekThroughBitcasts(V: St->getValue());
22841	LoadSDNode *Ld = cast<LoadSDNode>(Val);
22842
22843	BaseIndexOffset LdPtr = BaseIndexOffset::match(N: Ld, DAG);
22844	// If this is not the first ptr that we check.
22845	int64_t LdOffset = `0`;
22846	if (LdBasePtr.getBase().getNode()) {
22847	// The base ptr must be the same.
22848	if (!LdBasePtr.equalBaseIndex(Other: LdPtr, DAG, Off&: LdOffset))
22849	break;
22850	} else {
22851	// Check that all other base pointers are the same as this one.
22852	LdBasePtr = LdPtr;
22853	}
22854
22855	// We found a potential memory operand to merge.
22856	LoadNodes.push_back(Elt: MemOpLink (Ld, LdOffset));
22857	}
22858
22859	while (NumConsecutiveStores >= `2` && LoadNodes.size() >= `2`) {
22860	Align RequiredAlignment;
22861	bool NeedRotate = false;
22862	if (LoadNodes.size() == `2`) {
22863	// If we have load/store pair instructions and we only have two values,
22864	// don't bother merging.
22865	if (TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
22866	StoreNodes [`0`].MemNode->getAlign() >= RequiredAlignment) {
22867	StoreNodes.erase(CS: StoreNodes.begin(), CE: StoreNodes.begin() + `2`);
22868	LoadNodes.erase(CS: LoadNodes.begin(), CE: LoadNodes.begin() + `2`);
22869	break;
22870	}
22871	// If the loads are reversed, see if we can rotate the halves into place.
22872	int64_t Offset0 = LoadNodes [`0`].OffsetFromBase;
22873	int64_t Offset1 = LoadNodes [`1`].OffsetFromBase;
22874	EVT PairVT = EVT::getIntegerVT(Context, BitWidth: ElementSizeBytes * `8` * `2`);
22875	if (Offset0 - Offset1 == ElementSizeBytes &&
22876	(hasOperation(Opcode: ISD::ROTL, VT: PairVT) \|\|
22877	hasOperation(Opcode: ISD::ROTR, VT: PairVT))) {
22878	std::swap(a&: LoadNodes [`0`], b&: LoadNodes [`1`]);
22879	NeedRotate = true;
22880	}
22881	}
22882	LSBaseSDNode *FirstInChain = StoreNodes [`0`].MemNode;
22883	unsigned FirstStoreAS = FirstInChain->getAddressSpace();
22884	Align FirstStoreAlign = FirstInChain->getAlign();
22885	LoadSDNode *FirstLoad = cast<LoadSDNode>(Val: LoadNodes [`0`].MemNode);
22886
22887	// Scan the memory operations on the chain and find the first
22888	// non-consecutive load memory address. These variables hold the index in
22889	// the store node array.
22890
22891	unsigned LastConsecutiveLoad = `1`;
22892
22893	// This variable refers to the size and not index in the array.
22894	unsigned LastLegalVectorType = `1`;
22895	unsigned LastLegalIntegerType = `1`;
22896	bool isDereferenceable = true;
22897	bool DoIntegerTruncate = false;
22898	int64_t StartAddress = LoadNodes [`0`].OffsetFromBase;
22899	SDValue LoadChain = FirstLoad->getChain();
22900	for (unsigned i = `1`; i < LoadNodes.size(); ++i) {
22901	// All loads must share the same chain.
22902	if (LoadNodes [i].MemNode->getChain() != LoadChain)
22903	break;
22904
22905	int64_t CurrAddress = LoadNodes [i].OffsetFromBase;
22906	if (CurrAddress - StartAddress != (ElementSizeBytes * i))
22907	break;
22908	LastConsecutiveLoad = i;
22909
22910	if (isDereferenceable && !LoadNodes [i].MemNode->isDereferenceable())
22911	isDereferenceable = false;
22912
22913	// Find a legal type for the vector store.
22914	unsigned Elts = (i + `1`) * NumMemElts;
22915	EVT StoreTy = EVT::getVectorVT(Context, VT: MemVT.getScalarType(), NumElements: Elts);
22916
22917	// Break early when size is too large to be legal.
22918	if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
22919	break;
22920
22921	unsigned IsFastSt = `0`;
22922	unsigned IsFastLd = `0`;
22923	// Don't try vector types if we need a rotate. We may still fail the
22924	// legality checks for the integer type, but we can't handle the rotate
22925	// case with vectors.
22926	// FIXME: We could use a shuffle in place of the rotate.
22927	if (!NeedRotate && TLI.isTypeLegal(VT: StoreTy) &&
22928	TLI.canMergeStoresTo(AS: FirstStoreAS, MemVT: StoreTy,
22929	MF: DAG.getMachineFunction()) &&
22930	TLI.allowsMemoryAccess(Context, DL, VT: StoreTy,
22931	MMO: *FirstInChain->getMemOperand(), Fast: &IsFastSt) &&
22932	IsFastSt &&
22933	TLI.allowsMemoryAccess(Context, DL, VT: StoreTy,
22934	MMO: *FirstLoad->getMemOperand(), Fast: &IsFastLd) &&
22935	IsFastLd) {
22936	LastLegalVectorType = i + `1`;
22937	}
22938
22939	// Find a legal type for the integer store.
22940	unsigned SizeInBits = (i + `1`) * ElementSizeBytes * `8`;
22941	StoreTy = EVT::getIntegerVT(Context, BitWidth: SizeInBits);
22942	if (TLI.isTypeLegal(VT: StoreTy) &&
22943	TLI.canMergeStoresTo(AS: FirstStoreAS, MemVT: StoreTy,
22944	MF: DAG.getMachineFunction()) &&
22945	TLI.allowsMemoryAccess(Context, DL, VT: StoreTy,
22946	MMO: *FirstInChain->getMemOperand(), Fast: &IsFastSt) &&
22947	IsFastSt &&
22948	TLI.allowsMemoryAccess(Context, DL, VT: StoreTy,
22949	MMO: *FirstLoad->getMemOperand(), Fast: &IsFastLd) &&
22950	IsFastLd) {
22951	LastLegalIntegerType = i + `1`;
22952	DoIntegerTruncate = false;
22953	// Or check whether a truncstore and extload is legal.
22954	} else if (TLI.getTypeAction(Context, VT: StoreTy) ==
22955	TargetLowering::TypePromoteInteger) {
22956	EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, VT: StoreTy);
22957	if (TLI.isTruncStoreLegal(ValVT: LegalizedStoredValTy, MemVT: StoreTy) &&
22958	TLI.canMergeStoresTo(AS: FirstStoreAS, MemVT: LegalizedStoredValTy,
22959	MF: DAG.getMachineFunction()) &&
22960	TLI.isLoadExtLegal(ExtType: ISD::ZEXTLOAD, ValVT: LegalizedStoredValTy, MemVT: StoreTy) &&
22961	TLI.isLoadExtLegal(ExtType: ISD::SEXTLOAD, ValVT: LegalizedStoredValTy, MemVT: StoreTy) &&
22962	TLI.isLoadExtLegal(ExtType: ISD::EXTLOAD, ValVT: LegalizedStoredValTy, MemVT: StoreTy) &&
22963	TLI.allowsMemoryAccess(Context, DL, VT: StoreTy,
22964	MMO: *FirstInChain->getMemOperand(), Fast: &IsFastSt) &&
22965	IsFastSt &&
22966	TLI.allowsMemoryAccess(Context, DL, VT: StoreTy,
22967	MMO: *FirstLoad->getMemOperand(), Fast: &IsFastLd) &&
22968	IsFastLd) {
22969	LastLegalIntegerType = i + `1`;
22970	DoIntegerTruncate = true;
22971	}
22972	}
22973	}
22974
22975	// Only use vector types if the vector type is larger than the integer
22976	// type. If they are the same, use integers.
22977	bool UseVectorTy =
22978	LastLegalVectorType > LastLegalIntegerType && AllowVectors;
22979	unsigned LastLegalType =
22980	std::max(a: LastLegalVectorType, b: LastLegalIntegerType);
22981
22982	// We add +1 here because the LastXXX variables refer to location while
22983	// the NumElem refers to array/index size.
22984	unsigned NumElem = std::min(a: NumConsecutiveStores, b: LastConsecutiveLoad + `1`);
22985	NumElem = std::min(a: LastLegalType, b: NumElem);
22986	Align FirstLoadAlign = FirstLoad->getAlign();
22987
22988	if (NumElem < `2`) {
22989	// We know that candidate stores are in order and of correct
22990	// shape. While there is no mergeable sequence from the
22991	// beginning one may start later in the sequence. The only
22992	// reason a merge of size N could have failed where another of
22993	// the same size would not have is if the alignment or either
22994	// the load or store has improved. Drop as many candidates as we
22995	// can here.
22996	unsigned NumSkip = `1`;
22997	while ((NumSkip < LoadNodes.size()) &&
22998	(LoadNodes [NumSkip].MemNode->getAlign() <= FirstLoadAlign) &&
22999	(StoreNodes [NumSkip].MemNode->getAlign() <= FirstStoreAlign))
23000	NumSkip++;
23001	StoreNodes.erase(CS: StoreNodes.begin(), CE: StoreNodes.begin() + NumSkip);
23002	LoadNodes.erase(CS: LoadNodes.begin(), CE: LoadNodes.begin() + NumSkip);
23003	NumConsecutiveStores -= NumSkip;
23004	continue;
23005	}
23006
23007	// Check that we can merge these candidates without causing a cycle.
23008	if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumStores: NumElem,
23009	RootNode)) {
23010	StoreNodes.erase(CS: StoreNodes.begin(), CE: StoreNodes.begin() + NumElem);
23011	LoadNodes.erase(CS: LoadNodes.begin(), CE: LoadNodes.begin() + NumElem);
23012	NumConsecutiveStores -= NumElem;
23013	continue;
23014	}
23015
23016	// Find if it is better to use vectors or integers to load and store
23017	// to memory.
23018	EVT JointMemOpVT;
23019	if (UseVectorTy) {
23020	// Find a legal type for the vector store.
23021	unsigned Elts = NumElem * NumMemElts;
23022	JointMemOpVT = EVT::getVectorVT(Context, VT: MemVT.getScalarType(), NumElements: Elts);
23023	} else {
23024	unsigned SizeInBits = NumElem * ElementSizeBytes * `8`;
23025	JointMemOpVT = EVT::getIntegerVT(Context, BitWidth: SizeInBits);
23026	}
23027
23028	// Check if there is a call in the load/store chain.
23029	if (!TLI.shouldMergeStoreOfLoadsOverCall(MemVT, JointMemOpVT) &&
23030	hasCallInLdStChain(St: cast<StoreSDNode>(Val: StoreNodes [`0`].MemNode),
23031	Ld: cast<LoadSDNode>(Val: LoadNodes [`0`].MemNode))) {
23032	StoreNodes.erase(CS: StoreNodes.begin(), CE: StoreNodes.begin() + NumElem);
23033	LoadNodes.erase(CS: LoadNodes.begin(), CE: LoadNodes.begin() + NumElem);
23034	NumConsecutiveStores -= NumElem;
23035	continue;
23036	}
23037
23038	SDLoc LoadDL(LoadNodes [`0`].MemNode);
23039	SDLoc StoreDL(StoreNodes [`0`].MemNode);
23040
23041	// The merged loads are required to have the same incoming chain, so
23042	// using the first's chain is acceptable.
23043
23044	SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumStores: NumElem);
23045	bool CanReusePtrInfo = hasSameUnderlyingObj(StoreNodes);
23046	AddToWorklist(N: NewStoreChain.getNode());
23047
23048	MachineMemOperand::Flags LdMMOFlags =
23049	isDereferenceable ? MachineMemOperand::MODereferenceable
23050	: MachineMemOperand::MONone;
23051	if (IsNonTemporalLoad)
23052	LdMMOFlags \|= MachineMemOperand::MONonTemporal;
23053
23054	LdMMOFlags \|= TLI.getTargetMMOFlags(Node: *FirstLoad);
23055
23056	MachineMemOperand::Flags StMMOFlags = IsNonTemporalStore
23057	? MachineMemOperand::MONonTemporal
23058	: MachineMemOperand::MONone;
23059
23060	StMMOFlags \|= TLI.getTargetMMOFlags(Node: *StoreNodes [`0`].MemNode);
23061
23062	SDValue NewLoad, NewStore;
23063	if (UseVectorTy \|\| !DoIntegerTruncate) {
23064	NewLoad = DAG.getLoad(
23065	VT: JointMemOpVT, dl: LoadDL, Chain: FirstLoad->getChain(), Ptr: FirstLoad->getBasePtr(),
23066	PtrInfo: FirstLoad->getPointerInfo(), Alignment: FirstLoadAlign, MMOFlags: LdMMOFlags);
23067	SDValue StoreOp = NewLoad;
23068	if (NeedRotate) {
23069	unsigned LoadWidth = ElementSizeBytes * `8` * `2`;
23070	assert(JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) &&
23071	"Unexpected type for rotate-able load pair");
23072	SDValue RotAmt =
23073	DAG.getShiftAmountConstant(Val: LoadWidth / `2`, VT: JointMemOpVT, DL: LoadDL);
23074	// Target can convert to the identical ROTR if it does not have ROTL.
23075	StoreOp = DAG.getNode(Opcode: ISD::ROTL, DL: LoadDL, VT: JointMemOpVT, N1: NewLoad, N2: RotAmt);
23076	}
23077	NewStore = DAG.getStore(
23078	Chain: NewStoreChain, dl: StoreDL, Val: StoreOp, Ptr: FirstInChain->getBasePtr(),
23079	PtrInfo: CanReusePtrInfo ? FirstInChain->getPointerInfo()
23080	: MachinePointerInfo (FirstStoreAS),
23081	Alignment: FirstStoreAlign, MMOFlags: StMMOFlags);
23082	} else { // This must be the truncstore/extload case
23083	EVT ExtendedTy =
23084	TLI.getTypeToTransformTo(Context&: *DAG.getContext(), VT: JointMemOpVT);
23085	NewLoad = DAG.getExtLoad(ExtType: ISD::EXTLOAD, dl: LoadDL, VT: ExtendedTy,
23086	Chain: FirstLoad->getChain(), Ptr: FirstLoad->getBasePtr(),
23087	PtrInfo: FirstLoad->getPointerInfo(), MemVT: JointMemOpVT,
23088	Alignment: FirstLoadAlign, MMOFlags: LdMMOFlags);
23089	NewStore = DAG.getTruncStore(
23090	Chain: NewStoreChain, dl: StoreDL, Val: NewLoad, Ptr: FirstInChain->getBasePtr(),
23091	PtrInfo: CanReusePtrInfo ? FirstInChain->getPointerInfo()
23092	: MachinePointerInfo (FirstStoreAS),
23093	SVT: JointMemOpVT, Alignment: FirstInChain->getAlign(),
23094	MMOFlags: FirstInChain->getMemOperand()->getFlags());
23095	}
23096
23097	// Transfer chain users from old loads to the new load.
23098	for (unsigned i = `0`; i < NumElem; ++i) {
23099	LoadSDNode *Ld = cast<LoadSDNode>(Val: LoadNodes [i].MemNode);
23100	DAG.ReplaceAllUsesOfValueWith(From: SDValue (Ld, `1`),
23101	To: SDValue (NewLoad.getNode(), `1`));
23102	}
23103
23104	// Replace all stores with the new store. Recursively remove corresponding
23105	// values if they are no longer used.
23106	for (unsigned i = `0`; i < NumElem; ++i) {
23107	SDValue Val = StoreNodes [i].MemNode->getOperand(Num: `1`);
23108	CombineTo(N: StoreNodes [i].MemNode, Res: NewStore);
23109	if (Val ->use_empty())
23110	recursivelyDeleteUnusedNodes(N: Val.getNode());
23111	}
23112
23113	MadeChange = true;
23114	StoreNodes.erase(CS: StoreNodes.begin(), CE: StoreNodes.begin() + NumElem);
23115	LoadNodes.erase(CS: LoadNodes.begin(), CE: LoadNodes.begin() + NumElem);
23116	NumConsecutiveStores -= NumElem;
23117	}
23118	return MadeChange;
23119	}
23120
23121	bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
23122	if (OptLevel == CodeGenOptLevel::None \|\| !EnableStoreMerging)
23123	return false;
23124
23125	// TODO: Extend this function to merge stores of scalable vectors.
23126	// (i.e. two <vscale x 8 x i8> stores can be merged to one <vscale x 16 x i8>
23127	// store since we know <vscale x 16 x i8> is exactly twice as large as
23128	// <vscale x 8 x i8>). Until then, bail out for scalable vectors.
23129	EVT MemVT = St->getMemoryVT();
23130	if (MemVT.isScalableVT())
23131	return false;
23132	if (!MemVT.isSimple() \|\| MemVT.getSizeInBits() * `2` > MaximumLegalStoreInBits)
23133	return false;
23134
23135	// This function cannot currently deal with non-byte-sized memory sizes.
23136	int64_t ElementSizeBytes = MemVT.getStoreSize();
23137	if (ElementSizeBytes * `8` != (int64_t)MemVT.getSizeInBits())
23138	return false;
23139
23140	// Do not bother looking at stored values that are not constants, loads, or
23141	// extracted vector elements.
23142	SDValue StoredVal = peekThroughBitcasts(V: St->getValue());
23143	const StoreSource StoreSrc = getStoreSource(StoreVal: StoredVal);
23144	if (StoreSrc == StoreSource::Unknown)
23145	return false;
23146
23147	SmallVector<MemOpLink, `8`> StoreNodes;
23148	// Find potential store merge candidates by searching through chain sub-DAG
23149	SDNode *RootNode = getStoreMergeCandidates(St, StoreNodes);
23150
23151	// Check if there is anything to merge.
23152	if (StoreNodes.size() < `2`)
23153	return false;
23154
23155	// Sort the memory operands according to their distance from the
23156	// base pointer.
23157	llvm::sort(C&: StoreNodes, Comp: [](MemOpLink LHS, MemOpLink RHS) {
23158	return LHS.OffsetFromBase < RHS.OffsetFromBase;
23159	});
23160
23161	bool AllowVectors = !DAG.getMachineFunction().getFunction().hasFnAttribute(
23162	Kind: Attribute::NoImplicitFloat);
23163	bool IsNonTemporalStore = St->isNonTemporal();
23164	bool IsNonTemporalLoad = StoreSrc == StoreSource::Load &&
23165	cast<LoadSDNode>(Val&: StoredVal)->isNonTemporal();
23166
23167	// Store Merge attempts to merge the lowest stores. This generally
23168	// works out as if successful, as the remaining stores are checked
23169	// after the first collection of stores is merged. However, in the
23170	// case that a non-mergeable store is found first, e.g., {p[-2],
23171	// p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
23172	// mergeable cases. To prevent this, we prune such stores from the
23173	// front of StoreNodes here.
23174	bool MadeChange = false;
23175	while (StoreNodes.size() > `1`) {
23176	unsigned NumConsecutiveStores =
23177	getConsecutiveStores(StoreNodes, ElementSizeBytes);
23178	// There are no more stores in the list to examine.
23179	if (NumConsecutiveStores == `0`)
23180	return MadeChange;
23181
23182	// We have at least 2 consecutive stores. Try to merge them.
23183	assert(NumConsecutiveStores >= `2` && "Expected at least 2 stores");
23184	switch (StoreSrc) {
23185	case StoreSource::Constant:
23186	MadeChange \|= tryStoreMergeOfConstants(StoreNodes, NumConsecutiveStores,
23187	MemVT, RootNode, AllowVectors);
23188	break;
23189
23190	case StoreSource::Extract:
23191	MadeChange \|= tryStoreMergeOfExtracts(StoreNodes, NumConsecutiveStores,
23192	MemVT, RootNode);
23193	break;
23194
23195	case StoreSource::Load:
23196	MadeChange \|= tryStoreMergeOfLoads(StoreNodes, NumConsecutiveStores,
23197	MemVT, RootNode, AllowVectors,
23198	IsNonTemporalStore, IsNonTemporalLoad);
23199	break;
23200
23201	default:
23202	llvm_unreachable("Unhandled store source type");
23203	}
23204	}
23205
23206	// Remember if we failed to optimize, to save compile time.
23207	if (!MadeChange)
23208	ChainsWithoutMergeableStores.insert(Ptr: RootNode);
23209
23210	return MadeChange;
23211	}
23212
23213	SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
23214	SDLoc SL(ST);
23215	SDValue ReplStore;
23216
23217	// Replace the chain to avoid dependency.
23218	if (ST->isTruncatingStore()) {
23219	ReplStore = DAG.getTruncStore(Chain: BetterChain, dl: SL, Val: ST->getValue(),
23220	Ptr: ST->getBasePtr(), SVT: ST->getMemoryVT(),
23221	MMO: ST->getMemOperand());
23222	} else {
23223	ReplStore = DAG.getStore(Chain: BetterChain, dl: SL, Val: ST->getValue(), Ptr: ST->getBasePtr(),
23224	MMO: ST->getMemOperand());
23225	}
23226
23227	// Create token to keep both nodes around.
23228	SDValue Token = DAG.getNode(Opcode: ISD::TokenFactor, DL: SL,
23229	VT: MVT::Other, N1: ST->getChain(), N2: ReplStore);
23230
23231	// Make sure the new and old chains are cleaned up.
23232	AddToWorklist(N: Token.getNode());
23233
23234	// Don't add users to work list.
23235	return CombineTo(N: ST, Res: Token, AddTo: false);
23236	}
23237
23238	SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
23239	SDValue Value = ST->getValue();
23240	if (Value.getOpcode() == ISD::TargetConstantFP)
23241	return SDValue ();
23242
23243	if (!ISD::isNormalStore(N: ST))
23244	return SDValue ();
23245
23246	SDLoc DL(ST);
23247
23248	SDValue Chain = ST->getChain();
23249	SDValue Ptr = ST->getBasePtr();
23250
23251	const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Val&: Value);
23252
23253	// NOTE: If the original store is volatile, this transform must not increase
23254	// the number of stores. For example, on x86-32 an f64 can be stored in one
23255	// processor operation but an i64 (which is not legal) requires two. So the
23256	// transform should not be done in this case.
23257
23258	SDValue Tmp;
23259	switch (CFP->getSimpleValueType(ResNo: `0`).SimpleTy) {
23260	default:
23261	llvm_unreachable("Unknown FP type");
23262	case MVT::f16: // We don't do this for these yet.
23263	case MVT::bf16:
23264	case MVT::f80:
23265	case MVT::f128:
23266	case MVT::ppcf128:
23267	return SDValue ();
23268	case MVT::f32:
23269	if ((isTypeLegal(VT: MVT::i32) && !LegalOperations && ST->isSimple()) \|\|
23270	TLI.isOperationLegalOrCustom(Op: ISD::STORE, VT: MVT::i32)) {
23271	Tmp = DAG.getConstant(Val: (uint32_t)CFP->getValueAPF().
23272	bitcastToAPInt().getZExtValue(), DL: SDLoc (CFP),
23273	VT: MVT::i32);
23274	return DAG.getStore(Chain, dl: DL, Val: Tmp, Ptr, MMO: ST->getMemOperand());
23275	}
23276
23277	return SDValue ();
23278	case MVT::f64:
23279	if ((TLI.isTypeLegal(VT: MVT::i64) && !LegalOperations &&
23280	ST->isSimple()) \|\|
23281	TLI.isOperationLegalOrCustom(Op: ISD::STORE, VT: MVT::i64)) {
23282	Tmp = DAG.getConstant(Val: CFP->getValueAPF().bitcastToAPInt().
23283	getZExtValue(), DL: SDLoc (CFP), VT: MVT::i64);
23284	return DAG.getStore(Chain, dl: DL, Val: Tmp,
23285	Ptr, MMO: ST->getMemOperand());
23286	}
23287
23288	if (ST->isSimple() && TLI.isOperationLegalOrCustom(Op: ISD::STORE, VT: MVT::i32) &&
23289	!TLI.isFPImmLegal(CFP->getValueAPF(), MVT::f64)) {
23290	// Many FP stores are not made apparent until after legalize, e.g. for
23291	// argument passing. Since this is so common, custom legalize the
23292	// 64-bit integer store into two 32-bit stores.
23293	uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
23294	SDValue Lo = DAG.getConstant(Val: Val & `0xFFFFFFFF`, DL: SDLoc (CFP), VT: MVT::i32);
23295	SDValue Hi = DAG.getConstant(Val: Val >> `32`, DL: SDLoc (CFP), VT: MVT::i32);
23296	if (DAG.getDataLayout().isBigEndian())
23297	std::swap(a&: Lo, b&: Hi);
23298
23299	MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
23300	AAMDNodes AAInfo = ST->getAAInfo();
23301
23302	SDValue St0 = DAG.getStore(Chain, dl: DL, Val: Lo, Ptr, PtrInfo: ST->getPointerInfo(),
23303	Alignment: ST->getBaseAlign(), MMOFlags, AAInfo);
23304	Ptr = DAG.getMemBasePlusOffset(Base: Ptr, Offset: TypeSize::getFixed(ExactSize: `4`), DL);
23305	SDValue St1 = DAG.getStore(Chain, dl: DL, Val: Hi, Ptr,
23306	PtrInfo: ST->getPointerInfo().getWithOffset(O: `4`),
23307	Alignment: ST->getBaseAlign(), MMOFlags, AAInfo);
23308	return DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other,
23309	N1: St0, N2: St1);
23310	}
23311
23312	return SDValue ();
23313	}
23314	}
23315
23316	// (store (insert_vector_elt (load p), x, i), p) -> (store x, p+offset)
23317	//
23318	// If a store of a load with an element inserted into it has no other
23319	// uses in between the chain, then we can consider the vector store
23320	// dead and replace it with just the single scalar element store.
23321	SDValue DAGCombiner::replaceStoreOfInsertLoad(StoreSDNode *ST) {
23322	SDLoc DL(ST);
23323	SDValue Value = ST->getValue();
23324	SDValue Ptr = ST->getBasePtr();
23325	SDValue Chain = ST->getChain();
23326	if (Value.getOpcode() != ISD::INSERT_VECTOR_ELT \|\| !Value.hasOneUse())
23327	return SDValue ();
23328
23329	SDValue Elt = Value.getOperand(i: `1`);
23330	SDValue Idx = Value.getOperand(i: `2`);
23331
23332	// If the element isn't byte sized or is implicitly truncated then we can't
23333	// compute an offset.
23334	EVT EltVT = Elt.getValueType();
23335	if (!EltVT.isByteSized() \|\|
23336	EltVT != Value.getOperand(i: `0`).getValueType().getVectorElementType())
23337	return SDValue ();
23338
23339	auto *Ld = dyn_cast<LoadSDNode>(Val: Value.getOperand(i: `0`));
23340	if (!Ld \|\| Ld->getBasePtr() != Ptr \|\|
23341	ST->getMemoryVT() != Ld->getMemoryVT() \|\| !ST->isSimple() \|\|
23342	!ISD::isNormalStore(N: ST) \|\|
23343	Ld->getAddressSpace() != ST->getAddressSpace() \|\|
23344	!Chain.reachesChainWithoutSideEffects(Dest: SDValue (Ld, `1`)))
23345	return SDValue ();
23346
23347	unsigned IsFast;
23348	if (!TLI.allowsMemoryAccess(Context&: *DAG.getContext(), DL: DAG.getDataLayout(),
23349	VT: Elt.getValueType(), AddrSpace: ST->getAddressSpace(),
23350	Alignment: ST->getAlign(), Flags: ST->getMemOperand()->getFlags(),
23351	Fast: &IsFast) \|\|
23352	!IsFast)
23353	return SDValue ();
23354
23355	MachinePointerInfo PointerInfo(ST->getAddressSpace());
23356
23357	// If the offset is a known constant then try to recover the pointer
23358	// info
23359	SDValue NewPtr;
23360	if (auto *CIdx = dyn_cast<ConstantSDNode>(Val&: Idx)) {
23361	unsigned COffset = CIdx->getSExtValue() * EltVT.getSizeInBits() / `8`;
23362	NewPtr = DAG.getMemBasePlusOffset(Base: Ptr, Offset: TypeSize::getFixed(ExactSize: COffset), DL);
23363	PointerInfo = ST->getPointerInfo().getWithOffset(O: COffset);
23364	} else {
23365	// The original DAG loaded the entire vector from memory, so arithmetic
23366	// within it must be inbounds.
23367	NewPtr = TLI.getInboundsVectorElementPointer(DAG, VecPtr: Ptr, VecVT: Value.getValueType(),
23368	Index: Idx);
23369	}
23370
23371	return DAG.getStore(Chain, dl: DL, Val: Elt, Ptr: NewPtr, PtrInfo: PointerInfo, Alignment: ST->getAlign(),
23372	MMOFlags: ST->getMemOperand()->getFlags());
23373	}
23374
23375	SDValue DAGCombiner::visitATOMIC_STORE(SDNode *N) {
23376	AtomicSDNode *ST = cast<AtomicSDNode>(Val: N);
23377	SDValue Val = ST->getVal();
23378	EVT VT = Val.getValueType();
23379	EVT MemVT = ST->getMemoryVT();
23380
23381	if (MemVT.bitsLT(VT)) { // Is truncating store
23382	APInt TruncDemandedBits = APInt::getLowBitsSet(numBits: VT.getScalarSizeInBits(),
23383	loBitsSet: MemVT.getScalarSizeInBits());
23384	// See if we can simplify the operation with SimplifyDemandedBits, which
23385	// only works if the value has a single use.
23386	if (SimplifyDemandedBits(Op: Val, DemandedBits: TruncDemandedBits))
23387	return SDValue (N, `0`);
23388	}
23389
23390	return SDValue ();
23391	}
23392
23393	static SDValue foldToMaskedStore(StoreSDNode *Store, SelectionDAG &DAG,
23394	const SDLoc &Dl) {
23395	if (!Store->isSimple() \|\| !ISD::isNormalStore(N: Store))
23396	return SDValue ();
23397
23398	SDValue StoredVal = Store->getValue();
23399	SDValue StorePtr = Store->getBasePtr();
23400	SDValue StoreOffset = Store->getOffset();
23401	EVT VT = Store->getMemoryVT();
23402
23403	// Skip this combine for non-vector types and for <1 x ty> vectors, as they
23404	// will be scalarized later.
23405	if (!VT.isVector() \|\| VT.isScalableVector() \|\| VT.getVectorNumElements() == `1`)
23406	return SDValue ();
23407
23408	unsigned AddrSpace = Store->getAddressSpace();
23409	Align Alignment = Store->getAlign();
23410	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
23411
23412	if (!TLI.isOperationLegalOrCustom(Op: ISD::MSTORE, VT) \|\|
23413	!TLI.allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment))
23414	return SDValue ();
23415
23416	SDValue Mask, OtherVec, LoadCh;
23417	unsigned LoadPos;
23418	if (sd_match(N: StoredVal,
23419	P: m_VSelect(Cond: m_Value(N&: Mask), T: m_Value(N&: OtherVec),
23420	F: m_Load(Ch: m_Value(N&: LoadCh), Ptr: m_Specific(N: StorePtr),
23421	Offset: m_Specific(N: StoreOffset))))) {
23422	LoadPos = `2`;
23423	} else if (sd_match(N: StoredVal,
23424	P: m_VSelect(Cond: m_Value(N&: Mask),
23425	T: m_Load(Ch: m_Value(N&: LoadCh), Ptr: m_Specific(N: StorePtr),
23426	Offset: m_Specific(N: StoreOffset)),
23427	F: m_Value(N&: OtherVec)))) {
23428	LoadPos = `1`;
23429	} else {
23430	return SDValue ();
23431	}
23432
23433	auto *Load = cast<LoadSDNode>(Val: StoredVal.getOperand(i: LoadPos));
23434	if (!Load->isSimple() \|\| !ISD::isNormalLoad(N: Load) \|\|
23435	Load->getAddressSpace() != AddrSpace)
23436	return SDValue ();
23437
23438	if (!Store->getChain().reachesChainWithoutSideEffects(Dest: LoadCh))
23439	return SDValue ();
23440
23441	if (LoadPos == `1`)
23442	Mask = DAG.getNOT(DL: Dl, Val: Mask, VT: Mask.getValueType());
23443
23444	return DAG.getMaskedStore(Chain: Store->getChain(), dl: Dl, Val: OtherVec, Base: StorePtr,
23445	Offset: StoreOffset, Mask, MemVT: VT, MMO: Store->getMemOperand(),
23446	AM: Store->getAddressingMode());
23447	}
23448
23449	SDValue DAGCombiner::visitSTORE(SDNode *N) {
23450	StoreSDNode *ST = cast<StoreSDNode>(Val: N);
23451	SDValue Chain = ST->getChain();
23452	SDValue Value = ST->getValue();
23453	SDValue Ptr = ST->getBasePtr();
23454
23455	// If this is a store of a bit convert, store the input value if the
23456	// resultant store does not need a higher alignment than the original.
23457	if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
23458	ST->isUnindexed()) {
23459	EVT SVT = Value.getOperand(i: `0`).getValueType();
23460	// If the store is volatile, we only want to change the store type if the
23461	// resulting store is legal. Otherwise we might increase the number of
23462	// memory accesses. We don't care if the original type was legal or not
23463	// as we assume software couldn't rely on the number of accesses of an
23464	// illegal type.
23465	// TODO: May be able to relax for unordered atomics (see D66309)
23466	if (((!LegalOperations && ST->isSimple()) \|\|
23467	TLI.isOperationLegal(Op: ISD::STORE, VT: SVT)) &&
23468	TLI.isStoreBitCastBeneficial(StoreVT: Value.getValueType(), BitcastVT: SVT,
23469	DAG, MMO: *ST->getMemOperand())) {
23470	return DAG.getStore(Chain, dl: SDLoc (N), Val: Value.getOperand(i: `0`), Ptr,
23471	MMO: ST->getMemOperand());
23472	}
23473	}
23474
23475	// Turn 'store undef, Ptr' -> nothing.
23476	if (Value.isUndef() && ST->isUnindexed() && !ST->isVolatile())
23477	return Chain;
23478
23479	// Try to infer better alignment information than the store already has.
23480	if (OptLevel != CodeGenOptLevel::None && ST->isUnindexed() &&
23481	!ST->isAtomic()) {
23482	if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
23483	if (*Alignment > ST->getAlign() &&
23484	isAligned(Lhs: *Alignment, SizeInBytes: ST->getSrcValueOffset())) {
23485	SDValue NewStore =
23486	DAG.getTruncStore(Chain, dl: SDLoc (N), Val: Value, Ptr, PtrInfo: ST->getPointerInfo(),
23487	SVT: ST->getMemoryVT(), Alignment: *Alignment,
23488	MMOFlags: ST->getMemOperand()->getFlags(), AAInfo: ST->getAAInfo());
23489	// NewStore will always be N as we are only refining the alignment
23490	assert(NewStore.getNode() == N);
23491	(void)NewStore;
23492	}
23493	}
23494	}
23495
23496	// Try transforming a pair floating point load / store ops to integer
23497	// load / store ops.
23498	if (SDValue NewST = TransformFPLoadStorePair(N))
23499	return NewST;
23500
23501	// Try transforming several stores into STORE (BSWAP).
23502	if (SDValue Store = mergeTruncStores(N: ST))
23503	return Store;
23504
23505	if (ST->isUnindexed()) {
23506	// Walk up chain skipping non-aliasing memory nodes, on this store and any
23507	// adjacent stores.
23508	if (findBetterNeighborChains(St: ST)) {
23509	// replaceStoreChain uses CombineTo, which handled all of the worklist
23510	// manipulation. Return the original node to not do anything else.
23511	return SDValue (ST, `0`);
23512	}
23513	Chain = ST->getChain();
23514	}
23515
23516	// FIXME: is there such a thing as a truncating indexed store?
23517	if (ST->isTruncatingStore() && ST->isUnindexed() &&
23518	Value.getValueType().isInteger() &&
23519	(!isa<ConstantSDNode>(Val: Value) \|\|
23520	!cast<ConstantSDNode>(Val&: Value)->isOpaque())) {
23521	// Convert a truncating store of a extension into a standard store.
23522	if ((Value.getOpcode() == ISD::ZERO_EXTEND \|\|
23523	Value.getOpcode() == ISD::SIGN_EXTEND \|\|
23524	Value.getOpcode() == ISD::ANY_EXTEND) &&
23525	Value.getOperand(i: `0`).getValueType() == ST->getMemoryVT() &&
23526	TLI.isOperationLegalOrCustom(Op: ISD::STORE, VT: ST->getMemoryVT()))
23527	return DAG.getStore(Chain, dl: SDLoc (N), Val: Value.getOperand(i: `0`), Ptr,
23528	MMO: ST->getMemOperand());
23529
23530	APInt TruncDemandedBits =
23531	APInt::getLowBitsSet(numBits: Value.getScalarValueSizeInBits(),
23532	loBitsSet: ST->getMemoryVT().getScalarSizeInBits());
23533
23534	// See if we can simplify the operation with SimplifyDemandedBits, which
23535	// only works if the value has a single use.
23536	AddToWorklist(N: Value.getNode());
23537	if (SimplifyDemandedBits(Op: Value, DemandedBits: TruncDemandedBits)) {
23538	// Re-visit the store if anything changed and the store hasn't been merged
23539	// with another node (N is deleted) SimplifyDemandedBits will add Value's
23540	// node back to the worklist if necessary, but we also need to re-visit
23541	// the Store node itself.
23542	if (N->getOpcode() != ISD::DELETED_NODE)
23543	AddToWorklist(N);
23544	return SDValue (N, `0`);
23545	}
23546
23547	// Otherwise, see if we can simplify the input to this truncstore with
23548	// knowledge that only the low bits are being used. For example:
23549	// "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
23550	if (SDValue Shorter =
23551	TLI.SimplifyMultipleUseDemandedBits(Op: Value, DemandedBits: TruncDemandedBits, DAG))
23552	return DAG.getTruncStore(Chain, dl: SDLoc (N), Val: Shorter, Ptr, SVT: ST->getMemoryVT(),
23553	MMO: ST->getMemOperand());
23554
23555	// If we're storing a truncated constant, see if we can simplify it.
23556	// TODO: Move this to targetShrinkDemandedConstant?
23557	if (auto *Cst = dyn_cast<ConstantSDNode>(Val&: Value))
23558	if (!Cst->isOpaque()) {
23559	const APInt &CValue = Cst->getAPIntValue();
23560	APInt NewVal = CValue & TruncDemandedBits;
23561	if (NewVal != CValue) {
23562	SDValue Shorter =
23563	DAG.getConstant(Val: NewVal, DL: SDLoc (N), VT: Value.getValueType());
23564	return DAG.getTruncStore(Chain, dl: SDLoc (N), Val: Shorter, Ptr,
23565	SVT: ST->getMemoryVT(), MMO: ST->getMemOperand());
23566	}
23567	}
23568	}
23569
23570	// If this is a load followed by a store to the same location, then the store
23571	// is dead/noop. Peek through any truncates if canCombineTruncStore failed.
23572	// TODO: Add big-endian truncate support with test coverage.
23573	// TODO: Can relax for unordered atomics (see D66309)
23574	SDValue TruncVal = DAG.getDataLayout().isLittleEndian()
23575	? peekThroughTruncates(V: Value)
23576	: Value;
23577	if (auto *Ld = dyn_cast<LoadSDNode>(Val&: TruncVal)) {
23578	if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
23579	ST->isUnindexed() && ST->isSimple() &&
23580	Ld->getAddressSpace() == ST->getAddressSpace() &&
23581	// There can't be any side effects between the load and store, such as
23582	// a call or store.
23583	Chain.reachesChainWithoutSideEffects(Dest: SDValue (Ld, `1`))) {
23584	// The store is dead, remove it.
23585	return Chain;
23586	}
23587	}
23588
23589	// Try scalarizing vector stores of loads where we only change one element
23590	if (SDValue NewST = replaceStoreOfInsertLoad(ST))
23591	return NewST;
23592
23593	// TODO: Can relax for unordered atomics (see D66309)
23594	if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Val&: Chain)) {
23595	if (ST->isUnindexed() && ST->isSimple() &&
23596	ST1->isUnindexed() && ST1->isSimple()) {
23597	if (OptLevel != CodeGenOptLevel::None && ST1->getBasePtr() == Ptr &&
23598	ST1->getValue() == Value && ST->getMemoryVT() == ST1->getMemoryVT() &&
23599	ST->getAddressSpace() == ST1->getAddressSpace()) {
23600	// If this is a store followed by a store with the same value to the
23601	// same location, then the store is dead/noop.
23602	return Chain;
23603	}
23604
23605	if (OptLevel != CodeGenOptLevel::None && ST1->hasOneUse() &&
23606	!ST1->getBasePtr().isUndef() &&
23607	ST->getAddressSpace() == ST1->getAddressSpace()) {
23608	// If we consider two stores and one smaller in size is a scalable
23609	// vector type and another one a bigger size store with a fixed type,
23610	// then we could not allow the scalable store removal because we don't
23611	// know its final size in the end.
23612	if (ST->getMemoryVT().isScalableVector() \|\|
23613	ST1->getMemoryVT().isScalableVector()) {
23614	if (ST1->getBasePtr() == Ptr &&
23615	TypeSize::isKnownLE(LHS: ST1->getMemoryVT().getStoreSize(),
23616	RHS: ST->getMemoryVT().getStoreSize())) {
23617	CombineTo(N: ST1, Res: ST1->getChain());
23618	return SDValue (N, `0`);
23619	}
23620	} else {
23621	const BaseIndexOffset STBase = BaseIndexOffset::match(N: ST, DAG);
23622	const BaseIndexOffset ChainBase = BaseIndexOffset::match(N: ST1, DAG);
23623	// If this is a store who's preceding store to a subset of the current
23624	// location and no one other node is chained to that store we can
23625	// effectively drop the store. Do not remove stores to undef as they
23626	// may be used as data sinks.
23627	if (STBase.contains(DAG, BitSize: ST->getMemoryVT().getFixedSizeInBits(),
23628	Other: ChainBase,
23629	OtherBitSize: ST1->getMemoryVT().getFixedSizeInBits())) {
23630	CombineTo(N: ST1, Res: ST1->getChain());
23631	return SDValue (N, `0`);
23632	}
23633	}
23634	}
23635	}
23636	}
23637
23638	// If this is an FP_ROUND or TRUNC followed by a store, fold this into a
23639	// truncating store. We can do this even if this is already a truncstore.
23640	if ((Value.getOpcode() == ISD::FP_ROUND \|\|
23641	Value.getOpcode() == ISD::TRUNCATE) &&
23642	Value ->hasOneUse() && ST->isUnindexed() &&
23643	TLI.canCombineTruncStore(ValVT: Value.getOperand(i: `0`).getValueType(),
23644	MemVT: ST->getMemoryVT(), LegalOnly: LegalOperations)) {
23645	return DAG.getTruncStore(Chain, dl: SDLoc (N), Val: Value.getOperand(i: `0`),
23646	Ptr, SVT: ST->getMemoryVT(), MMO: ST->getMemOperand());
23647	}
23648
23649	// Always perform this optimization before types are legal. If the target
23650	// prefers, also try this after legalization to catch stores that were created
23651	// by intrinsics or other nodes.
23652	if (!LegalTypes \|\| (TLI.mergeStoresAfterLegalization(MemVT: ST->getMemoryVT()))) {
23653	while (true) {
23654	// There can be multiple store sequences on the same chain.
23655	// Keep trying to merge store sequences until we are unable to do so
23656	// or until we merge the last store on the chain.
23657	bool Changed = mergeConsecutiveStores(St: ST);
23658	if (!Changed) break;
23659	// Return N as merge only uses CombineTo and no worklist clean
23660	// up is necessary.
23661	if (N->getOpcode() == ISD::DELETED_NODE \|\| !isa<StoreSDNode>(Val: N))
23662	return SDValue (N, `0`);
23663	}
23664	}
23665
23666	// Try transforming N to an indexed store.
23667	if (CombineToPreIndexedLoadStore(N) \|\| CombineToPostIndexedLoadStore(N))
23668	return SDValue (N, `0`);
23669
23670	// Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
23671	//
23672	// Make sure to do this only after attempting to merge stores in order to
23673	// avoid changing the types of some subset of stores due to visit order,
23674	// preventing their merging.
23675	if (isa<ConstantFPSDNode>(Val: ST->getValue())) {
23676	if (SDValue NewSt = replaceStoreOfFPConstant(ST))
23677	return NewSt;
23678	}
23679
23680	if (SDValue NewSt = splitMergedValStore(ST))
23681	return NewSt;
23682
23683	if (SDValue MaskedStore = foldToMaskedStore(Store: ST, DAG, Dl: SDLoc (N)))
23684	return MaskedStore;
23685
23686	return ReduceLoadOpStoreWidth(N);
23687	}
23688
23689	SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
23690	const auto *LifetimeEnd = cast<LifetimeSDNode>(Val: N);
23691	const BaseIndexOffset LifetimeEndBase(N->getOperand(Num: `1`), SDValue (), `0`, false);
23692
23693	// We walk up the chains to find stores.
23694	SmallVector<SDValue, `8`> Chains = {N->getOperand(Num: `0`)};
23695	while (!Chains.empty()) {
23696	SDValue Chain = Chains.pop_back_val();
23697	if (!Chain.hasOneUse())
23698	continue;
23699	switch (Chain.getOpcode()) {
23700	case ISD::TokenFactor:
23701	for (unsigned Nops = Chain.getNumOperands(); Nops;)
23702	Chains.push_back(Elt: Chain.getOperand(i: --Nops));
23703	break;
23704	case ISD::LIFETIME_START:
23705	case ISD::LIFETIME_END:
23706	// We can forward past any lifetime start/end that can be proven not to
23707	// alias the node.
23708	if (!mayAlias(Op0: Chain.getNode(), Op1: N))
23709	Chains.push_back(Elt: Chain.getOperand(i: `0`));
23710	break;
23711	case ISD::STORE: {
23712	StoreSDNode *ST = dyn_cast<StoreSDNode>(Val&: Chain);
23713	// TODO: Can relax for unordered atomics (see D66309)
23714	if (!ST->isSimple() \|\| ST->isIndexed())
23715	continue;
23716	const TypeSize StoreSize = ST->getMemoryVT().getStoreSize();
23717	// The bounds of a scalable store are not known until runtime, so this
23718	// store cannot be elided.
23719	if (StoreSize.isScalable())
23720	continue;
23721	const BaseIndexOffset StoreBase = BaseIndexOffset::match(N: ST, DAG);
23722	// If we store purely within object bounds just before its lifetime ends,
23723	// we can remove the store.
23724	MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
23725	if (LifetimeEndBase.contains(
23726	DAG, BitSize: MFI.getObjectSize(ObjectIdx: LifetimeEnd->getFrameIndex()) * `8`,
23727	Other: StoreBase, OtherBitSize: StoreSize.getFixedValue() * `8`)) {
23728	LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
23729	dbgs() << "\nwithin LIFETIME_END of : ";
23730	LifetimeEndBase.dump(); dbgs() << "\n");
23731	CombineTo(N: ST, Res: ST->getChain());
23732	return SDValue (N, `0`);
23733	}
23734	}
23735	}
23736	}
23737	return SDValue ();
23738	}
23739
23740	/// For the instruction sequence of store below, F and I values
23741	/// are bundled together as an i64 value before being stored into memory.
23742	/// Sometimes it is more efficent to generate separate stores for F and I,
23743	/// which can remove the bitwise instructions or sink them to colder places.
23744	///
23745	/// (store (or (zext (bitcast F to i32) to i64),
23746	/// (shl (zext I to i64), 32)), addr) -->
23747	/// (store F, addr) and (store I, addr+4)
23748	///
23749	/// Similarly, splitting for other merged store can also be beneficial, like:
23750	/// For pair of {i32, i32}, i64 store --> two i32 stores.
23751	/// For pair of {i32, i16}, i64 store --> two i32 stores.
23752	/// For pair of {i16, i16}, i32 store --> two i16 stores.
23753	/// For pair of {i16, i8}, i32 store --> two i16 stores.
23754	/// For pair of {i8, i8}, i16 store --> two i8 stores.
23755	///
23756	/// We allow each target to determine specifically which kind of splitting is
23757	/// supported.
23758	///
23759	/// The store patterns are commonly seen from the simple code snippet below
23760	/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
23761	/// void goo(const std::pair<int, float> &);
23762	/// hoo() {
23763	/// ...
23764	/// goo(std::make_pair(tmp, ftmp));
23765	/// ...
23766	/// }
23767	///
23768	SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
23769	if (OptLevel == CodeGenOptLevel::None)
23770	return SDValue ();
23771
23772	// Can't change the number of memory accesses for a volatile store or break
23773	// atomicity for an atomic one.
23774	if (!ST->isSimple())
23775	return SDValue ();
23776
23777	SDValue Val = ST->getValue();
23778	SDLoc DL(ST);
23779
23780	// Match OR operand.
23781	if (!Val.getValueType().isScalarInteger() \|\| Val.getOpcode() != ISD::OR)
23782	return SDValue ();
23783
23784	// Match SHL operand and get Lower and Higher parts of Val.
23785	SDValue Op1 = Val.getOperand(i: `0`);
23786	SDValue Op2 = Val.getOperand(i: `1`);
23787	SDValue Lo, Hi;
23788	if (Op1.getOpcode() != ISD::SHL) {
23789	std::swap(a&: Op1, b&: Op2);
23790	if (Op1.getOpcode() != ISD::SHL)
23791	return SDValue ();
23792	}
23793	Lo = Op2;
23794	Hi = Op1.getOperand(i: `0`);
23795	if (!Op1.hasOneUse())
23796	return SDValue ();
23797
23798	// Match shift amount to HalfValBitSize.
23799	unsigned HalfValBitSize = Val.getValueSizeInBits() / `2`;
23800	ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Val: Op1.getOperand(i: `1`));
23801	if (!ShAmt \|\| ShAmt->getAPIntValue() != HalfValBitSize)
23802	return SDValue ();
23803
23804	// Lo and Hi are zero-extended from int with size less equal than 32
23805	// to i64.
23806	if (Lo.getOpcode() != ISD::ZERO_EXTEND \|\| !Lo.hasOneUse() \|\|
23807	!Lo.getOperand(i: `0`).getValueType().isScalarInteger() \|\|
23808	Lo.getOperand(i: `0`).getValueSizeInBits() > HalfValBitSize \|\|
23809	Hi.getOpcode() != ISD::ZERO_EXTEND \|\| !Hi.hasOneUse() \|\|
23810	!Hi.getOperand(i: `0`).getValueType().isScalarInteger() \|\|
23811	Hi.getOperand(i: `0`).getValueSizeInBits() > HalfValBitSize)
23812	return SDValue ();
23813
23814	// Use the EVT of low and high parts before bitcast as the input
23815	// of target query.
23816	EVT LowTy = (Lo.getOperand(i: `0`).getOpcode() == ISD::BITCAST)
23817	? Lo.getOperand(i: `0`).getValueType()
23818	: Lo.getValueType();
23819	EVT HighTy = (Hi.getOperand(i: `0`).getOpcode() == ISD::BITCAST)
23820	? Hi.getOperand(i: `0`).getValueType()
23821	: Hi.getValueType();
23822	if (!TLI.isMultiStoresCheaperThanBitsMerge(LTy: LowTy, HTy: HighTy))
23823	return SDValue ();
23824
23825	// Start to split store.
23826	MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
23827	AAMDNodes AAInfo = ST->getAAInfo();
23828
23829	// Change the sizes of Lo and Hi's value types to HalfValBitSize.
23830	EVT VT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: HalfValBitSize);
23831	Lo = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT, Operand: Lo.getOperand(i: `0`));
23832	Hi = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT, Operand: Hi.getOperand(i: `0`));
23833
23834	SDValue Chain = ST->getChain();
23835	SDValue Ptr = ST->getBasePtr();
23836	// Lower value store.
23837	SDValue St0 = DAG.getStore(Chain, dl: DL, Val: Lo, Ptr, PtrInfo: ST->getPointerInfo(),
23838	Alignment: ST->getBaseAlign(), MMOFlags, AAInfo);
23839	Ptr =
23840	DAG.getMemBasePlusOffset(Base: Ptr, Offset: TypeSize::getFixed(ExactSize: HalfValBitSize / `8`), DL);
23841	// Higher value store.
23842	SDValue St1 = DAG.getStore(
23843	Chain: St0, dl: DL, Val: Hi, Ptr, PtrInfo: ST->getPointerInfo().getWithOffset(O: HalfValBitSize / `8`),
23844	Alignment: ST->getBaseAlign(), MMOFlags, AAInfo);
23845	return St1;
23846	}
23847
23848	// Merge an insertion into an existing shuffle:
23849	// (insert_vector_elt (vector_shuffle X, Y, Mask),
23850	// .(extract_vector_elt X, N), InsIndex)
23851	// --> (vector_shuffle X, Y, NewMask)
23852	// and variations where shuffle operands may be CONCAT_VECTORS.
23853	static bool mergeEltWithShuffle(SDValue &X, SDValue &Y, ArrayRef<int> Mask,
23854	SmallVectorImpl<int> &NewMask, SDValue Elt,
23855	unsigned InsIndex) {
23856	if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT \|\|
23857	!isa<ConstantSDNode>(Val: Elt.getOperand(i: `1`)))
23858	return false;
23859
23860	// Vec's operand 0 is using indices from 0 to N-1 and
23861	// operand 1 from N to 2N - 1, where N is the number of
23862	// elements in the vectors.
23863	SDValue InsertVal0 = Elt.getOperand(i: `0`);
23864	int ElementOffset = -`1`;
23865
23866	// We explore the inputs of the shuffle in order to see if we find the
23867	// source of the extract_vector_elt. If so, we can use it to modify the
23868	// shuffle rather than perform an insert_vector_elt.
23869	SmallVector<std::pair<int, SDValue>, `8`> ArgWorkList;
23870	ArgWorkList.emplace_back(Args: Mask.size(), Args&: Y);
23871	ArgWorkList.emplace_back(Args: `0`, Args&: X);
23872
23873	while (!ArgWorkList.empty()) {
23874	int ArgOffset;
23875	SDValue ArgVal;
23876	std::tie(args&: ArgOffset, args&: ArgVal) = ArgWorkList.pop_back_val();
23877
23878	if (ArgVal == InsertVal0) {
23879	ElementOffset = ArgOffset;
23880	break;
23881	}
23882
23883	// Peek through concat_vector.
23884	if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) {
23885	int CurrentArgOffset =
23886	ArgOffset + ArgVal.getValueType().getVectorNumElements();
23887	int Step = ArgVal.getOperand(i: `0`).getValueType().getVectorNumElements();
23888	for (SDValue Op : reverse(C: ArgVal ->ops())) {
23889	CurrentArgOffset -= Step;
23890	ArgWorkList.emplace_back(Args&: CurrentArgOffset, Args&: Op);
23891	}
23892
23893	// Make sure we went through all the elements and did not screw up index
23894	// computation.
23895	assert(CurrentArgOffset == ArgOffset);
23896	}
23897	}
23898
23899	// If we failed to find a match, see if we can replace an UNDEF shuffle
23900	// operand.
23901	if (ElementOffset == -`1`) {
23902	if (!Y.isUndef() \|\| InsertVal0.getValueType() != Y.getValueType())
23903	return false;
23904	ElementOffset = Mask.size();
23905	Y = InsertVal0;
23906	}
23907
23908	NewMask.assign(in_start: Mask.begin(), in_end: Mask.end());
23909	NewMask [InsIndex] = ElementOffset + Elt.getConstantOperandVal(i: `1`);
23910	assert(NewMask[InsIndex] < (int)(`2` * Mask.size()) && NewMask[InsIndex] >= `0` &&
23911	"NewMask[InsIndex] is out of bound");
23912	return true;
23913	}
23914
23915	// Merge an insertion into an existing shuffle:
23916	// (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N),
23917	// InsIndex)
23918	// --> (vector_shuffle X, Y) and variations where shuffle operands may be
23919	// CONCAT_VECTORS.
23920	SDValue DAGCombiner::mergeInsertEltWithShuffle(SDNode N, unsigned* InsIndex) {
23921	assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
23922	"Expected extract_vector_elt");
23923	SDValue InsertVal = N->getOperand(Num: `1`);
23924	SDValue Vec = N->getOperand(Num: `0`);
23925
23926	auto *SVN = dyn_cast<ShuffleVectorSDNode>(Val&: Vec);
23927	if (!SVN \|\| !Vec.hasOneUse())
23928	return SDValue ();
23929
23930	ArrayRef<int> Mask = SVN->getMask();
23931	SDValue X = Vec.getOperand(i: `0`);
23932	SDValue Y = Vec.getOperand(i: `1`);
23933
23934	SmallVector<int, `16`> NewMask(Mask);
23935	if (mergeEltWithShuffle(X, Y, Mask, NewMask, Elt: InsertVal, InsIndex)) {
23936	SDValue LegalShuffle = TLI.buildLegalVectorShuffle(
23937	VT: Vec.getValueType(), DL: SDLoc (N), N0: X, N1: Y, Mask: NewMask, DAG);
23938	if (LegalShuffle)
23939	return LegalShuffle;
23940	}
23941
23942	return SDValue ();
23943	}
23944
23945	// Convert a disguised subvector insertion into a shuffle:
23946	// insert_vector_elt V, (bitcast X from vector type), IdxC -->
23947	// bitcast(shuffle (bitcast V), (extended X), Mask)
23948	// Note: We do not use an insert_subvector node because that requires a
23949	// legal subvector type.
23950	SDValue DAGCombiner::combineInsertEltToShuffle(SDNode N, unsigned* InsIndex) {
23951	assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
23952	"Expected extract_vector_elt");
23953	SDValue InsertVal = N->getOperand(Num: `1`);
23954
23955	if (InsertVal.getOpcode() != ISD::BITCAST \|\| !InsertVal.hasOneUse() \|\|
23956	!InsertVal.getOperand(i: `0`).getValueType().isVector())
23957	return SDValue ();
23958
23959	SDValue SubVec = InsertVal.getOperand(i: `0`);
23960	SDValue DestVec = N->getOperand(Num: `0`);
23961	EVT SubVecVT = SubVec.getValueType();
23962	EVT VT = DestVec.getValueType();
23963	unsigned NumSrcElts = SubVecVT.getVectorNumElements();
23964	// Bail out if the inserted value is larger than the vector element, as
23965	// insert_vector_elt performs an implicit truncation in this case.
23966	if (InsertVal.getValueType() != VT.getVectorElementType())
23967	return SDValue ();
23968	// If the source only has a single vector element, the cost of creating adding
23969	// it to a vector is likely to exceed the cost of a insert_vector_elt.
23970	if (NumSrcElts == `1`)
23971	return SDValue ();
23972	unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
23973	unsigned NumMaskVals = ExtendRatio * NumSrcElts;
23974
23975	// Step 1: Create a shuffle mask that implements this insert operation. The
23976	// vector that we are inserting into will be operand 0 of the shuffle, so
23977	// those elements are just 'i'. The inserted subvector is in the first
23978	// positions of operand 1 of the shuffle. Example:
23979	// insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
23980	SmallVector<int, `16`> Mask(NumMaskVals);
23981	for (unsigned i = `0`; i != NumMaskVals; ++i) {
23982	if (i / NumSrcElts == InsIndex)
23983	Mask [i] = (i % NumSrcElts) + NumMaskVals;
23984	else
23985	Mask [i] = i;
23986	}
23987
23988	// Bail out if the target can not handle the shuffle we want to create.
23989	EVT SubVecEltVT = SubVecVT.getVectorElementType();
23990	EVT ShufVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: SubVecEltVT, NumElements: NumMaskVals);
23991	if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
23992	return SDValue ();
23993
23994	// Step 2: Create a wide vector from the inserted source vector by appending
23995	// poison elements. This is the same size as our destination vector.
23996	SDLoc DL(N);
23997	SmallVector<SDValue, `8`> ConcatOps(ExtendRatio, DAG.getPOISON(VT: SubVecVT));
23998	ConcatOps [`0`] = SubVec;
23999	SDValue PaddedSubV = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: ShufVT, Ops: ConcatOps);
24000
24001	// Step 3: Shuffle in the padded subvector.
24002	SDValue DestVecBC = DAG.getBitcast(VT: ShufVT, V: DestVec);
24003	SDValue Shuf = DAG.getVectorShuffle(VT: ShufVT, dl: DL, N1: DestVecBC, N2: PaddedSubV, Mask);
24004	AddToWorklist(N: PaddedSubV.getNode());
24005	AddToWorklist(N: DestVecBC.getNode());
24006	AddToWorklist(N: Shuf.getNode());
24007	return DAG.getBitcast(VT, V: Shuf);
24008	}
24009
24010	// Combine insert(shuffle(load, <u,0,1,2>), load, 0) into a single load if
24011	// possible and the new load will be quick. We use more loads but less shuffles
24012	// and inserts.
24013	SDValue DAGCombiner::combineInsertEltToLoad(SDNode N, unsigned* InsIndex) {
24014	EVT VT = N->getValueType(ResNo: `0`);
24015
24016	// InsIndex is expected to be the first of last lane.
24017	if (!VT.isFixedLengthVector() \|\|
24018	(InsIndex != `0` && InsIndex != VT.getVectorNumElements() - `1`))
24019	return SDValue ();
24020
24021	// Look for a shuffle with the mask u,0,1,2,3,4,5,6 or 1,2,3,4,5,6,7,u
24022	// depending on the InsIndex.
24023	auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(Val: N->getOperand(Num: `0`));
24024	SDValue Scalar = N->getOperand(Num: `1`);
24025	if (!Shuffle \|\| !all_of(Range: enumerate(First: Shuffle->getMask()), P: [&](auto P) {
24026	return InsIndex == P.index() \|\| P.value() < `0` \|\|
24027	(InsIndex == `0` && P.value() == (int)P.index() - `1`) \|\|
24028	(InsIndex == VT.getVectorNumElements() - `1` &&
24029	P.value() == (int)P.index() + `1`);
24030	}))
24031	return SDValue ();
24032
24033	// We optionally skip over an extend so long as both loads are extended in the
24034	// same way from the same type.
24035	unsigned Extend = `0`;
24036	if (Scalar.getOpcode() == ISD::ZERO_EXTEND \|\|
24037	Scalar.getOpcode() == ISD::SIGN_EXTEND \|\|
24038	Scalar.getOpcode() == ISD::ANY_EXTEND) {
24039	Extend = Scalar.getOpcode();
24040	Scalar = Scalar.getOperand(i: `0`);
24041	}
24042
24043	auto *ScalarLoad = dyn_cast<LoadSDNode>(Val&: Scalar);
24044	if (!ScalarLoad)
24045	return SDValue ();
24046
24047	SDValue Vec = Shuffle->getOperand(Num: `0`);
24048	if (Extend) {
24049	if (Vec.getOpcode() != Extend)
24050	return SDValue ();
24051	Vec = Vec.getOperand(i: `0`);
24052	}
24053	auto *VecLoad = dyn_cast<LoadSDNode>(Val&: Vec);
24054	if (!VecLoad \|\| Vec.getValueType().getScalarType() != Scalar.getValueType())
24055	return SDValue ();
24056
24057	int EltSize = ScalarLoad->getValueType(ResNo: `0`).getScalarSizeInBits();
24058	if (EltSize == `0` \|\| EltSize % `8` != `0` \|\| !ScalarLoad->isSimple() \|\|
24059	!VecLoad->isSimple() \|\| VecLoad->getExtensionType() != ISD::NON_EXTLOAD \|\|
24060	ScalarLoad->getExtensionType() != ISD::NON_EXTLOAD \|\|
24061	ScalarLoad->getAddressSpace() != VecLoad->getAddressSpace())
24062	return SDValue ();
24063
24064	// Check that the offset between the pointers to produce a single continuous
24065	// load.
24066	if (InsIndex == `0`) {
24067	if (!DAG.areNonVolatileConsecutiveLoads(LD: ScalarLoad, Base: VecLoad, Bytes: EltSize / `8`,
24068	Dist: -`1`))
24069	return SDValue ();
24070	} else {
24071	if (!DAG.areNonVolatileConsecutiveLoads(
24072	LD: VecLoad, Base: ScalarLoad, Bytes: VT.getVectorNumElements() * EltSize / `8`, Dist: -`1`))
24073	return SDValue ();
24074	}
24075
24076	// And that the new unaligned load will be fast.
24077	unsigned IsFast = `0`;
24078	Align NewAlign = commonAlignment(A: VecLoad->getAlign(), Offset: EltSize / `8`);
24079	if (!TLI.allowsMemoryAccess(Context&: *DAG.getContext(), DL: DAG.getDataLayout(),
24080	VT: Vec.getValueType(), AddrSpace: VecLoad->getAddressSpace(),
24081	Alignment: NewAlign, Flags: VecLoad->getMemOperand()->getFlags(),
24082	Fast: &IsFast) \|\|
24083	!IsFast)
24084	return SDValue ();
24085
24086	// Calculate the new Ptr and create the new load.
24087	SDLoc DL(N);
24088	SDValue Ptr = ScalarLoad->getBasePtr();
24089	if (InsIndex != `0`)
24090	Ptr = DAG.getNode(Opcode: ISD::ADD, DL, VT: Ptr.getValueType(), N1: VecLoad->getBasePtr(),
24091	N2: DAG.getConstant(Val: EltSize / `8`, DL, VT: Ptr.getValueType()));
24092	MachinePointerInfo PtrInfo =
24093	InsIndex == `0` ? ScalarLoad->getPointerInfo()
24094	: VecLoad->getPointerInfo().getWithOffset(O: EltSize / `8`);
24095
24096	SDValue Load = DAG.getLoad(VT: VecLoad->getValueType(ResNo: `0`), dl: DL,
24097	Chain: ScalarLoad->getChain(), Ptr, PtrInfo, Alignment: NewAlign);
24098	DAG.makeEquivalentMemoryOrdering(OldLoad: ScalarLoad, NewMemOp: Load.getValue(R: `1`));
24099	DAG.makeEquivalentMemoryOrdering(OldLoad: VecLoad, NewMemOp: Load.getValue(R: `1`));
24100	return Extend ? DAG.getNode(Opcode: Extend, DL, VT, Operand: Load) : Load;
24101	}
24102
24103	SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
24104	SDValue InVec = N->getOperand(Num: `0`);
24105	SDValue InVal = N->getOperand(Num: `1`);
24106	SDValue EltNo = N->getOperand(Num: `2`);
24107	SDLoc DL(N);
24108
24109	EVT VT = InVec.getValueType();
24110	auto *IndexC = dyn_cast<ConstantSDNode>(Val&: EltNo);
24111
24112	// Insert into out-of-bounds element is poison.
24113	if (IndexC && VT.isFixedLengthVector() &&
24114	IndexC->getZExtValue() >= VT.getVectorNumElements())
24115	return DAG.getPOISON(VT);
24116
24117	// Remove redundant insertions:
24118	// (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
24119	if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
24120	InVec == InVal.getOperand(i: `0`) && EltNo == InVal.getOperand(i: `1`))
24121	return InVec;
24122
24123	// Remove insert of UNDEF/POISON elements.
24124	if (InVal.isUndef()) {
24125	if (InVal.getOpcode() == ISD::POISON \|\| InVec.getOpcode() == ISD::UNDEF)
24126	return InVec;
24127	return DAG.getFreeze(V: InVec);
24128	}
24129
24130	if (!IndexC) {
24131	// If this is variable insert to undef vector, it might be better to splat:
24132	// inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
24133	if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT))
24134	return DAG.getSplat(VT, DL, Op: InVal);
24135
24136	// Extend this type to be byte-addressable
24137	EVT OldVT = VT;
24138	EVT EltVT = VT.getVectorElementType();
24139	bool IsByteSized = EltVT.isByteSized();
24140	if (!IsByteSized) {
24141	EltVT =
24142	EltVT.changeTypeToInteger().getRoundIntegerType(Context&: *DAG.getContext());
24143	VT = VT.changeElementType(Context&: *DAG.getContext(), EltVT);
24144	}
24145
24146	// Check if this operation will be handled the default way for its type.
24147	auto IsTypeDefaultHandled = [this](EVT VT) {
24148	return TLI.getTypeAction(Context&: *DAG.getContext(), VT) ==
24149	TargetLowering::TypeSplitVector \|\|
24150	TLI.isOperationExpand(Op: ISD::INSERT_VECTOR_ELT, VT);
24151	};
24152
24153	// Check if this operation is illegal and will be handled the default way,
24154	// even after extending the type to be byte-addressable.
24155	if (IsTypeDefaultHandled (OldVT) && IsTypeDefaultHandled (VT)) {
24156	// For each dynamic insertelt, the default way will save the vector to
24157	// the stack, store at an offset, and load the modified vector. This can
24158	// dramatically increase code size if we have a chain of insertelts on a
24159	// large vector: requiring O(VC) stores/loads where V = length of*
24160	// vector and C is length of chain. If each insertelt is only fed into the
24161	// next, the vector is write-only across this chain, and we can just
24162	// save once before the chain and load after in O(V + C) operations.
24163	SmallVector<SDNode *> Seq{N};
24164	unsigned NumDynamic = `1`;
24165	while (true) {
24166	SDValue InVec = Seq.back()->getOperand(Num: `0`);
24167	if (InVec.getOpcode() != ISD::INSERT_VECTOR_ELT)
24168	break;
24169	Seq.push_back(Elt: InVec.getNode());
24170	NumDynamic += !isa<ConstantSDNode>(Val: InVec.getOperand(i: `2`));
24171	}
24172
24173	// It always and only makes sense to lower this sequence when we have more
24174	// than one dynamic insertelt, since we will not have more than V constant
24175	// insertelts, so we will be reducing the total number of stores+loads.
24176	if (NumDynamic > `1`) {
24177	// In cases where the vector is illegal it will be broken down into
24178	// parts and stored in parts - we should use the alignment for the
24179	// smallest part.
24180	Align SmallestAlign = DAG.getReducedAlign(VT, /UseABI=/false);
24181	SDValue StackPtr =
24182	DAG.CreateStackTemporary(Bytes: VT.getStoreSize(), Alignment: SmallestAlign);
24183	auto &MF = DAG.getMachineFunction();
24184	int FrameIndex = cast<FrameIndexSDNode>(Val: StackPtr.getNode())->getIndex();
24185	auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FI: FrameIndex);
24186
24187	// Save the vector to the stack
24188	SDValue InVec = Seq.back()->getOperand(Num: `0`);
24189	if (!IsByteSized)
24190	InVec = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT, Operand: InVec);
24191	SDValue Store = DAG.getStore(Chain: DAG.getEntryNode(), dl: DL, Val: InVec, Ptr: StackPtr,
24192	PtrInfo, Alignment: SmallestAlign);
24193
24194	// Lower each dynamic insertelt to a store
24195	for (SDNode *N : reverse(C&: Seq)) {
24196	SDValue Elmnt = N->getOperand(Num: `1`);
24197	SDValue Index = N->getOperand(Num: `2`);
24198
24199	// Check if we have to extend the element type
24200	if (!IsByteSized && Elmnt.getValueType().bitsLT(VT: EltVT))
24201	Elmnt = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: EltVT, Operand: Elmnt);
24202
24203	// Store the new element. This may be larger than the vector element
24204	// type, so use a truncating store.
24205	SDValue EltPtr =
24206	TLI.getVectorElementPointer(DAG, VecPtr: StackPtr, VecVT: VT, Index);
24207	EVT EltVT = Elmnt.getValueType();
24208	Store = DAG.getTruncStore(
24209	Chain: Store, dl: DL, Val: Elmnt, Ptr: EltPtr, PtrInfo: MachinePointerInfo::getUnknownStack(MF),
24210	SVT: EltVT,
24211	Alignment: commonAlignment(A: SmallestAlign, Offset: EltVT.getFixedSizeInBits() / `8`));
24212	}
24213
24214	// Load the saved vector from the stack
24215	SDValue Load =
24216	DAG.getLoad(VT, dl: DL, Chain: Store, Ptr: StackPtr, PtrInfo, Alignment: SmallestAlign);
24217	SDValue LoadV = Load.getValue(R: `0`);
24218	return IsByteSized ? LoadV : DAG.getAnyExtOrTrunc(Op: LoadV, DL, VT: OldVT);
24219	}
24220	}
24221
24222	return SDValue ();
24223	}
24224
24225	if (VT.isScalableVector())
24226	return SDValue ();
24227
24228	unsigned NumElts = VT.getVectorNumElements();
24229
24230	// We must know which element is being inserted for folds below here.
24231	unsigned Elt = IndexC->getZExtValue();
24232
24233	// Handle <1 x ???> vector insertion special cases.
24234	if (NumElts == `1`) {
24235	// insert_vector_elt(x, extract_vector_elt(y, 0), 0) -> y
24236	if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
24237	InVal.getOperand(i: `0`).getValueType() == VT &&
24238	isNullConstant(V: InVal.getOperand(i: `1`)))
24239	return InVal.getOperand(i: `0`);
24240	}
24241
24242	// Canonicalize insert_vector_elt dag nodes.
24243	// Example:
24244	// (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
24245	// -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
24246	//
24247	// Do this only if the child insert_vector node has one use; also
24248	// do this only if indices are both constants and Idx1 < Idx0.
24249	if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
24250	&& isa<ConstantSDNode>(Val: InVec.getOperand(i: `2`))) {
24251	unsigned OtherElt = InVec.getConstantOperandVal(i: `2`);
24252	if (Elt < OtherElt) {
24253	// Swap nodes.
24254	SDValue NewOp = DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT,
24255	N1: InVec.getOperand(i: `0`), N2: InVal, N3: EltNo);
24256	AddToWorklist(N: NewOp.getNode());
24257	return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: SDLoc (InVec.getNode()),
24258	VT, N1: NewOp, N2: InVec.getOperand(i: `1`), N3: InVec.getOperand(i: `2`));
24259	}
24260	}
24261
24262	if (SDValue Shuf = mergeInsertEltWithShuffle(N, InsIndex: Elt))
24263	return Shuf;
24264
24265	if (SDValue Shuf = combineInsertEltToShuffle(N, InsIndex: Elt))
24266	return Shuf;
24267
24268	if (SDValue Shuf = combineInsertEltToLoad(N, InsIndex: Elt))
24269	return Shuf;
24270
24271	// Attempt to convert an insert_vector_elt chain into a legal build_vector.
24272	if (!LegalOperations \|\| TLI.isOperationLegal(Op: ISD::BUILD_VECTOR, VT)) {
24273	// vXi1 vector - we don't need to recurse.
24274	if (NumElts == `1`)
24275	return DAG.getBuildVector(VT, DL, Ops: {InVal});
24276
24277	// If we haven't already collected the element, insert into the op list.
24278	EVT MaxEltVT = InVal.getValueType();
24279	auto AddBuildVectorOp = [&](SmallVectorImpl<SDValue> &Ops, SDValue Elt,
24280	unsigned Idx) {
24281	if (!Ops [Idx]) {
24282	Ops [Idx] = Elt;
24283	if (VT.isInteger()) {
24284	EVT EltVT = Elt.getValueType();
24285	MaxEltVT = MaxEltVT.bitsGE(VT: EltVT) ? MaxEltVT : EltVT;
24286	}
24287	}
24288	};
24289
24290	// Ensure all the operands are the same value type, fill any missing
24291	// operands with UNDEF and create the BUILD_VECTOR.
24292	auto CanonicalizeBuildVector = [&](SmallVectorImpl<SDValue> &Ops,
24293	bool FreezeUndef = false) {
24294	assert(Ops.size() == NumElts && "Unexpected vector size");
24295	SDValue UndefOp = FreezeUndef ? DAG.getFreeze(V: DAG.getUNDEF(VT: MaxEltVT))
24296	: DAG.getUNDEF(VT: MaxEltVT);
24297	for (SDValue &Op : Ops) {
24298	if (Op)
24299	Op = VT.isInteger() ? DAG.getAnyExtOrTrunc(Op, DL, VT: MaxEltVT) : Op;
24300	else
24301	Op = UndefOp;
24302	}
24303	return DAG.getBuildVector(VT, DL, Ops);
24304	};
24305
24306	SmallVector<SDValue, `8`> Ops(NumElts, SDValue ());
24307	Ops [Elt] = InVal;
24308
24309	// Recurse up a INSERT_VECTOR_ELT chain to build a BUILD_VECTOR.
24310	for (SDValue CurVec = InVec; CurVec;) {
24311	// UNDEF - build new BUILD_VECTOR from already inserted operands.
24312	if (CurVec.isUndef())
24313	return CanonicalizeBuildVector (Ops);
24314
24315	// FREEZE(UNDEF) - build new BUILD_VECTOR from already inserted operands.
24316	if (ISD::isFreezeUndef(N: CurVec.getNode()) && CurVec.hasOneUse())
24317	return CanonicalizeBuildVector (Ops, /FreezeUndef=/true);
24318
24319	// BUILD_VECTOR - insert unused operands and build new BUILD_VECTOR.
24320	if (CurVec.getOpcode() == ISD::BUILD_VECTOR && CurVec.hasOneUse()) {
24321	for (unsigned I = `0`; I != NumElts; ++I)
24322	AddBuildVectorOp (Ops, CurVec.getOperand(i: I), I);
24323	return CanonicalizeBuildVector (Ops);
24324	}
24325
24326	// SCALAR_TO_VECTOR - insert unused scalar and build new BUILD_VECTOR.
24327	if (CurVec.getOpcode() == ISD::SCALAR_TO_VECTOR && CurVec.hasOneUse()) {
24328	AddBuildVectorOp (Ops, CurVec.getOperand(i: `0`), `0`);
24329	return CanonicalizeBuildVector (Ops);
24330	}
24331
24332	// INSERT_VECTOR_ELT - insert operand and continue up the chain.
24333	if (CurVec.getOpcode() == ISD::INSERT_VECTOR_ELT && CurVec.hasOneUse())
24334	if (auto *CurIdx = dyn_cast<ConstantSDNode>(Val: CurVec.getOperand(i: `2`)))
24335	if (CurIdx->getAPIntValue().ult(RHS: NumElts)) {
24336	unsigned Idx = CurIdx->getZExtValue();
24337	AddBuildVectorOp (Ops, CurVec.getOperand(i: `1`), Idx);
24338
24339	// Found entire BUILD_VECTOR.
24340	if (all_of(Range&: Ops, P: [](SDValue Op) { return !!Op; }))
24341	return CanonicalizeBuildVector (Ops);
24342
24343	CurVec = CurVec ->getOperand(Num: `0`);
24344	continue;
24345	}
24346
24347	// VECTOR_SHUFFLE - if all the operands match the shuffle's sources,
24348	// update the shuffle mask (and second operand if we started with unary
24349	// shuffle) and create a new legal shuffle.
24350	if (CurVec.getOpcode() == ISD::VECTOR_SHUFFLE && CurVec.hasOneUse()) {
24351	auto *SVN = cast<ShuffleVectorSDNode>(Val&: CurVec);
24352	SDValue LHS = SVN->getOperand(Num: `0`);
24353	SDValue RHS = SVN->getOperand(Num: `1`);
24354	SmallVector<int, `16`> Mask(SVN->getMask());
24355	bool Merged = true;
24356	for (auto I : enumerate(First&: Ops)) {
24357	SDValue &Op = I.value();
24358	if (Op) {
24359	SmallVector<int, `16`> NewMask;
24360	if (!mergeEltWithShuffle(X&: LHS, Y&: RHS, Mask, NewMask, Elt: Op, InsIndex: I.index())) {
24361	Merged = false;
24362	break;
24363	}
24364	Mask = std::move(NewMask);
24365	}
24366	}
24367	if (Merged)
24368	if (SDValue NewShuffle =
24369	TLI.buildLegalVectorShuffle(VT, DL, N0: LHS, N1: RHS, Mask, DAG))
24370	return NewShuffle;
24371	}
24372
24373	if (!LegalOperations) {
24374	bool IsNull = llvm::isNullConstant(V: InVal);
24375	// We can convert to AND/OR mask if all insertions are zero or -1
24376	// respectively.
24377	if ((IsNull \|\| llvm::isAllOnesConstant(V: InVal)) &&
24378	all_of(Range&: Ops, P: [InVal](SDValue Op) { return !Op \|\| Op == InVal; }) &&
24379	count_if(Range&: Ops, P: [InVal](SDValue Op) { return Op == InVal; }) >= `2`) {
24380	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT: MaxEltVT);
24381	SDValue AllOnes = DAG.getAllOnesConstant(DL, VT: MaxEltVT);
24382	SmallVector<SDValue, `8`> Mask(NumElts);
24383
24384	// Build the mask and return the corresponding DAG node.
24385	auto BuildMaskAndNode = [&](SDValue TrueVal, SDValue FalseVal,
24386	unsigned MaskOpcode) {
24387	APInt InsertedEltMask = APInt::getZero(numBits: NumElts);
24388	for (unsigned I = `0`; I != NumElts; ++I) {
24389	Mask [I] = Ops [I] ? TrueVal : FalseVal;
24390	if (Ops [I])
24391	InsertedEltMask.setBit(I);
24392	}
24393	// Make sure to freeze the source vector in case any of the elements
24394	// overwritten by the insert may be poison. Otherwise those elements
24395	// could end up being poison instead of 0/-1 after the AND/OR.
24396	CurVec =
24397	DAG.getFreeze(V: CurVec, DemandedElts: InsertedEltMask, /PoisonOnly=/true);
24398	return DAG.getNode(Opcode: MaskOpcode, DL, VT, N1: CurVec,
24399	N2: DAG.getBuildVector(VT, DL, Ops: Mask));
24400	};
24401
24402	// If all elements are zero, we can use AND with all ones.
24403	if (IsNull)
24404	return BuildMaskAndNode (Zero, AllOnes, ISD::AND);
24405
24406	// If all elements are -1, we can use OR with zero.
24407	return BuildMaskAndNode (AllOnes, Zero, ISD::OR);
24408	}
24409	}
24410
24411	// Failed to find a match in the chain - bail.
24412	break;
24413	}
24414
24415	// See if we can fill in the missing constant elements as zeros.
24416	// TODO: Should we do this for any constant?
24417	APInt DemandedZeroElts = APInt::getZero(numBits: NumElts);
24418	for (unsigned I = `0`; I != NumElts; ++I)
24419	if (!Ops [I])
24420	DemandedZeroElts.setBit(I);
24421
24422	if (DAG.MaskedVectorIsZero(Op: InVec, DemandedElts: DemandedZeroElts)) {
24423	SDValue Zero = VT.isInteger() ? DAG.getConstant(Val: `0`, DL, VT: MaxEltVT)
24424	: DAG.getConstantFP(Val: `0`, DL, VT: MaxEltVT);
24425	for (unsigned I = `0`; I != NumElts; ++I)
24426	if (!Ops [I])
24427	Ops [I] = Zero;
24428
24429	return CanonicalizeBuildVector (Ops);
24430	}
24431	}
24432
24433	return SDValue ();
24434	}
24435
24436	/// Transform a vector binary operation into a scalar binary operation by moving
24437	/// the math/logic after an extract element of a vector.
24438	static SDValue scalarizeExtractedBinOp(SDNode *ExtElt, SelectionDAG &DAG,
24439	const SDLoc &DL, bool LegalTypes) {
24440	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24441	SDValue Vec = ExtElt->getOperand(Num: `0`);
24442	SDValue Index = ExtElt->getOperand(Num: `1`);
24443	auto *IndexC = dyn_cast<ConstantSDNode>(Val&: Index);
24444	unsigned Opc = Vec.getOpcode();
24445	if (!IndexC \|\| !Vec.hasOneUse() \|\| (!TLI.isBinOp(Opcode: Opc) && Opc != ISD::SETCC) \|\|
24446	Vec ->getNumValues() != `1`)
24447	return SDValue ();
24448
24449	// Targets may want to avoid this to prevent an expensive register transfer.
24450	if (!TLI.shouldScalarizeBinop(VecOp: Vec))
24451	return SDValue ();
24452
24453	EVT ResVT = ExtElt->getValueType(ResNo: `0`);
24454	if (Opc == ISD::SETCC &&
24455	(ResVT != Vec.getValueType().getVectorElementType() \|\| LegalTypes))
24456	return SDValue ();
24457
24458	// Extracting an element of a vector constant is constant-folded, so this
24459	// transform is just replacing a vector op with a scalar op while moving the
24460	// extract.
24461	SDValue Op0 = Vec.getOperand(i: `0`);
24462	SDValue Op1 = Vec.getOperand(i: `1`);
24463	APInt SplatVal;
24464	if (!isAnyConstantBuildVector(V: Op0, NoOpaques: true) &&
24465	!ISD::isConstantSplatVector(N: Op0.getNode(), SplatValue&: SplatVal) &&
24466	!isAnyConstantBuildVector(V: Op1, NoOpaques: true) &&
24467	!ISD::isConstantSplatVector(N: Op1.getNode(), SplatValue&: SplatVal))
24468	return SDValue ();
24469
24470	// extractelt (op X, C), IndexC --> op (extractelt X, IndexC), C'
24471	// extractelt (op C, X), IndexC --> op C', (extractelt X, IndexC)
24472	if (Opc == ISD::SETCC) {
24473	EVT OpVT = Op0.getValueType().getVectorElementType();
24474	Op0 = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: OpVT, N1: Op0, N2: Index);
24475	Op1 = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: OpVT, N1: Op1, N2: Index);
24476	SDValue NewVal = DAG.getSetCC(
24477	DL, VT: ResVT, LHS: Op0, RHS: Op1, Cond: cast<CondCodeSDNode>(Val: Vec ->getOperand(Num: `2`))->get());
24478	// We may need to sign- or zero-extend the result to match the same
24479	// behaviour as the vector version of SETCC.
24480	unsigned VecBoolContents = TLI.getBooleanContents(Type: Vec.getValueType());
24481	if (ResVT != MVT::i1 &&
24482	VecBoolContents != TargetLowering::UndefinedBooleanContent &&
24483	VecBoolContents != TLI.getBooleanContents(Type: ResVT)) {
24484	if (VecBoolContents == TargetLowering::ZeroOrNegativeOneBooleanContent)
24485	NewVal = DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL, VT: ResVT, N1: NewVal,
24486	N2: DAG.getValueType(MVT::i1));
24487	else
24488	NewVal = DAG.getZeroExtendInReg(Op: NewVal, DL, VT: MVT::i1);
24489	}
24490	return NewVal;
24491	}
24492	Op0 = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: ResVT, N1: Op0, N2: Index);
24493	Op1 = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: ResVT, N1: Op1, N2: Index);
24494	return DAG.getNode(Opcode: Opc, DL, VT: ResVT, N1: Op0, N2: Op1);
24495	}
24496
24497	// Given a ISD::EXTRACT_VECTOR_ELT, which is a glorified bit sequence extract,
24498	// recursively analyse all of it's users. and try to model themselves as
24499	// bit sequence extractions. If all of them agree on the new, narrower element
24500	// type, and all of them can be modelled as ISD::EXTRACT_VECTOR_ELT's of that
24501	// new element type, do so now.
24502	// This is mainly useful to recover from legalization that scalarized
24503	// the vector as wide elements, but tries to rebuild it with narrower elements.
24504	//
24505	// Some more nodes could be modelled if that helps cover interesting patterns.
24506	bool DAGCombiner::refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(
24507	SDNode *N) {
24508	// We perform this optimization post type-legalization because
24509	// the type-legalizer often scalarizes integer-promoted vectors.
24510	// Performing this optimization before may cause legalizaton cycles.
24511	if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
24512	return false;
24513
24514	// TODO: Add support for big-endian.
24515	if (DAG.getDataLayout().isBigEndian())
24516	return false;
24517
24518	SDValue VecOp = N->getOperand(Num: `0`);
24519	EVT VecVT = VecOp.getValueType();
24520	assert(!VecVT.isScalableVector() && "Only for fixed vectors.");
24521
24522	// We must start with a constant extraction index.
24523	auto *IndexC = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: `1`));
24524	if (!IndexC)
24525	return false;
24526
24527	assert(IndexC->getZExtValue() < VecVT.getVectorNumElements() &&
24528	"Original ISD::EXTRACT_VECTOR_ELT is undefinend?");
24529
24530	// TODO: deal with the case of implicit anyext of the extraction.
24531	unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
24532	EVT ScalarVT = N->getValueType(ResNo: `0`);
24533	if (VecVT.getScalarType() != ScalarVT)
24534	return false;
24535
24536	// TODO: deal with the cases other than everything being integer-typed.
24537	if (!ScalarVT.isScalarInteger())
24538	return false;
24539
24540	struct Entry {
24541	SDNode *Producer;
24542
24543	// Which bits of VecOp does it contain?
24544	unsigned BitPos;
24545	int NumBits;
24546	// NOTE: the actual width of \p Producer may be wider than NumBits!
24547
24548	Entry(Entry &&) = default;
24549	Entry(SDNode Producer_, unsigned* BitPos_, int NumBits_)
24550	: Producer(Producer_), BitPos(BitPos_), NumBits(NumBits_) {}
24551
24552	Entry() = delete;
24553	Entry(const Entry &) = delete;
24554	Entry &operator=(const Entry &) = delete;
24555	Entry &operator=(Entry &&) = delete;
24556	};
24557	SmallVector<Entry, `32`> Worklist;
24558	SmallVector<Entry, `32`> Leafs;
24559
24560	// We start at the "root" ISD::EXTRACT_VECTOR_ELT.
24561	Worklist.emplace_back(Args&: N, /BitPos=/Args: VecEltBitWidth * IndexC->getZExtValue(),
24562	/NumBits=/Args&: VecEltBitWidth);
24563
24564	while (!Worklist.empty()) {
24565	Entry E = Worklist.pop_back_val();
24566	// Does the node not even use any of the VecOp bits?
24567	if (!(E.NumBits > `0` && E.BitPos < VecVT.getSizeInBits() &&
24568	E.BitPos + E.NumBits <= VecVT.getSizeInBits()))
24569	return false; // Let's allow the other combines clean this up first.
24570	// Did we fail to model any of the users of the Producer?
24571	bool ProducerIsLeaf = false;
24572	// Look at each user of this Producer.
24573	for (SDNode *User : E.Producer->users()) {
24574	switch (User->getOpcode()) {
24575	// TODO: support ISD::BITCAST
24576	// TODO: support ISD::ANY_EXTEND
24577	// TODO: support ISD::ZERO_EXTEND
24578	// TODO: support ISD::SIGN_EXTEND
24579	case ISD::TRUNCATE:
24580	// Truncation simply means we keep position, but extract less bits.
24581	Worklist.emplace_back(Args&: User, Args&: E.BitPos,
24582	/NumBits=/Args: User->getValueSizeInBits(ResNo: `0`));
24583	break;
24584	// TODO: support ISD::SRA
24585	// TODO: support ISD::SHL
24586	case ISD::SRL:
24587	// We should be shifting the Producer by a constant amount.
24588	if (auto *ShAmtC = dyn_cast<ConstantSDNode>(Val: User->getOperand(Num: `1`));
24589	User->getOperand(Num: `0`).getNode() == E.Producer && ShAmtC) {
24590	// Logical right-shift means that we start extraction later,
24591	// but stop it at the same position we did previously.
24592	unsigned ShAmt = ShAmtC->getZExtValue();
24593	Worklist.emplace_back(Args&: User, Args: E.BitPos + ShAmt, Args: E.NumBits - ShAmt);
24594	break;
24595	}
24596	[[fallthrough]];
24597	default:
24598	// We can not model this user of the Producer.
24599	// Which means the current Producer will be a ISD::EXTRACT_VECTOR_ELT.
24600	ProducerIsLeaf = true;
24601	// Profitability check: all users that we can not model
24602	// must be ISD::BUILD_VECTOR's.
24603	if (User->getOpcode() != ISD::BUILD_VECTOR)
24604	return false;
24605	break;
24606	}
24607	}
24608	if (ProducerIsLeaf)
24609	Leafs.emplace_back(Args: std::move(E));
24610	}
24611
24612	unsigned NewVecEltBitWidth = Leafs.front().NumBits;
24613
24614	// If we are still at the same element granularity, give up,
24615	if (NewVecEltBitWidth == VecEltBitWidth)
24616	return false;
24617
24618	// The vector width must be a multiple of the new element width.
24619	if (VecVT.getSizeInBits() % NewVecEltBitWidth != `0`)
24620	return false;
24621
24622	// All leafs must agree on the new element width.
24623	// All leafs must not expect any "padding" bits ontop of that width.
24624	// All leafs must start extraction from multiple of that width.
24625	if (!all_of(Range&: Leafs, P: [NewVecEltBitWidth](const Entry &E) {
24626	return (unsigned)E.NumBits == NewVecEltBitWidth &&
24627	E.Producer->getValueSizeInBits(ResNo: `0`) == NewVecEltBitWidth &&
24628	E.BitPos % NewVecEltBitWidth == `0`;
24629	}))
24630	return false;
24631
24632	EVT NewScalarVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NewVecEltBitWidth);
24633	EVT NewVecVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: NewScalarVT,
24634	NumElements: VecVT.getSizeInBits() / NewVecEltBitWidth);
24635
24636	if (LegalTypes &&
24637	!(TLI.isTypeLegal(VT: NewScalarVT) && TLI.isTypeLegal(VT: NewVecVT)))
24638	return false;
24639
24640	if (LegalOperations &&
24641	!(TLI.isOperationLegalOrCustom(Op: ISD::BITCAST, VT: NewVecVT) &&
24642	TLI.isOperationLegalOrCustom(Op: ISD::EXTRACT_VECTOR_ELT, VT: NewVecVT)))
24643	return false;
24644
24645	SDValue NewVecOp = DAG.getBitcast(VT: NewVecVT, V: VecOp);
24646	for (const Entry &E : Leafs) {
24647	SDLoc DL(E.Producer);
24648	unsigned NewIndex = E.BitPos / NewVecEltBitWidth;
24649	assert(NewIndex < NewVecVT.getVectorNumElements() &&
24650	"Creating out-of-bounds ISD::EXTRACT_VECTOR_ELT?");
24651	SDValue V = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: NewScalarVT, N1: NewVecOp,
24652	N2: DAG.getVectorIdxConstant(Val: NewIndex, DL));
24653	CombineTo(N: E.Producer, Res: V);
24654	}
24655
24656	return true;
24657	}
24658
24659	SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
24660	SDValue VecOp = N->getOperand(Num: `0`);
24661	SDValue Index = N->getOperand(Num: `1`);
24662	EVT ScalarVT = N->getValueType(ResNo: `0`);
24663	EVT VecVT = VecOp.getValueType();
24664	if (VecOp.isUndef())
24665	return DAG.getUNDEF(VT: ScalarVT);
24666
24667	// extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
24668	//
24669	// This only really matters if the index is non-constant since other combines
24670	// on the constant elements already work.
24671	SDLoc DL(N);
24672	if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
24673	Index == VecOp.getOperand(i: `2`)) {
24674	SDValue Elt = VecOp.getOperand(i: `1`);
24675	AddUsersToWorklist(N: VecOp.getNode());
24676	return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Op: Elt, DL, VT: ScalarVT) : Elt;
24677	}
24678
24679	// (vextract (scalar_to_vector val, 0) -> val
24680	if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
24681	// Only 0'th element of SCALAR_TO_VECTOR is defined.
24682	if (DAG.isKnownNeverZero(Op: Index))
24683	return DAG.getPOISON(VT: ScalarVT);
24684
24685	// Check if the result type doesn't match the inserted element type.
24686	// The inserted element and extracted element may have mismatched bitwidth.
24687	// As a result, EXTRACT_VECTOR_ELT may extend or truncate the extracted vector.
24688	SDValue InOp = VecOp.getOperand(i: `0`);
24689	if (InOp.getValueType() != ScalarVT) {
24690	assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
24691	if (InOp.getValueType().bitsGT(VT: ScalarVT))
24692	return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: ScalarVT, Operand: InOp);
24693	return DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: ScalarVT, Operand: InOp);
24694	}
24695	return InOp;
24696	}
24697
24698	// extract_vector_elt of out-of-bounds element -> UNDEF
24699	auto *IndexC = dyn_cast<ConstantSDNode>(Val&: Index);
24700	if (IndexC && VecVT.isFixedLengthVector() &&
24701	IndexC->getAPIntValue().uge(RHS: VecVT.getVectorNumElements()))
24702	return DAG.getUNDEF(VT: ScalarVT);
24703
24704	// extract_vector_elt (build_vector x, y), 1 -> y
24705	if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) \|\|
24706	VecOp.getOpcode() == ISD::SPLAT_VECTOR) &&
24707	TLI.isTypeLegal(VT: VecVT)) {
24708	assert((VecOp.getOpcode() != ISD::BUILD_VECTOR \|\|
24709	VecVT.isFixedLengthVector()) &&
24710	"BUILD_VECTOR used for scalable vectors");
24711	unsigned IndexVal =
24712	VecOp.getOpcode() == ISD::BUILD_VECTOR ? IndexC->getZExtValue() : `0`;
24713	SDValue Elt = VecOp.getOperand(i: IndexVal);
24714	EVT InEltVT = Elt.getValueType();
24715
24716	if (VecOp.hasOneUse() \|\| TLI.aggressivelyPreferBuildVectorSources(VecVT) \|\|
24717	isNullConstant(V: Elt)) {
24718	// Sometimes build_vector's scalar input types do not match result type.
24719	if (ScalarVT == InEltVT)
24720	return Elt;
24721
24722	// TODO: It may be useful to truncate if free if the build_vector
24723	// implicitly converts.
24724	}
24725	}
24726
24727	if (SDValue BO = scalarizeExtractedBinOp(ExtElt: N, DAG, DL, LegalTypes))
24728	return BO;
24729
24730	if (VecVT.isScalableVector())
24731	return SDValue ();
24732
24733	// All the code from this point onwards assumes fixed width vectors, but it's
24734	// possible that some of the combinations could be made to work for scalable
24735	// vectors too.
24736	unsigned NumElts = VecVT.getVectorNumElements();
24737	unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
24738
24739	// See if the extracted element is constant, in which case fold it if its
24740	// a legal fp immediate.
24741	if (IndexC && ScalarVT.isFloatingPoint()) {
24742	APInt EltMask = APInt::getOneBitSet(numBits: NumElts, BitNo: IndexC->getZExtValue());
24743	KnownBits KnownElt = DAG.computeKnownBits(Op: VecOp, DemandedElts: EltMask);
24744	if (KnownElt.isConstant()) {
24745	APFloat CstFP =
24746	APFloat (ScalarVT.getFltSemantics(), KnownElt.getConstant());
24747	if (TLI.isFPImmLegal(CstFP, ScalarVT))
24748	return DAG.getConstantFP(Val: CstFP, DL, VT: ScalarVT);
24749	}
24750	}
24751
24752	// TODO: These transforms should not require the 'hasOneUse' restriction, but
24753	// there are regressions on multiple targets without it. We can end up with a
24754	// mess of scalar and vector code if we reduce only part of the DAG to scalar.
24755	if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
24756	VecOp.hasOneUse()) {
24757	// The vector index of the LSBs of the source depend on the endian-ness.
24758	bool IsLE = DAG.getDataLayout().isLittleEndian();
24759	unsigned ExtractIndex = IndexC->getZExtValue();
24760	// extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
24761	unsigned BCTruncElt = IsLE ? `0` : NumElts - `1`;
24762	SDValue BCSrc = VecOp.getOperand(i: `0`);
24763	if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
24764	return DAG.getAnyExtOrTrunc(Op: BCSrc, DL, VT: ScalarVT);
24765
24766	// TODO: Add support for SCALAR_TO_VECTOR implicit truncation.
24767	if (LegalTypes && BCSrc.getValueType().isInteger() &&
24768	BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR &&
24769	BCSrc.getScalarValueSizeInBits() ==
24770	BCSrc.getOperand(i: `0`).getScalarValueSizeInBits()) {
24771	// ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
24772	// trunc i64 X to i32
24773	SDValue X = BCSrc.getOperand(i: `0`);
24774	EVT XVT = X.getValueType();
24775	assert(XVT.isScalarInteger() && ScalarVT.isScalarInteger() &&
24776	"Extract element and scalar to vector can't change element type "
24777	"from FP to integer.");
24778	unsigned XBitWidth = X.getValueSizeInBits();
24779	unsigned Scale = XBitWidth / VecEltBitWidth;
24780	BCTruncElt = IsLE ? `0` : Scale - `1`;
24781
24782	// An extract element return value type can be wider than its vector
24783	// operand element type. In that case, the high bits are undefined, so
24784	// it's possible that we may need to extend rather than truncate.
24785	if (ExtractIndex < Scale && XBitWidth > VecEltBitWidth) {
24786	assert(XBitWidth % VecEltBitWidth == `0` &&
24787	"Scalar bitwidth must be a multiple of vector element bitwidth");
24788
24789	if (ExtractIndex != BCTruncElt) {
24790	unsigned ShiftIndex =
24791	IsLE ? ExtractIndex : (Scale - `1`) - ExtractIndex;
24792	X = DAG.getNode(
24793	Opcode: ISD::SRL, DL, VT: XVT, N1: X,
24794	N2: DAG.getShiftAmountConstant(Val: ShiftIndex * VecEltBitWidth, VT: XVT, DL));
24795	}
24796
24797	return DAG.getAnyExtOrTrunc(Op: X, DL, VT: ScalarVT);
24798	}
24799	}
24800	}
24801
24802	// Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
24803	// We only perform this optimization before the op legalization phase because
24804	// we may introduce new vector instructions which are not backed by TD
24805	// patterns. For example on AVX, extracting elements from a wide vector
24806	// without using extract_subvector. However, if we can find an underlying
24807	// scalar value, then we can always use that.
24808	if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
24809	auto *Shuf = cast<ShuffleVectorSDNode>(Val&: VecOp);
24810	// Find the new index to extract from.
24811	int OrigElt = Shuf->getMaskElt(Idx: IndexC->getZExtValue());
24812
24813	// Extracting an undef index is undef.
24814	if (OrigElt == -`1`)
24815	return DAG.getUNDEF(VT: ScalarVT);
24816
24817	// Select the right vector half to extract from.
24818	SDValue SVInVec;
24819	if (OrigElt < (int)NumElts) {
24820	SVInVec = VecOp.getOperand(i: `0`);
24821	} else {
24822	SVInVec = VecOp.getOperand(i: `1`);
24823	OrigElt -= NumElts;
24824	}
24825
24826	if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
24827	// TODO: Check if shuffle mask is legal?
24828	if (LegalOperations && TLI.isOperationLegal(Op: ISD::VECTOR_SHUFFLE, VT: VecVT) &&
24829	!VecOp.hasOneUse())
24830	return SDValue ();
24831
24832	SDValue InOp = SVInVec.getOperand(i: OrigElt);
24833	if (InOp.getValueType() != ScalarVT) {
24834	assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
24835	InOp = DAG.getSExtOrTrunc(Op: InOp, DL, VT: ScalarVT);
24836	}
24837
24838	return InOp;
24839	}
24840
24841	// FIXME: We should handle recursing on other vector shuffles and
24842	// scalar_to_vector here as well.
24843
24844	if (!LegalOperations \|\|
24845	// FIXME: Should really be just isOperationLegalOrCustom.
24846	TLI.isOperationLegal(Op: ISD::EXTRACT_VECTOR_ELT, VT: VecVT) \|\|
24847	TLI.isOperationExpand(Op: ISD::VECTOR_SHUFFLE, VT: VecVT)) {
24848	return DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: ScalarVT, N1: SVInVec,
24849	N2: DAG.getVectorIdxConstant(Val: OrigElt, DL));
24850	}
24851	}
24852
24853	// If only EXTRACT_VECTOR_ELT nodes use the source vector we can
24854	// simplify it based on the (valid) extraction indices.
24855	if (llvm::all_of(Range: VecOp ->users(), P: [&](SDNode *Use) {
24856	return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
24857	Use->getOperand(Num: `0`) == VecOp &&
24858	isa<ConstantSDNode>(Val: Use->getOperand(Num: `1`));
24859	})) {
24860	APInt DemandedElts = APInt::getZero(numBits: NumElts);
24861	for (SDNode *User : VecOp ->users()) {
24862	auto *CstElt = cast<ConstantSDNode>(Val: User->getOperand(Num: `1`));
24863	if (CstElt->getAPIntValue().ult(RHS: NumElts))
24864	DemandedElts.setBit(CstElt->getZExtValue());
24865	}
24866	if (SimplifyDemandedVectorElts(Op: VecOp, DemandedElts, AssumeSingleUse: true)) {
24867	// We simplified the vector operand of this extract element. If this
24868	// extract is not dead, visit it again so it is folded properly.
24869	if (N->getOpcode() != ISD::DELETED_NODE)
24870	AddToWorklist(N);
24871	return SDValue (N, `0`);
24872	}
24873	APInt DemandedBits = APInt::getAllOnes(numBits: VecEltBitWidth);
24874	if (SimplifyDemandedBits(Op: VecOp, DemandedBits, DemandedElts, AssumeSingleUse: true)) {
24875	// We simplified the vector operand of this extract element. If this
24876	// extract is not dead, visit it again so it is folded properly.
24877	if (N->getOpcode() != ISD::DELETED_NODE)
24878	AddToWorklist(N);
24879	return SDValue (N, `0`);
24880	}
24881	}
24882
24883	if (refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(N))
24884	return SDValue (N, `0`);
24885
24886	// Everything under here is trying to match an extract of a loaded value.
24887	// If the result of load has to be truncated, then it's not necessarily
24888	// profitable.
24889	bool BCNumEltsChanged = false;
24890	EVT ExtVT = VecVT.getVectorElementType();
24891	EVT LVT = ExtVT;
24892	if (ScalarVT.bitsLT(VT: LVT) && !TLI.isTruncateFree(FromVT: LVT, ToVT: ScalarVT))
24893	return SDValue ();
24894
24895	if (VecOp.getOpcode() == ISD::BITCAST) {
24896	// Don't duplicate a load with other uses.
24897	if (!VecOp.hasOneUse())
24898	return SDValue ();
24899
24900	EVT BCVT = VecOp.getOperand(i: `0`).getValueType();
24901	if (!BCVT.isVector() \|\| ExtVT.bitsGT(VT: BCVT.getVectorElementType()))
24902	return SDValue ();
24903	if (NumElts != BCVT.getVectorNumElements())
24904	BCNumEltsChanged = true;
24905	VecOp = VecOp.getOperand(i: `0`);
24906	ExtVT = BCVT.getVectorElementType();
24907	}
24908
24909	// extract (vector load $addr), i --> load $addr + i size*
24910	if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
24911	ISD::isNormalLoad(N: VecOp.getNode()) &&
24912	!Index ->hasPredecessor(N: VecOp.getNode())) {
24913	auto *VecLoad = dyn_cast<LoadSDNode>(Val&: VecOp);
24914	if (VecLoad && VecLoad->isSimple()) {
24915	if (SDValue Scalarized = TLI.scalarizeExtractedVectorLoad(
24916	ResultVT: ScalarVT, DL: SDLoc (N), InVecVT: VecVT, EltNo: Index, OriginalLoad: VecLoad, DAG)) {
24917	++OpsNarrowed;
24918	return Scalarized;
24919	}
24920	}
24921	}
24922
24923	// Perform only after legalization to ensure build_vector / vector_shuffle
24924	// optimizations have already been done.
24925	if (!LegalOperations \|\| !IndexC)
24926	return SDValue ();
24927
24928	// (vextract (v4f32 load $addr), c) -> (f32 load $addr+csize)*
24929	// (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+csize)*
24930	// (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
24931	int Elt = IndexC->getZExtValue();
24932	LoadSDNode LN0 = nullptr*;
24933	if (ISD::isNormalLoad(N: VecOp.getNode())) {
24934	LN0 = cast<LoadSDNode>(Val&: VecOp);
24935	} else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
24936	VecOp.getOperand(i: `0`).getValueType() == ExtVT &&
24937	ISD::isNormalLoad(N: VecOp.getOperand(i: `0`).getNode())) {
24938	// Don't duplicate a load with other uses.
24939	if (!VecOp.hasOneUse())
24940	return SDValue ();
24941
24942	LN0 = cast<LoadSDNode>(Val: VecOp.getOperand(i: `0`));
24943	}
24944	if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(Val&: VecOp)) {
24945	// (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
24946	// =>
24947	// (load $addr+1size)*
24948
24949	// Don't duplicate a load with other uses.
24950	if (!VecOp.hasOneUse())
24951	return SDValue ();
24952
24953	// If the bit convert changed the number of elements, it is unsafe
24954	// to examine the mask.
24955	if (BCNumEltsChanged)
24956	return SDValue ();
24957
24958	// Select the input vector, guarding against out of range extract vector.
24959	int Idx = (Elt > (int)NumElts) ? -`1` : Shuf->getMaskElt(Idx: Elt);
24960	VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(i: `0`) : VecOp.getOperand(i: `1`);
24961
24962	if (VecOp.getOpcode() == ISD::BITCAST) {
24963	// Don't duplicate a load with other uses.
24964	if (!VecOp.hasOneUse())
24965	return SDValue ();
24966
24967	VecOp = VecOp.getOperand(i: `0`);
24968	}
24969	if (ISD::isNormalLoad(N: VecOp.getNode())) {
24970	LN0 = cast<LoadSDNode>(Val&: VecOp);
24971	Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
24972	Index = DAG.getConstant(Val: Elt, DL, VT: Index.getValueType());
24973	}
24974	} else if (VecOp.getOpcode() == ISD::CONCAT_VECTORS && !BCNumEltsChanged &&
24975	VecVT.getVectorElementType() == ScalarVT &&
24976	(!LegalTypes \|\|
24977	TLI.isTypeLegal(
24978	VT: VecOp.getOperand(i: `0`).getValueType().getVectorElementType()))) {
24979	// extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 0
24980	// -> extract_vector_elt a, 0
24981	// extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 1
24982	// -> extract_vector_elt a, 1
24983	// extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 2
24984	// -> extract_vector_elt b, 0
24985	// extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 3
24986	// -> extract_vector_elt b, 1
24987	EVT ConcatVT = VecOp.getOperand(i: `0`).getValueType();
24988	unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
24989	SDValue NewIdx = DAG.getConstant(Val: Elt % ConcatNumElts, DL,
24990	VT: Index.getValueType());
24991
24992	SDValue ConcatOp = VecOp.getOperand(i: Elt / ConcatNumElts);
24993	SDValue Elt = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL,
24994	VT: ConcatVT.getVectorElementType(),
24995	N1: ConcatOp, N2: NewIdx);
24996	return DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ScalarVT, Operand: Elt);
24997	}
24998
24999	// Make sure we found a non-volatile load and the extractelement is
25000	// the only use.
25001	if (!LN0 \|\| !LN0->hasNUsesOfValue(NUses: `1`,Value: `0`) \|\| !LN0->isSimple())
25002	return SDValue ();
25003
25004	// If Idx was -1 above, Elt is going to be -1, so just return undef.
25005	if (Elt == -`1`)
25006	return DAG.getUNDEF(VT: LVT);
25007
25008	if (SDValue Scalarized =
25009	TLI.scalarizeExtractedVectorLoad(ResultVT: LVT, DL, InVecVT: VecVT, EltNo: Index, OriginalLoad: LN0, DAG)) {
25010	++OpsNarrowed;
25011	return Scalarized;
25012	}
25013
25014	return SDValue ();
25015	}
25016
25017	// Simplify (build_vec (ext )) to (bitcast (build_vec ))
25018	SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
25019	// We perform this optimization post type-legalization because
25020	// the type-legalizer often scalarizes integer-promoted vectors.
25021	// Performing this optimization before may create bit-casts which
25022	// will be type-legalized to complex code sequences.
25023	// We perform this optimization only before the operation legalizer because we
25024	// may introduce illegal operations.
25025	if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
25026	return SDValue ();
25027
25028	unsigned NumInScalars = N->getNumOperands();
25029	SDLoc DL(N);
25030	EVT VT = N->getValueType(ResNo: `0`);
25031
25032	// Check to see if this is a BUILD_VECTOR of a bunch of values
25033	// which come from any_extend or zero_extend nodes. If so, we can create
25034	// a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
25035	// optimizations. We do not handle sign-extend because we can't fill the sign
25036	// using shuffles.
25037	EVT SourceType = MVT::Other;
25038	bool AllAnyExt = true;
25039
25040	for (unsigned i = `0`; i != NumInScalars; ++i) {
25041	SDValue In = N->getOperand(Num: i);
25042	// Ignore undef inputs.
25043	if (In.isUndef()) continue;
25044
25045	bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND;
25046	bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
25047
25048	// Abort if the element is not an extension.
25049	if (!ZeroExt && !AnyExt) {
25050	SourceType = MVT::Other;
25051	break;
25052	}
25053
25054	// The input is a ZeroExt or AnyExt. Check the original type.
25055	EVT InTy = In.getOperand(i: `0`).getValueType();
25056
25057	// Check that all of the widened source types are the same.
25058	if (SourceType == MVT::Other)
25059	// First time.
25060	SourceType = InTy;
25061	else if (InTy != SourceType) {
25062	// Multiple income types. Abort.
25063	SourceType = MVT::Other;
25064	break;
25065	}
25066
25067	// Check if all of the extends are ANY_EXTENDs.
25068	AllAnyExt &= AnyExt;
25069	}
25070
25071	// In order to have valid types, all of the inputs must be extended from the
25072	// same source type and all of the inputs must be any or zero extend.
25073	// Scalar sizes must be a power of two.
25074	EVT OutScalarTy = VT.getScalarType();
25075	bool ValidTypes =
25076	SourceType != MVT::Other &&
25077	llvm::has_single_bit<uint32_t>(Value: OutScalarTy.getSizeInBits()) &&
25078	llvm::has_single_bit<uint32_t>(Value: SourceType.getSizeInBits());
25079
25080	// Create a new simpler BUILD_VECTOR sequence which other optimizations can
25081	// turn into a single shuffle instruction.
25082	if (!ValidTypes)
25083	return SDValue ();
25084
25085	// If we already have a splat buildvector, then don't fold it if it means
25086	// introducing zeros.
25087	if (!AllAnyExt && DAG.isSplatValue(V: SDValue (N, `0`), /AllowUndefs/ true))
25088	return SDValue ();
25089
25090	bool isLE = DAG.getDataLayout().isLittleEndian();
25091	unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
25092	assert(ElemRatio > `1` && "Invalid element size ratio");
25093	SDValue Filler = AllAnyExt ? DAG.getPOISON(VT: SourceType)
25094	: DAG.getConstant(Val: `0`, DL, VT: SourceType);
25095
25096	unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
25097	SmallVector<SDValue, `8`> Ops(NewBVElems, Filler);
25098
25099	// Populate the new build_vector
25100	for (unsigned i = `0`, e = N->getNumOperands(); i != e; ++i) {
25101	SDValue Cast = N->getOperand(Num: i);
25102	assert((Cast.getOpcode() == ISD::ANY_EXTEND \|\|
25103	Cast.getOpcode() == ISD::ZERO_EXTEND \|\|
25104	Cast.isUndef()) && "Invalid cast opcode");
25105	SDValue In;
25106	if (Cast.isUndef())
25107	In = DAG.getUNDEF(VT: SourceType);
25108	else
25109	In = Cast ->getOperand(Num: `0`);
25110	unsigned Index = isLE ? (i * ElemRatio) :
25111	(i * ElemRatio + (ElemRatio - `1`));
25112
25113	assert(Index < Ops.size() && "Invalid index");
25114	Ops [Index] = In;
25115	}
25116
25117	// The type of the new BUILD_VECTOR node.
25118	EVT VecVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: SourceType, NumElements: NewBVElems);
25119	assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
25120	"Invalid vector size");
25121	// Check if the new vector type is legal.
25122	if (!isTypeLegal(VT: VecVT) \|\|
25123	(!TLI.isOperationLegal(Op: ISD::BUILD_VECTOR, VT: VecVT) &&
25124	TLI.isOperationLegal(Op: ISD::BUILD_VECTOR, VT)))
25125	return SDValue ();
25126
25127	// Make the new BUILD_VECTOR.
25128	SDValue BV = DAG.getBuildVector(VT: VecVT, DL, Ops);
25129
25130	// The new BUILD_VECTOR node has the potential to be further optimized.
25131	AddToWorklist(N: BV.getNode());
25132	// Bitcast to the desired type.
25133	return DAG.getBitcast(VT, V: BV);
25134	}
25135
25136	// Simplify (build_vec (trunc $1)
25137	// (trunc (srl $1 half-width))
25138	// (trunc (srl $1 (2 half-width))))*
25139	// to (bitcast $1)
25140	SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) {
25141	assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
25142
25143	EVT VT = N->getValueType(ResNo: `0`);
25144
25145	// Don't run this before LegalizeTypes if VT is legal.
25146	// Targets may have other preferences.
25147	if (Level < AfterLegalizeTypes && TLI.isTypeLegal(VT))
25148	return SDValue ();
25149
25150	// Only for little endian
25151	if (!DAG.getDataLayout().isLittleEndian())
25152	return SDValue ();
25153
25154	EVT OutScalarTy = VT.getScalarType();
25155	uint64_t ScalarTypeBitsize = OutScalarTy.getSizeInBits();
25156
25157	// Only for power of two types to be sure that bitcast works well
25158	if (!isPowerOf2_64(Value: ScalarTypeBitsize))
25159	return SDValue ();
25160
25161	unsigned NumInScalars = N->getNumOperands();
25162
25163	// Look through bitcasts
25164	auto PeekThroughBitcast = [](SDValue Op) {
25165	if (Op.getOpcode() == ISD::BITCAST)
25166	return Op.getOperand(i: `0`);
25167	return Op;
25168	};
25169
25170	// The source value where all the parts are extracted.
25171	SDValue Src;
25172	for (unsigned i = `0`; i != NumInScalars; ++i) {
25173	SDValue In = PeekThroughBitcast (N->getOperand(Num: i));
25174	// Ignore undef inputs.
25175	if (In.isUndef()) continue;
25176
25177	if (In.getOpcode() != ISD::TRUNCATE)
25178	return SDValue ();
25179
25180	In = PeekThroughBitcast (In.getOperand(i: `0`));
25181
25182	if (In.getOpcode() != ISD::SRL) {
25183	// For now only build_vec without shuffling, handle shifts here in the
25184	// future.
25185	if (i != `0`)
25186	return SDValue ();
25187
25188	Src = In;
25189	} else {
25190	// In is SRL
25191	SDValue part = PeekThroughBitcast (In.getOperand(i: `0`));
25192
25193	if (!Src) {
25194	Src = part;
25195	} else if (Src != part) {
25196	// Vector parts do not stem from the same variable
25197	return SDValue ();
25198	}
25199
25200	SDValue ShiftAmtVal = In.getOperand(i: `1`);
25201	if (!isa<ConstantSDNode>(Val: ShiftAmtVal))
25202	return SDValue ();
25203
25204	uint64_t ShiftAmt = In.getConstantOperandVal(i: `1`);
25205
25206	// The extracted value is not extracted at the right position
25207	if (ShiftAmt != i * ScalarTypeBitsize)
25208	return SDValue ();
25209	}
25210	}
25211
25212	// Only cast if the size is the same
25213	if (!Src \|\| Src.getValueType().getSizeInBits() != VT.getSizeInBits())
25214	return SDValue ();
25215
25216	return DAG.getBitcast(VT, V: Src);
25217	}
25218
25219	SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
25220	ArrayRef<int> VectorMask,
25221	SDValue VecIn1, SDValue VecIn2,
25222	unsigned LeftIdx, bool DidSplitVec) {
25223	EVT VT = N->getValueType(ResNo: `0`);
25224	EVT InVT1 = VecIn1.getValueType();
25225	EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
25226
25227	unsigned NumElems = VT.getVectorNumElements();
25228	unsigned ShuffleNumElems = NumElems;
25229
25230	// If we artificially split a vector in two already, then the offsets in the
25231	// operands will all be based off of VecIn1, even those in VecIn2.
25232	unsigned Vec2Offset = DidSplitVec ? `0` : InVT1.getVectorNumElements();
25233
25234	uint64_t VTSize = VT.getFixedSizeInBits();
25235	uint64_t InVT1Size = InVT1.getFixedSizeInBits();
25236	uint64_t InVT2Size = InVT2.getFixedSizeInBits();
25237
25238	assert(InVT2Size <= InVT1Size &&
25239	"Inputs must be sorted to be in non-increasing vector size order.");
25240
25241	// We can't generate a shuffle node with mismatched input and output types.
25242	// Try to make the types match the type of the output.
25243	if (InVT1 != VT \|\| InVT2 != VT) {
25244	if ((VTSize % InVT1Size == `0`) && InVT1 == InVT2) {
25245	// If the output vector length is a multiple of both input lengths,
25246	// we can concatenate them and pad the rest with poison.
25247	unsigned NumConcats = VTSize / InVT1Size;
25248	assert(NumConcats >= `2` && "Concat needs at least two inputs!");
25249	SmallVector<SDValue, `2`> ConcatOps(NumConcats, DAG.getPOISON(VT: InVT1));
25250	ConcatOps [`0`] = VecIn1;
25251	ConcatOps [`1`] = VecIn2 ? VecIn2 : DAG.getPOISON(VT: InVT1);
25252	VecIn1 = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT, Ops: ConcatOps);
25253	VecIn2 = SDValue ();
25254	} else if (InVT1Size == VTSize * `2`) {
25255	if (!TLI.isExtractSubvectorCheap(ResVT: VT, SrcVT: InVT1, Index: NumElems))
25256	return SDValue ();
25257
25258	if (!VecIn2.getNode()) {
25259	// If we only have one input vector, and it's twice the size of the
25260	// output, split it in two.
25261	VecIn2 = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT, N1: VecIn1,
25262	N2: DAG.getVectorIdxConstant(Val: NumElems, DL));
25263	VecIn1 = DAG.getExtractSubvector(DL, VT, Vec: VecIn1, Idx: `0`);
25264	// Since we now have shorter input vectors, adjust the offset of the
25265	// second vector's start.
25266	Vec2Offset = NumElems;
25267	} else {
25268	assert(InVT2Size <= InVT1Size &&
25269	"Second input is not going to be larger than the first one.");
25270
25271	// VecIn1 is wider than the output, and we have another, possibly
25272	// smaller input. Pad the smaller input with undefs, shuffle at the
25273	// input vector width, and extract the output.
25274	// The shuffle type is different than VT, so check legality again.
25275	if (LegalOperations &&
25276	!TLI.isOperationLegal(Op: ISD::VECTOR_SHUFFLE, VT: InVT1))
25277	return SDValue ();
25278
25279	// Legalizing INSERT_SUBVECTOR is tricky - you basically have to
25280	// lower it back into a BUILD_VECTOR. So if the inserted type is
25281	// illegal, don't even try.
25282	if (InVT1 != InVT2) {
25283	if (!TLI.isTypeLegal(VT: InVT2))
25284	return SDValue ();
25285	VecIn2 = DAG.getInsertSubvector(DL, Vec: DAG.getPOISON(VT: InVT1), SubVec: VecIn2, Idx: `0`);
25286	}
25287	ShuffleNumElems = NumElems * `2`;
25288	}
25289	} else if (InVT2Size * `2` == VTSize && InVT1Size == VTSize) {
25290	SmallVector<SDValue, `2`> ConcatOps(`2`, DAG.getPOISON(VT: InVT2));
25291	ConcatOps [`0`] = VecIn2;
25292	VecIn2 = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT, Ops: ConcatOps);
25293	} else if (InVT1Size / VTSize > `1` && InVT1Size % VTSize == `0`) {
25294	if (!TLI.isExtractSubvectorCheap(ResVT: VT, SrcVT: InVT1, Index: NumElems) \|\|
25295	!TLI.isTypeLegal(VT: InVT1) \|\| !TLI.isTypeLegal(VT: InVT2))
25296	return SDValue ();
25297	// If dest vector has less than two elements, then use shuffle and extract
25298	// from larger regs will cost even more.
25299	if (VT.getVectorNumElements() <= `2` \|\| !VecIn2.getNode())
25300	return SDValue ();
25301	assert(InVT2Size <= InVT1Size &&
25302	"Second input is not going to be larger than the first one.");
25303
25304	// VecIn1 is wider than the output, and we have another, possibly
25305	// smaller input. Pad the smaller input with undefs, shuffle at the
25306	// input vector width, and extract the output.
25307	// The shuffle type is different than VT, so check legality again.
25308	if (LegalOperations && !TLI.isOperationLegal(Op: ISD::VECTOR_SHUFFLE, VT: InVT1))
25309	return SDValue ();
25310
25311	if (InVT1 != InVT2) {
25312	VecIn2 = DAG.getInsertSubvector(DL, Vec: DAG.getPOISON(VT: InVT1), SubVec: VecIn2, Idx: `0`);
25313	}
25314	ShuffleNumElems = InVT1Size / VTSize * NumElems;
25315	} else {
25316	// TODO: Support cases where the length mismatch isn't exactly by a
25317	// factor of 2.
25318	// TODO: Move this check upwards, so that if we have bad type
25319	// mismatches, we don't create any DAG nodes.
25320	return SDValue ();
25321	}
25322	}
25323
25324	// Initialize mask to undef.
25325	SmallVector<int, `8`> Mask(ShuffleNumElems, -`1`);
25326
25327	// Only need to run up to the number of elements actually used, not the
25328	// total number of elements in the shuffle - if we are shuffling a wider
25329	// vector, the high lanes should be set to undef.
25330	for (unsigned i = `0`; i != NumElems; ++i) {
25331	if (VectorMask [i] <= `0`)
25332	continue;
25333
25334	unsigned ExtIndex = N->getOperand(Num: i).getConstantOperandVal(i: `1`);
25335	if (VectorMask [i] == (int)LeftIdx) {
25336	Mask [i] = ExtIndex;
25337	} else if (VectorMask [i] == (int)LeftIdx + `1`) {
25338	Mask [i] = Vec2Offset + ExtIndex;
25339	}
25340	}
25341
25342	// The type the input vectors may have changed above.
25343	InVT1 = VecIn1.getValueType();
25344
25345	// If we already have a VecIn2, it should have the same type as VecIn1.
25346	// If we don't, get an poison/zero vector of the appropriate type.
25347	VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getPOISON(VT: InVT1);
25348	assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
25349
25350	SDValue Shuffle = DAG.getVectorShuffle(VT: InVT1, dl: DL, N1: VecIn1, N2: VecIn2, Mask);
25351	if (ShuffleNumElems > NumElems)
25352	Shuffle = DAG.getExtractSubvector(DL, VT, Vec: Shuffle, Idx: `0`);
25353
25354	return Shuffle;
25355	}
25356
25357	static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
25358	assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
25359
25360	// First, determine where the build vector is not undef.
25361	// TODO: We could extend this to handle zero elements as well as undefs.
25362	int NumBVOps = BV->getNumOperands();
25363	int ZextElt = -`1`;
25364	for (int i = `0`; i != NumBVOps; ++i) {
25365	SDValue Op = BV->getOperand(Num: i);
25366	if (Op.isUndef())
25367	continue;
25368	if (ZextElt == -`1`)
25369	ZextElt = i;
25370	else
25371	return SDValue ();
25372	}
25373	// Bail out if there's no non-undef element.
25374	if (ZextElt == -`1`)
25375	return SDValue ();
25376
25377	// The build vector contains some number of undef elements and exactly
25378	// one other element. That other element must be a zero-extended scalar
25379	// extracted from a vector at a constant index to turn this into a shuffle.
25380	// Also, require that the build vector does not implicitly truncate/extend
25381	// its elements.
25382	// TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
25383	EVT VT = BV->getValueType(ResNo: `0`);
25384	SDValue Zext = BV->getOperand(Num: ZextElt);
25385	if (Zext.getOpcode() != ISD::ZERO_EXTEND \|\| !Zext.hasOneUse() \|\|
25386	Zext.getOperand(i: `0`).getOpcode() != ISD::EXTRACT_VECTOR_ELT \|\|
25387	!isa<ConstantSDNode>(Val: Zext.getOperand(i: `0`).getOperand(i: `1`)) \|\|
25388	Zext.getValueSizeInBits() != VT.getScalarSizeInBits())
25389	return SDValue ();
25390
25391	// The zero-extend must be a multiple of the source size, and we must be
25392	// building a vector of the same size as the source of the extract element.
25393	SDValue Extract = Zext.getOperand(i: `0`);
25394	unsigned DestSize = Zext.getValueSizeInBits();
25395	unsigned SrcSize = Extract.getValueSizeInBits();
25396	if (DestSize % SrcSize != `0` \|\|
25397	Extract.getOperand(i: `0`).getValueSizeInBits() != VT.getSizeInBits())
25398	return SDValue ();
25399
25400	// Create a shuffle mask that will combine the extracted element with zeros
25401	// and undefs.
25402	int ZextRatio = DestSize / SrcSize;
25403	int NumMaskElts = NumBVOps * ZextRatio;
25404	SmallVector<int, `32`> ShufMask(NumMaskElts, -`1`);
25405	for (int i = `0`; i != NumMaskElts; ++i) {
25406	if (i / ZextRatio == ZextElt) {
25407	// The low bits of the (potentially translated) extracted element map to
25408	// the source vector. The high bits map to zero. We will use a zero vector
25409	// as the 2nd source operand of the shuffle, so use the 1st element of
25410	// that vector (mask value is number-of-elements) for the high bits.
25411	int Low = DAG.getDataLayout().isBigEndian() ? (ZextRatio - `1`) : `0`;
25412	ShufMask [i] = (i % ZextRatio == Low) ? Extract.getConstantOperandVal(i: `1`)
25413	: NumMaskElts;
25414	}
25415
25416	// Undef elements of the build vector remain undef because we initialize
25417	// the shuffle mask with -1.
25418	}
25419
25420	// buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
25421	// bitcast (shuffle V, ZeroVec, VectorMask)
25422	SDLoc DL(BV);
25423	EVT VecVT = Extract.getOperand(i: `0`).getValueType();
25424	SDValue ZeroVec = DAG.getConstant(Val: `0`, DL, VT: VecVT);
25425	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
25426	SDValue Shuf = TLI.buildLegalVectorShuffle(VT: VecVT, DL, N0: Extract.getOperand(i: `0`),
25427	N1: ZeroVec, Mask: ShufMask, DAG);
25428	if (!Shuf)
25429	return SDValue ();
25430	return DAG.getBitcast(VT, V: Shuf);
25431	}
25432
25433	// FIXME: promote to STLExtras.
25434	template <typename R, typename T>
25435	static auto getFirstIndexOf(R &&Range, const T &Val) {
25436	auto I = find(Range, Val);
25437	if (I == Range.end())
25438	return static_cast<decltype(std::distance(Range.begin(), I))>(-`1`);
25439	return std::distance(Range.begin(), I);
25440	}
25441
25442	// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
25443	// operations. If the types of the vectors we're extracting from allow it,
25444	// turn this into a vector_shuffle node.
25445	SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
25446	SDLoc DL(N);
25447	EVT VT = N->getValueType(ResNo: `0`);
25448
25449	// Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
25450	if (!isTypeLegal(VT))
25451	return SDValue ();
25452
25453	if (SDValue V = reduceBuildVecToShuffleWithZero(BV: N, DAG))
25454	return V;
25455
25456	// May only combine to shuffle after legalize if shuffle is legal.
25457	if (LegalOperations && !TLI.isOperationLegal(Op: ISD::VECTOR_SHUFFLE, VT))
25458	return SDValue ();
25459
25460	bool UsesZeroVector = false;
25461	unsigned NumElems = N->getNumOperands();
25462
25463	// Record, for each element of the newly built vector, which input vector
25464	// that element comes from. -1 stands for undef, 0 for the zero vector,
25465	// and positive values for the input vectors.
25466	// VectorMask maps each element to its vector number, and VecIn maps vector
25467	// numbers to their initial SDValues.
25468
25469	SmallVector<int, `8`> VectorMask(NumElems, -`1`);
25470	SmallVector<SDValue, `8`> VecIn;
25471	VecIn.push_back(Elt: SDValue ());
25472
25473	// If we have a single extract_element with a constant index, track the index
25474	// value.
25475	unsigned OneConstExtractIndex = ~`0u`;
25476
25477	// Count the number of extract_vector_elt sources (i.e. non-constant or undef)
25478	unsigned NumExtracts = `0`;
25479
25480	for (unsigned i = `0`; i != NumElems; ++i) {
25481	SDValue Op = N->getOperand(Num: i);
25482
25483	if (Op.isUndef())
25484	continue;
25485
25486	// See if we can use a blend with a zero vector.
25487	// TODO: Should we generalize this to a blend with an arbitrary constant
25488	// vector?
25489	if (isNullConstant(V: Op) \|\| isNullFPConstant(V: Op)) {
25490	UsesZeroVector = true;
25491	VectorMask [i] = `0`;
25492	continue;
25493	}
25494
25495	// Not an undef or zero. If the input is something other than an
25496	// EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
25497	if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
25498	return SDValue ();
25499
25500	SDValue ExtractedFromVec = Op.getOperand(i: `0`);
25501	if (ExtractedFromVec.getValueType().isScalableVector())
25502	return SDValue ();
25503	auto *ExtractIdx = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: `1`));
25504	if (!ExtractIdx)
25505	return SDValue ();
25506
25507	if (ExtractIdx->getAsAPIntVal().uge(
25508	RHS: ExtractedFromVec.getValueType().getVectorNumElements()))
25509	return SDValue ();
25510
25511	// All inputs must have the same element type as the output.
25512	if (VT.getVectorElementType() !=
25513	ExtractedFromVec.getValueType().getVectorElementType())
25514	return SDValue ();
25515
25516	OneConstExtractIndex = ExtractIdx->getZExtValue();
25517	++NumExtracts;
25518
25519	// Have we seen this input vector before?
25520	// The vectors are expected to be tiny (usually 1 or 2 elements), so using
25521	// a map back from SDValues to numbers isn't worth it.
25522	int Idx = getFirstIndexOf(Range&: VecIn, Val: ExtractedFromVec);
25523	if (Idx == -`1`) { // A new source vector?
25524	Idx = VecIn.size();
25525	VecIn.push_back(Elt: ExtractedFromVec);
25526	}
25527
25528	VectorMask [i] = Idx;
25529	}
25530
25531	// If we didn't find at least one input vector, bail out.
25532	if (VecIn.size() < `2`)
25533	return SDValue ();
25534
25535	// If all the Operands of BUILD_VECTOR extract from same
25536	// vector, then split the vector efficiently based on the maximum
25537	// vector access index and adjust the VectorMask and
25538	// VecIn accordingly.
25539	bool DidSplitVec = false;
25540	if (VecIn.size() == `2`) {
25541	// If we only found a single constant indexed extract_vector_elt feeding the
25542	// build_vector, do not produce a more complicated shuffle if the extract is
25543	// cheap with other constant/undef elements. Skip broadcast patterns with
25544	// multiple uses in the build_vector.
25545
25546	// TODO: This should be more aggressive about skipping the shuffle
25547	// formation, particularly if VecIn[1].hasOneUse(), and regardless of the
25548	// index.
25549	if (NumExtracts == `1` &&
25550	TLI.isOperationLegalOrCustom(Op: ISD::EXTRACT_VECTOR_ELT, VT) &&
25551	TLI.isTypeLegal(VT: VT.getVectorElementType()) &&
25552	TLI.isExtractVecEltCheap(VT, Index: OneConstExtractIndex))
25553	return SDValue ();
25554
25555	unsigned MaxIndex = `0`;
25556	unsigned NearestPow2 = `0`;
25557	SDValue Vec = VecIn.back();
25558	EVT InVT = Vec.getValueType();
25559	SmallVector<unsigned, `8`> IndexVec(NumElems, `0`);
25560
25561	for (unsigned i = `0`; i < NumElems; i++) {
25562	if (VectorMask [i] <= `0`)
25563	continue;
25564	unsigned Index = N->getOperand(Num: i).getConstantOperandVal(i: `1`);
25565	IndexVec [i] = Index;
25566	MaxIndex = std::max(a: MaxIndex, b: Index);
25567	}
25568
25569	NearestPow2 = PowerOf2Ceil(A: MaxIndex);
25570	if (InVT.isSimple() && NearestPow2 > `2` && MaxIndex < NearestPow2 &&
25571	NumElems * `2` < NearestPow2) {
25572	unsigned SplitSize = NearestPow2 / `2`;
25573	EVT SplitVT = EVT::getVectorVT(Context&: *DAG.getContext(),
25574	VT: InVT.getVectorElementType(), NumElements: SplitSize);
25575	if (TLI.isTypeLegal(VT: SplitVT) &&
25576	SplitSize + SplitVT.getVectorNumElements() <=
25577	InVT.getVectorNumElements()) {
25578	SDValue VecIn2 = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: SplitVT, N1: Vec,
25579	N2: DAG.getVectorIdxConstant(Val: SplitSize, DL));
25580	SDValue VecIn1 = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: SplitVT, N1: Vec,
25581	N2: DAG.getVectorIdxConstant(Val: `0`, DL));
25582	VecIn.pop_back();
25583	VecIn.push_back(Elt: VecIn1);
25584	VecIn.push_back(Elt: VecIn2);
25585	DidSplitVec = true;
25586
25587	for (unsigned i = `0`; i < NumElems; i++) {
25588	if (VectorMask [i] <= `0`)
25589	continue;
25590	VectorMask [i] = (IndexVec [i] < SplitSize) ? `1` : `2`;
25591	}
25592	}
25593	}
25594	}
25595
25596	// Sort input vectors by decreasing vector element count,
25597	// while preserving the relative order of equally-sized vectors.
25598	// Note that we keep the first "implicit zero vector as-is.
25599	SmallVector<SDValue, `8`> SortedVecIn(VecIn);
25600	llvm::stable_sort(Range: MutableArrayRef<SDValue>(SortedVecIn).drop_front(),
25601	C: [](const SDValue &a, const SDValue &b) {
25602	return a.getValueType().getVectorNumElements() >
25603	b.getValueType().getVectorNumElements();
25604	});
25605
25606	// We now also need to rebuild the VectorMask, because it referenced element
25607	// order in VecIn, and we just sorted them.
25608	for (int &SourceVectorIndex : VectorMask) {
25609	if (SourceVectorIndex <= `0`)
25610	continue;
25611	unsigned Idx = getFirstIndexOf(Range&: SortedVecIn, Val: VecIn [SourceVectorIndex]);
25612	assert(Idx > `0` && Idx < SortedVecIn.size() &&
25613	VecIn[SourceVectorIndex] == SortedVecIn[Idx] && "Remapping failure");
25614	SourceVectorIndex = Idx;
25615	}
25616
25617	VecIn = std::move(SortedVecIn);
25618
25619	// TODO: Should this fire if some of the input vectors has illegal type (like
25620	// it does now), or should we let legalization run its course first?
25621
25622	// Shuffle phase:
25623	// Take pairs of vectors, and shuffle them so that the result has elements
25624	// from these vectors in the correct places.
25625	// For example, given:
25626	// t10: i32 = extract_vector_elt t1, Constant:i64<0>
25627	// t11: i32 = extract_vector_elt t2, Constant:i64<0>
25628	// t12: i32 = extract_vector_elt t3, Constant:i64<0>
25629	// t13: i32 = extract_vector_elt t1, Constant:i64<1>
25630	// t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
25631	// We will generate:
25632	// t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
25633	// t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
25634	SmallVector<SDValue, `4`> Shuffles;
25635	for (unsigned In = `0`, Len = (VecIn.size() / `2`); In < Len; ++In) {
25636	unsigned LeftIdx = `2` * In + `1`;
25637	SDValue VecLeft = VecIn [LeftIdx];
25638	SDValue VecRight =
25639	(LeftIdx + `1`) < VecIn.size() ? VecIn [LeftIdx + `1`] : SDValue ();
25640
25641	if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecIn1: VecLeft,
25642	VecIn2: VecRight, LeftIdx, DidSplitVec))
25643	Shuffles.push_back(Elt: Shuffle);
25644	else
25645	return SDValue ();
25646	}
25647
25648	// If we need the zero vector as an "ingredient" in the blend tree, add it
25649	// to the list of shuffles.
25650	if (UsesZeroVector)
25651	Shuffles.push_back(Elt: VT.isInteger() ? DAG.getConstant(Val: `0`, DL, VT)
25652	: DAG.getConstantFP(Val: `0.0`, DL, VT));
25653
25654	// If we only have one shuffle, we're done.
25655	if (Shuffles.size() == `1`)
25656	return Shuffles [`0`];
25657
25658	// Update the vector mask to point to the post-shuffle vectors.
25659	for (int &Vec : VectorMask)
25660	if (Vec == `0`)
25661	Vec = Shuffles.size() - `1`;
25662	else
25663	Vec = (Vec - `1`) / `2`;
25664
25665	// More than one shuffle. Generate a binary tree of blends, e.g. if from
25666	// the previous step we got the set of shuffles t10, t11, t12, t13, we will
25667	// generate:
25668	// t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
25669	// t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
25670	// t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
25671	// t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
25672	// t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
25673	// t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
25674	// t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
25675
25676	// Make sure the initial size of the shuffle list is even.
25677	if (Shuffles.size() % `2`)
25678	Shuffles.push_back(Elt: DAG.getPOISON(VT));
25679
25680	for (unsigned CurSize = Shuffles.size(); CurSize > `1`; CurSize /= `2`) {
25681	if (CurSize % `2`) {
25682	Shuffles [CurSize] = DAG.getPOISON(VT);
25683	CurSize++;
25684	}
25685	for (unsigned In = `0`, Len = CurSize / `2`; In < Len; ++In) {
25686	int Left = `2` * In;
25687	int Right = `2` * In + `1`;
25688	SmallVector<int, `8`> Mask(NumElems, -`1`);
25689	SDValue L = Shuffles [Left];
25690	ArrayRef<int> LMask;
25691	bool IsLeftShuffle = L.getOpcode() == ISD::VECTOR_SHUFFLE &&
25692	L.use_empty() && L.getOperand(i: `1`).isUndef() &&
25693	L.getOperand(i: `0`).getValueType() == L.getValueType();
25694	if (IsLeftShuffle) {
25695	LMask = cast<ShuffleVectorSDNode>(Val: L.getNode())->getMask();
25696	L = L.getOperand(i: `0`);
25697	}
25698	SDValue R = Shuffles [Right];
25699	ArrayRef<int> RMask;
25700	bool IsRightShuffle = R.getOpcode() == ISD::VECTOR_SHUFFLE &&
25701	R.use_empty() && R.getOperand(i: `1`).isUndef() &&
25702	R.getOperand(i: `0`).getValueType() == R.getValueType();
25703	if (IsRightShuffle) {
25704	RMask = cast<ShuffleVectorSDNode>(Val: R.getNode())->getMask();
25705	R = R.getOperand(i: `0`);
25706	}
25707	for (unsigned I = `0`; I != NumElems; ++I) {
25708	if (VectorMask [I] == Left) {
25709	Mask [I] = I;
25710	if (IsLeftShuffle)
25711	Mask [I] = LMask [I];
25712	VectorMask [I] = In;
25713	} else if (VectorMask [I] == Right) {
25714	Mask [I] = I + NumElems;
25715	if (IsRightShuffle)
25716	Mask [I] = RMask [I] + NumElems;
25717	VectorMask [I] = In;
25718	}
25719	}
25720
25721	Shuffles [In] = DAG.getVectorShuffle(VT, dl: DL, N1: L, N2: R, Mask);
25722	}
25723	}
25724	return Shuffles [`0`];
25725	}
25726
25727	// Try to turn a build vector of zero extends of extract vector elts into a
25728	// a vector zero extend and possibly an extract subvector.
25729	// TODO: Support sign extend?
25730	// TODO: Allow undef elements?
25731	SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
25732	if (LegalOperations)
25733	return SDValue ();
25734
25735	EVT VT = N->getValueType(ResNo: `0`);
25736
25737	bool FoundZeroExtend = false;
25738	SDValue Op0 = N->getOperand(Num: `0`);
25739	auto checkElem = [&](SDValue Op) -> int64_t {
25740	unsigned Opc = Op.getOpcode();
25741	FoundZeroExtend \|= (Opc == ISD::ZERO_EXTEND);
25742	if ((Opc == ISD::ZERO_EXTEND \|\| Opc == ISD::ANY_EXTEND) &&
25743	Op.getOperand(i: `0`).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
25744	Op0.getOperand(i: `0`).getOperand(i: `0`) == Op.getOperand(i: `0`).getOperand(i: `0`))
25745	if (auto *C = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: `0`).getOperand(i: `1`)))
25746	return C->getZExtValue();
25747	return -`1`;
25748	};
25749
25750	// Make sure the first element matches
25751	// (zext (extract_vector_elt X, C))
25752	// Offset must be a constant multiple of the
25753	// known-minimum vector length of the result type.
25754	int64_t Offset = checkElem (Op0);
25755	if (Offset < `0` \|\| (Offset % VT.getVectorNumElements()) != `0`)
25756	return SDValue ();
25757
25758	unsigned NumElems = N->getNumOperands();
25759	SDValue In = Op0.getOperand(i: `0`).getOperand(i: `0`);
25760	EVT InSVT = In.getValueType().getScalarType();
25761	EVT InVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: InSVT, NumElements: NumElems);
25762
25763	// Don't create an illegal input type after type legalization.
25764	if (LegalTypes && !TLI.isTypeLegal(VT: InVT))
25765	return SDValue ();
25766
25767	// Ensure all the elements come from the same vector and are adjacent.
25768	for (unsigned i = `1`; i != NumElems; ++i) {
25769	if ((Offset + i) != checkElem (N->getOperand(Num: i)))
25770	return SDValue ();
25771	}
25772
25773	SDLoc DL(N);
25774	In = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: InVT, N1: In,
25775	N2: Op0.getOperand(i: `0`).getOperand(i: `1`));
25776	return DAG.getNode(Opcode: FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
25777	VT, Operand: In);
25778	}
25779
25780	// If this is a very simple BUILD_VECTOR with first element being a ZERO_EXTEND,
25781	// and all other elements being constant zero's, granularize the BUILD_VECTOR's
25782	// element width, absorbing the ZERO_EXTEND, turning it into a constant zero op.
25783	// This patten can appear during legalization.
25784	//
25785	// NOTE: This can be generalized to allow more than a single
25786	// non-constant-zero op, UNDEF's, and to be KnownBits-based,
25787	SDValue DAGCombiner::convertBuildVecZextToBuildVecWithZeros(SDNode *N) {
25788	// Don't run this after legalization. Targets may have other preferences.
25789	if (Level >= AfterLegalizeDAG)
25790	return SDValue ();
25791
25792	// FIXME: support big-endian.
25793	if (DAG.getDataLayout().isBigEndian())
25794	return SDValue ();
25795
25796	EVT VT = N->getValueType(ResNo: `0`);
25797	EVT OpVT = N->getOperand(Num: `0`).getValueType();
25798	assert(!VT.isScalableVector() && "Encountered scalable BUILD_VECTOR?");
25799
25800	EVT OpIntVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: OpVT.getSizeInBits());
25801
25802	if (!TLI.isTypeLegal(VT: OpIntVT) \|\|
25803	(LegalOperations && !TLI.isOperationLegalOrCustom(Op: ISD::BITCAST, VT: OpIntVT)))
25804	return SDValue ();
25805
25806	unsigned EltBitwidth = VT.getScalarSizeInBits();
25807	// NOTE: the actual width of operands may be wider than that!
25808
25809	// Analyze all operands of this BUILD_VECTOR. What is the largest number of
25810	// active bits they all have? We'll want to truncate them all to that width.
25811	unsigned ActiveBits = `0`;
25812	APInt KnownZeroOps(VT.getVectorNumElements(), `0`);
25813	for (auto I : enumerate(First: N->ops())) {
25814	SDValue Op = I.value();
25815	// FIXME: support UNDEF elements?
25816	if (auto *Cst = dyn_cast<ConstantSDNode>(Val&: Op)) {
25817	unsigned OpActiveBits =
25818	Cst->getAPIntValue().trunc(width: EltBitwidth).getActiveBits();
25819	if (OpActiveBits == `0`) {
25820	KnownZeroOps.setBit(I.index());
25821	continue;
25822	}
25823	// Profitability check: don't allow non-zero constant operands.
25824	return SDValue ();
25825	}
25826	// Profitability check: there must only be a single non-zero operand,
25827	// and it must be the first operand of the BUILD_VECTOR.
25828	if (I.index() != `0`)
25829	return SDValue ();
25830	// The operand must be a zero-extension itself.
25831	// FIXME: this could be generalized to known leading zeros check.
25832	if (Op.getOpcode() != ISD::ZERO_EXTEND)
25833	return SDValue ();
25834	unsigned CurrActiveBits =
25835	Op.getOperand(i: `0`).getValueSizeInBits().getFixedValue();
25836	assert(!ActiveBits && "Already encountered non-constant-zero operand?");
25837	ActiveBits = CurrActiveBits;
25838	// We want to at least halve the element size.
25839	if (`2` * ActiveBits > EltBitwidth)
25840	return SDValue ();
25841	}
25842
25843	// This BUILD_VECTOR must have at least one non-constant-zero operand.
25844	if (ActiveBits == `0`)
25845	return SDValue ();
25846
25847	// We have EltBitwidth bits, the minimal* chunk size is ActiveBits,*
25848	// into how many chunks can we split our element width?
25849	EVT NewScalarIntVT, NewIntVT;
25850	std::optional<unsigned> Factor;
25851	// We can split the element into at least two chunks, but not into more
25852	// than \|_ EltBitwidth / ActiveBits _\| chunks. Find a largest split factor
25853	// for which the element width is a multiple of it,
25854	// and the resulting types/operations on that chunk width are legal.
25855	assert(`2` * ActiveBits <= EltBitwidth &&
25856	"We know that half or less bits of the element are active.");
25857	for (unsigned Scale = EltBitwidth / ActiveBits; Scale >= `2`; --Scale) {
25858	if (EltBitwidth % Scale != `0`)
25859	continue;
25860	unsigned ChunkBitwidth = EltBitwidth / Scale;
25861	assert(ChunkBitwidth >= ActiveBits && "As per starting point.");
25862	NewScalarIntVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: ChunkBitwidth);
25863	NewIntVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: NewScalarIntVT,
25864	NumElements: Scale * N->getNumOperands());
25865	if (!TLI.isTypeLegal(VT: NewScalarIntVT) \|\| !TLI.isTypeLegal(VT: NewIntVT) \|\|
25866	(LegalOperations &&
25867	!(TLI.isOperationLegalOrCustom(Op: ISD::TRUNCATE, VT: NewScalarIntVT) &&
25868	TLI.isOperationLegalOrCustom(Op: ISD::BUILD_VECTOR, VT: NewIntVT))))
25869	continue;
25870	Factor = Scale;
25871	break;
25872	}
25873	if (!Factor)
25874	return SDValue ();
25875
25876	SDLoc DL(N);
25877	SDValue ZeroOp = DAG.getConstant(Val: `0`, DL, VT: NewScalarIntVT);
25878
25879	// Recreate the BUILD_VECTOR, with elements now being Factor times smaller.
25880	SmallVector<SDValue, `16`> NewOps;
25881	NewOps.reserve(N: NewIntVT.getVectorNumElements());
25882	for (auto I : enumerate(First: N->ops())) {
25883	SDValue Op = I.value();
25884	assert(!Op.isUndef() && "FIXME: after allowing UNDEF's, handle them here.");
25885	unsigned SrcOpIdx = I.index();
25886	if (KnownZeroOps [SrcOpIdx]) {
25887	NewOps.append(NumInputs: *Factor, Elt: ZeroOp);
25888	continue;
25889	}
25890	Op = DAG.getBitcast(VT: OpIntVT, V: Op);
25891	Op = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: NewScalarIntVT, Operand: Op);
25892	NewOps.emplace_back(Args&: Op);
25893	NewOps.append(NumInputs: *Factor - `1`, Elt: ZeroOp);
25894	}
25895	assert(NewOps.size() == NewIntVT.getVectorNumElements());
25896	SDValue NewBV = DAG.getBuildVector(VT: NewIntVT, DL, Ops: NewOps);
25897	NewBV = DAG.getBitcast(VT, V: NewBV);
25898	return NewBV;
25899	}
25900
25901	SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
25902	EVT VT = N->getValueType(ResNo: `0`);
25903
25904	// A vector built entirely of undefs is undef.
25905	if (ISD::allOperandsUndef(N))
25906	return DAG.getUNDEF(VT);
25907
25908	// If this is a splat of a bitcast from another vector, change to a
25909	// concat_vector.
25910	// For example:
25911	// (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
25912	// (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
25913	//
25914	// If X is a build_vector itself, the concat can become a larger build_vector.
25915	// TODO: Maybe this is useful for non-splat too?
25916	if (!LegalOperations) {
25917	SDValue Splat = cast<BuildVectorSDNode>(Val: N)->getSplatValue();
25918	// Only change build_vector to a concat_vector if the splat value type is
25919	// same as the vector element type.
25920	if (Splat && Splat.getValueType() == VT.getVectorElementType()) {
25921	Splat = peekThroughBitcasts(V: Splat);
25922	EVT SrcVT = Splat.getValueType();
25923	if (SrcVT.isVector()) {
25924	unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
25925	EVT NewVT = EVT::getVectorVT(Context&: *DAG.getContext(),
25926	VT: SrcVT.getVectorElementType(), NumElements: NumElts);
25927	if (!LegalTypes \|\| TLI.isTypeLegal(VT: NewVT)) {
25928	SmallVector<SDValue, `8`> Ops(N->getNumOperands(), Splat);
25929	SDValue Concat =
25930	DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: SDLoc (N), VT: NewVT, Ops);
25931	return DAG.getBitcast(VT, V: Concat);
25932	}
25933	}
25934	}
25935	}
25936
25937	// Check if we can express BUILD VECTOR via subvector extract.
25938	if (!LegalTypes && (N->getNumOperands() > `1`)) {
25939	SDValue Op0 = N->getOperand(Num: `0`);
25940	auto checkElem = [&](SDValue Op) -> uint64_t {
25941	if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
25942	(Op0.getOperand(i: `0`) == Op.getOperand(i: `0`)))
25943	if (auto CNode = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: `1`)))
25944	return CNode->getZExtValue();
25945	return -`1`;
25946	};
25947
25948	int Offset = checkElem (Op0);
25949	for (unsigned i = `0`; i < N->getNumOperands(); ++i) {
25950	if (Offset + i != checkElem (N->getOperand(Num: i))) {
25951	Offset = -`1`;
25952	break;
25953	}
25954	}
25955
25956	if ((Offset == `0`) &&
25957	(Op0.getOperand(i: `0`).getValueType() == N->getValueType(ResNo: `0`)))
25958	return Op0.getOperand(i: `0`);
25959	if ((Offset != -`1`) &&
25960	((Offset % N->getValueType(ResNo: `0`).getVectorNumElements()) ==
25961	`0`)) // IDX must be multiple of output size.
25962	return DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL: SDLoc (N), VT: N->getValueType(ResNo: `0`),
25963	N1: Op0.getOperand(i: `0`), N2: Op0.getOperand(i: `1`));
25964	}
25965
25966	if (SDValue V = convertBuildVecZextToZext(N))
25967	return V;
25968
25969	if (SDValue V = convertBuildVecZextToBuildVecWithZeros(N))
25970	return V;
25971
25972	if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
25973	return V;
25974
25975	if (SDValue V = reduceBuildVecTruncToBitCast(N))
25976	return V;
25977
25978	if (SDValue V = reduceBuildVecToShuffle(N))
25979	return V;
25980
25981	// A splat of a single element is a SPLAT_VECTOR if supported on the target.
25982	// Do this late as some of the above may replace the splat.
25983	if (TLI.getOperationAction(Op: ISD::SPLAT_VECTOR, VT) != TargetLowering::Expand)
25984	if (SDValue V = cast<BuildVectorSDNode>(Val: N)->getSplatValue()) {
25985	assert(!V.isUndef() && "Splat of undef should have been handled earlier");
25986	return DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: SDLoc (N), VT, Operand: V);
25987	}
25988
25989	return SDValue ();
25990	}
25991
25992	static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
25993	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
25994	EVT OpVT = N->getOperand(Num: `0`).getValueType();
25995
25996	// If the operands are legal vectors, leave them alone.
25997	if (TLI.isTypeLegal(VT: OpVT) \|\| OpVT.isScalableVector())
25998	return SDValue ();
25999
26000	SDLoc DL(N);
26001	EVT VT = N->getValueType(ResNo: `0`);
26002	SmallVector<SDValue, `8`> Ops;
26003	EVT SVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: OpVT.getSizeInBits());
26004
26005	// Keep track of what we encounter.
26006	EVT AnyFPVT;
26007
26008	for (const SDValue &Op : N->ops()) {
26009	if (ISD::BITCAST == Op.getOpcode() &&
26010	!Op.getOperand(i: `0`).getValueType().isVector())
26011	Ops.push_back(Elt: Op.getOperand(i: `0`));
26012	else if (Op.isUndef())
26013	Ops.push_back(Elt: DAG.getNode(Opcode: Op.getOpcode(), DL, VT: SVT));
26014	else
26015	return SDValue ();
26016
26017	// Note whether we encounter an integer or floating point scalar.
26018	// If it's neither, bail out, it could be something weird like x86mmx.
26019	EVT LastOpVT = Ops.back().getValueType();
26020	if (LastOpVT.isFloatingPoint())
26021	AnyFPVT = LastOpVT;
26022	else if (!LastOpVT.isInteger())
26023	return SDValue ();
26024	}
26025
26026	// If any of the operands is a floating point scalar bitcast to a vector,
26027	// use floating point types throughout, and bitcast everything.
26028	// Replace UNDEFs by another scalar UNDEF node, of the final desired type.
26029	if (AnyFPVT != EVT ()) {
26030	SVT = AnyFPVT;
26031	for (SDValue &Op : Ops) {
26032	if (Op.getValueType() == SVT)
26033	continue;
26034	if (Op.isUndef())
26035	Op = DAG.getNode(Opcode: Op.getOpcode(), DL, VT: SVT);
26036	else
26037	Op = DAG.getBitcast(VT: SVT, V: Op);
26038	}
26039	}
26040
26041	EVT VecVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: SVT,
26042	NumElements: VT.getSizeInBits() / SVT.getSizeInBits());
26043	return DAG.getBitcast(VT, V: DAG.getBuildVector(VT: VecVT, DL, Ops));
26044	}
26045
26046	// Attempt to merge nested concat_vectors/undefs.
26047	// Fold concat_vectors(concat_vectors(x,y,z,w),u,u,concat_vectors(a,b,c,d))
26048	// --> concat_vectors(x,y,z,w,u,u,u,u,u,u,u,u,a,b,c,d)
26049	static SDValue combineConcatVectorOfConcatVectors(SDNode *N,
26050	SelectionDAG &DAG) {
26051	EVT VT = N->getValueType(ResNo: `0`);
26052
26053	// Ensure we're concatenating UNDEF and CONCAT_VECTORS nodes of similar types.
26054	EVT SubVT;
26055	SDValue FirstConcat;
26056	for (const SDValue &Op : N->ops()) {
26057	if (Op.isUndef())
26058	continue;
26059	if (Op.getOpcode() != ISD::CONCAT_VECTORS)
26060	return SDValue ();
26061	if (!FirstConcat) {
26062	SubVT = Op.getOperand(i: `0`).getValueType();
26063	if (!DAG.getTargetLoweringInfo().isTypeLegal(VT: SubVT))
26064	return SDValue ();
26065	FirstConcat = Op;
26066	continue;
26067	}
26068	if (SubVT != Op.getOperand(i: `0`).getValueType())
26069	return SDValue ();
26070	}
26071	assert(FirstConcat && "Concat of all-undefs found");
26072
26073	SmallVector<SDValue> ConcatOps;
26074	for (const SDValue &Op : N->ops()) {
26075	if (Op.isUndef()) {
26076	ConcatOps.append(NumInputs: FirstConcat ->getNumOperands(), Elt: DAG.getPOISON(VT: SubVT));
26077	continue;
26078	}
26079	ConcatOps.append(in_start: Op ->op_begin(), in_end: Op ->op_end());
26080	}
26081	return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: SDLoc (N), VT, Ops: ConcatOps);
26082	}
26083
26084	// Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
26085	// operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
26086	// most two distinct vectors the same size as the result, attempt to turn this
26087	// into a legal shuffle.
26088	static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
26089	EVT VT = N->getValueType(ResNo: `0`);
26090	EVT OpVT = N->getOperand(Num: `0`).getValueType();
26091
26092	// We currently can't generate an appropriate shuffle for a scalable vector.
26093	if (VT.isScalableVector())
26094	return SDValue ();
26095
26096	int NumElts = VT.getVectorNumElements();
26097	int NumOpElts = OpVT.getVectorNumElements();
26098
26099	SDValue SV0 = DAG.getPOISON(VT), SV1 = DAG.getPOISON(VT);
26100	SmallVector<int, `8`> Mask;
26101
26102	for (SDValue Op : N->ops()) {
26103	Op = peekThroughBitcasts(V: Op);
26104
26105	// UNDEF nodes convert to UNDEF shuffle mask values.
26106	if (Op.isUndef()) {
26107	Mask.append(NumInputs: (unsigned)NumOpElts, Elt: -`1`);
26108	continue;
26109	}
26110
26111	if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
26112	return SDValue ();
26113
26114	// What vector are we extracting the subvector from and at what index?
26115	SDValue ExtVec = Op.getOperand(i: `0`);
26116	int ExtIdx = Op.getConstantOperandVal(i: `1`);
26117
26118	// We want the EVT of the original extraction to correctly scale the
26119	// extraction index.
26120	EVT ExtVT = ExtVec.getValueType();
26121	ExtVec = peekThroughBitcasts(V: ExtVec);
26122
26123	// UNDEF nodes convert to UNDEF shuffle mask values.
26124	if (ExtVec.isUndef()) {
26125	Mask.append(NumInputs: (unsigned)NumOpElts, Elt: -`1`);
26126	continue;
26127	}
26128
26129	// Ensure that we are extracting a subvector from a vector the same
26130	// size as the result.
26131	if (ExtVT.getSizeInBits() != VT.getSizeInBits())
26132	return SDValue ();
26133
26134	// Scale the subvector index to account for any bitcast.
26135	int NumExtElts = ExtVT.getVectorNumElements();
26136	if (`0` == (NumExtElts % NumElts))
26137	ExtIdx /= (NumExtElts / NumElts);
26138	else if (`0` == (NumElts % NumExtElts))
26139	ExtIdx *= (NumElts / NumExtElts);
26140	else
26141	return SDValue ();
26142
26143	// At most we can reference 2 inputs in the final shuffle.
26144	if (SV0.isUndef() \|\| SV0 == ExtVec) {
26145	SV0 = ExtVec;
26146	for (int i = `0`; i != NumOpElts; ++i)
26147	Mask.push_back(Elt: i + ExtIdx);
26148	} else if (SV1.isUndef() \|\| SV1 == ExtVec) {
26149	SV1 = ExtVec;
26150	for (int i = `0`; i != NumOpElts; ++i)
26151	Mask.push_back(Elt: i + ExtIdx + NumElts);
26152	} else {
26153	return SDValue ();
26154	}
26155	}
26156
26157	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
26158	return TLI.buildLegalVectorShuffle(VT, DL: SDLoc (N), N0: DAG.getBitcast(VT, V: SV0),
26159	N1: DAG.getBitcast(VT, V: SV1), Mask, DAG);
26160	}
26161
26162	static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG) {
26163	unsigned CastOpcode = N->getOperand(Num: `0`).getOpcode();
26164	switch (CastOpcode) {
26165	case ISD::SINT_TO_FP:
26166	case ISD::UINT_TO_FP:
26167	case ISD::FP_TO_SINT:
26168	case ISD::FP_TO_UINT:
26169	// TODO: Allow more opcodes?
26170	// case ISD::BITCAST:
26171	// case ISD::TRUNCATE:
26172	// case ISD::ZERO_EXTEND:
26173	// case ISD::SIGN_EXTEND:
26174	// case ISD::FP_EXTEND:
26175	break;
26176	default:
26177	return SDValue ();
26178	}
26179
26180	EVT SrcVT = N->getOperand(Num: `0`).getOperand(i: `0`).getValueType();
26181	if (!SrcVT.isVector())
26182	return SDValue ();
26183
26184	// All operands of the concat must be the same kind of cast from the same
26185	// source type.
26186	SmallVector<SDValue, `4`> SrcOps;
26187	for (SDValue Op : N->ops()) {
26188	if (Op.getOpcode() != CastOpcode \|\| !Op.hasOneUse() \|\|
26189	Op.getOperand(i: `0`).getValueType() != SrcVT)
26190	return SDValue ();
26191	SrcOps.push_back(Elt: Op.getOperand(i: `0`));
26192	}
26193
26194	// The wider cast must be supported by the target. This is unusual because
26195	// the operation support type parameter depends on the opcode. In addition,
26196	// check the other type in the cast to make sure this is really legal.
26197	EVT VT = N->getValueType(ResNo: `0`);
26198	ElementCount NumElts = SrcVT.getVectorElementCount() * N->getNumOperands();
26199	EVT ConcatSrcVT = SrcVT.changeVectorElementCount(Context&: *DAG.getContext(), EC: NumElts);
26200	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
26201	switch (CastOpcode) {
26202	case ISD::SINT_TO_FP:
26203	case ISD::UINT_TO_FP:
26204	if (!TLI.isOperationLegalOrCustom(Op: CastOpcode, VT: ConcatSrcVT) \|\|
26205	!TLI.isTypeLegal(VT))
26206	return SDValue ();
26207	break;
26208	case ISD::FP_TO_SINT:
26209	case ISD::FP_TO_UINT:
26210	if (!TLI.isOperationLegalOrCustom(Op: CastOpcode, VT) \|\|
26211	!TLI.isTypeLegal(VT: ConcatSrcVT))
26212	return SDValue ();
26213	break;
26214	default:
26215	llvm_unreachable("Unexpected cast opcode");
26216	}
26217
26218	// concat (cast X), (cast Y)... -> cast (concat X, Y...)
26219	SDLoc DL(N);
26220	SDValue NewConcat = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: ConcatSrcVT, Ops: SrcOps);
26221	return DAG.getNode(Opcode: CastOpcode, DL, VT, Operand: NewConcat);
26222	}
26223
26224	// See if this is a simple CONCAT_VECTORS with no UNDEF operands, and if one of
26225	// the operands is a SHUFFLE_VECTOR, and all other operands are also operands
26226	// to that SHUFFLE_VECTOR, create wider SHUFFLE_VECTOR.
26227	static SDValue combineConcatVectorOfShuffleAndItsOperands(
26228	SDNode N, SelectionDAG &DAG, const* TargetLowering &TLI, bool LegalTypes,
26229	bool LegalOperations) {
26230	EVT VT = N->getValueType(ResNo: `0`);
26231	EVT OpVT = N->getOperand(Num: `0`).getValueType();
26232	if (VT.isScalableVector())
26233	return SDValue ();
26234
26235	// For now, only allow simple 2-operand concatenations.
26236	if (N->getNumOperands() != `2`)
26237	return SDValue ();
26238
26239	// Don't create illegal types/shuffles when not allowed to.
26240	if ((LegalTypes && !TLI.isTypeLegal(VT)) \|\|
26241	(LegalOperations &&
26242	!TLI.isOperationLegalOrCustom(Op: ISD::VECTOR_SHUFFLE, VT)))
26243	return SDValue ();
26244
26245	// Analyze all of the operands of the CONCAT_VECTORS. Out of all of them,
26246	// we want to find one that is: (1) a SHUFFLE_VECTOR (2) only used by us,
26247	// and (3) all operands of CONCAT_VECTORS must be either that SHUFFLE_VECTOR,
26248	// or one of the operands of that SHUFFLE_VECTOR (but not UNDEF!).
26249	// (4) and for now, the SHUFFLE_VECTOR must be unary.
26250	ShuffleVectorSDNode SVN = nullptr*;
26251	for (SDValue Op : N->ops()) {
26252	if (auto *CurSVN = dyn_cast<ShuffleVectorSDNode>(Val&: Op);
26253	CurSVN && CurSVN->getOperand(Num: `1`).isUndef() && N->isOnlyUserOf(N: CurSVN) &&
26254	all_of(Range: N->ops(), P: [CurSVN](SDValue Op) {
26255	// FIXME: can we allow UNDEF operands?
26256	return !Op.isUndef() &&
26257	(Op.getNode() == CurSVN \|\| is_contained(Range: CurSVN->ops(), Element: Op));
26258	})) {
26259	SVN = CurSVN;
26260	break;
26261	}
26262	}
26263	if (!SVN)
26264	return SDValue ();
26265
26266	// We are going to pad the shuffle operands, so any indice, that was picking
26267	// from the second operand, must be adjusted.
26268	SmallVector<int, `16`> AdjustedMask(SVN->getMask());
26269	assert(SVN->getOperand(`1`).isUndef() && "Expected unary shuffle!");
26270
26271	// Identity masks for the operands of the (padded) shuffle.
26272	SmallVector<int, `32`> IdentityMask(`2` * OpVT.getVectorNumElements());
26273	MutableArrayRef<int> FirstShufOpIdentityMask =
26274	MutableArrayRef<int>(IdentityMask)
26275	.take_front(N: OpVT.getVectorNumElements());
26276	MutableArrayRef<int> SecondShufOpIdentityMask =
26277	MutableArrayRef<int>(IdentityMask).take_back(N: OpVT.getVectorNumElements());
26278	std::iota(first: FirstShufOpIdentityMask.begin(), last: FirstShufOpIdentityMask.end(), value: `0`);
26279	std::iota(first: SecondShufOpIdentityMask.begin(), last: SecondShufOpIdentityMask.end(),
26280	value: VT.getVectorNumElements());
26281
26282	// New combined shuffle mask.
26283	SmallVector<int, `32`> Mask;
26284	Mask.reserve(N: VT.getVectorNumElements());
26285	for (SDValue Op : N->ops()) {
26286	assert(!Op.isUndef() && "Not expecting to concatenate UNDEF.");
26287	if (Op.getNode() == SVN) {
26288	append_range(C&: Mask, R&: AdjustedMask);
26289	continue;
26290	}
26291	if (Op == SVN->getOperand(Num: `0`)) {
26292	append_range(C&: Mask, R&: FirstShufOpIdentityMask);
26293	continue;
26294	}
26295	if (Op == SVN->getOperand(Num: `1`)) {
26296	append_range(C&: Mask, R&: SecondShufOpIdentityMask);
26297	continue;
26298	}
26299	llvm_unreachable("Unexpected operand!");
26300	}
26301
26302	// Don't create illegal shuffle masks.
26303	if (!TLI.isShuffleMaskLegal(Mask, VT))
26304	return SDValue ();
26305
26306	// Pad the shuffle operands with poison.
26307	SDLoc dl(N);
26308	std::array<SDValue, `2`> ShufOps;
26309	for (auto I : zip(t: SVN->ops(), u&: ShufOps)) {
26310	SDValue ShufOp = std::get<`0`>(t&: I);
26311	SDValue &NewShufOp = std::get<`1`>(t&: I);
26312	if (ShufOp.isUndef())
26313	NewShufOp = DAG.getPOISON(VT);
26314	else {
26315	SmallVector<SDValue, `2`> ShufOpParts(N->getNumOperands(),
26316	DAG.getPOISON(VT: OpVT));
26317	ShufOpParts [`0`] = ShufOp;
26318	NewShufOp = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT, Ops: ShufOpParts);
26319	}
26320	}
26321	// Finally, create the new wide shuffle.
26322	return DAG.getVectorShuffle(VT, dl, N1: ShufOps [`0`], N2: ShufOps [`1`], Mask);
26323	}
26324
26325	static SDValue combineConcatVectorOfSplats(SDNode *N, SelectionDAG &DAG,
26326	const TargetLowering &TLI,
26327	bool LegalTypes,
26328	bool LegalOperations) {
26329	EVT VT = N->getValueType(ResNo: `0`);
26330
26331	// Post-legalization we can only create wider SPLAT_VECTOR operations if both
26332	// the type and operation is legal. The Hexagon target has custom
26333	// legalization for SPLAT_VECTOR that splits the operation into two parts and
26334	// concatenates them. Therefore, custom lowering must also be rejected in
26335	// order to avoid an infinite loop.
26336	if ((LegalTypes && !TLI.isTypeLegal(VT)) \|\|
26337	(LegalOperations && !TLI.isOperationLegal(Op: ISD::SPLAT_VECTOR, VT)))
26338	return SDValue ();
26339
26340	SDValue Op0 = N->getOperand(Num: `0`);
26341	if (!llvm::all_equal(Range: N->op_values()) \|\| Op0.getOpcode() != ISD::SPLAT_VECTOR)
26342	return SDValue ();
26343
26344	return DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: SDLoc (N), VT, Operand: Op0.getOperand(i: `0`));
26345	}
26346
26347	SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
26348	// If we only have one input vector, we don't need to do any concatenation.
26349	if (N->getNumOperands() == `1`)
26350	return N->getOperand(Num: `0`);
26351
26352	// Check if all of the operands are undefs.
26353	EVT VT = N->getValueType(ResNo: `0`);
26354	if (ISD::allOperandsUndef(N))
26355	return DAG.getUNDEF(VT);
26356
26357	// Optimize concat_vectors where all but the first of the vectors are undef.
26358	if (all_of(Range: drop_begin(RangeOrContainer: N->ops()),
26359	P: [](const SDValue &Op) { return Op.isUndef(); })) {
26360	SDValue In = N->getOperand(Num: `0`);
26361	assert(In.getValueType().isVector() && "Must concat vectors");
26362
26363	// If the input is a concat_vectors, just make a larger concat by padding
26364	// with smaller undefs.
26365	//
26366	// Legalizing in AArch64TargetLowering::LowerCONCAT_VECTORS() and combining
26367	// here could cause an infinite loop. That legalizing happens when LegalDAG
26368	// is true and input of AArch64TargetLowering::LowerCONCAT_VECTORS() is
26369	// scalable.
26370	if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse() &&
26371	!(LegalDAG && In.getValueType().isScalableVector())) {
26372	unsigned NumOps = N->getNumOperands() * In.getNumOperands();
26373	SmallVector<SDValue, `4`> Ops(In ->ops());
26374	Ops.resize(N: NumOps, NV: DAG.getPOISON(VT: Ops [`0`].getValueType()));
26375	return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: SDLoc (N), VT, Ops);
26376	}
26377
26378	SDValue Scalar = peekThroughOneUseBitcasts(V: In);
26379
26380	// concat_vectors(scalar_to_vector(scalar), undef) ->
26381	// scalar_to_vector(scalar)
26382	if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
26383	Scalar.hasOneUse()) {
26384	EVT SVT = Scalar.getValueType().getVectorElementType();
26385	if (SVT == Scalar.getOperand(i: `0`).getValueType())
26386	Scalar = Scalar.getOperand(i: `0`);
26387	}
26388
26389	// concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
26390	if (!Scalar.getValueType().isVector() && In.hasOneUse()) {
26391	// If the bitcast type isn't legal, it might be a trunc of a legal type;
26392	// look through the trunc so we can still do the transform:
26393	// concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
26394	if (Scalar ->getOpcode() == ISD::TRUNCATE &&
26395	!TLI.isTypeLegal(VT: Scalar.getValueType()) &&
26396	TLI.isTypeLegal(VT: Scalar ->getOperand(Num: `0`).getValueType()))
26397	Scalar = Scalar ->getOperand(Num: `0`);
26398
26399	EVT SclTy = Scalar.getValueType();
26400
26401	if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
26402	return SDValue ();
26403
26404	// Bail out if the vector size is not a multiple of the scalar size.
26405	if (VT.getSizeInBits() % SclTy.getSizeInBits())
26406	return SDValue ();
26407
26408	unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
26409	if (VNTNumElms < `2`)
26410	return SDValue ();
26411
26412	EVT NVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: SclTy, NumElements: VNTNumElms);
26413	if (!TLI.isTypeLegal(VT: NVT) \|\| !TLI.isTypeLegal(VT: Scalar.getValueType()))
26414	return SDValue ();
26415
26416	SDValue Res = DAG.getNode(Opcode: ISD::SCALAR_TO_VECTOR, DL: SDLoc (N), VT: NVT, Operand: Scalar);
26417	return DAG.getBitcast(VT, V: Res);
26418	}
26419	}
26420
26421	// Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
26422	// We have already tested above for an UNDEF only concatenation.
26423	// fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
26424	// -> (BUILD_VECTOR A, B, ..., C, D, ...)
26425	auto IsBuildVectorOrUndef = [](const SDValue &Op) {
26426	return Op.isUndef() \|\| ISD::BUILD_VECTOR == Op.getOpcode();
26427	};
26428	if (llvm::all_of(Range: N->ops(), P: IsBuildVectorOrUndef)) {
26429	SmallVector<SDValue, `8`> Opnds;
26430	EVT SVT = VT.getScalarType();
26431
26432	EVT MinVT = SVT;
26433	if (!SVT.isFloatingPoint()) {
26434	// If BUILD_VECTOR are from built from integer, they may have different
26435	// operand types. Get the smallest type and truncate all operands to it.
26436	bool FoundMinVT = false;
26437	for (const SDValue &Op : N->ops())
26438	if (ISD::BUILD_VECTOR == Op.getOpcode()) {
26439	EVT OpSVT = Op.getOperand(i: `0`).getValueType();
26440	MinVT = (!FoundMinVT \|\| OpSVT.bitsLE(VT: MinVT)) ? OpSVT : MinVT;
26441	FoundMinVT = true;
26442	}
26443	assert(FoundMinVT && "Concat vector type mismatch");
26444	}
26445
26446	for (const SDValue &Op : N->ops()) {
26447	EVT OpVT = Op.getValueType();
26448	unsigned NumElts = OpVT.getVectorNumElements();
26449
26450	if (Op.isUndef())
26451	Opnds.append(NumInputs: NumElts, Elt: DAG.getPOISON(VT: MinVT));
26452
26453	if (ISD::BUILD_VECTOR == Op.getOpcode()) {
26454	if (SVT.isFloatingPoint()) {
26455	assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
26456	Opnds.append(in_start: Op ->op_begin(), in_end: Op ->op_begin() + NumElts);
26457	} else {
26458	for (unsigned i = `0`; i != NumElts; ++i)
26459	Opnds.push_back(
26460	Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL: SDLoc (N), VT: MinVT, Operand: Op.getOperand(i)));
26461	}
26462	}
26463	}
26464
26465	assert(VT.getVectorNumElements() == Opnds.size() &&
26466	"Concat vector type mismatch");
26467	return DAG.getBuildVector(VT, DL: SDLoc (N), Ops: Opnds);
26468	}
26469
26470	if (SDValue V =
26471	combineConcatVectorOfSplats(N, DAG, TLI, LegalTypes, LegalOperations))
26472	return V;
26473
26474	// Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
26475	// FIXME: Add support for concat_vectors(bitcast(vec0),bitcast(vec1),...).
26476	if (SDValue V = combineConcatVectorOfScalars(N, DAG))
26477	return V;
26478
26479	if (Level <= AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
26480	// Fold CONCAT_VECTORS of CONCAT_VECTORS (or undef) to VECTOR_SHUFFLE.
26481	if (SDValue V = combineConcatVectorOfConcatVectors(N, DAG))
26482	return V;
26483
26484	// Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
26485	if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
26486	return V;
26487	}
26488
26489	if (SDValue V = combineConcatVectorOfCasts(N, DAG))
26490	return V;
26491
26492	if (SDValue V = combineConcatVectorOfShuffleAndItsOperands(
26493	N, DAG, TLI, LegalTypes, LegalOperations))
26494	return V;
26495
26496	// Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
26497	// nodes often generate nop CONCAT_VECTOR nodes. Scan the CONCAT_VECTOR
26498	// operands and look for a CONCAT operations that place the incoming vectors
26499	// at the exact same location.
26500	//
26501	// For scalable vectors, EXTRACT_SUBVECTOR indexes are implicitly scaled.
26502	SDValue SingleSource = SDValue ();
26503	unsigned PartNumElem =
26504	N->getOperand(Num: `0`).getValueType().getVectorMinNumElements();
26505
26506	for (unsigned i = `0`, e = N->getNumOperands(); i != e; ++i) {
26507	SDValue Op = N->getOperand(Num: i);
26508
26509	if (Op.isUndef())
26510	continue;
26511
26512	// Check if this is the identity extract:
26513	if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
26514	return SDValue ();
26515
26516	// Find the single incoming vector for the extract_subvector.
26517	if (SingleSource.getNode()) {
26518	if (Op.getOperand(i: `0`) != SingleSource)
26519	return SDValue ();
26520	} else {
26521	SingleSource = Op.getOperand(i: `0`);
26522
26523	// Check the source type is the same as the type of the result.
26524	// If not, this concat may extend the vector, so we can not
26525	// optimize it away.
26526	if (SingleSource.getValueType() != N->getValueType(ResNo: `0`))
26527	return SDValue ();
26528	}
26529
26530	// Check that we are reading from the identity index.
26531	unsigned IdentityIndex = i * PartNumElem;
26532	if (Op.getConstantOperandAPInt(i: `1`) != IdentityIndex)
26533	return SDValue ();
26534	}
26535
26536	if (SingleSource.getNode())
26537	return SingleSource;
26538
26539	return SDValue ();
26540	}
26541
26542	SDValue DAGCombiner::visitVECTOR_INTERLEAVE(SDNode *N) {
26543	// Check to see if all operands are identical.
26544	if (!llvm::all_equal(Range: N->op_values()))
26545	return SDValue ();
26546
26547	// Check to see if the identical operand is a splat.
26548	if (!DAG.isSplatValue(V: N->getOperand(Num: `0`)))
26549	return SDValue ();
26550
26551	// interleave splat(X), splat(X).... --> splat(X), splat(X)....
26552	SmallVector<SDValue, `4`> Ops;
26553	Ops.append(in_start: N->op_values().begin(), in_end: N->op_values().end());
26554	return CombineTo(N, To: &Ops);
26555	}
26556
26557	// Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
26558	// if the subvector can be sourced for free.
26559	static SDValue getSubVectorSrc(SDValue V, unsigned Index, EVT SubVT) {
26560	if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
26561	V.getOperand(i: `1`).getValueType() == SubVT &&
26562	V.getConstantOperandAPInt(i: `2`) == Index) {
26563	return V.getOperand(i: `1`);
26564	}
26565	if (V.getOpcode() == ISD::CONCAT_VECTORS &&
26566	V.getOperand(i: `0`).getValueType() == SubVT &&
26567	(Index % SubVT.getVectorMinNumElements()) == `0`) {
26568	uint64_t SubIdx = Index / SubVT.getVectorMinNumElements();
26569	return V.getOperand(i: SubIdx);
26570	}
26571	return SDValue ();
26572	}
26573
26574	static SDValue narrowInsertExtractVectorBinOp(EVT SubVT, SDValue BinOp,
26575	unsigned Index, const SDLoc &DL,
26576	SelectionDAG &DAG,
26577	bool LegalOperations) {
26578	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
26579	unsigned BinOpcode = BinOp.getOpcode();
26580	if (!TLI.isBinOp(Opcode: BinOpcode) \|\| BinOp ->getNumValues() != `1`)
26581	return SDValue ();
26582
26583	EVT VecVT = BinOp.getValueType();
26584	SDValue Bop0 = BinOp.getOperand(i: `0`), Bop1 = BinOp.getOperand(i: `1`);
26585	if (VecVT != Bop0.getValueType() \|\| VecVT != Bop1.getValueType())
26586	return SDValue ();
26587	if (!TLI.isOperationLegalOrCustom(Op: BinOpcode, VT: SubVT, LegalOnly: LegalOperations))
26588	return SDValue ();
26589
26590	SDValue Sub0 = getSubVectorSrc(V: Bop0, Index, SubVT);
26591	SDValue Sub1 = getSubVectorSrc(V: Bop1, Index, SubVT);
26592
26593	// TODO: We could handle the case where only 1 operand is being inserted by
26594	// creating an extract of the other operand, but that requires checking
26595	// number of uses and/or costs.
26596	if (!Sub0 \|\| !Sub1)
26597	return SDValue ();
26598
26599	// We are inserting both operands of the wide binop only to extract back
26600	// to the narrow vector size. Eliminate all of the insert/extract:
26601	// ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
26602	return DAG.getNode(Opcode: BinOpcode, DL, VT: SubVT, N1: Sub0, N2: Sub1, Flags: BinOp ->getFlags());
26603	}
26604
26605	/// If we are extracting a subvector produced by a wide binary operator try
26606	/// to use a narrow binary operator and/or avoid concatenation and extraction.
26607	static SDValue narrowExtractedVectorBinOp(EVT VT, SDValue Src, unsigned Index,
26608	const SDLoc &DL, SelectionDAG &DAG,
26609	bool LegalOperations) {
26610	// TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
26611	// some of these bailouts with other transforms.
26612
26613	if (SDValue V = narrowInsertExtractVectorBinOp(SubVT: VT, BinOp: Src, Index, DL, DAG,
26614	LegalOperations))
26615	return V;
26616
26617	// We are looking for an optionally bitcasted wide vector binary operator
26618	// feeding an extract subvector.
26619	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
26620	SDValue BinOp = peekThroughBitcasts(V: Src);
26621	unsigned BOpcode = BinOp.getOpcode();
26622	if (!TLI.isBinOp(Opcode: BOpcode) \|\| BinOp ->getNumValues() != `1`)
26623	return SDValue ();
26624
26625	// Exclude the fake form of fneg (fsub -0.0, x) because that is likely to be
26626	// reduced to the unary fneg when it is visited, and we probably want to deal
26627	// with fneg in a target-specific way.
26628	if (BOpcode == ISD::FSUB) {
26629	auto C = isConstOrConstSplatFP(N: BinOp.getOperand(i: `0`), /AllowUndefs/* true);
26630	if (C && C->getValueAPF().isNegZero())
26631	return SDValue ();
26632	}
26633
26634	// The binop must be a vector type, so we can extract some fraction of it.
26635	EVT WideBVT = BinOp.getValueType();
26636	// The optimisations below currently assume we are dealing with fixed length
26637	// vectors. It is possible to add support for scalable vectors, but at the
26638	// moment we've done no analysis to prove whether they are profitable or not.
26639	if (!WideBVT.isFixedLengthVector())
26640	return SDValue ();
26641
26642	assert((Index % VT.getVectorNumElements()) == `0` &&
26643	"Extract index is not a multiple of the vector length.");
26644
26645	// Bail out if this is not a proper multiple width extraction.
26646	unsigned WideWidth = WideBVT.getSizeInBits();
26647	unsigned NarrowWidth = VT.getSizeInBits();
26648	if (WideWidth % NarrowWidth != `0`)
26649	return SDValue ();
26650
26651	// Bail out if we are extracting a fraction of a single operation. This can
26652	// occur because we potentially looked through a bitcast of the binop.
26653	unsigned NarrowingRatio = WideWidth / NarrowWidth;
26654	unsigned WideNumElts = WideBVT.getVectorNumElements();
26655	if (WideNumElts % NarrowingRatio != `0`)
26656	return SDValue ();
26657
26658	// Bail out if the target does not support a narrower version of the binop.
26659	EVT NarrowBVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: WideBVT.getScalarType(),
26660	NumElements: WideNumElts / NarrowingRatio);
26661	if (!TLI.isOperationLegalOrCustomOrPromote(Op: BOpcode, VT: NarrowBVT,
26662	LegalOnly: LegalOperations))
26663	return SDValue ();
26664
26665	// If extraction is cheap, we don't need to look at the binop operands
26666	// for concat ops. The narrow binop alone makes this transform profitable.
26667	// We can't just reuse the original extract index operand because we may have
26668	// bitcasted.
26669	unsigned ConcatOpNum = Index / VT.getVectorNumElements();
26670	unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
26671	if (TLI.isExtractSubvectorCheap(ResVT: NarrowBVT, SrcVT: WideBVT, Index: ExtBOIdx) &&
26672	BinOp.hasOneUse() && Src ->hasOneUse()) {
26673	// extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
26674	SDValue NewExtIndex = DAG.getVectorIdxConstant(Val: ExtBOIdx, DL);
26675	SDValue X = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: NarrowBVT,
26676	N1: BinOp.getOperand(i: `0`), N2: NewExtIndex);
26677	SDValue Y = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: NarrowBVT,
26678	N1: BinOp.getOperand(i: `1`), N2: NewExtIndex);
26679	SDValue NarrowBinOp =
26680	DAG.getNode(Opcode: BOpcode, DL, VT: NarrowBVT, N1: X, N2: Y, Flags: BinOp ->getFlags());
26681	return DAG.getBitcast(VT, V: NarrowBinOp);
26682	}
26683
26684	// Only handle the case where we are doubling and then halving. A larger ratio
26685	// may require more than two narrow binops to replace the wide binop.
26686	if (NarrowingRatio != `2`)
26687	return SDValue ();
26688
26689	// TODO: The motivating case for this transform is an x86 AVX1 target. That
26690	// target has temptingly almost legal versions of bitwise logic ops in 256-bit
26691	// flavors, but no other 256-bit integer support. This could be extended to
26692	// handle any binop, but that may require fixing/adding other folds to avoid
26693	// codegen regressions.
26694	if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
26695	return SDValue ();
26696
26697	// We need at least one concatenation operation of a binop operand to make
26698	// this transform worthwhile. The concat must double the input vector sizes.
26699	auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
26700	if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == `2`)
26701	return V.getOperand(i: ConcatOpNum);
26702	return SDValue ();
26703	};
26704	SDValue SubVecL = GetSubVector (peekThroughBitcasts(V: BinOp.getOperand(i: `0`)));
26705	SDValue SubVecR = GetSubVector (peekThroughBitcasts(V: BinOp.getOperand(i: `1`)));
26706
26707	if (SubVecL \|\| SubVecR) {
26708	// If a binop operand was not the result of a concat, we must extract a
26709	// half-sized operand for our new narrow binop:
26710	// extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
26711	// extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
26712	// extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
26713	SDValue IndexC = DAG.getVectorIdxConstant(Val: ExtBOIdx, DL);
26714	SDValue X = SubVecL ? DAG.getBitcast(VT: NarrowBVT, V: SubVecL)
26715	: DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: NarrowBVT,
26716	N1: BinOp.getOperand(i: `0`), N2: IndexC);
26717
26718	SDValue Y = SubVecR ? DAG.getBitcast(VT: NarrowBVT, V: SubVecR)
26719	: DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: NarrowBVT,
26720	N1: BinOp.getOperand(i: `1`), N2: IndexC);
26721
26722	SDValue NarrowBinOp = DAG.getNode(Opcode: BOpcode, DL, VT: NarrowBVT, N1: X, N2: Y);
26723	return DAG.getBitcast(VT, V: NarrowBinOp);
26724	}
26725
26726	return SDValue ();
26727	}
26728
26729	/// If we are extracting a subvector from a wide vector load, convert to a
26730	/// narrow load to eliminate the extraction:
26731	/// (extract_subvector (load wide vector)) --> (load narrow vector)
26732	static SDValue narrowExtractedVectorLoad(EVT VT, SDValue Src, unsigned Index,
26733	const SDLoc &DL, SelectionDAG &DAG) {
26734	// TODO: Add support for big-endian. The offset calculation must be adjusted.
26735	if (DAG.getDataLayout().isBigEndian())
26736	return SDValue ();
26737
26738	auto *Ld = dyn_cast<LoadSDNode>(Val&: Src);
26739	if (!Ld \|\| !ISD::isNormalLoad(N: Ld) \|\| !Ld->isSimple())
26740	return SDValue ();
26741
26742	// We can only create byte sized loads.
26743	if (!VT.isByteSized())
26744	return SDValue ();
26745
26746	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
26747	if (!TLI.isOperationLegalOrCustomOrPromote(Op: ISD::LOAD, VT))
26748	return SDValue ();
26749
26750	unsigned NumElts = VT.getVectorMinNumElements();
26751	// A fixed length vector being extracted from a scalable vector
26752	// may not be any smaller* than the scalable one.*
26753	if (Index == `0` && NumElts >= Ld->getValueType(ResNo: `0`).getVectorMinNumElements())
26754	return SDValue ();
26755
26756	// The definition of EXTRACT_SUBVECTOR states that the index must be a
26757	// multiple of the minimum number of elements in the result type.
26758	assert(Index % NumElts == `0` && "The extract subvector index is not a "
26759	"multiple of the result's element count");
26760
26761	// It's fine to use TypeSize here as we know the offset will not be negative.
26762	TypeSize Offset = VT.getStoreSize() * (Index / NumElts);
26763	std::optional<unsigned> ByteOffset;
26764	if (Offset.isFixed())
26765	ByteOffset = Offset.getFixedValue();
26766
26767	if (!TLI.shouldReduceLoadWidth(Load: Ld, ExtTy: Ld->getExtensionType(), NewVT: VT, ByteOffset))
26768	return SDValue ();
26769
26770	// The narrow load will be offset from the base address of the old load if
26771	// we are extracting from something besides index 0 (little-endian).
26772	// TODO: Use "BaseIndexOffset" to make this more effective.
26773	SDValue NewAddr = DAG.getMemBasePlusOffset(Base: Ld->getBasePtr(), Offset, DL);
26774
26775	MachineFunction &MF = DAG.getMachineFunction();
26776	MachineMemOperand *MMO;
26777	if (Offset.isScalable()) {
26778	MachinePointerInfo MPI =
26779	MachinePointerInfo (Ld->getPointerInfo().getAddrSpace());
26780	MMO = MF.getMachineMemOperand(MMO: Ld->getMemOperand(), PtrInfo: MPI, Size: VT.getStoreSize());
26781	} else
26782	MMO = MF.getMachineMemOperand(MMO: Ld->getMemOperand(), Offset: Offset.getFixedValue(),
26783	Size: VT.getStoreSize());
26784
26785	SDValue NewLd = DAG.getLoad(VT, dl: DL, Chain: Ld->getChain(), Ptr: NewAddr, MMO);
26786	DAG.makeEquivalentMemoryOrdering(OldLoad: Ld, NewMemOp: NewLd);
26787	return NewLd;
26788	}
26789
26790	/// Given EXTRACT_SUBVECTOR(VECTOR_SHUFFLE(Op0, Op1, Mask)),
26791	/// try to produce VECTOR_SHUFFLE(EXTRACT_SUBVECTOR(Op?, ?),
26792	/// EXTRACT_SUBVECTOR(Op?, ?),
26793	/// Mask'))
26794	/// iff it is legal and profitable to do so. Notably, the trimmed mask
26795	/// (containing only the elements that are extracted)
26796	/// must reference at most two subvectors.
26797	static SDValue foldExtractSubvectorFromShuffleVector(EVT NarrowVT, SDValue Src,
26798	unsigned Index,
26799	const SDLoc &DL,
26800	SelectionDAG &DAG,
26801	bool LegalOperations) {
26802	// Only deal with non-scalable vectors.
26803	EVT WideVT = Src.getValueType();
26804	if (!NarrowVT.isFixedLengthVector() \|\| !WideVT.isFixedLengthVector())
26805	return SDValue ();
26806
26807	// The operand must be a shufflevector.
26808	auto *WideShuffleVector = dyn_cast<ShuffleVectorSDNode>(Val&: Src);
26809	if (!WideShuffleVector)
26810	return SDValue ();
26811
26812	// The old shuffleneeds to go away.
26813	if (!WideShuffleVector->hasOneUse())
26814	return SDValue ();
26815
26816	// And the narrow shufflevector that we'll form must be legal.
26817	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
26818	if (LegalOperations &&
26819	!TLI.isOperationLegalOrCustom(Op: ISD::VECTOR_SHUFFLE, VT: NarrowVT))
26820	return SDValue ();
26821
26822	int NumEltsExtracted = NarrowVT.getVectorNumElements();
26823	assert((Index % NumEltsExtracted) == `0` &&
26824	"Extract index is not a multiple of the output vector length.");
26825
26826	int WideNumElts = WideVT.getVectorNumElements();
26827
26828	SmallVector<int, `16`> NewMask;
26829	NewMask.reserve(N: NumEltsExtracted);
26830	SmallSetVector<std::pair<SDValue /Op/, int /SubvectorIndex/>, `2`>
26831	DemandedSubvectors;
26832
26833	// Try to decode the wide mask into narrow mask from at most two subvectors.
26834	for (int M : WideShuffleVector->getMask().slice(N: Index, M: NumEltsExtracted)) {
26835	assert((M >= -`1`) && (M < (`2` * WideNumElts)) &&
26836	"Out-of-bounds shuffle mask?");
26837
26838	if (M < `0`) {
26839	// Does not depend on operands, does not require adjustment.
26840	NewMask.emplace_back(Args&: M);
26841	continue;
26842	}
26843
26844	// From which operand of the shuffle does this shuffle mask element pick?
26845	int WideShufOpIdx = M / WideNumElts;
26846	// Which element of that operand is picked?
26847	int OpEltIdx = M % WideNumElts;
26848
26849	assert((OpEltIdx + WideShufOpIdx * WideNumElts) == M &&
26850	"Shuffle mask vector decomposition failure.");
26851
26852	// And which NumEltsExtracted-sized subvector of that operand is that?
26853	int OpSubvecIdx = OpEltIdx / NumEltsExtracted;
26854	// And which element within that subvector of that operand is that?
26855	int OpEltIdxInSubvec = OpEltIdx % NumEltsExtracted;
26856
26857	assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted) == OpEltIdx &&
26858	"Shuffle mask subvector decomposition failure.");
26859
26860	assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted +
26861	WideShufOpIdx * WideNumElts) == M &&
26862	"Shuffle mask full decomposition failure.");
26863
26864	SDValue Op = WideShuffleVector->getOperand(Num: WideShufOpIdx);
26865
26866	if (Op.isUndef()) {
26867	// Picking from an undef operand. Let's adjust mask instead.
26868	NewMask.emplace_back(Args: -`1`);
26869	continue;
26870	}
26871
26872	const std::pair<SDValue, int> DemandedSubvector =
26873	std::make_pair(x&: Op, y&: OpSubvecIdx);
26874
26875	if (DemandedSubvectors.insert(X: DemandedSubvector)) {
26876	if (DemandedSubvectors.size() > `2`)
26877	return SDValue (); // We can't handle more than two subvectors.
26878	// How many elements into the WideVT does this subvector start?
26879	int Index = NumEltsExtracted * OpSubvecIdx;
26880	// Bail out if the extraction isn't going to be cheap.
26881	if (!TLI.isExtractSubvectorCheap(ResVT: NarrowVT, SrcVT: WideVT, Index))
26882	return SDValue ();
26883	}
26884
26885	// Ok, but from which operand of the new shuffle will this element pick?
26886	int NewOpIdx =
26887	getFirstIndexOf(Range: DemandedSubvectors.getArrayRef(), Val: DemandedSubvector);
26888	assert((NewOpIdx == `0` \|\| NewOpIdx == `1`) && "Unexpected operand index.");
26889
26890	int AdjM = OpEltIdxInSubvec + NewOpIdx * NumEltsExtracted;
26891	NewMask.emplace_back(Args&: AdjM);
26892	}
26893	assert(NewMask.size() == (unsigned)NumEltsExtracted && "Produced bad mask.");
26894	assert(DemandedSubvectors.size() <= `2` &&
26895	"Should have ended up demanding at most two subvectors.");
26896
26897	// Did we discover that the shuffle does not actually depend on operands?
26898	if (DemandedSubvectors.empty())
26899	return DAG.getPOISON(VT: NarrowVT);
26900
26901	// Profitability check: only deal with extractions from the first subvector
26902	// unless the mask becomes an identity mask.
26903	if (!ShuffleVectorInst::isIdentityMask(Mask: NewMask, NumSrcElts: NewMask.size()) \|\|
26904	any_of(Range&: NewMask, P: [](int M) { return M < `0`; }))
26905	for (auto &DemandedSubvector : DemandedSubvectors)
26906	if (DemandedSubvector.second != `0`)
26907	return SDValue ();
26908
26909	// We still perform the exact same EXTRACT_SUBVECTOR, just on different
26910	// operand[s]/index[es], so there is no point in checking for it's legality.
26911
26912	// Do not turn a legal shuffle into an illegal one.
26913	if (TLI.isShuffleMaskLegal(WideShuffleVector->getMask(), WideVT) &&
26914	!TLI.isShuffleMaskLegal(NewMask, NarrowVT))
26915	return SDValue ();
26916
26917	SmallVector<SDValue, `2`> NewOps;
26918	for (const std::pair<SDValue /Op/, int /SubvectorIndex/>
26919	&DemandedSubvector : DemandedSubvectors) {
26920	// How many elements into the WideVT does this subvector start?
26921	int Index = NumEltsExtracted * DemandedSubvector.second;
26922	SDValue IndexC = DAG.getVectorIdxConstant(Val: Index, DL);
26923	NewOps.emplace_back(Args: DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: NarrowVT,
26924	N1: DemandedSubvector.first, N2: IndexC));
26925	}
26926	assert((NewOps.size() == `1` \|\| NewOps.size() == `2`) &&
26927	"Should end up with either one or two ops");
26928
26929	// If we ended up with only one operand, pad with poison.
26930	if (NewOps.size() == `1`)
26931	NewOps.emplace_back(Args: DAG.getPOISON(VT: NarrowVT));
26932
26933	return DAG.getVectorShuffle(VT: NarrowVT, dl: DL, N1: NewOps [`0`], N2: NewOps [`1`], Mask: NewMask);
26934	}
26935
26936	SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
26937	EVT NVT = N->getValueType(ResNo: `0`);
26938	SDValue V = N->getOperand(Num: `0`);
26939	uint64_t ExtIdx = N->getConstantOperandVal(Num: `1`);
26940	SDLoc DL(N);
26941
26942	// Extract from UNDEF is UNDEF.
26943	if (V.isUndef())
26944	return DAG.getUNDEF(VT: NVT);
26945
26946	if (SDValue NarrowLoad = narrowExtractedVectorLoad(VT: NVT, Src: V, Index: ExtIdx, DL, DAG))
26947	return NarrowLoad;
26948
26949	// Combine an extract of an extract into a single extract_subvector.
26950	// ext (ext X, C), 0 --> ext X, C
26951	if (ExtIdx == `0` && V.getOpcode() == ISD::EXTRACT_SUBVECTOR && V.hasOneUse()) {
26952	// The index has to be a multiple of the new result type's known minimum
26953	// vector length.
26954	if (V.getConstantOperandVal(i: `1`) % NVT.getVectorMinNumElements() == `0` &&
26955	TLI.isExtractSubvectorCheap(ResVT: NVT, SrcVT: V.getOperand(i: `0`).getValueType(),
26956	Index: V.getConstantOperandVal(i: `1`)) &&
26957	TLI.isOperationLegalOrCustom(Op: ISD::EXTRACT_SUBVECTOR, VT: NVT)) {
26958	return DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: NVT, N1: V.getOperand(i: `0`),
26959	N2: V.getOperand(i: `1`));
26960	}
26961	}
26962
26963	// ty1 extract_vector(ty2 splat(V))) -> ty1 splat(V)
26964	if (V.getOpcode() == ISD::SPLAT_VECTOR)
26965	if (DAG.isConstantValueOfAnyType(N: V.getOperand(i: `0`)) \|\| V.hasOneUse())
26966	if (!LegalOperations \|\| TLI.isOperationLegal(Op: ISD::SPLAT_VECTOR, VT: NVT))
26967	return DAG.getSplatVector(VT: NVT, DL, Op: V.getOperand(i: `0`));
26968
26969	// extract_subvector(insert_subvector(x,y,c1),c2)
26970	// --> extract_subvector(y,c2-c1)
26971	// iff we're just extracting from the inserted subvector.
26972	if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
26973	SDValue InsSub = V.getOperand(i: `1`);
26974	EVT InsSubVT = InsSub.getValueType();
26975	unsigned NumInsElts = InsSubVT.getVectorMinNumElements();
26976	unsigned InsIdx = V.getConstantOperandVal(i: `2`);
26977	unsigned NumSubElts = NVT.getVectorMinNumElements();
26978	if (InsIdx <= ExtIdx && (ExtIdx + NumSubElts) <= (InsIdx + NumInsElts) &&
26979	TLI.isExtractSubvectorCheap(ResVT: NVT, SrcVT: InsSubVT, Index: ExtIdx - InsIdx) &&
26980	InsSubVT.isFixedLengthVector() && NVT.isFixedLengthVector() &&
26981	V.getValueType().isFixedLengthVector())
26982	return DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: NVT, N1: InsSub,
26983	N2: DAG.getVectorIdxConstant(Val: ExtIdx - InsIdx, DL));
26984	}
26985
26986	// Try to move vector bitcast after extract_subv by scaling extraction index:
26987	// extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
26988	if (V.getOpcode() == ISD::BITCAST &&
26989	V.getOperand(i: `0`).getValueType().isVector() &&
26990	(!LegalOperations \|\| TLI.isOperationLegal(Op: ISD::BITCAST, VT: NVT))) {
26991	SDValue SrcOp = V.getOperand(i: `0`);
26992	EVT SrcVT = SrcOp.getValueType();
26993	unsigned SrcNumElts = SrcVT.getVectorMinNumElements();
26994	unsigned DestNumElts = V.getValueType().getVectorMinNumElements();
26995	if ((SrcNumElts % DestNumElts) == `0`) {
26996	unsigned SrcDestRatio = SrcNumElts / DestNumElts;
26997	ElementCount NewExtEC = NVT.getVectorElementCount() * SrcDestRatio;
26998	EVT NewExtVT =
26999	EVT::getVectorVT(Context&: *DAG.getContext(), VT: SrcVT.getScalarType(), EC: NewExtEC);
27000	if (TLI.isOperationLegalOrCustom(Op: ISD::EXTRACT_SUBVECTOR, VT: NewExtVT)) {
27001	SDValue NewIndex = DAG.getVectorIdxConstant(Val: ExtIdx * SrcDestRatio, DL);
27002	SDValue NewExtract = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: NewExtVT,
27003	N1: V.getOperand(i: `0`), N2: NewIndex);
27004	return DAG.getBitcast(VT: NVT, V: NewExtract);
27005	}
27006	}
27007	if ((DestNumElts % SrcNumElts) == `0`) {
27008	unsigned DestSrcRatio = DestNumElts / SrcNumElts;
27009	if (NVT.getVectorElementCount().isKnownMultipleOf(RHS: DestSrcRatio)) {
27010	ElementCount NewExtEC =
27011	NVT.getVectorElementCount().divideCoefficientBy(RHS: DestSrcRatio);
27012	EVT ScalarVT = SrcVT.getScalarType();
27013	if ((ExtIdx % DestSrcRatio) == `0`) {
27014	unsigned IndexValScaled = ExtIdx / DestSrcRatio;
27015	EVT NewExtVT =
27016	EVT::getVectorVT(Context&: *DAG.getContext(), VT: ScalarVT, EC: NewExtEC);
27017	if (TLI.isOperationLegalOrCustom(Op: ISD::EXTRACT_SUBVECTOR, VT: NewExtVT)) {
27018	SDValue NewIndex = DAG.getVectorIdxConstant(Val: IndexValScaled, DL);
27019	SDValue NewExtract =
27020	DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: NewExtVT,
27021	N1: V.getOperand(i: `0`), N2: NewIndex);
27022	return DAG.getBitcast(VT: NVT, V: NewExtract);
27023	}
27024	if (NewExtEC.isScalar() &&
27025	TLI.isOperationLegalOrCustom(Op: ISD::EXTRACT_VECTOR_ELT, VT: ScalarVT)) {
27026	SDValue NewIndex = DAG.getVectorIdxConstant(Val: IndexValScaled, DL);
27027	SDValue NewExtract =
27028	DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: ScalarVT,
27029	N1: V.getOperand(i: `0`), N2: NewIndex);
27030	return DAG.getBitcast(VT: NVT, V: NewExtract);
27031	}
27032	}
27033	}
27034	}
27035	}
27036
27037	if (V.getOpcode() == ISD::CONCAT_VECTORS) {
27038	unsigned ExtNumElts = NVT.getVectorMinNumElements();
27039	EVT ConcatSrcVT = V.getOperand(i: `0`).getValueType();
27040	assert(ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() &&
27041	"Concat and extract subvector do not change element type");
27042
27043	unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorMinNumElements();
27044	unsigned ConcatOpIdx = ExtIdx / ConcatSrcNumElts;
27045
27046	// If the concatenated source types match this extract, it's a direct
27047	// simplification:
27048	// extract_subvec (concat V1, V2, ...), i --> Vi
27049	if (NVT.getVectorElementCount() == ConcatSrcVT.getVectorElementCount())
27050	return V.getOperand(i: ConcatOpIdx);
27051
27052	// If the concatenated source vectors are a multiple length of this extract,
27053	// then extract a fraction of one of those source vectors directly from a
27054	// concat operand. Example:
27055	// v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
27056	// v2i8 extract_subvec v8i8 Y, 6
27057	if (NVT.isFixedLengthVector() && ConcatSrcVT.isFixedLengthVector() &&
27058	ConcatSrcNumElts % ExtNumElts == `0`) {
27059	unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
27060	assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts &&
27061	"Trying to extract from >1 concat operand?");
27062	assert(NewExtIdx % ExtNumElts == `0` &&
27063	"Extract index is not a multiple of the input vector length.");
27064	SDValue NewIndexC = DAG.getVectorIdxConstant(Val: NewExtIdx, DL);
27065	return DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: NVT,
27066	N1: V.getOperand(i: ConcatOpIdx), N2: NewIndexC);
27067	}
27068	}
27069
27070	if (SDValue Shuffle = foldExtractSubvectorFromShuffleVector(
27071	NarrowVT: NVT, Src: V, Index: ExtIdx, DL, DAG, LegalOperations))
27072	return Shuffle;
27073
27074	if (SDValue NarrowBOp =
27075	narrowExtractedVectorBinOp(VT: NVT, Src: V, Index: ExtIdx, DL, DAG, LegalOperations))
27076	return NarrowBOp;
27077
27078	V = peekThroughBitcasts(V);
27079
27080	// If the input is a build vector. Try to make a smaller build vector.
27081	if (V.getOpcode() == ISD::BUILD_VECTOR) {
27082	EVT InVT = V.getValueType();
27083	unsigned ExtractSize = NVT.getSizeInBits();
27084	unsigned EltSize = InVT.getScalarSizeInBits();
27085	// Only do this if we won't split any elements.
27086	if (ExtractSize % EltSize == `0`) {
27087	unsigned NumElems = ExtractSize / EltSize;
27088	EVT EltVT = InVT.getVectorElementType();
27089	EVT ExtractVT =
27090	NumElems == `1` ? EltVT
27091	: EVT::getVectorVT(Context&: *DAG.getContext(), VT: EltVT, NumElements: NumElems);
27092	if ((Level < AfterLegalizeDAG \|\|
27093	(NumElems == `1` \|\|
27094	TLI.isOperationLegal(Op: ISD::BUILD_VECTOR, VT: ExtractVT))) &&
27095	(!LegalTypes \|\| TLI.isTypeLegal(VT: ExtractVT))) {
27096	unsigned IdxVal = (ExtIdx * NVT.getScalarSizeInBits()) / EltSize;
27097
27098	if (NumElems == `1`) {
27099	SDValue Src = V ->getOperand(Num: IdxVal);
27100	if (EltVT != Src.getValueType())
27101	Src = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: EltVT, Operand: Src);
27102	return DAG.getBitcast(VT: NVT, V: Src);
27103	}
27104
27105	// Extract the pieces from the original build_vector.
27106	SDValue BuildVec =
27107	DAG.getBuildVector(VT: ExtractVT, DL, Ops: V ->ops().slice(N: IdxVal, M: NumElems));
27108	return DAG.getBitcast(VT: NVT, V: BuildVec);
27109	}
27110	}
27111	}
27112
27113	if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
27114	// Handle only simple case where vector being inserted and vector
27115	// being extracted are of same size.
27116	EVT SmallVT = V.getOperand(i: `1`).getValueType();
27117	if (NVT.bitsEq(VT: SmallVT)) {
27118	// Combine:
27119	// (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
27120	// Into:
27121	// indices are equal or bit offsets are equal => V1
27122	// otherwise => (extract_subvec V1, ExtIdx)
27123	uint64_t InsIdx = V.getConstantOperandVal(i: `2`);
27124	if (InsIdx * SmallVT.getScalarSizeInBits() ==
27125	ExtIdx * NVT.getScalarSizeInBits()) {
27126	if (!LegalOperations \|\| TLI.isOperationLegal(Op: ISD::BITCAST, VT: NVT))
27127	return DAG.getBitcast(VT: NVT, V: V.getOperand(i: `1`));
27128	} else {
27129	return DAG.getNode(
27130	Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: NVT,
27131	N1: DAG.getBitcast(VT: N->getOperand(Num: `0`).getValueType(), V: V.getOperand(i: `0`)),
27132	N2: N->getOperand(Num: `1`));
27133	}
27134	}
27135	}
27136
27137	// If only EXTRACT_SUBVECTOR nodes use the source vector we can
27138	// simplify it based on the (valid) extractions.
27139	if (!V.getValueType().isScalableVector() &&
27140	llvm::all_of(Range: V ->users(), P: [&](SDNode *Use) {
27141	return Use->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
27142	Use->getOperand(Num: `0`) == V;
27143	})) {
27144	unsigned NumElts = V.getValueType().getVectorNumElements();
27145	APInt DemandedElts = APInt::getZero(numBits: NumElts);
27146	for (SDNode *User : V ->users()) {
27147	unsigned ExtIdx = User->getConstantOperandVal(Num: `1`);
27148	unsigned NumSubElts = User->getValueType(ResNo: `0`).getVectorNumElements();
27149	DemandedElts.setBits(loBit: ExtIdx, hiBit: ExtIdx + NumSubElts);
27150	}
27151	if (SimplifyDemandedVectorElts(Op: V, DemandedElts, /AssumeSingleUse=/true)) {
27152	// We simplified the vector operand of this extract subvector. If this
27153	// extract is not dead, visit it again so it is folded properly.
27154	if (N->getOpcode() != ISD::DELETED_NODE)
27155	AddToWorklist(N);
27156	return SDValue (N, `0`);
27157	}
27158	} else {
27159	if (SimplifyDemandedVectorElts(Op: SDValue (N, `0`)))
27160	return SDValue (N, `0`);
27161	}
27162
27163	return SDValue ();
27164	}
27165
27166	/// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
27167	/// followed by concatenation. Narrow vector ops may have better performance
27168	/// than wide ops, and this can unlock further narrowing of other vector ops.
27169	/// Targets can invert this transform later if it is not profitable.
27170	static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf,
27171	SelectionDAG &DAG) {
27172	SDValue N0 = Shuf->getOperand(Num: `0`), N1 = Shuf->getOperand(Num: `1`);
27173	if (N0.getOpcode() != ISD::CONCAT_VECTORS \|\| N0.getNumOperands() != `2` \|\|
27174	N1.getOpcode() != ISD::CONCAT_VECTORS \|\| N1.getNumOperands() != `2` \|\|
27175	!N0.getOperand(i: `1`).isUndef() \|\| !N1.getOperand(i: `1`).isUndef())
27176	return SDValue ();
27177
27178	// Split the wide shuffle mask into halves. Any mask element that is accessing
27179	// operand 1 is offset down to account for narrowing of the vectors.
27180	ArrayRef<int> Mask = Shuf->getMask();
27181	EVT VT = Shuf->getValueType(ResNo: `0`);
27182	unsigned NumElts = VT.getVectorNumElements();
27183	unsigned HalfNumElts = NumElts / `2`;
27184	SmallVector<int, `16`> Mask0(HalfNumElts, -`1`);
27185	SmallVector<int, `16`> Mask1(HalfNumElts, -`1`);
27186	for (unsigned i = `0`; i != NumElts; ++i) {
27187	if (Mask [i] == -`1`)
27188	continue;
27189	// If we reference the upper (undef) subvector then the element is undef.
27190	if ((Mask [i] % NumElts) >= HalfNumElts)
27191	continue;
27192	int M = Mask [i] < (int)NumElts ? Mask [i] : Mask [i] - (int)HalfNumElts;
27193	if (i < HalfNumElts)
27194	Mask0 [i] = M;
27195	else
27196	Mask1 [i - HalfNumElts] = M;
27197	}
27198
27199	// Ask the target if this is a valid transform.
27200	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
27201	EVT HalfVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: VT.getScalarType(),
27202	NumElements: HalfNumElts);
27203	if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) \|\|
27204	!TLI.isShuffleMaskLegal(Mask1, HalfVT))
27205	return SDValue ();
27206
27207	// shuffle (concat X, undef), (concat Y, undef), Mask -->
27208	// concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
27209	SDValue X = N0.getOperand(i: `0`), Y = N1.getOperand(i: `0`);
27210	SDLoc DL(Shuf);
27211	SDValue Shuf0 = DAG.getVectorShuffle(VT: HalfVT, dl: DL, N1: X, N2: Y, Mask: Mask0);
27212	SDValue Shuf1 = DAG.getVectorShuffle(VT: HalfVT, dl: DL, N1: X, N2: Y, Mask: Mask1);
27213	return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT, N1: Shuf0, N2: Shuf1);
27214	}
27215
27216	// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
27217	// or turn a shuffle of a single concat into simpler shuffle then concat.
27218	static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
27219	EVT VT = N->getValueType(ResNo: `0`);
27220	unsigned NumElts = VT.getVectorNumElements();
27221
27222	SDValue N0 = N->getOperand(Num: `0`);
27223	SDValue N1 = N->getOperand(Num: `1`);
27224	ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Val: N);
27225	ArrayRef<int> Mask = SVN->getMask();
27226
27227	SmallVector<SDValue, `4`> Ops;
27228	EVT ConcatVT = N0.getOperand(i: `0`).getValueType();
27229	unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
27230	unsigned NumConcats = NumElts / NumElemsPerConcat;
27231
27232	auto IsUndefMaskElt = [](int i) { return i == -`1`; };
27233
27234	// Special case: shuffle(concat(A,B)) can be more efficiently represented
27235	// as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
27236	// half vector elements.
27237	if (NumElemsPerConcat * `2` == NumElts && N1.isUndef() &&
27238	llvm::all_of(Range: Mask.slice(N: NumElemsPerConcat, M: NumElemsPerConcat),
27239	P: IsUndefMaskElt)) {
27240	N0 = DAG.getVectorShuffle(VT: ConcatVT, dl: SDLoc (N), N1: N0.getOperand(i: `0`),
27241	N2: N0.getOperand(i: `1`),
27242	Mask: Mask.slice(N: `0`, M: NumElemsPerConcat));
27243	N1 = DAG.getPOISON(VT: ConcatVT);
27244	return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: SDLoc (N), VT, N1: N0, N2: N1);
27245	}
27246
27247	// Look at every vector that's inserted. We're looking for exact
27248	// subvector-sized copies from a concatenated vector
27249	for (unsigned I = `0`; I != NumConcats; ++I) {
27250	unsigned Begin = I * NumElemsPerConcat;
27251	ArrayRef<int> SubMask = Mask.slice(N: Begin, M: NumElemsPerConcat);
27252
27253	// Make sure we're dealing with a copy.
27254	if (llvm::all_of(Range&: SubMask, P: IsUndefMaskElt)) {
27255	Ops.push_back(Elt: DAG.getUNDEF(VT: ConcatVT));
27256	continue;
27257	}
27258
27259	int OpIdx = -`1`;
27260	for (int i = `0`; i != (int)NumElemsPerConcat; ++i) {
27261	if (IsUndefMaskElt (SubMask [i]))
27262	continue;
27263	if ((SubMask [i] % (int)NumElemsPerConcat) != i)
27264	return SDValue ();
27265	int EltOpIdx = SubMask [i] / NumElemsPerConcat;
27266	if (`0` <= OpIdx && EltOpIdx != OpIdx)
27267	return SDValue ();
27268	OpIdx = EltOpIdx;
27269	}
27270	assert(`0` <= OpIdx && "Unknown concat_vectors op");
27271
27272	if (OpIdx < (int)N0.getNumOperands())
27273	Ops.push_back(Elt: N0.getOperand(i: OpIdx));
27274	else
27275	Ops.push_back(Elt: N1.getOperand(i: OpIdx - N0.getNumOperands()));
27276	}
27277
27278	return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: SDLoc (N), VT, Ops);
27279	}
27280
27281	// Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
27282	// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
27283	//
27284	// SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
27285	// a simplification in some sense, but it isn't appropriate in general: some
27286	// BUILD_VECTORs are substantially cheaper than others. The general case
27287	// of a BUILD_VECTOR requires inserting each element individually (or
27288	// performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
27289	// all constants is a single constant pool load. A BUILD_VECTOR where each
27290	// element is identical is a splat. A BUILD_VECTOR where most of the operands
27291	// are undef lowers to a small number of element insertions.
27292	//
27293	// To deal with this, we currently use a bunch of mostly arbitrary heuristics.
27294	// We don't fold shuffles where one side is a non-zero constant, and we don't
27295	// fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
27296	// non-constant operands. This seems to work out reasonably well in practice.
27297	static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
27298	SelectionDAG &DAG,
27299	const TargetLowering &TLI) {
27300	EVT VT = SVN->getValueType(ResNo: `0`);
27301	unsigned NumElts = VT.getVectorNumElements();
27302	SDValue N0 = SVN->getOperand(Num: `0`);
27303	SDValue N1 = SVN->getOperand(Num: `1`);
27304
27305	if (!N0 ->hasOneUse())
27306	return SDValue ();
27307
27308	// If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
27309	// discussed above.
27310	if (!N1.isUndef()) {
27311	if (!N1 ->hasOneUse())
27312	return SDValue ();
27313
27314	bool N0AnyConst = isAnyConstantBuildVector(V: N0);
27315	bool N1AnyConst = isAnyConstantBuildVector(V: N1);
27316	if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N: N0.getNode()))
27317	return SDValue ();
27318	if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N: N1.getNode()))
27319	return SDValue ();
27320	}
27321
27322	// If both inputs are splats of the same value then we can safely merge this
27323	// to a single BUILD_VECTOR with undef elements based on the shuffle mask.
27324	bool IsSplat = false;
27325	auto *BV0 = dyn_cast<BuildVectorSDNode>(Val&: N0);
27326	auto *BV1 = dyn_cast<BuildVectorSDNode>(Val&: N1);
27327	if (BV0 && BV1)
27328	if (SDValue Splat0 = BV0->getSplatValue())
27329	IsSplat = (Splat0 == BV1->getSplatValue());
27330
27331	SmallVector<SDValue, `8`> Ops;
27332	SmallSet<SDValue, `16`> DuplicateOps;
27333	for (int M : SVN->getMask()) {
27334	SDValue Op = DAG.getPOISON(VT: VT.getScalarType());
27335	if (M >= `0`) {
27336	int Idx = M < (int)NumElts ? M : M - NumElts;
27337	SDValue &S = (M < (int)NumElts ? N0 : N1);
27338	if (S.getOpcode() == ISD::BUILD_VECTOR) {
27339	Op = S.getOperand(i: Idx);
27340	} else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
27341	SDValue Op0 = S.getOperand(i: `0`);
27342	Op = Idx == `0` ? Op0 : DAG.getPOISON(VT: Op0.getValueType());
27343	} else {
27344	// Operand can't be combined - bail out.
27345	return SDValue ();
27346	}
27347	}
27348
27349	// Don't duplicate a non-constant BUILD_VECTOR operand unless we're
27350	// generating a splat; semantically, this is fine, but it's likely to
27351	// generate low-quality code if the target can't reconstruct an appropriate
27352	// shuffle.
27353	if (!Op.isUndef() && !isIntOrFPConstant(V: Op))
27354	if (!IsSplat && !DuplicateOps.insert(V: Op).second)
27355	return SDValue ();
27356
27357	Ops.push_back(Elt: Op);
27358	}
27359
27360	// BUILD_VECTOR requires all inputs to be of the same type, find the
27361	// maximum type and extend them all.
27362	EVT SVT = VT.getScalarType();
27363	if (SVT.isInteger())
27364	for (SDValue &Op : Ops)
27365	SVT = (SVT.bitsLT(VT: Op.getValueType()) ? Op.getValueType() : SVT);
27366	if (SVT != VT.getScalarType())
27367	for (SDValue &Op : Ops)
27368	Op = Op.isUndef() ? DAG.getUNDEF(VT: SVT)
27369	: (TLI.isZExtFree(FromTy: Op.getValueType(), ToTy: SVT)
27370	? DAG.getZExtOrTrunc(Op, DL: SDLoc (SVN), VT: SVT)
27371	: DAG.getSExtOrTrunc(Op, DL: SDLoc (SVN), VT: SVT));
27372	return DAG.getBuildVector(VT, DL: SDLoc (SVN), Ops);
27373	}
27374
27375	// Match shuffles that can be converted to _vector_extend_in_reg.*
27376	// This is often generated during legalization.
27377	// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src)),
27378	// and returns the EVT to which the extension should be performed.
27379	// NOTE: this assumes that the src is the first operand of the shuffle.
27380	static std::optional<EVT> canCombineShuffleToExtendVectorInreg(
27381	unsigned Opcode, EVT VT, std::function<bool(unsigned)> Match,
27382	SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes,
27383	bool LegalOperations) {
27384	bool IsBigEndian = DAG.getDataLayout().isBigEndian();
27385
27386	// TODO Add support for big-endian when we have a test case.
27387	if (!VT.isInteger() \|\| IsBigEndian)
27388	return std::nullopt;
27389
27390	unsigned NumElts = VT.getVectorNumElements();
27391	unsigned EltSizeInBits = VT.getScalarSizeInBits();
27392
27393	// Attempt to match a '_extend_vector_inreg' shuffle, we just search for*
27394	// power-of-2 extensions as they are the most likely.
27395	// FIXME: should try Scale == NumElts case too,
27396	for (unsigned Scale = `2`; Scale < NumElts; Scale *= `2`) {
27397	// The vector width must be a multiple of Scale.
27398	if (NumElts % Scale != `0`)
27399	continue;
27400
27401	EVT OutSVT = EVT::getIntegerVT(Context&: DAG.getContext(), BitWidth: EltSizeInBits Scale);
27402	EVT OutVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: OutSVT, NumElements: NumElts / Scale);
27403
27404	if ((LegalTypes && !TLI.isTypeLegal(VT: OutVT)) \|\|
27405	(LegalOperations && !TLI.isOperationLegalOrCustom(Op: Opcode, VT: OutVT)))
27406	continue;
27407
27408	if (Match (Scale))
27409	return OutVT;
27410	}
27411
27412	return std::nullopt;
27413	}
27414
27415	// Match shuffles that can be converted to any_vector_extend_in_reg.
27416	// This is often generated during legalization.
27417	// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
27418	static SDValue combineShuffleToAnyExtendVectorInreg(ShuffleVectorSDNode *SVN,
27419	SelectionDAG &DAG,
27420	const TargetLowering &TLI,
27421	bool LegalOperations) {
27422	EVT VT = SVN->getValueType(ResNo: `0`);
27423	bool IsBigEndian = DAG.getDataLayout().isBigEndian();
27424
27425	// TODO Add support for big-endian when we have a test case.
27426	if (!VT.isInteger() \|\| IsBigEndian)
27427	return SDValue ();
27428
27429	// shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
27430	auto isAnyExtend = [NumElts = VT.getVectorNumElements(),
27431	Mask = SVN->getMask()](unsigned Scale) {
27432	for (unsigned i = `0`; i != NumElts; ++i) {
27433	if (Mask [i] < `0`)
27434	continue;
27435	if ((i % Scale) == `0` && Mask [i] == (int)(i / Scale))
27436	continue;
27437	return false;
27438	}
27439	return true;
27440	};
27441
27442	unsigned Opcode = ISD::ANY_EXTEND_VECTOR_INREG;
27443	SDValue N0 = SVN->getOperand(Num: `0`);
27444	// Never create an illegal type. Only create unsupported operations if we
27445	// are pre-legalization.
27446	std::optional<EVT> OutVT = canCombineShuffleToExtendVectorInreg(
27447	Opcode, VT, Match: isAnyExtend, DAG, TLI, /LegalTypes=/true, LegalOperations);
27448	if (!OutVT)
27449	return SDValue ();
27450	return DAG.getBitcast(VT, V: DAG.getNode(Opcode, DL: SDLoc (SVN), VT: *OutVT, Operand: N0));
27451	}
27452
27453	// Match shuffles that can be converted to zero_extend_vector_inreg.
27454	// This is often generated during legalization.
27455	// e.g. v4i32 <0,z,1,u> -> (v2i64 zero_extend_vector_inreg(v4i32 src))
27456	static SDValue combineShuffleToZeroExtendVectorInReg(ShuffleVectorSDNode *SVN,
27457	SelectionDAG &DAG,
27458	const TargetLowering &TLI,
27459	bool LegalOperations) {
27460	bool LegalTypes = true;
27461	EVT VT = SVN->getValueType(ResNo: `0`);
27462	assert(!VT.isScalableVector() && "Encountered scalable shuffle?");
27463	unsigned NumElts = VT.getVectorNumElements();
27464	unsigned EltSizeInBits = VT.getScalarSizeInBits();
27465
27466	// TODO: add support for big-endian when we have a test case.
27467	bool IsBigEndian = DAG.getDataLayout().isBigEndian();
27468	if (!VT.isInteger() \|\| IsBigEndian)
27469	return SDValue ();
27470
27471	SmallVector<int, `16`> Mask(SVN->getMask());
27472	auto ForEachDecomposedIndice = [NumElts, &Mask](auto Fn) {
27473	for (int &Indice : Mask) {
27474	if (Indice < `0`)
27475	continue;
27476	int OpIdx = (unsigned)Indice < NumElts ? `0` : `1`;
27477	int OpEltIdx = (unsigned)Indice < NumElts ? Indice : Indice - NumElts;
27478	Fn(Indice, OpIdx, OpEltIdx);
27479	}
27480	};
27481
27482	// Which elements of which operand does this shuffle demand?
27483	std::array<APInt, `2`> OpsDemandedElts;
27484	for (APInt &OpDemandedElts : OpsDemandedElts)
27485	OpDemandedElts = APInt::getZero(numBits: NumElts);
27486	ForEachDecomposedIndice (
27487	[&OpsDemandedElts](int &Indice, int OpIdx, int OpEltIdx) {
27488	OpsDemandedElts [OpIdx].setBit(OpEltIdx);
27489	});
27490
27491	// Element-wise(!), which of these demanded elements are know to be zero?
27492	std::array<APInt, `2`> OpsKnownZeroElts;
27493	for (auto I : zip(t: SVN->ops(), u&: OpsDemandedElts, args&: OpsKnownZeroElts))
27494	std::get<`2`>(t&: I) =
27495	DAG.computeVectorKnownZeroElements(Op: std::get<`0`>(t&: I), DemandedElts: std::get<`1`>(t&: I));
27496
27497	// Manifest zeroable element knowledge in the shuffle mask.
27498	// NOTE: we don't have 'zeroable' sentinel value in generic DAG,
27499	// this is a local invention, but it won't leak into DAG.
27500	// FIXME: should we not manifest them, but just check when matching?
27501	bool HadZeroableElts = false;
27502	ForEachDecomposedIndice ([&OpsKnownZeroElts, &HadZeroableElts](
27503	int &Indice, int OpIdx, int OpEltIdx) {
27504	if (OpsKnownZeroElts [OpIdx][OpEltIdx]) {
27505	Indice = -`2`; // Zeroable element.
27506	HadZeroableElts = true;
27507	}
27508	});
27509
27510	// Don't proceed unless we've refined at least one zeroable mask indice.
27511	// If we didn't, then we are still trying to match the same shuffle mask
27512	// we previously tried to match as ISD::ANY_EXTEND_VECTOR_INREG,
27513	// and evidently failed. Proceeding will lead to endless combine loops.
27514	if (!HadZeroableElts)
27515	return SDValue ();
27516
27517	// The shuffle may be more fine-grained than we want. Widen elements first.
27518	// FIXME: should we do this before manifesting zeroable shuffle mask indices?
27519	SmallVector<int, `16`> ScaledMask;
27520	getShuffleMaskWithWidestElts(Mask, ScaledMask);
27521	assert(Mask.size() >= ScaledMask.size() &&
27522	Mask.size() % ScaledMask.size() == `0` && "Unexpected mask widening.");
27523	int Prescale = Mask.size() / ScaledMask.size();
27524
27525	NumElts = ScaledMask.size();
27526	EltSizeInBits *= Prescale;
27527
27528	EVT PrescaledVT = EVT::getVectorVT(
27529	Context&: DAG.getContext(), VT: EVT::getIntegerVT(Context&: DAG.getContext(), BitWidth: EltSizeInBits),
27530	NumElements: NumElts);
27531
27532	if (LegalTypes && !TLI.isTypeLegal(VT: PrescaledVT) && TLI.isTypeLegal(VT))
27533	return SDValue ();
27534
27535	// For example,
27536	// shuffle<0,z,1,-1> == (v2i64 zero_extend_vector_inreg(v4i32))
27537	// But not shuffle<z,z,1,-1> and not shuffle<0,z,z,-1> ! (for same types)
27538	auto isZeroExtend = [NumElts, &ScaledMask](unsigned Scale) {
27539	assert(Scale >= `2` && Scale <= NumElts && NumElts % Scale == `0` &&
27540	"Unexpected mask scaling factor.");
27541	ArrayRef<int> Mask = ScaledMask;
27542	for (unsigned SrcElt = `0`, NumSrcElts = NumElts / Scale;
27543	SrcElt != NumSrcElts; ++SrcElt) {
27544	// Analyze the shuffle mask in Scale-sized chunks.
27545	ArrayRef<int> MaskChunk = Mask.take_front(N: Scale);
27546	assert(MaskChunk.size() == Scale && "Unexpected mask size.");
27547	Mask = Mask.drop_front(N: MaskChunk.size());
27548	// The first indice in this chunk must be SrcElt, but not zero!
27549	// FIXME: undef should be fine, but that results in more-defined result.
27550	if (int FirstIndice = MaskChunk [`0`]; (unsigned)FirstIndice != SrcElt)
27551	return false;
27552	// The rest of the indices in this chunk must be zeros.
27553	// FIXME: undef should be fine, but that results in more-defined result.
27554	if (!all_of(Range: MaskChunk.drop_front(N: `1`),
27555	P: [](int Indice) { return Indice == -`2`; }))
27556	return false;
27557	}
27558	assert(Mask.empty() && "Did not process the whole mask?");
27559	return true;
27560	};
27561
27562	unsigned Opcode = ISD::ZERO_EXTEND_VECTOR_INREG;
27563	for (bool Commuted : {false, true}) {
27564	SDValue Op = SVN->getOperand(Num: !Commuted ? `0` : `1`);
27565	if (Commuted)
27566	ShuffleVectorSDNode::commuteMask(Mask: ScaledMask);
27567	std::optional<EVT> OutVT = canCombineShuffleToExtendVectorInreg(
27568	Opcode, VT: PrescaledVT, Match: isZeroExtend, DAG, TLI, LegalTypes,
27569	LegalOperations);
27570	if (OutVT)
27571	return DAG.getBitcast(VT, V: DAG.getNode(Opcode, DL: SDLoc (SVN), VT: *OutVT,
27572	Operand: DAG.getBitcast(VT: PrescaledVT, V: Op)));
27573	}
27574	return SDValue ();
27575	}
27576
27577	// Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
27578	// each source element of a large type into the lowest elements of a smaller
27579	// destination type. This is often generated during legalization.
27580	// If the source node itself was a '_extend_vector_inreg' node then we should*
27581	// then be able to remove it.
27582	static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
27583	SelectionDAG &DAG) {
27584	EVT VT = SVN->getValueType(ResNo: `0`);
27585	bool IsBigEndian = DAG.getDataLayout().isBigEndian();
27586
27587	// TODO Add support for big-endian when we have a test case.
27588	if (!VT.isInteger() \|\| IsBigEndian)
27589	return SDValue ();
27590
27591	SDValue N0 = peekThroughBitcasts(V: SVN->getOperand(Num: `0`));
27592
27593	unsigned Opcode = N0.getOpcode();
27594	if (!ISD::isExtVecInRegOpcode(Opcode))
27595	return SDValue ();
27596
27597	SDValue N00 = N0.getOperand(i: `0`);
27598	ArrayRef<int> Mask = SVN->getMask();
27599	unsigned NumElts = VT.getVectorNumElements();
27600	unsigned EltSizeInBits = VT.getScalarSizeInBits();
27601	unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
27602	unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
27603
27604	if (ExtDstSizeInBits % ExtSrcSizeInBits != `0`)
27605	return SDValue ();
27606	unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
27607
27608	// (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
27609	// (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
27610	// (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
27611	auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
27612	for (unsigned i = `0`; i != NumElts; ++i) {
27613	if (Mask [i] < `0`)
27614	continue;
27615	if ((i * Scale) < NumElts && Mask [i] == (int)(i * Scale))
27616	continue;
27617	return false;
27618	}
27619	return true;
27620	};
27621
27622	// At the moment we just handle the case where we've truncated back to the
27623	// same size as before the extension.
27624	// TODO: handle more extension/truncation cases as cases arise.
27625	if (EltSizeInBits != ExtSrcSizeInBits)
27626	return SDValue ();
27627	if (VT.getSizeInBits() != N00.getValueSizeInBits())
27628	return SDValue ();
27629
27630	// We can remove extend_vector_inreg only if the truncation happens at*
27631	// the same scale as the extension.
27632	if (isTruncate (ExtScale))
27633	return DAG.getBitcast(VT, V: N00);
27634
27635	return SDValue ();
27636	}
27637
27638	// Combine shuffles of splat-shuffles of the form:
27639	// shuffle (shuffle V, undef, splat-mask), undef, M
27640	// If splat-mask contains undef elements, we need to be careful about
27641	// introducing undef's in the folded mask which are not the result of composing
27642	// the masks of the shuffles.
27643	static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf,
27644	SelectionDAG &DAG) {
27645	EVT VT = Shuf->getValueType(ResNo: `0`);
27646	unsigned NumElts = VT.getVectorNumElements();
27647
27648	if (!Shuf->getOperand(Num: `1`).isUndef())
27649	return SDValue ();
27650
27651	// See if this unary non-splat shuffle actually is* a splat shuffle,*
27652	// in disguise, with all demanded elements being identical.
27653	// FIXME: this can be done per-operand.
27654	if (!Shuf->isSplat()) {
27655	APInt DemandedElts(NumElts, `0`);
27656	for (int Idx : Shuf->getMask()) {
27657	if (Idx < `0`)
27658	continue; // Ignore sentinel indices.
27659	assert((unsigned)Idx < NumElts && "Out-of-bounds shuffle indice?");
27660	DemandedElts.setBit(Idx);
27661	}
27662	assert(DemandedElts.popcount() > `1` && "Is a splat shuffle already?");
27663	APInt UndefElts;
27664	if (DAG.isSplatValue(V: Shuf->getOperand(Num: `0`), DemandedElts, UndefElts)) {
27665	// Even if all demanded elements are splat, some of them could be undef.
27666	// Which lowest demanded element is not* known-undef?*
27667	std::optional<unsigned> MinNonUndefIdx;
27668	for (int Idx : Shuf->getMask()) {
27669	if (Idx < `0` \|\| UndefElts [Idx])
27670	continue; // Ignore sentinel indices, and undef elements.
27671	MinNonUndefIdx = std::min<unsigned>(a: Idx, b: MinNonUndefIdx.value_or(u: ~`0U`));
27672	}
27673	if (!MinNonUndefIdx)
27674	return DAG.getUNDEF(VT); // All undef - result is undef.
27675	assert(*MinNonUndefIdx < NumElts && "Expected valid element index.");
27676	SmallVector<int, `8`> SplatMask(Shuf->getMask());
27677	for (int &Idx : SplatMask) {
27678	if (Idx < `0`)
27679	continue; // Passthrough sentinel indices.
27680	// Otherwise, just pick the lowest demanded non-undef element.
27681	// Or sentinel undef, if we know we'd pick a known-undef element.
27682	Idx = UndefElts [Idx] ? -`1` : *MinNonUndefIdx;
27683	}
27684	assert(SplatMask != Shuf->getMask() && "Expected mask to change!");
27685	return DAG.getVectorShuffle(VT, dl: SDLoc (Shuf), N1: Shuf->getOperand(Num: `0`),
27686	N2: Shuf->getOperand(Num: `1`), Mask: SplatMask);
27687	}
27688	}
27689
27690	// If the inner operand is a known splat with no undefs, just return that directly.
27691	// TODO: Create DemandedElts mask from Shuf's mask.
27692	// TODO: Allow undef elements and merge with the shuffle code below.
27693	if (DAG.isSplatValue(V: Shuf->getOperand(Num: `0`), /AllowUndefs/ false))
27694	return Shuf->getOperand(Num: `0`);
27695
27696	auto *Splat = dyn_cast<ShuffleVectorSDNode>(Val: Shuf->getOperand(Num: `0`));
27697	if (!Splat \|\| !Splat->isSplat())
27698	return SDValue ();
27699
27700	ArrayRef<int> ShufMask = Shuf->getMask();
27701	ArrayRef<int> SplatMask = Splat->getMask();
27702	assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch");
27703
27704	// Prefer simplifying to the splat-shuffle, if possible. This is legal if
27705	// every undef mask element in the splat-shuffle has a corresponding undef
27706	// element in the user-shuffle's mask or if the composition of mask elements
27707	// would result in undef.
27708	// Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
27709	// UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]*
27710	// In this case it is not legal to simplify to the splat-shuffle because we
27711	// may be exposing the users of the shuffle an undef element at index 1
27712	// which was not there before the combine.
27713	// UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]*
27714	// In this case the composition of masks yields SplatMask, so it's ok to
27715	// simplify to the splat-shuffle.
27716	// UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]*
27717	// In this case the composed mask includes all undef elements of SplatMask
27718	// and in addition sets element zero to undef. It is safe to simplify to
27719	// the splat-shuffle.
27720	auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
27721	ArrayRef<int> SplatMask) {
27722	for (unsigned i = `0`, e = UserMask.size(); i != e; ++i)
27723	if (UserMask [i] != -`1` && SplatMask [i] == -`1` &&
27724	SplatMask [UserMask [i]] != -`1`)
27725	return false;
27726	return true;
27727	};
27728	if (CanSimplifyToExistingSplat (ShufMask, SplatMask))
27729	return Shuf->getOperand(Num: `0`);
27730
27731	// Create a new shuffle with a mask that is composed of the two shuffles'
27732	// masks.
27733	SmallVector<int, `32`> NewMask;
27734	for (int Idx : ShufMask)
27735	NewMask.push_back(Elt: Idx == -`1` ? -`1` : SplatMask [Idx]);
27736
27737	return DAG.getVectorShuffle(VT: Splat->getValueType(ResNo: `0`), dl: SDLoc (Splat),
27738	N1: Splat->getOperand(Num: `0`), N2: Splat->getOperand(Num: `1`),
27739	Mask: NewMask);
27740	}
27741
27742	// Combine shuffles of bitcasts into a shuffle of the bitcast type, providing
27743	// the mask can be treated as a larger type.
27744	static SDValue combineShuffleOfBitcast(ShuffleVectorSDNode *SVN,
27745	SelectionDAG &DAG,
27746	const TargetLowering &TLI,
27747	bool LegalOperations) {
27748	SDValue Op0 = SVN->getOperand(Num: `0`);
27749	SDValue Op1 = SVN->getOperand(Num: `1`);
27750	EVT VT = SVN->getValueType(ResNo: `0`);
27751	if (Op0.getOpcode() != ISD::BITCAST)
27752	return SDValue ();
27753	EVT InVT = Op0.getOperand(i: `0`).getValueType();
27754	if (!InVT.isVector() \|\|
27755	(!Op1.isUndef() && (Op1.getOpcode() != ISD::BITCAST \|\|
27756	Op1.getOperand(i: `0`).getValueType() != InVT)))
27757	return SDValue ();
27758	if (isAnyConstantBuildVector(V: Op0.getOperand(i: `0`)) &&
27759	(Op1.isUndef() \|\| isAnyConstantBuildVector(V: Op1.getOperand(i: `0`))))
27760	return SDValue ();
27761
27762	int VTLanes = VT.getVectorNumElements();
27763	int InLanes = InVT.getVectorNumElements();
27764	if (VTLanes <= InLanes \|\| VTLanes % InLanes != `0` \|\|
27765	(LegalOperations &&
27766	!TLI.isOperationLegalOrCustom(Op: ISD::VECTOR_SHUFFLE, VT: InVT)))
27767	return SDValue ();
27768	int Factor = VTLanes / InLanes;
27769
27770	// Check that each group of lanes in the mask are either undef or make a valid
27771	// mask for the wider lane type.
27772	ArrayRef<int> Mask = SVN->getMask();
27773	SmallVector<int> NewMask;
27774	if (!widenShuffleMaskElts(Scale: Factor, Mask, ScaledMask&: NewMask))
27775	return SDValue ();
27776
27777	if (!TLI.isShuffleMaskLegal(NewMask, InVT))
27778	return SDValue ();
27779
27780	// Create the new shuffle with the new mask and bitcast it back to the
27781	// original type.
27782	SDLoc DL(SVN);
27783	Op0 = Op0.getOperand(i: `0`);
27784	Op1 = Op1.isUndef() ? DAG.getUNDEF(VT: InVT) : Op1.getOperand(i: `0`);
27785	SDValue NewShuf = DAG.getVectorShuffle(VT: InVT, dl: DL, N1: Op0, N2: Op1, Mask: NewMask);
27786	return DAG.getBitcast(VT, V: NewShuf);
27787	}
27788
27789	/// Combine shuffle of shuffle of the form:
27790	/// shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X
27791	static SDValue formSplatFromShuffles(ShuffleVectorSDNode *OuterShuf,
27792	SelectionDAG &DAG) {
27793	if (!OuterShuf->getOperand(Num: `1`).isUndef())
27794	return SDValue ();
27795	auto *InnerShuf = dyn_cast<ShuffleVectorSDNode>(Val: OuterShuf->getOperand(Num: `0`));
27796	if (!InnerShuf \|\| !InnerShuf->getOperand(Num: `1`).isUndef())
27797	return SDValue ();
27798
27799	ArrayRef<int> OuterMask = OuterShuf->getMask();
27800	ArrayRef<int> InnerMask = InnerShuf->getMask();
27801	unsigned NumElts = OuterMask.size();
27802	assert(NumElts == InnerMask.size() && "Mask length mismatch");
27803	SmallVector<int, `32`> CombinedMask(NumElts, -`1`);
27804	int SplatIndex = -`1`;
27805	for (unsigned i = `0`; i != NumElts; ++i) {
27806	// Undef lanes remain undef.
27807	int OuterMaskElt = OuterMask [i];
27808	if (OuterMaskElt == -`1`)
27809	continue;
27810
27811	// Peek through the shuffle masks to get the underlying source element.
27812	int InnerMaskElt = InnerMask [OuterMaskElt];
27813	if (InnerMaskElt == -`1`)
27814	continue;
27815
27816	// Initialize the splatted element.
27817	if (SplatIndex == -`1`)
27818	SplatIndex = InnerMaskElt;
27819
27820	// Non-matching index - this is not a splat.
27821	if (SplatIndex != InnerMaskElt)
27822	return SDValue ();
27823
27824	CombinedMask [i] = InnerMaskElt;
27825	}
27826	assert((all_of(CombinedMask, equal_to(-`1`)) \|\|
27827	getSplatIndex(CombinedMask) != -`1`) &&
27828	"Expected a splat mask");
27829
27830	// TODO: The transform may be a win even if the mask is not legal.
27831	EVT VT = OuterShuf->getValueType(ResNo: `0`);
27832	assert(VT == InnerShuf->getValueType(`0`) && "Expected matching shuffle types");
27833	if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(CombinedMask, VT))
27834	return SDValue ();
27835
27836	return DAG.getVectorShuffle(VT, dl: SDLoc (OuterShuf), N1: InnerShuf->getOperand(Num: `0`),
27837	N2: InnerShuf->getOperand(Num: `1`), Mask: CombinedMask);
27838	}
27839
27840	/// If the shuffle mask is taking exactly one element from the first vector
27841	/// operand and passing through all other elements from the second vector
27842	/// operand, return the index of the mask element that is choosing an element
27843	/// from the first operand. Otherwise, return -1.
27844	static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) {
27845	int MaskSize = Mask.size();
27846	int EltFromOp0 = -`1`;
27847	// TODO: This does not match if there are undef elements in the shuffle mask.
27848	// Should we ignore undefs in the shuffle mask instead? The trade-off is
27849	// removing an instruction (a shuffle), but losing the knowledge that some
27850	// vector lanes are not needed.
27851	for (int i = `0`; i != MaskSize; ++i) {
27852	if (Mask [i] >= `0` && Mask [i] < MaskSize) {
27853	// We're looking for a shuffle of exactly one element from operand 0.
27854	if (EltFromOp0 != -`1`)
27855	return -`1`;
27856	EltFromOp0 = i;
27857	} else if (Mask [i] != i + MaskSize) {
27858	// Nothing from operand 1 can change lanes.
27859	return -`1`;
27860	}
27861	}
27862	return EltFromOp0;
27863	}
27864
27865	/// If a shuffle inserts exactly one element from a source vector operand into
27866	/// another vector operand and we can access the specified element as a scalar,
27867	/// then we can eliminate the shuffle.
27868	SDValue DAGCombiner::replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf) {
27869	// First, check if we are taking one element of a vector and shuffling that
27870	// element into another vector.
27871	ArrayRef<int> Mask = Shuf->getMask();
27872	SmallVector<int, `16`> CommutedMask(Mask);
27873	SDValue Op0 = Shuf->getOperand(Num: `0`);
27874	SDValue Op1 = Shuf->getOperand(Num: `1`);
27875	int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
27876	if (ShufOp0Index == -`1`) {
27877	// Commute mask and check again.
27878	ShuffleVectorSDNode::commuteMask(Mask: CommutedMask);
27879	ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask: CommutedMask);
27880	if (ShufOp0Index == -`1`)
27881	return SDValue ();
27882	// Commute operands to match the commuted shuffle mask.
27883	std::swap(a&: Op0, b&: Op1);
27884	Mask = CommutedMask;
27885	}
27886
27887	// The shuffle inserts exactly one element from operand 0 into operand 1.
27888	// Now see if we can access that element as a scalar via a real insert element
27889	// instruction.
27890	// TODO: We can try harder to locate the element as a scalar. Examples: it
27891	// could be an operand of BUILD_VECTOR, or a constant.
27892	assert(Mask[ShufOp0Index] >= `0` && Mask[ShufOp0Index] < (int)Mask.size() &&
27893	"Shuffle mask value must be from operand 0");
27894
27895	SDValue Elt;
27896	if (sd_match(N: Op0, P: m_InsertElt(Vec: m_Value(), Val: m_Value(N&: Elt),
27897	Idx: m_SpecificInt(V: Mask [ShufOp0Index])))) {
27898	// There's an existing insertelement with constant insertion index, so we
27899	// don't need to check the legality/profitability of a replacement operation
27900	// that differs at most in the constant value. The target should be able to
27901	// lower any of those in a similar way. If not, legalization will expand
27902	// this to a scalar-to-vector plus shuffle.
27903	//
27904	// Note that the shuffle may move the scalar from the position that the
27905	// insert element used. Therefore, our new insert element occurs at the
27906	// shuffle's mask index value, not the insert's index value.
27907	//
27908	// shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
27909	SDValue NewInsIndex = DAG.getVectorIdxConstant(Val: ShufOp0Index, DL: SDLoc (Shuf));
27910	return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: SDLoc (Shuf), VT: Op0.getValueType(),
27911	N1: Op1, N2: Elt, N3: NewInsIndex);
27912	}
27913
27914	if (!hasOperation(Opcode: ISD::INSERT_VECTOR_ELT, VT: Op0.getValueType()))
27915	return SDValue ();
27916
27917	if (sd_match(N: Op0, P: m_UnaryOp(Opc: ISD::SCALAR_TO_VECTOR, Op: m_Value(N&: Elt))) &&
27918	Mask [ShufOp0Index] == `0`) {
27919	SDValue NewInsIndex = DAG.getVectorIdxConstant(Val: ShufOp0Index, DL: SDLoc (Shuf));
27920	return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: SDLoc (Shuf), VT: Op0.getValueType(),
27921	N1: Op1, N2: Elt, N3: NewInsIndex);
27922	}
27923
27924	return SDValue ();
27925	}
27926
27927	/// If we have a unary shuffle of a shuffle, see if it can be folded away
27928	/// completely. This has the potential to lose undef knowledge because the first
27929	/// shuffle may not have an undef mask element where the second one does. So
27930	/// only call this after doing simplifications based on demanded elements.
27931	static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf) {
27932	// shuf (shuf0 X, Y, Mask0), undef, Mask
27933	auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Val: Shuf->getOperand(Num: `0`));
27934	if (!Shuf0 \|\| !Shuf->getOperand(Num: `1`).isUndef())
27935	return SDValue ();
27936
27937	ArrayRef<int> Mask = Shuf->getMask();
27938	ArrayRef<int> Mask0 = Shuf0->getMask();
27939	for (int i = `0`, e = (int)Mask.size(); i != e; ++i) {
27940	// Ignore undef elements.
27941	if (Mask [i] == -`1`)
27942	continue;
27943	assert(Mask[i] >= `0` && Mask[i] < e && "Unexpected shuffle mask value");
27944
27945	// Is the element of the shuffle operand chosen by this shuffle the same as
27946	// the element chosen by the shuffle operand itself?
27947	if (Mask0 [Mask [i]] != Mask0 [i])
27948	return SDValue ();
27949	}
27950	// Every element of this shuffle is identical to the result of the previous
27951	// shuffle, so we can replace this value.
27952	return Shuf->getOperand(Num: `0`);
27953	}
27954
27955	SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
27956	EVT VT = N->getValueType(ResNo: `0`);
27957	unsigned NumElts = VT.getVectorNumElements();
27958
27959	SDValue N0 = N->getOperand(Num: `0`);
27960	SDValue N1 = N->getOperand(Num: `1`);
27961
27962	assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
27963
27964	// Canonicalize shuffle undef, undef -> undef
27965	if (N0.isUndef() && N1.isUndef())
27966	return DAG.getUNDEF(VT);
27967
27968	ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Val: N);
27969
27970	// Canonicalize shuffle v, v -> v, poison
27971	if (N0 == N1)
27972	return DAG.getVectorShuffle(VT, dl: SDLoc (N), N1: N0, N2: DAG.getPOISON(VT),
27973	Mask: createUnaryMask(Mask: SVN->getMask(), NumElts));
27974
27975	// Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
27976	if (N0.isUndef())
27977	return DAG.getCommutedVectorShuffle(SV: *SVN);
27978
27979	// Remove references to rhs if it is undef
27980	if (N1.isUndef()) {
27981	bool Changed = false;
27982	SmallVector<int, `8`> NewMask;
27983	for (unsigned i = `0`; i != NumElts; ++i) {
27984	int Idx = SVN->getMaskElt(Idx: i);
27985	if (Idx >= (int)NumElts) {
27986	Idx = -`1`;
27987	Changed = true;
27988	}
27989	NewMask.push_back(Elt: Idx);
27990	}
27991	if (Changed)
27992	return DAG.getVectorShuffle(VT, dl: SDLoc (N), N1: N0, N2: N1, Mask: NewMask);
27993	}
27994
27995	if (SDValue InsElt = replaceShuffleOfInsert(Shuf: SVN))
27996	return InsElt;
27997
27998	// A shuffle of a single vector that is a splatted value can always be folded.
27999	if (SDValue V = combineShuffleOfSplatVal(Shuf: SVN, DAG))
28000	return V;
28001
28002	if (SDValue V = formSplatFromShuffles(OuterShuf: SVN, DAG))
28003	return V;
28004
28005	// If it is a splat, check if the argument vector is another splat or a
28006	// build_vector.
28007	if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
28008	int SplatIndex = SVN->getSplatIndex();
28009	if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, Index: SplatIndex) &&
28010	TLI.isBinOp(Opcode: N0.getOpcode()) && N0 ->getNumValues() == `1`) {
28011	// splat (vector_bo L, R), Index -->
28012	// splat (scalar_bo (extelt L, Index), (extelt R, Index))
28013	SDValue L = N0.getOperand(i: `0`), R = N0.getOperand(i: `1`);
28014	SDLoc DL(N);
28015	EVT EltVT = VT.getScalarType();
28016	SDValue Index = DAG.getVectorIdxConstant(Val: SplatIndex, DL);
28017	SDValue ExtL = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: EltVT, N1: L, N2: Index);
28018	SDValue ExtR = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: EltVT, N1: R, N2: Index);
28019	SDValue NewBO =
28020	DAG.getNode(Opcode: N0.getOpcode(), DL, VT: EltVT, N1: ExtL, N2: ExtR, Flags: N0 ->getFlags());
28021	SDValue Insert = DAG.getNode(Opcode: ISD::SCALAR_TO_VECTOR, DL, VT, Operand: NewBO);
28022	SmallVector<int, `16`> ZeroMask(VT.getVectorNumElements(), `0`);
28023	return DAG.getVectorShuffle(VT, dl: DL, N1: Insert, N2: DAG.getPOISON(VT), Mask: ZeroMask);
28024	}
28025
28026	// splat(scalar_to_vector(x), 0) -> build_vector(x,...,x)
28027	// splat(insert_vector_elt(v, x, c), c) -> build_vector(x,...,x)
28028	if ((!LegalOperations \|\| TLI.isOperationLegal(Op: ISD::BUILD_VECTOR, VT)) &&
28029	N0.hasOneUse()) {
28030	if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && SplatIndex == `0`)
28031	return DAG.getSplatBuildVector(VT, DL: SDLoc (N), Op: N0.getOperand(i: `0`));
28032
28033	if (N0.getOpcode() == ISD::INSERT_VECTOR_ELT)
28034	if (auto *Idx = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: `2`)))
28035	if (Idx->getAPIntValue() == SplatIndex)
28036	return DAG.getSplatBuildVector(VT, DL: SDLoc (N), Op: N0.getOperand(i: `1`));
28037
28038	// Look through a bitcast if LE and splatting lane 0, through to a
28039	// scalar_to_vector or a build_vector.
28040	if (N0.getOpcode() == ISD::BITCAST && N0.getOperand(i: `0`).hasOneUse() &&
28041	SplatIndex == `0` && DAG.getDataLayout().isLittleEndian() &&
28042	(N0.getOperand(i: `0`).getOpcode() == ISD::SCALAR_TO_VECTOR \|\|
28043	N0.getOperand(i: `0`).getOpcode() == ISD::BUILD_VECTOR)) {
28044	EVT N00VT = N0.getOperand(i: `0`).getValueType();
28045	if (VT.getScalarSizeInBits() <= N00VT.getScalarSizeInBits() &&
28046	VT.isInteger() && N00VT.isInteger()) {
28047	EVT InVT =
28048	TLI.getTypeToTransformTo(Context&: *DAG.getContext(), VT: VT.getScalarType());
28049	SDValue Op = DAG.getZExtOrTrunc(Op: N0.getOperand(i: `0`).getOperand(i: `0`),
28050	DL: SDLoc (N), VT: InVT);
28051	return DAG.getSplatBuildVector(VT, DL: SDLoc (N), Op);
28052	}
28053	}
28054	}
28055
28056	// If this is a bit convert that changes the element type of the vector but
28057	// not the number of vector elements, look through it. Be careful not to
28058	// look though conversions that change things like v4f32 to v2f64.
28059	SDNode *V = N0.getNode();
28060	if (V->getOpcode() == ISD::BITCAST) {
28061	SDValue ConvInput = V->getOperand(Num: `0`);
28062	if (ConvInput.getValueType().isVector() &&
28063	ConvInput.getValueType().getVectorNumElements() == NumElts)
28064	V = ConvInput.getNode();
28065	}
28066
28067	if (V->getOpcode() == ISD::BUILD_VECTOR) {
28068	assert(V->getNumOperands() == NumElts &&
28069	"BUILD_VECTOR has wrong number of operands");
28070	SDValue Base;
28071	bool AllSame = true;
28072	for (unsigned i = `0`; i != NumElts; ++i) {
28073	if (!V->getOperand(Num: i).isUndef()) {
28074	Base = V->getOperand(Num: i);
28075	break;
28076	}
28077	}
28078	// Splat of <u, u, u, u>, return <u, u, u, u>
28079	if (!Base.getNode())
28080	return N0;
28081	for (unsigned i = `0`; i != NumElts; ++i) {
28082	if (V->getOperand(Num: i) != Base) {
28083	AllSame = false;
28084	break;
28085	}
28086	}
28087	// Splat of <x, x, x, x>, return <x, x, x, x>
28088	if (AllSame)
28089	return N0;
28090
28091	// Canonicalize any other splat as a build_vector, but avoid defining any
28092	// undefined elements in the mask.
28093	SDValue Splatted = V->getOperand(Num: SplatIndex);
28094	SmallVector<SDValue, `8`> Ops(NumElts, Splatted);
28095	EVT EltVT = Splatted.getValueType();
28096
28097	for (unsigned i = `0`; i != NumElts; ++i) {
28098	if (SVN->getMaskElt(Idx: i) < `0`)
28099	Ops [i] = DAG.getPOISON(VT: EltVT);
28100	}
28101
28102	SDValue NewBV = DAG.getBuildVector(VT: V->getValueType(ResNo: `0`), DL: SDLoc (N), Ops);
28103
28104	// We may have jumped through bitcasts, so the type of the
28105	// BUILD_VECTOR may not match the type of the shuffle.
28106	if (V->getValueType(ResNo: `0`) != VT)
28107	NewBV = DAG.getBitcast(VT, V: NewBV);
28108	return NewBV;
28109	}
28110	}
28111
28112	// Simplify source operands based on shuffle mask.
28113	if (SimplifyDemandedVectorElts(Op: SDValue (N, `0`)))
28114	return SDValue (N, `0`);
28115
28116	// This is intentionally placed after demanded elements simplification because
28117	// it could eliminate knowledge of undef elements created by this shuffle.
28118	if (SDValue ShufOp = simplifyShuffleOfShuffle(Shuf: SVN))
28119	return ShufOp;
28120
28121	// Match shuffles that can be converted to any_vector_extend_in_reg.
28122	if (SDValue V =
28123	combineShuffleToAnyExtendVectorInreg(SVN, DAG, TLI, LegalOperations))
28124	return V;
28125
28126	// Combine "truncate_vector_in_reg" style shuffles.
28127	if (SDValue V = combineTruncationShuffle(SVN, DAG))
28128	return V;
28129
28130	if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
28131	Level < AfterLegalizeVectorOps &&
28132	(N1.isUndef() \|\|
28133	(N1.getOpcode() == ISD::CONCAT_VECTORS &&
28134	N0.getOperand(i: `0`).getValueType() == N1.getOperand(i: `0`).getValueType()))) {
28135	if (SDValue V = partitionShuffleOfConcats(N, DAG))
28136	return V;
28137	}
28138
28139	// A shuffle of a concat of the same narrow vector can be reduced to use
28140	// only low-half elements of a concat with undef:
28141	// shuf (concat X, X), undef, Mask --> shuf (concat X, undef), undef, Mask'
28142	if (N0.getOpcode() == ISD::CONCAT_VECTORS && N1.isUndef() &&
28143	N0.getNumOperands() == `2` &&
28144	N0.getOperand(i: `0`) == N0.getOperand(i: `1`)) {
28145	int HalfNumElts = (int)NumElts / `2`;
28146	SmallVector<int, `8`> NewMask;
28147	for (unsigned i = `0`; i != NumElts; ++i) {
28148	int Idx = SVN->getMaskElt(Idx: i);
28149	if (Idx >= HalfNumElts) {
28150	assert(Idx < (int)NumElts && "Shuffle mask chooses undef op");
28151	Idx -= HalfNumElts;
28152	}
28153	NewMask.push_back(Elt: Idx);
28154	}
28155	if (TLI.isShuffleMaskLegal(NewMask, VT)) {
28156	SDValue UndefVec = DAG.getPOISON(VT: N0.getOperand(i: `0`).getValueType());
28157	SDValue NewCat = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: SDLoc (N), VT,
28158	N1: N0.getOperand(i: `0`), N2: UndefVec);
28159	return DAG.getVectorShuffle(VT, dl: SDLoc (N), N1: NewCat, N2: N1, Mask: NewMask);
28160	}
28161	}
28162
28163	// See if we can replace a shuffle with an insert_subvector.
28164	// e.g. v2i32 into v8i32:
28165	// shuffle(lhs,concat(rhs0,rhs1,rhs2,rhs3),0,1,2,3,10,11,6,7).
28166	// --> insert_subvector(lhs,rhs1,4).
28167	if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT) &&
28168	TLI.isOperationLegalOrCustom(Op: ISD::INSERT_SUBVECTOR, VT)) {
28169	auto ShuffleToInsert = [&](SDValue LHS, SDValue RHS, ArrayRef<int> Mask) {
28170	// Ensure RHS subvectors are legal.
28171	assert(RHS.getOpcode() == ISD::CONCAT_VECTORS && "Can't find subvectors");
28172	EVT SubVT = RHS.getOperand(i: `0`).getValueType();
28173	int NumSubVecs = RHS.getNumOperands();
28174	int NumSubElts = SubVT.getVectorNumElements();
28175	assert((NumElts % NumSubElts) == `0` && "Subvector mismatch");
28176	if (!TLI.isTypeLegal(VT: SubVT))
28177	return SDValue ();
28178
28179	// Don't bother if we have an unary shuffle (matches undef + LHS elts).
28180	if (all_of(Range&: Mask, P: [NumElts](int M) { return M < (int)NumElts; }))
28181	return SDValue ();
28182
28183	// Search [NumSubElts] spans for RHS sequence.
28184	// TODO: Can we avoid nested loops to increase performance?
28185	SmallVector<int> InsertionMask(NumElts);
28186	for (int SubVec = `0`; SubVec != NumSubVecs; ++SubVec) {
28187	for (int SubIdx = `0`; SubIdx != (int)NumElts; SubIdx += NumSubElts) {
28188	// Reset mask to identity.
28189	std::iota(first: InsertionMask.begin(), last: InsertionMask.end(), value: `0`);
28190
28191	// Add subvector insertion.
28192	std::iota(first: InsertionMask.begin() + SubIdx,
28193	last: InsertionMask.begin() + SubIdx + NumSubElts,
28194	value: NumElts + (SubVec * NumSubElts));
28195
28196	// See if the shuffle mask matches the reference insertion mask.
28197	bool MatchingShuffle = true;
28198	for (int i = `0`; i != (int)NumElts; ++i) {
28199	int ExpectIdx = InsertionMask [i];
28200	int ActualIdx = Mask [i];
28201	if (`0` <= ActualIdx && ExpectIdx != ActualIdx) {
28202	MatchingShuffle = false;
28203	break;
28204	}
28205	}
28206
28207	if (MatchingShuffle)
28208	return DAG.getInsertSubvector(DL: SDLoc (N), Vec: LHS, SubVec: RHS.getOperand(i: SubVec),
28209	Idx: SubIdx);
28210	}
28211	}
28212	return SDValue ();
28213	};
28214	ArrayRef<int> Mask = SVN->getMask();
28215	if (N1.getOpcode() == ISD::CONCAT_VECTORS)
28216	if (SDValue InsertN1 = ShuffleToInsert (N0, N1, Mask))
28217	return InsertN1;
28218	if (N0.getOpcode() == ISD::CONCAT_VECTORS) {
28219	SmallVector<int> CommuteMask(Mask);
28220	ShuffleVectorSDNode::commuteMask(Mask: CommuteMask);
28221	if (SDValue InsertN0 = ShuffleToInsert (N1, N0, CommuteMask))
28222	return InsertN0;
28223	}
28224	}
28225
28226	// If we're not performing a select/blend shuffle, see if we can convert the
28227	// shuffle into a AND node, with all the out-of-lane elements are known zero.
28228	if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
28229	bool IsInLaneMask = true;
28230	ArrayRef<int> Mask = SVN->getMask();
28231	SmallVector<int, `16`> ClearMask(NumElts, -`1`);
28232	APInt DemandedLHS = APInt::getZero(numBits: NumElts);
28233	APInt DemandedRHS = APInt::getZero(numBits: NumElts);
28234	for (int I = `0`; I != (int)NumElts; ++I) {
28235	int M = Mask [I];
28236	if (M < `0`)
28237	continue;
28238	ClearMask [I] = M == I ? I : (I + NumElts);
28239	IsInLaneMask &= (M == I) \|\| (M == (int)(I + NumElts));
28240	if (M != I) {
28241	APInt &Demanded = M < (int)NumElts ? DemandedLHS : DemandedRHS;
28242	Demanded.setBit(M % NumElts);
28243	}
28244	}
28245	// TODO: Should we try to mask with N1 as well?
28246	if (!IsInLaneMask && (!DemandedLHS.isZero() \|\| !DemandedRHS.isZero()) &&
28247	(DemandedLHS.isZero() \|\| DAG.MaskedVectorIsZero(Op: N0, DemandedElts: DemandedLHS)) &&
28248	(DemandedRHS.isZero() \|\| DAG.MaskedVectorIsZero(Op: N1, DemandedElts: DemandedRHS))) {
28249	SDLoc DL(N);
28250	EVT IntVT = VT.changeVectorElementTypeToInteger();
28251	EVT IntSVT = VT.getVectorElementType().changeTypeToInteger();
28252	// Transform the type to a legal type so that the buildvector constant
28253	// elements are not illegal. Make sure that the result is larger than the
28254	// original type, incase the value is split into two (eg i64->i32).
28255	if (!TLI.isTypeLegal(VT: IntSVT) && LegalTypes)
28256	IntSVT = TLI.getTypeToTransformTo(Context&: *DAG.getContext(), VT: IntSVT);
28257	if (IntSVT.getSizeInBits() >= IntVT.getScalarSizeInBits()) {
28258	SDValue ZeroElt = DAG.getConstant(Val: `0`, DL, VT: IntSVT);
28259	SDValue AllOnesElt = DAG.getAllOnesConstant(DL, VT: IntSVT);
28260	SmallVector<SDValue, `16`> AndMask(NumElts, DAG.getPOISON(VT: IntSVT));
28261	for (int I = `0`; I != (int)NumElts; ++I)
28262	if (`0` <= Mask [I])
28263	AndMask [I] = Mask [I] == I ? AllOnesElt : ZeroElt;
28264
28265	// See if a clear mask is legal instead of going via
28266	// XformToShuffleWithZero which loses UNDEF mask elements.
28267	if (TLI.isVectorClearMaskLegal(ClearMask, IntVT))
28268	return DAG.getBitcast(
28269	VT, V: DAG.getVectorShuffle(VT: IntVT, dl: DL, N1: DAG.getBitcast(VT: IntVT, V: N0),
28270	N2: DAG.getConstant(Val: `0`, DL, VT: IntVT), Mask: ClearMask));
28271
28272	if (TLI.isOperationLegalOrCustom(Op: ISD::AND, VT: IntVT))
28273	return DAG.getBitcast(
28274	VT, V: DAG.getNode(Opcode: ISD::AND, DL, VT: IntVT, N1: DAG.getBitcast(VT: IntVT, V: N0),
28275	N2: DAG.getBuildVector(VT: IntVT, DL, Ops: AndMask)));
28276	}
28277	}
28278	}
28279
28280	// Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
28281	// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
28282	if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
28283	if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
28284	return Res;
28285
28286	// If this shuffle only has a single input that is a bitcasted shuffle,
28287	// attempt to merge the 2 shuffles and suitably bitcast the inputs/output
28288	// back to their original types.
28289	if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
28290	N1.isUndef() && Level < AfterLegalizeVectorOps &&
28291	TLI.isTypeLegal(VT)) {
28292
28293	SDValue BC0 = peekThroughOneUseBitcasts(V: N0);
28294	if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
28295	EVT SVT = VT.getScalarType();
28296	EVT InnerVT = BC0 ->getValueType(ResNo: `0`);
28297	EVT InnerSVT = InnerVT.getScalarType();
28298
28299	// Determine which shuffle works with the smaller scalar type.
28300	EVT ScaleVT = SVT.bitsLT(VT: InnerSVT) ? VT : InnerVT;
28301	EVT ScaleSVT = ScaleVT.getScalarType();
28302
28303	if (TLI.isTypeLegal(VT: ScaleVT) &&
28304	`0` == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
28305	`0` == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
28306	int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
28307	int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
28308
28309	// Scale the shuffle masks to the smaller scalar type.
28310	ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(Val&: BC0);
28311	SmallVector<int, `8`> InnerMask;
28312	SmallVector<int, `8`> OuterMask;
28313	narrowShuffleMaskElts(Scale: InnerScale, Mask: InnerSVN->getMask(), ScaledMask&: InnerMask);
28314	narrowShuffleMaskElts(Scale: OuterScale, Mask: SVN->getMask(), ScaledMask&: OuterMask);
28315
28316	// Merge the shuffle masks.
28317	SmallVector<int, `8`> NewMask;
28318	for (int M : OuterMask)
28319	NewMask.push_back(Elt: M < `0` ? -`1` : InnerMask [M]);
28320
28321	// Test for shuffle mask legality over both commutations.
28322	SDValue SV0 = BC0 ->getOperand(Num: `0`);
28323	SDValue SV1 = BC0 ->getOperand(Num: `1`);
28324	bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
28325	if (!LegalMask) {
28326	std::swap(a&: SV0, b&: SV1);
28327	ShuffleVectorSDNode::commuteMask(Mask: NewMask);
28328	LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
28329	}
28330
28331	if (LegalMask) {
28332	SV0 = DAG.getBitcast(VT: ScaleVT, V: SV0);
28333	SV1 = DAG.getBitcast(VT: ScaleVT, V: SV1);
28334	return DAG.getBitcast(
28335	VT, V: DAG.getVectorShuffle(VT: ScaleVT, dl: SDLoc (N), N1: SV0, N2: SV1, Mask: NewMask));
28336	}
28337	}
28338	}
28339	}
28340
28341	// Match shuffles of bitcasts, so long as the mask can be treated as the
28342	// larger type.
28343	if (SDValue V = combineShuffleOfBitcast(SVN, DAG, TLI, LegalOperations))
28344	return V;
28345
28346	// Compute the combined shuffle mask for a shuffle with SV0 as the first
28347	// operand, and SV1 as the second operand.
28348	// i.e. Merge SVN(OtherSVN, N1) -> shuffle(SV0, SV1, Mask) iff Commute = false
28349	// Merge SVN(N1, OtherSVN) -> shuffle(SV0, SV1, Mask') iff Commute = true
28350	auto MergeInnerShuffle =
28351	[NumElts, &VT](bool Commute, ShuffleVectorSDNode *SVN,
28352	ShuffleVectorSDNode *OtherSVN, SDValue N1,
28353	const TargetLowering &TLI, SDValue &SV0, SDValue &SV1,
28354	SmallVectorImpl<int> &Mask) -> bool {
28355	// Don't try to fold splats; they're likely to simplify somehow, or they
28356	// might be free.
28357	if (OtherSVN->isSplat())
28358	return false;
28359
28360	SV0 = SV1 = SDValue ();
28361	Mask.clear();
28362
28363	for (unsigned i = `0`; i != NumElts; ++i) {
28364	int Idx = SVN->getMaskElt(Idx: i);
28365	if (Idx < `0`) {
28366	// Propagate Undef.
28367	Mask.push_back(Elt: Idx);
28368	continue;
28369	}
28370
28371	if (Commute)
28372	Idx = (Idx < (int)NumElts) ? (Idx + NumElts) : (Idx - NumElts);
28373
28374	SDValue CurrentVec;
28375	if (Idx < (int)NumElts) {
28376	// This shuffle index refers to the inner shuffle N0. Lookup the inner
28377	// shuffle mask to identify which vector is actually referenced.
28378	Idx = OtherSVN->getMaskElt(Idx);
28379	if (Idx < `0`) {
28380	// Propagate Undef.
28381	Mask.push_back(Elt: Idx);
28382	continue;
28383	}
28384	CurrentVec = (Idx < (int)NumElts) ? OtherSVN->getOperand(Num: `0`)
28385	: OtherSVN->getOperand(Num: `1`);
28386	} else {
28387	// This shuffle index references an element within N1.
28388	CurrentVec = N1;
28389	}
28390
28391	// Simple case where 'CurrentVec' is UNDEF.
28392	if (CurrentVec.isUndef()) {
28393	Mask.push_back(Elt: -`1`);
28394	continue;
28395	}
28396
28397	// Canonicalize the shuffle index. We don't know yet if CurrentVec
28398	// will be the first or second operand of the combined shuffle.
28399	Idx = Idx % NumElts;
28400	if (!SV0.getNode() \|\| SV0 == CurrentVec) {
28401	// Ok. CurrentVec is the left hand side.
28402	// Update the mask accordingly.
28403	SV0 = CurrentVec;
28404	Mask.push_back(Elt: Idx);
28405	continue;
28406	}
28407	if (!SV1.getNode() \|\| SV1 == CurrentVec) {
28408	// Ok. CurrentVec is the right hand side.
28409	// Update the mask accordingly.
28410	SV1 = CurrentVec;
28411	Mask.push_back(Elt: Idx + NumElts);
28412	continue;
28413	}
28414
28415	// Last chance - see if the vector is another shuffle and if it
28416	// uses one of the existing candidate shuffle ops.
28417	if (auto *CurrentSVN = dyn_cast<ShuffleVectorSDNode>(Val&: CurrentVec)) {
28418	int InnerIdx = CurrentSVN->getMaskElt(Idx);
28419	if (InnerIdx < `0`) {
28420	Mask.push_back(Elt: -`1`);
28421	continue;
28422	}
28423	SDValue InnerVec = (InnerIdx < (int)NumElts)
28424	? CurrentSVN->getOperand(Num: `0`)
28425	: CurrentSVN->getOperand(Num: `1`);
28426	if (InnerVec.isUndef()) {
28427	Mask.push_back(Elt: -`1`);
28428	continue;
28429	}
28430	InnerIdx %= NumElts;
28431	if (InnerVec == SV0) {
28432	Mask.push_back(Elt: InnerIdx);
28433	continue;
28434	}
28435	if (InnerVec == SV1) {
28436	Mask.push_back(Elt: InnerIdx + NumElts);
28437	continue;
28438	}
28439	}
28440
28441	// Bail out if we cannot convert the shuffle pair into a single shuffle.
28442	return false;
28443	}
28444
28445	if (llvm::all_of(Range&: Mask, P: [](int M) { return M < `0`; }))
28446	return true;
28447
28448	// Avoid introducing shuffles with illegal mask.
28449	// shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
28450	// shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
28451	// shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
28452	// shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
28453	// shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
28454	// shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
28455	if (TLI.isShuffleMaskLegal(Mask, VT))
28456	return true;
28457
28458	std::swap(a&: SV0, b&: SV1);
28459	ShuffleVectorSDNode::commuteMask(Mask);
28460	return TLI.isShuffleMaskLegal(Mask, VT);
28461	};
28462
28463	if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
28464	// Canonicalize shuffles according to rules:
28465	// shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
28466	// shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
28467	// shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
28468	if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
28469	N0.getOpcode() != ISD::VECTOR_SHUFFLE) {
28470	// The incoming shuffle must be of the same type as the result of the
28471	// current shuffle.
28472	assert(N1->getOperand(`0`).getValueType() == VT &&
28473	"Shuffle types don't match");
28474
28475	SDValue SV0 = N1 ->getOperand(Num: `0`);
28476	SDValue SV1 = N1 ->getOperand(Num: `1`);
28477	bool HasSameOp0 = N0 == SV0;
28478	bool IsSV1Undef = SV1.isUndef();
28479	if (HasSameOp0 \|\| IsSV1Undef \|\| N0 == SV1)
28480	// Commute the operands of this shuffle so merging below will trigger.
28481	return DAG.getCommutedVectorShuffle(SV: *SVN);
28482	}
28483
28484	// Canonicalize splat shuffles to the RHS to improve merging below.
28485	// shuffle(splat(A,u), shuffle(C,D)) -> shuffle'(shuffle(C,D), splat(A,u))
28486	if (N0.getOpcode() == ISD::VECTOR_SHUFFLE &&
28487	N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
28488	cast<ShuffleVectorSDNode>(Val&: N0)->isSplat() &&
28489	!cast<ShuffleVectorSDNode>(Val&: N1)->isSplat()) {
28490	return DAG.getCommutedVectorShuffle(SV: *SVN);
28491	}
28492
28493	// Try to fold according to rules:
28494	// shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
28495	// shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
28496	// shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
28497	// Don't try to fold shuffles with illegal type.
28498	// Only fold if this shuffle is the only user of the other shuffle.
28499	// Try matching shuffle(C,shuffle(A,B)) commutted patterns as well.
28500	for (int i = `0`; i != `2`; ++i) {
28501	if (N->getOperand(Num: i).getOpcode() == ISD::VECTOR_SHUFFLE &&
28502	N->isOnlyUserOf(N: N->getOperand(Num: i).getNode())) {
28503	// The incoming shuffle must be of the same type as the result of the
28504	// current shuffle.
28505	auto *OtherSV = cast<ShuffleVectorSDNode>(Val: N->getOperand(Num: i));
28506	assert(OtherSV->getOperand(`0`).getValueType() == VT &&
28507	"Shuffle types don't match");
28508
28509	SDValue SV0, SV1;
28510	SmallVector<int, `4`> Mask;
28511	if (MergeInnerShuffle (i != `0`, SVN, OtherSV, N->getOperand(Num: `1` - i), TLI,
28512	SV0, SV1, Mask)) {
28513	// Check if all indices in Mask are poison. In case, propagate poison.
28514	if (llvm::all_of(Range&: Mask, P: [](int M) { return M < `0`; }))
28515	return DAG.getPOISON(VT);
28516
28517	return DAG.getVectorShuffle(VT, dl: SDLoc (N),
28518	N1: SV0 ? SV0 : DAG.getPOISON(VT),
28519	N2: SV1 ? SV1 : DAG.getPOISON(VT), Mask);
28520	}
28521	}
28522	}
28523
28524	// Merge shuffles through binops if we are able to merge it with at least
28525	// one other shuffles.
28526	// shuffle(bop(shuffle(x,y),shuffle(z,w)),undef)
28527	// shuffle(bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d)))
28528	unsigned SrcOpcode = N0.getOpcode();
28529	if (TLI.isBinOp(Opcode: SrcOpcode) && N->isOnlyUserOf(N: N0.getNode()) &&
28530	(N1.isUndef() \|\|
28531	(SrcOpcode == N1.getOpcode() && N->isOnlyUserOf(N: N1.getNode()) &&
28532	N0.getResNo() == N1.getResNo()))) {
28533	// Get binop source ops, or just pass on the undef.
28534	SDValue Op00 = N0.getOperand(i: `0`);
28535	SDValue Op01 = N0.getOperand(i: `1`);
28536	SDValue Op10 = N1.isUndef() ? N1 : N1.getOperand(i: `0`);
28537	SDValue Op11 = N1.isUndef() ? N1 : N1.getOperand(i: `1`);
28538	// TODO: We might be able to relax the VT check but we don't currently
28539	// have any isBinOp() that has different result/ops VTs so play safe until
28540	// we have test coverage.
28541	if (Op00.getValueType() == VT && Op10.getValueType() == VT &&
28542	Op01.getValueType() == VT && Op11.getValueType() == VT &&
28543	(Op00.getOpcode() == ISD::VECTOR_SHUFFLE \|\|
28544	Op10.getOpcode() == ISD::VECTOR_SHUFFLE \|\|
28545	Op01.getOpcode() == ISD::VECTOR_SHUFFLE \|\|
28546	Op11.getOpcode() == ISD::VECTOR_SHUFFLE)) {
28547	auto CanMergeInnerShuffle = [&](SDValue &SV0, SDValue &SV1,
28548	SmallVectorImpl<int> &Mask, bool LeftOp,
28549	bool Commute) {
28550	SDValue InnerN = Commute ? N1 : N0;
28551	SDValue Op0 = LeftOp ? Op00 : Op01;
28552	SDValue Op1 = LeftOp ? Op10 : Op11;
28553	if (Commute)
28554	std::swap(a&: Op0, b&: Op1);
28555	// Only accept the merged shuffle if we don't introduce undef elements,
28556	// or the inner shuffle already contained undef elements.
28557	auto *SVN0 = dyn_cast<ShuffleVectorSDNode>(Val&: Op0);
28558	return SVN0 && InnerN ->isOnlyUserOf(N: SVN0) &&
28559	MergeInnerShuffle (Commute, SVN, SVN0, Op1, TLI, SV0, SV1,
28560	Mask) &&
28561	(llvm::any_of(Range: SVN0->getMask(), P: [](int M) { return M < `0`; }) \|\|
28562	llvm::none_of(Range&: Mask, P: [](int M) { return M < `0`; }));
28563	};
28564
28565	// Ensure we don't increase the number of shuffles - we must merge a
28566	// shuffle from at least one of the LHS and RHS ops.
28567	bool MergedLeft = false;
28568	SDValue LeftSV0, LeftSV1;
28569	SmallVector<int, `4`> LeftMask;
28570	if (CanMergeInnerShuffle (LeftSV0, LeftSV1, LeftMask, true, false) \|\|
28571	CanMergeInnerShuffle (LeftSV0, LeftSV1, LeftMask, true, true)) {
28572	MergedLeft = true;
28573	} else {
28574	LeftMask.assign(in_start: SVN->getMask().begin(), in_end: SVN->getMask().end());
28575	LeftSV0 = Op00, LeftSV1 = Op10;
28576	}
28577
28578	bool MergedRight = false;
28579	SDValue RightSV0, RightSV1;
28580	SmallVector<int, `4`> RightMask;
28581	if (CanMergeInnerShuffle (RightSV0, RightSV1, RightMask, false, false) \|\|
28582	CanMergeInnerShuffle (RightSV0, RightSV1, RightMask, false, true)) {
28583	MergedRight = true;
28584	} else {
28585	RightMask.assign(in_start: SVN->getMask().begin(), in_end: SVN->getMask().end());
28586	RightSV0 = Op01, RightSV1 = Op11;
28587	}
28588
28589	if (MergedLeft \|\| MergedRight) {
28590	SDLoc DL(N);
28591	SDValue LHS = DAG.getVectorShuffle(
28592	VT, dl: DL, N1: LeftSV0 ? LeftSV0 : DAG.getPOISON(VT),
28593	N2: LeftSV1 ? LeftSV1 : DAG.getPOISON(VT), Mask: LeftMask);
28594	SDValue RHS = DAG.getVectorShuffle(
28595	VT, dl: DL, N1: RightSV0 ? RightSV0 : DAG.getPOISON(VT),
28596	N2: RightSV1 ? RightSV1 : DAG.getPOISON(VT), Mask: RightMask);
28597	return DAG.getNode(Opcode: SrcOpcode, DL, VTList: N0 ->getVTList(), N1: LHS, N2: RHS)
28598	.getValue(R: N0.getResNo());
28599	}
28600	}
28601	}
28602	}
28603
28604	if (SDValue V = foldShuffleOfConcatUndefs(Shuf: SVN, DAG))
28605	return V;
28606
28607	// Match shuffles that can be converted to ISD::ZERO_EXTEND_VECTOR_INREG.
28608	// Perform this really late, because it could eliminate knowledge
28609	// of undef elements created by this shuffle.
28610	if (Level < AfterLegalizeTypes)
28611	if (SDValue V = combineShuffleToZeroExtendVectorInReg(SVN, DAG, TLI,
28612	LegalOperations))
28613	return V;
28614
28615	return SDValue ();
28616	}
28617
28618	SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
28619	EVT VT = N->getValueType(ResNo: `0`);
28620	if (!VT.isFixedLengthVector())
28621	return SDValue ();
28622
28623	// Try to convert a scalar binop with an extracted vector element to a vector
28624	// binop. This is intended to reduce potentially expensive register moves.
28625	// TODO: Check if both operands are extracted.
28626	// TODO: How to prefer scalar/vector ops with multiple uses of the extact?
28627	// TODO: Generalize this, so it can be called from visitINSERT_VECTOR_ELT().
28628	SDValue Scalar = N->getOperand(Num: `0`);
28629	unsigned Opcode = Scalar.getOpcode();
28630	EVT VecEltVT = VT.getScalarType();
28631	if (Scalar.hasOneUse() && Scalar ->getNumValues() == `1` &&
28632	TLI.isBinOp(Opcode) && Scalar.getValueType() == VecEltVT &&
28633	Scalar.getOperand(i: `0`).getValueType() == VecEltVT &&
28634	Scalar.getOperand(i: `1`).getValueType() == VecEltVT &&
28635	Scalar ->isOnlyUserOf(N: Scalar.getOperand(i: `0`).getNode()) &&
28636	Scalar ->isOnlyUserOf(N: Scalar.getOperand(i: `1`).getNode()) &&
28637	DAG.isSafeToSpeculativelyExecute(Opcode) && hasOperation(Opcode, VT)) {
28638	// Match an extract element and get a shuffle mask equivalent.
28639	SmallVector<int, `8`> ShufMask(VT.getVectorNumElements(), -`1`);
28640
28641	for (int i : {`0`, `1`}) {
28642	// s2v (bo (extelt V, Idx), C) --> shuffle (bo V, C'), {Idx, -1, -1...}
28643	// s2v (bo C, (extelt V, Idx)) --> shuffle (bo C', V), {Idx, -1, -1...}
28644	SDValue EE = Scalar.getOperand(i);
28645	auto *C = dyn_cast<ConstantSDNode>(Val: Scalar.getOperand(i: i ? `0` : `1`));
28646	if (C && EE.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
28647	EE.getOperand(i: `0`).getValueType() == VT &&
28648	isa<ConstantSDNode>(Val: EE.getOperand(i: `1`))) {
28649	// Mask = {ExtractIndex, undef, undef....}
28650	ShufMask [`0`] = EE.getConstantOperandVal(i: `1`);
28651	// Make sure the shuffle is legal if we are crossing lanes.
28652	if (TLI.isShuffleMaskLegal(ShufMask, VT)) {
28653	SDLoc DL(N);
28654	SDValue V[] = {EE.getOperand(i: `0`),
28655	DAG.getConstant(Val: C->getAPIntValue(), DL, VT)};
28656	SDValue VecBO = DAG.getNode(Opcode, DL, VT, N1: V[i], N2: V[`1` - i]);
28657	return DAG.getVectorShuffle(VT, dl: DL, N1: VecBO, N2: DAG.getPOISON(VT),
28658	Mask: ShufMask);
28659	}
28660	}
28661	}
28662	}
28663
28664	// Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
28665	// with a VECTOR_SHUFFLE and possible truncate.
28666	if (Opcode != ISD::EXTRACT_VECTOR_ELT \|\|
28667	!Scalar.getOperand(i: `0`).getValueType().isFixedLengthVector())
28668	return SDValue ();
28669
28670	// If we have an implicit truncate, truncate here if it is legal.
28671	if (VecEltVT != Scalar.getValueType() &&
28672	Scalar.getValueType().isScalarInteger() && isTypeLegal(VT: VecEltVT)) {
28673	SDValue Val = DAG.getNode(Opcode: ISD::TRUNCATE, DL: SDLoc (Scalar), VT: VecEltVT, Operand: Scalar);
28674	return DAG.getNode(Opcode: ISD::SCALAR_TO_VECTOR, DL: SDLoc (N), VT, Operand: Val);
28675	}
28676
28677	auto *ExtIndexC = dyn_cast<ConstantSDNode>(Val: Scalar.getOperand(i: `1`));
28678	if (!ExtIndexC)
28679	return SDValue ();
28680
28681	SDValue SrcVec = Scalar.getOperand(i: `0`);
28682	EVT SrcVT = SrcVec.getValueType();
28683	unsigned SrcNumElts = SrcVT.getVectorNumElements();
28684	unsigned VTNumElts = VT.getVectorNumElements();
28685	if (VecEltVT == SrcVT.getScalarType() && VTNumElts <= SrcNumElts) {
28686	// Create a shuffle equivalent for scalar-to-vector: {ExtIndex, -1, -1, ...}
28687	SmallVector<int, `8`> Mask(SrcNumElts, -`1`);
28688	Mask [`0`] = ExtIndexC->getZExtValue();
28689	SDValue LegalShuffle = TLI.buildLegalVectorShuffle(
28690	VT: SrcVT, DL: SDLoc (N), N0: SrcVec, N1: DAG.getPOISON(VT: SrcVT), Mask, DAG);
28691	if (!LegalShuffle)
28692	return SDValue ();
28693
28694	// If the initial vector is the same size, the shuffle is the result.
28695	if (VT == SrcVT)
28696	return LegalShuffle;
28697
28698	// If not, shorten the shuffled vector.
28699	if (VTNumElts != SrcNumElts) {
28700	SDValue ZeroIdx = DAG.getVectorIdxConstant(Val: `0`, DL: SDLoc (N));
28701	EVT SubVT = EVT::getVectorVT(Context&: *DAG.getContext(),
28702	VT: SrcVT.getVectorElementType(), NumElements: VTNumElts);
28703	return DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL: SDLoc (N), VT: SubVT, N1: LegalShuffle,
28704	N2: ZeroIdx);
28705	}
28706	}
28707
28708	return SDValue ();
28709	}
28710
28711	SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
28712	EVT VT = N->getValueType(ResNo: `0`);
28713	SDValue N0 = N->getOperand(Num: `0`);
28714	SDValue N1 = N->getOperand(Num: `1`);
28715	SDValue N2 = N->getOperand(Num: `2`);
28716	uint64_t InsIdx = N->getConstantOperandVal(Num: `2`);
28717
28718	// Remove insert of UNDEF/POISON.
28719	if (N1.isUndef()) {
28720	if (N1.getOpcode() == ISD::POISON \|\| N0.getOpcode() == ISD::UNDEF)
28721	return N0;
28722	return DAG.getFreeze(V: N0);
28723	}
28724
28725	// If this is an insert of an extracted vector into an undef/poison vector, we
28726	// can just use the input to the extract if the types match, and can simplify
28727	// in some cases even if they don't.
28728	if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
28729	N1.getOperand(i: `1`) == N2) {
28730	EVT N1VT = N1.getValueType();
28731	EVT SrcVT = N1.getOperand(i: `0`).getValueType();
28732	if (SrcVT == VT) {
28733	// Need to ensure that result isn't more poisonous if skipping both the
28734	// extract+insert.
28735	if (N0.getOpcode() == ISD::POISON)
28736	return N1.getOperand(i: `0`);
28737	if (VT.isFixedLengthVector() && N1VT.isFixedLengthVector()) {
28738	unsigned SubVecNumElts = N1VT.getVectorNumElements();
28739	APInt EltMask = APInt::getBitsSet(numBits: VT.getVectorNumElements(), loBit: InsIdx,
28740	hiBit: InsIdx + SubVecNumElts);
28741	if (DAG.isGuaranteedNotToBePoison(Op: N1.getOperand(i: `0`), DemandedElts: ~EltMask))
28742	return N1.getOperand(i: `0`);
28743	} else if (DAG.isGuaranteedNotToBePoison(Op: N1.getOperand(i: `0`)))
28744	return N1.getOperand(i: `0`);
28745	}
28746	// TODO: To remove the zero check, need to adjust the offset to
28747	// a multiple of the new src type.
28748	if (isNullConstant(V: N2)) {
28749	if (VT.knownBitsGE(VT: SrcVT) &&
28750	!(VT.isFixedLengthVector() && SrcVT.isScalableVector()))
28751	return DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL: SDLoc (N),
28752	VT, N1: N0, N2: N1.getOperand(i: `0`), N3: N2);
28753	else if (VT.knownBitsLE(VT: SrcVT) &&
28754	!(VT.isScalableVector() && SrcVT.isFixedLengthVector()))
28755	return DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL: SDLoc (N),
28756	VT, N1: N1.getOperand(i: `0`), N2);
28757	}
28758	}
28759
28760	// Handle case where we've ended up inserting back into the source vector
28761	// we extracted the subvector from.
28762	// insert_subvector(N0, extract_subvector(N0, N2), N2) --> N0
28763	if (N1.getOpcode() == ISD::EXTRACT_SUBVECTOR && N1.getOperand(i: `0`) == N0 &&
28764	N1.getOperand(i: `1`) == N2)
28765	return N0;
28766
28767	// Simplify scalar inserts into an undef vector:
28768	// insert_subvector undef, (splat X), N2 -> splat X
28769	if (N0.isUndef() && N1.getOpcode() == ISD::SPLAT_VECTOR)
28770	if (DAG.isConstantValueOfAnyType(N: N1.getOperand(i: `0`)) \|\| N1.hasOneUse())
28771	return DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: SDLoc (N), VT, Operand: N1.getOperand(i: `0`));
28772
28773	// insert_subvector (splat X), (splat X), N2 -> splat X
28774	if (N0.getOpcode() == ISD::SPLAT_VECTOR && N0.getOpcode() == N1.getOpcode() &&
28775	N0.getOperand(i: `0`) == N1.getOperand(i: `0`))
28776	return N0;
28777
28778	// If we are inserting a bitcast value into an undef, with the same
28779	// number of elements, just use the bitcast input of the extract.
28780	// i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
28781	// BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
28782	if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
28783	N1.getOperand(i: `0`).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
28784	N1.getOperand(i: `0`).getOperand(i: `1`) == N2 &&
28785	N1.getOperand(i: `0`).getOperand(i: `0`).getValueType().getVectorElementCount() ==
28786	VT.getVectorElementCount() &&
28787	N1.getOperand(i: `0`).getOperand(i: `0`).getValueType().getSizeInBits() ==
28788	VT.getSizeInBits()) {
28789	return DAG.getBitcast(VT, V: N1.getOperand(i: `0`).getOperand(i: `0`));
28790	}
28791
28792	// If both N1 and N2 are bitcast values on which insert_subvector
28793	// would makes sense, pull the bitcast through.
28794	// i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
28795	// BITCAST (INSERT_SUBVECTOR N0 N1 N2)
28796	if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
28797	SDValue CN0 = N0.getOperand(i: `0`);
28798	SDValue CN1 = N1.getOperand(i: `0`);
28799	EVT CN0VT = CN0.getValueType();
28800	EVT CN1VT = CN1.getValueType();
28801	if (CN0VT.isVector() && CN1VT.isVector() &&
28802	CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
28803	CN0VT.getVectorElementCount() == VT.getVectorElementCount()) {
28804	SDValue NewINSERT = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL: SDLoc (N),
28805	VT: CN0.getValueType(), N1: CN0, N2: CN1, N3: N2);
28806	return DAG.getBitcast(VT, V: NewINSERT);
28807	}
28808	}
28809
28810	// Combine INSERT_SUBVECTORs where we are inserting to the same index.
28811	// INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
28812	// --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
28813	if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
28814	N0.getOperand(i: `1`).getValueType() == N1.getValueType() &&
28815	N0.getOperand(i: `2`) == N2)
28816	return DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL: SDLoc (N), VT, N1: N0.getOperand(i: `0`),
28817	N2: N1, N3: N2);
28818
28819	// Eliminate an intermediate insert into an undef vector:
28820	// insert_subvector undef, (insert_subvector undef, X, 0), 0 -->
28821	// insert_subvector undef, X, 0
28822	if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
28823	N1.getOperand(i: `0`).isUndef() && isNullConstant(V: N1.getOperand(i: `2`)) &&
28824	isNullConstant(V: N2))
28825	return DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL: SDLoc (N), VT, N1: N0,
28826	N2: N1.getOperand(i: `1`), N3: N2);
28827
28828	// Push subvector bitcasts to the output, adjusting the index as we go.
28829	// insert_subvector(bitcast(v), bitcast(s), c1)
28830	// -> bitcast(insert_subvector(v, s, c2))
28831	if ((N0.isUndef() \|\| N0.getOpcode() == ISD::BITCAST) &&
28832	N1.getOpcode() == ISD::BITCAST) {
28833	SDValue N0Src = peekThroughBitcasts(V: N0);
28834	SDValue N1Src = peekThroughBitcasts(V: N1);
28835	EVT N0SrcSVT = N0Src.getValueType().getScalarType();
28836	EVT N1SrcSVT = N1Src.getValueType().getScalarType();
28837	if ((N0.isUndef() \|\| N0SrcSVT == N1SrcSVT) &&
28838	N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
28839	EVT NewVT;
28840	SDLoc DL(N);
28841	SDValue NewIdx;
28842	LLVMContext &Ctx = *DAG.getContext();
28843	ElementCount NumElts = VT.getVectorElementCount();
28844	unsigned EltSizeInBits = VT.getScalarSizeInBits();
28845	if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == `0`) {
28846	unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
28847	NewVT = EVT::getVectorVT(Context&: Ctx, VT: N1SrcSVT, EC: NumElts * Scale);
28848	NewIdx = DAG.getVectorIdxConstant(Val: InsIdx * Scale, DL);
28849	} else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == `0`) {
28850	unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
28851	if (NumElts.isKnownMultipleOf(RHS: Scale) && (InsIdx % Scale) == `0`) {
28852	NewVT = EVT::getVectorVT(Context&: Ctx, VT: N1SrcSVT,
28853	EC: NumElts.divideCoefficientBy(RHS: Scale));
28854	NewIdx = DAG.getVectorIdxConstant(Val: InsIdx / Scale, DL);
28855	}
28856	}
28857	if (NewIdx && hasOperation(Opcode: ISD::INSERT_SUBVECTOR, VT: NewVT)) {
28858	SDValue Res = DAG.getBitcast(VT: NewVT, V: N0Src);
28859	Res = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: NewVT, N1: Res, N2: N1Src, N3: NewIdx);
28860	return DAG.getBitcast(VT, V: Res);
28861	}
28862	}
28863	}
28864
28865	// Canonicalize insert_subvector dag nodes.
28866	// Example:
28867	// (insert_subvector (insert_subvector A, Idx0), Idx1)
28868	// -> (insert_subvector (insert_subvector A, Idx1), Idx0)
28869	if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
28870	N1.getValueType() == N0.getOperand(i: `1`).getValueType()) {
28871	unsigned OtherIdx = N0.getConstantOperandVal(i: `2`);
28872	if (InsIdx < OtherIdx) {
28873	// Swap nodes.
28874	SDValue NewOp = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL: SDLoc (N), VT,
28875	N1: N0.getOperand(i: `0`), N2: N1, N3: N2);
28876	AddToWorklist(N: NewOp.getNode());
28877	return DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL: SDLoc (N0.getNode()),
28878	VT, N1: NewOp, N2: N0.getOperand(i: `1`), N3: N0.getOperand(i: `2`));
28879	}
28880	}
28881
28882	// If the input vector is a concatenation, and the insert replaces
28883	// one of the pieces, we can optimize into a single concat_vectors.
28884	if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
28885	N0.getOperand(i: `0`).getValueType() == N1.getValueType() &&
28886	N0.getOperand(i: `0`).getValueType().isScalableVector() ==
28887	N1.getValueType().isScalableVector()) {
28888	unsigned Factor = N1.getValueType().getVectorMinNumElements();
28889	SmallVector<SDValue, `8`> Ops(N0 ->ops());
28890	Ops [InsIdx / Factor] = N1;
28891	return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: SDLoc (N), VT, Ops);
28892	}
28893
28894	// Simplify source operands based on insertion.
28895	if (SimplifyDemandedVectorElts(Op: SDValue (N, `0`)))
28896	return SDValue (N, `0`);
28897
28898	return SDValue ();
28899	}
28900
28901	SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
28902	SDValue N0 = N->getOperand(Num: `0`);
28903
28904	// fold (fp_to_fp16 (fp16_to_fp op)) -> op
28905	if (N0 ->getOpcode() == ISD::FP16_TO_FP)
28906	return N0 ->getOperand(Num: `0`);
28907
28908	return SDValue ();
28909	}
28910
28911	SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
28912	SelectionDAG::FlagInserter FlagsInserter(DAG, N);
28913	auto Op = N->getOpcode();
28914	assert((Op == ISD::FP16_TO_FP \|\| Op == ISD::BF16_TO_FP) &&
28915	"opcode should be FP16_TO_FP or BF16_TO_FP.");
28916	SDValue N0 = N->getOperand(Num: `0`);
28917
28918	// fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op) or
28919	// fold bf16_to_fp(op & 0xffff) -> bf16_to_fp(op)
28920	if (!TLI.shouldKeepZExtForFP16Conv() && N0 ->getOpcode() == ISD::AND) {
28921	ConstantSDNode *AndConst = getAsNonOpaqueConstant(N: N0.getOperand(i: `1`));
28922	if (AndConst && AndConst->getAPIntValue() == `0xffff`) {
28923	return DAG.getNode(Opcode: Op, DL: SDLoc (N), VT: N->getValueType(ResNo: `0`), Operand: N0.getOperand(i: `0`));
28924	}
28925	}
28926
28927	if (SDValue CastEliminated = eliminateFPCastPair(N))
28928	return CastEliminated;
28929
28930	// Sometimes constants manage to survive very late in the pipeline, e.g.,
28931	// because they are wrapped inside the <1 x f16> type. Try one last time to
28932	// get rid of them.
28933	SDValue Folded = DAG.FoldConstantArithmetic(Opcode: N->getOpcode(), DL: SDLoc (N),
28934	VT: N->getValueType(ResNo: `0`), Ops: {N0});
28935	return Folded;
28936	}
28937
28938	SDValue DAGCombiner::visitFP_TO_BF16(SDNode *N) {
28939	SDValue N0 = N->getOperand(Num: `0`);
28940
28941	// fold (fp_to_bf16 (bf16_to_fp op)) -> op
28942	if (N0 ->getOpcode() == ISD::BF16_TO_FP)
28943	return N0 ->getOperand(Num: `0`);
28944
28945	return SDValue ();
28946	}
28947
28948	SDValue DAGCombiner::visitBF16_TO_FP(SDNode *N) {
28949	// fold bf16_to_fp(op & 0xffff) -> bf16_to_fp(op)
28950	return visitFP16_TO_FP(N);
28951	}
28952
28953	SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
28954	SDValue N0 = N->getOperand(Num: `0`);
28955	EVT VT = N0.getValueType();
28956	unsigned Opcode = N->getOpcode();
28957
28958	// VECREDUCE over 1-element vector is just an extract.
28959	if (VT.getVectorElementCount().isScalar()) {
28960	SDLoc dl(N);
28961	SDValue Res =
28962	DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: dl, VT: VT.getVectorElementType(), N1: N0,
28963	N2: DAG.getVectorIdxConstant(Val: `0`, DL: dl));
28964	if (Res.getValueType() != N->getValueType(ResNo: `0`))
28965	Res = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT: N->getValueType(ResNo: `0`), Operand: Res);
28966	return Res;
28967	}
28968
28969	// On an boolean vector an and/or reduction is the same as a umin/umax
28970	// reduction. Convert them if the latter is legal while the former isn't.
28971	if (Opcode == ISD::VECREDUCE_AND \|\| Opcode == ISD::VECREDUCE_OR) {
28972	unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
28973	? ISD::VECREDUCE_UMIN : ISD::VECREDUCE_UMAX;
28974	if (!TLI.isOperationLegalOrCustom(Op: Opcode, VT) &&
28975	TLI.isOperationLegalOrCustom(Op: NewOpcode, VT) &&
28976	DAG.ComputeNumSignBits(Op: N0) == VT.getScalarSizeInBits())
28977	return DAG.getNode(Opcode: NewOpcode, DL: SDLoc (N), VT: N->getValueType(ResNo: `0`), Operand: N0);
28978	}
28979
28980	// vecreduce_or(insert_subvector(zero or undef, val)) -> vecreduce_or(val)
28981	// vecreduce_and(insert_subvector(ones or undef, val)) -> vecreduce_and(val)
28982	if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
28983	TLI.isTypeLegal(VT: N0.getOperand(i: `1`).getValueType())) {
28984	SDValue Vec = N0.getOperand(i: `0`);
28985	SDValue Subvec = N0.getOperand(i: `1`);
28986	if ((Opcode == ISD::VECREDUCE_OR &&
28987	(N0.getOperand(i: `0`).isUndef() \|\| isNullOrNullSplat(V: Vec))) \|\|
28988	(Opcode == ISD::VECREDUCE_AND &&
28989	(N0.getOperand(i: `0`).isUndef() \|\| isAllOnesOrAllOnesSplat(V: Vec))))
28990	return DAG.getNode(Opcode, DL: SDLoc (N), VT: N->getValueType(ResNo: `0`), Operand: Subvec);
28991	}
28992
28993	// vecreduce_or(sext(x)) -> sext(vecreduce_or(x))
28994	// Same for zext and anyext, and for and/or/xor reductions.
28995	if ((Opcode == ISD::VECREDUCE_OR \|\| Opcode == ISD::VECREDUCE_AND \|\|
28996	Opcode == ISD::VECREDUCE_XOR) &&
28997	(N0.getOpcode() == ISD::SIGN_EXTEND \|\|
28998	N0.getOpcode() == ISD::ZERO_EXTEND \|\|
28999	N0.getOpcode() == ISD::ANY_EXTEND) &&
29000	TLI.isOperationLegalOrCustom(Op: Opcode, VT: N0.getOperand(i: `0`).getValueType())) {
29001	SDValue Red = DAG.getNode(Opcode, DL: SDLoc (N),
29002	VT: N0.getOperand(i: `0`).getValueType().getScalarType(),
29003	Operand: N0.getOperand(i: `0`));
29004	return DAG.getNode(Opcode: N0.getOpcode(), DL: SDLoc (N), VT: N->getValueType(ResNo: `0`), Operand: Red);
29005	}
29006	return SDValue ();
29007	}
29008
29009	SDValue DAGCombiner::visitVP_FSUB(SDNode *N) {
29010	SelectionDAG::FlagInserter FlagsInserter(DAG, N);
29011
29012	// FSUB -> FMA combines:
29013	if (SDValue Fused = visitFSUBForFMACombine<VPMatchContext>(N)) {
29014	AddToWorklist(N: Fused.getNode());
29015	return Fused;
29016	}
29017	return SDValue ();
29018	}
29019
29020	SDValue DAGCombiner::visitVPOp(SDNode *N) {
29021
29022	if (N->getOpcode() == ISD::VP_GATHER)
29023	if (SDValue SD = visitVPGATHER(N))
29024	return SD;
29025
29026	if (N->getOpcode() == ISD::VP_SCATTER)
29027	if (SDValue SD = visitVPSCATTER(N))
29028	return SD;
29029
29030	if (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD)
29031	if (SDValue SD = visitVP_STRIDED_LOAD(N))
29032	return SD;
29033
29034	if (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_STORE)
29035	if (SDValue SD = visitVP_STRIDED_STORE(N))
29036	return SD;
29037
29038	// VP operations in which all vector elements are disabled - either by
29039	// determining that the mask is all false or that the EVL is 0 - can be
29040	// eliminated.
29041	bool AreAllEltsDisabled = false;
29042	if (auto EVLIdx = ISD::getVPExplicitVectorLengthIdx(Opcode: N->getOpcode()))
29043	AreAllEltsDisabled \|= isNullConstant(V: N->getOperand(Num: *EVLIdx));
29044	if (auto MaskIdx = ISD::getVPMaskIdx(Opcode: N->getOpcode()))
29045	AreAllEltsDisabled \|=
29046	ISD::isConstantSplatVectorAllZeros(N: N->getOperand(Num: *MaskIdx).getNode());
29047
29048	// This is the only generic VP combine we support for now.
29049	if (!AreAllEltsDisabled) {
29050	switch (N->getOpcode()) {
29051	case ISD::VP_FADD:
29052	return visitVP_FADD(N);
29053	case ISD::VP_FSUB:
29054	return visitVP_FSUB(N);
29055	case ISD::VP_FMA:
29056	return visitFMA<VPMatchContext>(N);
29057	case ISD::VP_SELECT:
29058	return visitVP_SELECT(N);
29059	case ISD::VP_MUL:
29060	return visitMUL<VPMatchContext>(N);
29061	case ISD::VP_SUB:
29062	return foldSubCtlzNot<VPMatchContext>(N, DAG);
29063	default:
29064	break;
29065	}
29066	return SDValue ();
29067	}
29068
29069	// Binary operations can be replaced by UNDEF.
29070	if (ISD::isVPBinaryOp(Opcode: N->getOpcode()))
29071	return DAG.getUNDEF(VT: N->getValueType(ResNo: `0`));
29072
29073	// VP Memory operations can be replaced by either the chain (stores) or the
29074	// chain + undef (loads).
29075	if (const auto *MemSD = dyn_cast<MemSDNode>(Val: N)) {
29076	if (MemSD->writeMem())
29077	return MemSD->getChain();
29078	return CombineTo(N, Res0: DAG.getUNDEF(VT: N->getValueType(ResNo: `0`)), Res1: MemSD->getChain());
29079	}
29080
29081	// Reduction operations return the start operand when no elements are active.
29082	if (ISD::isVPReduction(Opcode: N->getOpcode()))
29083	return N->getOperand(Num: `0`);
29084
29085	return SDValue ();
29086	}
29087
29088	SDValue DAGCombiner::visitGET_FPENV_MEM(SDNode *N) {
29089	SDValue Chain = N->getOperand(Num: `0`);
29090	SDValue Ptr = N->getOperand(Num: `1`);
29091	EVT MemVT = cast<FPStateAccessSDNode>(Val: N)->getMemoryVT();
29092
29093	// Check if the memory, where FP state is written to, is used only in a single
29094	// load operation.
29095	LoadSDNode LdNode = nullptr*;
29096	for (auto *U : Ptr ->users()) {
29097	if (U == N)
29098	continue;
29099	if (auto *Ld = dyn_cast<LoadSDNode>(Val: U)) {
29100	if (LdNode && LdNode != Ld)
29101	return SDValue ();
29102	LdNode = Ld;
29103	continue;
29104	}
29105	return SDValue ();
29106	}
29107	if (!LdNode \|\| !LdNode->isSimple() \|\| LdNode->isIndexed() \|\|
29108	!LdNode->getOffset().isUndef() \|\| LdNode->getMemoryVT() != MemVT \|\|
29109	!LdNode->getChain().reachesChainWithoutSideEffects(Dest: SDValue (N, `0`)))
29110	return SDValue ();
29111
29112	// Check if the loaded value is used only in a store operation.
29113	StoreSDNode StNode = nullptr*;
29114	for (SDUse &U : LdNode->uses()) {
29115	if (U.getResNo() == `0`) {
29116	if (auto *St = dyn_cast<StoreSDNode>(Val: U.getUser())) {
29117	if (StNode)
29118	return SDValue ();
29119	StNode = St;
29120	} else {
29121	return SDValue ();
29122	}
29123	}
29124	}
29125	if (!StNode \|\| !StNode->isSimple() \|\| StNode->isIndexed() \|\|
29126	!StNode->getOffset().isUndef() \|\| StNode->getMemoryVT() != MemVT \|\|
29127	!StNode->getChain().reachesChainWithoutSideEffects(Dest: SDValue (LdNode, `1`)))
29128	return SDValue ();
29129
29130	// Create new node GET_FPENV_MEM, which uses the store address to write FP
29131	// environment.
29132	SDValue Res = DAG.getGetFPEnv(Chain, dl: SDLoc (N), Ptr: StNode->getBasePtr(), MemVT,
29133	MMO: StNode->getMemOperand());
29134	CombineTo(N: StNode, Res, AddTo: false);
29135	return Res;
29136	}
29137
29138	SDValue DAGCombiner::visitSET_FPENV_MEM(SDNode *N) {
29139	SDValue Chain = N->getOperand(Num: `0`);
29140	SDValue Ptr = N->getOperand(Num: `1`);
29141	EVT MemVT = cast<FPStateAccessSDNode>(Val: N)->getMemoryVT();
29142
29143	// Check if the address of FP state is used also in a store operation only.
29144	StoreSDNode StNode = nullptr*;
29145	for (auto *U : Ptr ->users()) {
29146	if (U == N)
29147	continue;
29148	if (auto *St = dyn_cast<StoreSDNode>(Val: U)) {
29149	if (StNode && StNode != St)
29150	return SDValue ();
29151	StNode = St;
29152	continue;
29153	}
29154	return SDValue ();
29155	}
29156	if (!StNode \|\| !StNode->isSimple() \|\| StNode->isIndexed() \|\|
29157	!StNode->getOffset().isUndef() \|\| StNode->getMemoryVT() != MemVT \|\|
29158	!Chain.reachesChainWithoutSideEffects(Dest: SDValue (StNode, `0`)))
29159	return SDValue ();
29160
29161	// Check if the stored value is loaded from some location and the loaded
29162	// value is used only in the store operation.
29163	SDValue StValue = StNode->getValue();
29164	auto *LdNode = dyn_cast<LoadSDNode>(Val&: StValue);
29165	if (!LdNode \|\| !LdNode->isSimple() \|\| LdNode->isIndexed() \|\|
29166	!LdNode->getOffset().isUndef() \|\| LdNode->getMemoryVT() != MemVT \|\|
29167	!StNode->getChain().reachesChainWithoutSideEffects(Dest: SDValue (LdNode, `1`)))
29168	return SDValue ();
29169
29170	// Create new node SET_FPENV_MEM, which uses the load address to read FP
29171	// environment.
29172	SDValue Res =
29173	DAG.getSetFPEnv(Chain: LdNode->getChain(), dl: SDLoc (N), Ptr: LdNode->getBasePtr(), MemVT,
29174	MMO: LdNode->getMemOperand());
29175	return Res;
29176	}
29177
29178	/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
29179	/// with the destination vector and a zero vector.
29180	/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
29181	/// vector_shuffle V, Zero, <0, 4, 2, 4>
29182	SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
29183	assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
29184
29185	EVT VT = N->getValueType(ResNo: `0`);
29186	SDValue LHS = N->getOperand(Num: `0`);
29187	SDValue RHS = peekThroughBitcasts(V: N->getOperand(Num: `1`));
29188	SDLoc DL(N);
29189
29190	// Make sure we're not running after operation legalization where it
29191	// may have custom lowered the vector shuffles.
29192	if (LegalOperations)
29193	return SDValue ();
29194
29195	if (RHS.getOpcode() != ISD::BUILD_VECTOR)
29196	return SDValue ();
29197
29198	EVT RVT = RHS.getValueType();
29199	unsigned NumElts = RHS.getNumOperands();
29200
29201	// Attempt to create a valid clear mask, splitting the mask into
29202	// sub elements and checking to see if each is
29203	// all zeros or all ones - suitable for shuffle masking.
29204	auto BuildClearMask = [&](int Split) {
29205	int NumSubElts = NumElts * Split;
29206	int NumSubBits = RVT.getScalarSizeInBits() / Split;
29207
29208	SmallVector<int, `8`> Indices;
29209	for (int i = `0`; i != NumSubElts; ++i) {
29210	int EltIdx = i / Split;
29211	int SubIdx = i % Split;
29212	SDValue Elt = RHS.getOperand(i: EltIdx);
29213	// X & undef --> 0 (not undef). So this lane must be converted to choose
29214	// from the zero constant vector (same as if the element had all 0-bits).
29215	if (Elt.isUndef()) {
29216	Indices.push_back(Elt: i + NumSubElts);
29217	continue;
29218	}
29219
29220	std::optional<APInt> Bits = Elt ->bitcastToAPInt();
29221	if (!Bits)
29222	return SDValue ();
29223
29224	// Extract the sub element from the constant bit mask.
29225	if (DAG.getDataLayout().isBigEndian())
29226	*Bits =
29227	Bits ->extractBits(numBits: NumSubBits, bitPosition: (Split - SubIdx - `1`) * NumSubBits);
29228	else
29229	Bits = Bits ->extractBits(numBits: NumSubBits, bitPosition: SubIdx NumSubBits);
29230
29231	if (Bits ->isAllOnes())
29232	Indices.push_back(Elt: i);
29233	else if (*Bits == `0`)
29234	Indices.push_back(Elt: i + NumSubElts);
29235	else
29236	return SDValue ();
29237	}
29238
29239	// Let's see if the target supports this vector_shuffle.
29240	EVT ClearSVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NumSubBits);
29241	EVT ClearVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: ClearSVT, NumElements: NumSubElts);
29242	if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
29243	return SDValue ();
29244
29245	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT: ClearVT);
29246	return DAG.getBitcast(VT, V: DAG.getVectorShuffle(VT: ClearVT, dl: DL,
29247	N1: DAG.getBitcast(VT: ClearVT, V: LHS),
29248	N2: Zero, Mask: Indices));
29249	};
29250
29251	// Determine maximum split level (byte level masking).
29252	int MaxSplit = `1`;
29253	if (RVT.getScalarSizeInBits() % `8` == `0`)
29254	MaxSplit = RVT.getScalarSizeInBits() / `8`;
29255
29256	for (int Split = `1`; Split <= MaxSplit; ++Split)
29257	if (RVT.getScalarSizeInBits() % Split == `0`)
29258	if (SDValue S = BuildClearMask (Split))
29259	return S;
29260
29261	return SDValue ();
29262	}
29263
29264	/// If a vector binop is performed on splat values, it may be profitable to
29265	/// extract, scalarize, and insert/splat.
29266	static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG,
29267	const SDLoc &DL, bool LegalTypes) {
29268	SDValue N0 = N->getOperand(Num: `0`);
29269	SDValue N1 = N->getOperand(Num: `1`);
29270	unsigned Opcode = N->getOpcode();
29271	EVT VT = N->getValueType(ResNo: `0`);
29272	EVT EltVT = VT.getVectorElementType();
29273	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
29274
29275	// TODO: Remove/replace the extract cost check? If the elements are available
29276	// as scalars, then there may be no extract cost. Should we ask if
29277	// inserting a scalar back into a vector is cheap instead?
29278	int Index0, Index1;
29279	SDValue Src0 = DAG.getSplatSourceVector(V: N0, SplatIndex&: Index0);
29280	SDValue Src1 = DAG.getSplatSourceVector(V: N1, SplatIndex&: Index1);
29281	// Extract element from splat_vector should be free.
29282	// TODO: use DAG.isSplatValue instead?
29283	bool IsBothSplatVector = N0.getOpcode() == ISD::SPLAT_VECTOR &&
29284	N1.getOpcode() == ISD::SPLAT_VECTOR;
29285	if (!Src0 \|\| !Src1 \|\| Index0 != Index1 \|\|
29286	Src0.getValueType().getVectorElementType() != EltVT \|\|
29287	Src1.getValueType().getVectorElementType() != EltVT \|\|
29288	!(IsBothSplatVector \|\| TLI.isExtractVecEltCheap(VT, Index: Index0)) \|\|
29289	// If before type legalization, allow scalar types that will eventually be
29290	// made legal.
29291	!TLI.isOperationLegalOrCustom(
29292	Op: Opcode, VT: LegalTypes
29293	? EltVT
29294	: TLI.getTypeToTransformTo(Context&: *DAG.getContext(), VT: EltVT)))
29295	return SDValue ();
29296
29297	// FIXME: Type legalization can't handle illegal MULHS/MULHU.
29298	if ((Opcode == ISD::MULHS \|\| Opcode == ISD::MULHU) && !TLI.isTypeLegal(VT: EltVT))
29299	return SDValue ();
29300
29301	if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode()) {
29302	// All but one element should have an undef input, which will fold to a
29303	// constant or undef. Avoid splatting which would over-define potentially
29304	// undefined elements.
29305
29306	// bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
29307	// build_vec ..undef, (bo X, Y), undef...
29308	SmallVector<SDValue, `16`> EltsX, EltsY, EltsResult;
29309	DAG.ExtractVectorElements(Op: Src0, Args&: EltsX);
29310	DAG.ExtractVectorElements(Op: Src1, Args&: EltsY);
29311
29312	for (auto [X, Y] : zip(t&: EltsX, u&: EltsY))
29313	EltsResult.push_back(Elt: DAG.getNode(Opcode, DL, VT: EltVT, N1: X, N2: Y, Flags: N->getFlags()));
29314	return DAG.getBuildVector(VT, DL, Ops: EltsResult);
29315	}
29316
29317	SDValue IndexC = DAG.getVectorIdxConstant(Val: Index0, DL);
29318	SDValue X = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: EltVT, N1: Src0, N2: IndexC);
29319	SDValue Y = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: EltVT, N1: Src1, N2: IndexC);
29320	SDValue ScalarBO = DAG.getNode(Opcode, DL, VT: EltVT, N1: X, N2: Y, Flags: N->getFlags());
29321
29322	// bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
29323	return DAG.getSplat(VT, DL, Op: ScalarBO);
29324	}
29325
29326	/// Visit a vector cast operation, like FP_EXTEND.
29327	SDValue DAGCombiner::SimplifyVCastOp(SDNode N, const* SDLoc &DL) {
29328	EVT VT = N->getValueType(ResNo: `0`);
29329	assert(VT.isVector() && "SimplifyVCastOp only works on vectors!");
29330	EVT EltVT = VT.getVectorElementType();
29331	unsigned Opcode = N->getOpcode();
29332
29333	SDValue N0 = N->getOperand(Num: `0`);
29334	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
29335
29336	// TODO: promote operation might be also good here?
29337	int Index0;
29338	SDValue Src0 = DAG.getSplatSourceVector(V: N0, SplatIndex&: Index0);
29339	if (Src0 &&
29340	(N0.getOpcode() == ISD::SPLAT_VECTOR \|\|
29341	TLI.isExtractVecEltCheap(VT, Index: Index0)) &&
29342	TLI.isOperationLegalOrCustom(Op: Opcode, VT: EltVT) &&
29343	TLI.preferScalarizeSplat(N)) {
29344	EVT SrcVT = N0.getValueType();
29345	EVT SrcEltVT = SrcVT.getVectorElementType();
29346	if (!LegalTypes \|\| TLI.isTypeLegal(VT: SrcEltVT)) {
29347	SDValue IndexC = DAG.getVectorIdxConstant(Val: Index0, DL);
29348	SDValue Elt =
29349	DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: SrcEltVT, N1: Src0, N2: IndexC);
29350	SDValue ScalarBO = DAG.getNode(Opcode, DL, VT: EltVT, Operand: Elt, Flags: N->getFlags());
29351	if (VT.isScalableVector())
29352	return DAG.getSplatVector(VT, DL, Op: ScalarBO);
29353	SmallVector<SDValue, `8`> Ops(VT.getVectorNumElements(), ScalarBO);
29354	return DAG.getBuildVector(VT, DL, Ops);
29355	}
29356	}
29357
29358	return SDValue ();
29359	}
29360
29361	/// Visit a binary vector operation, like ADD.
29362	SDValue DAGCombiner::SimplifyVBinOp(SDNode N, const* SDLoc &DL) {
29363	EVT VT = N->getValueType(ResNo: `0`);
29364	assert(VT.isVector() && "SimplifyVBinOp only works on vectors!");
29365
29366	SDValue LHS = N->getOperand(Num: `0`);
29367	SDValue RHS = N->getOperand(Num: `1`);
29368	unsigned Opcode = N->getOpcode();
29369	SDNodeFlags Flags = N->getFlags();
29370
29371	// Move unary shuffles with identical masks after a vector binop:
29372	// VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
29373	// --> shuffle (VBinOp A, B), Undef, Mask
29374	// This does not require type legality checks because we are creating the
29375	// same types of operations that are in the original sequence. We do have to
29376	// restrict ops like integer div that have immediate UB (eg, div-by-zero)
29377	// though. This code is adapted from the identical transform in instcombine.
29378	if (DAG.isSafeToSpeculativelyExecute(Opcode)) {
29379	auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Val&: LHS);
29380	auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(Val&: RHS);
29381	if (Shuf0 && Shuf1 && Shuf0->getMask().equals(RHS: Shuf1->getMask()) &&
29382	LHS.getOperand(i: `1`).isUndef() && RHS.getOperand(i: `1`).isUndef() &&
29383	(LHS.hasOneUse() \|\| RHS.hasOneUse() \|\| LHS == RHS)) {
29384	SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, N1: LHS.getOperand(i: `0`),
29385	N2: RHS.getOperand(i: `0`), Flags);
29386	SDValue UndefV = LHS.getOperand(i: `1`);
29387	return DAG.getVectorShuffle(VT, dl: DL, N1: NewBinOp, N2: UndefV, Mask: Shuf0->getMask());
29388	}
29389
29390	// Try to sink a splat shuffle after a binop with a uniform constant.
29391	// This is limited to cases where neither the shuffle nor the constant have
29392	// undefined elements because that could be poison-unsafe or inhibit
29393	// demanded elements analysis. It is further limited to not change a splat
29394	// of an inserted scalar because that may be optimized better by
29395	// load-folding or other target-specific behaviors.
29396	if (isConstOrConstSplat(N: RHS) && Shuf0 && all_equal(Range: Shuf0->getMask()) &&
29397	Shuf0->hasOneUse() && Shuf0->getOperand(Num: `1`).isUndef() &&
29398	Shuf0->getOperand(Num: `0`).getOpcode() != ISD::INSERT_VECTOR_ELT) {
29399	// binop (splat X), (splat C) --> splat (binop X, C)
29400	SDValue X = Shuf0->getOperand(Num: `0`);
29401	SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, N1: X, N2: RHS, Flags);
29402	return DAG.getVectorShuffle(VT, dl: DL, N1: NewBinOp, N2: DAG.getPOISON(VT),
29403	Mask: Shuf0->getMask());
29404	}
29405	if (isConstOrConstSplat(N: LHS) && Shuf1 && all_equal(Range: Shuf1->getMask()) &&
29406	Shuf1->hasOneUse() && Shuf1->getOperand(Num: `1`).isUndef() &&
29407	Shuf1->getOperand(Num: `0`).getOpcode() != ISD::INSERT_VECTOR_ELT) {
29408	// binop (splat C), (splat X) --> splat (binop C, X)
29409	SDValue X = Shuf1->getOperand(Num: `0`);
29410	SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, N1: LHS, N2: X, Flags);
29411	return DAG.getVectorShuffle(VT, dl: DL, N1: NewBinOp, N2: DAG.getPOISON(VT),
29412	Mask: Shuf1->getMask());
29413	}
29414	}
29415
29416	// The following pattern is likely to emerge with vector reduction ops. Moving
29417	// the binary operation ahead of insertion may allow using a narrower vector
29418	// instruction that has better performance than the wide version of the op:
29419	// VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
29420	if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(i: `0`).isUndef() &&
29421	RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(i: `0`).isUndef() &&
29422	LHS.getOperand(i: `2`) == RHS.getOperand(i: `2`) &&
29423	(LHS.hasOneUse() \|\| RHS.hasOneUse())) {
29424	SDValue X = LHS.getOperand(i: `1`);
29425	SDValue Y = RHS.getOperand(i: `1`);
29426	SDValue Z = LHS.getOperand(i: `2`);
29427	EVT NarrowVT = X.getValueType();
29428	if (NarrowVT == Y.getValueType() &&
29429	TLI.isOperationLegalOrCustomOrPromote(Op: Opcode, VT: NarrowVT,
29430	LegalOnly: LegalOperations)) {
29431	// (binop undef, undef) may not return undef, so compute that result.
29432	SDValue VecC =
29433	DAG.getNode(Opcode, DL, VT, N1: DAG.getUNDEF(VT), N2: DAG.getUNDEF(VT));
29434	SDValue NarrowBO = DAG.getNode(Opcode, DL, VT: NarrowVT, N1: X, N2: Y);
29435	return DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT, N1: VecC, N2: NarrowBO, N3: Z);
29436	}
29437	}
29438
29439	// Make sure all but the first op are undef or constant.
29440	auto ConcatWithConstantOrUndef = [](SDValue Concat) {
29441	return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
29442	all_of(Range: drop_begin(RangeOrContainer: Concat ->ops()), P: [](const SDValue &Op) {
29443	return Op.isUndef() \|\|
29444	ISD::isBuildVectorOfConstantSDNodes(N: Op.getNode());
29445	});
29446	};
29447
29448	// The following pattern is likely to emerge with vector reduction ops. Moving
29449	// the binary operation ahead of the concat may allow using a narrower vector
29450	// instruction that has better performance than the wide version of the op:
29451	// VBinOp (concat X, undef/constant), (concat Y, undef/constant) -->
29452	// concat (VBinOp X, Y), VecC
29453	if (ConcatWithConstantOrUndef (LHS) && ConcatWithConstantOrUndef (RHS) &&
29454	(LHS.hasOneUse() \|\| RHS.hasOneUse())) {
29455	EVT NarrowVT = LHS.getOperand(i: `0`).getValueType();
29456	if (NarrowVT == RHS.getOperand(i: `0`).getValueType() &&
29457	TLI.isOperationLegalOrCustomOrPromote(Op: Opcode, VT: NarrowVT)) {
29458	unsigned NumOperands = LHS.getNumOperands();
29459	SmallVector<SDValue, `4`> ConcatOps;
29460	for (unsigned i = `0`; i != NumOperands; ++i) {
29461	// This constant fold for operands 1 and up.
29462	ConcatOps.push_back(Elt: DAG.getNode(Opcode, DL, VT: NarrowVT, N1: LHS.getOperand(i),
29463	N2: RHS.getOperand(i)));
29464	}
29465
29466	return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT, Ops: ConcatOps);
29467	}
29468	}
29469
29470	if (SDValue V = scalarizeBinOpOfSplats(N, DAG, DL, LegalTypes))
29471	return V;
29472
29473	return SDValue ();
29474	}
29475
29476	SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
29477	SDValue N2) {
29478	assert(N0.getOpcode() == ISD::SETCC &&
29479	"First argument must be a SetCC node!");
29480
29481	SDValue SCC = SimplifySelectCC(DL, N0: N0.getOperand(i: `0`), N1: N0.getOperand(i: `1`), N2: N1, N3: N2,
29482	CC: cast<CondCodeSDNode>(Val: N0.getOperand(i: `2`))->get());
29483
29484	// If we got a simplified select_cc node back from SimplifySelectCC, then
29485	// break it down into a new SETCC node, and a new SELECT node, and then return
29486	// the SELECT node, since we were called with a SELECT node.
29487	if (SCC.getNode()) {
29488	// Check to see if we got a select_cc back (to turn into setcc/select).
29489	// Otherwise, just return whatever node we got back, like fabs.
29490	if (SCC.getOpcode() == ISD::SELECT_CC) {
29491	const SDNodeFlags Flags = N0 ->getFlags();
29492	SDValue SETCC = DAG.getNode(Opcode: ISD::SETCC, DL: SDLoc (N0),
29493	VT: N0.getValueType(),
29494	N1: SCC.getOperand(i: `0`), N2: SCC.getOperand(i: `1`),
29495	N3: SCC.getOperand(i: `4`), Flags);
29496	AddToWorklist(N: SETCC.getNode());
29497	return DAG.getSelect(DL: SDLoc (SCC), VT: SCC.getValueType(), Cond: SETCC,
29498	LHS: SCC.getOperand(i: `2`), RHS: SCC.getOperand(i: `3`), Flags);
29499	}
29500
29501	return SCC;
29502	}
29503	return SDValue ();
29504	}
29505
29506	/// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
29507	/// being selected between, see if we can simplify the select. Callers of this
29508	/// should assume that TheSelect is deleted if this returns true. As such, they
29509	/// should return the appropriate thing (e.g. the node) back to the top-level of
29510	/// the DAG combiner loop to avoid it being looked at.
29511	bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
29512	SDValue RHS) {
29513	// fold (select (setcc x, [+-]0.0, lt), NaN, (fsqrt x))*
29514	// The select + setcc is redundant, because fsqrt returns NaN for X < 0.
29515	if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(N: LHS)) {
29516	if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
29517	// We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
29518	SDValue Sqrt = RHS;
29519	ISD::CondCode CC;
29520	SDValue CmpLHS;
29521	const ConstantFPSDNode Zero = nullptr*;
29522
29523	if (TheSelect->getOpcode() == ISD::SELECT_CC) {
29524	CC = cast<CondCodeSDNode>(Val: TheSelect->getOperand(Num: `4`))->get();
29525	CmpLHS = TheSelect->getOperand(Num: `0`);
29526	Zero = isConstOrConstSplatFP(N: TheSelect->getOperand(Num: `1`));
29527	} else {
29528	// SELECT or VSELECT
29529	SDValue Cmp = TheSelect->getOperand(Num: `0`);
29530	if (Cmp.getOpcode() == ISD::SETCC) {
29531	CC = cast<CondCodeSDNode>(Val: Cmp.getOperand(i: `2`))->get();
29532	CmpLHS = Cmp.getOperand(i: `0`);
29533	Zero = isConstOrConstSplatFP(N: Cmp.getOperand(i: `1`));
29534	}
29535	}
29536	if (Zero && Zero->isZero() &&
29537	Sqrt.getOperand(i: `0`) == CmpLHS && (CC == ISD::SETOLT \|\|
29538	CC == ISD::SETULT \|\| CC == ISD::SETLT)) {
29539	// We have: (select (setcc x, [+-]0.0, lt), NaN, (fsqrt x))*
29540	CombineTo(N: TheSelect, Res: Sqrt);
29541	return true;
29542	}
29543	}
29544	}
29545	// Cannot simplify select with vector condition
29546	if (TheSelect->getOperand(Num: `0`).getValueType().isVector()) return false;
29547
29548	// If this is a select from two identical things, try to pull the operation
29549	// through the select.
29550	if (LHS.getOpcode() != RHS.getOpcode() \|\|
29551	!LHS.hasOneUse() \|\| !RHS.hasOneUse())
29552	return false;
29553
29554	// If this is a load and the token chain is identical, replace the select
29555	// of two loads with a load through a select of the address to load from.
29556	// This triggers in things like "select bool X, 10.0, 123.0" after the FP
29557	// constants have been dropped into the constant pool.
29558	if (LHS.getOpcode() == ISD::LOAD) {
29559	LoadSDNode *LLD = cast<LoadSDNode>(Val&: LHS);
29560	LoadSDNode *RLD = cast<LoadSDNode>(Val&: RHS);
29561
29562	// Token chains must be identical.
29563	if (LHS.getOperand(i: `0`) != RHS.getOperand(i: `0`) \|\|
29564	// Do not let this transformation reduce the number of volatile loads.
29565	// Be conservative for atomics for the moment
29566	// TODO: This does appear to be legal for unordered atomics (see D66309)
29567	!LLD->isSimple() \|\| !RLD->isSimple() \|\|
29568	// FIXME: If either is a pre/post inc/dec load,
29569	// we'd need to split out the address adjustment.
29570	LLD->isIndexed() \|\| RLD->isIndexed() \|\|
29571	// If this is an EXTLOAD, the VT's must match.
29572	LLD->getMemoryVT() != RLD->getMemoryVT() \|\|
29573	// If this is an EXTLOAD, the kind of extension must match.
29574	(LLD->getExtensionType() != RLD->getExtensionType() &&
29575	// The only exception is if one of the extensions is anyext.
29576	LLD->getExtensionType() != ISD::EXTLOAD &&
29577	RLD->getExtensionType() != ISD::EXTLOAD) \|\|
29578	// FIXME: this discards src value information. This is
29579	// over-conservative. It would be beneficial to be able to remember
29580	// both potential memory locations. Since we are discarding
29581	// src value info, don't do the transformation if the memory
29582	// locations are not in the same address space.
29583	LLD->getPointerInfo().getAddrSpace() !=
29584	RLD->getPointerInfo().getAddrSpace() \|\|
29585	// We can't produce a CMOV of a TargetFrameIndex since we won't
29586	// generate the address generation required.
29587	LLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex \|\|
29588	RLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex \|\|
29589	!TLI.isOperationLegalOrCustom(Op: TheSelect->getOpcode(),
29590	VT: LLD->getBasePtr().getValueType()))
29591	return false;
29592
29593	// The loads must not depend on one another.
29594	if (LLD->isPredecessorOf(N: RLD) \|\| RLD->isPredecessorOf(N: LLD))
29595	return false;
29596
29597	// Check that the select condition doesn't reach either load. If so,
29598	// folding this will induce a cycle into the DAG. If not, this is safe to
29599	// xform, so create a select of the addresses.
29600
29601	SmallPtrSet<const SDNode *, `32`> Visited;
29602	SmallVector<const SDNode *, `16`> Worklist;
29603
29604	// Always fail if LLD and RLD are not independent. TheSelect is a
29605	// predecessor to all Nodes in question so we need not search past it.
29606
29607	Visited.insert(Ptr: TheSelect);
29608	Worklist.push_back(Elt: LLD);
29609	Worklist.push_back(Elt: RLD);
29610
29611	if (SDNode::hasPredecessorHelper(N: LLD, Visited, Worklist) \|\|
29612	SDNode::hasPredecessorHelper(N: RLD, Visited, Worklist))
29613	return false;
29614
29615	SDValue Addr;
29616	if (TheSelect->getOpcode() == ISD::SELECT) {
29617	// We cannot do this optimization if any pair of {RLD, LLD} is a
29618	// predecessor to {RLD, LLD, CondNode}. As we've already compared the
29619	// Loads, we only need to check if CondNode is a successor to one of the
29620	// loads. We can further avoid this if there's no use of their chain
29621	// value.
29622	SDNode *CondNode = TheSelect->getOperand(Num: `0`).getNode();
29623	Worklist.push_back(Elt: CondNode);
29624
29625	if ((LLD->hasAnyUseOfValue(Value: `1`) &&
29626	SDNode::hasPredecessorHelper(N: LLD, Visited, Worklist)) \|\|
29627	(RLD->hasAnyUseOfValue(Value: `1`) &&
29628	SDNode::hasPredecessorHelper(N: RLD, Visited, Worklist)))
29629	return false;
29630
29631	Addr = DAG.getSelect(DL: SDLoc (TheSelect),
29632	VT: LLD->getBasePtr().getValueType(),
29633	Cond: TheSelect->getOperand(Num: `0`), LHS: LLD->getBasePtr(),
29634	RHS: RLD->getBasePtr());
29635	} else { // Otherwise SELECT_CC
29636	// We cannot do this optimization if any pair of {RLD, LLD} is a
29637	// predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
29638	// the Loads, we only need to check if CondLHS/CondRHS is a successor to
29639	// one of the loads. We can further avoid this if there's no use of their
29640	// chain value.
29641
29642	SDNode *CondLHS = TheSelect->getOperand(Num: `0`).getNode();
29643	SDNode *CondRHS = TheSelect->getOperand(Num: `1`).getNode();
29644	Worklist.push_back(Elt: CondLHS);
29645	Worklist.push_back(Elt: CondRHS);
29646
29647	if ((LLD->hasAnyUseOfValue(Value: `1`) &&
29648	SDNode::hasPredecessorHelper(N: LLD, Visited, Worklist)) \|\|
29649	(RLD->hasAnyUseOfValue(Value: `1`) &&
29650	SDNode::hasPredecessorHelper(N: RLD, Visited, Worklist)))
29651	return false;
29652
29653	Addr = DAG.getNode(Opcode: ISD::SELECT_CC, DL: SDLoc (TheSelect),
29654	VT: LLD->getBasePtr().getValueType(),
29655	N1: TheSelect->getOperand(Num: `0`),
29656	N2: TheSelect->getOperand(Num: `1`),
29657	N3: LLD->getBasePtr(), N4: RLD->getBasePtr(),
29658	N5: TheSelect->getOperand(Num: `4`));
29659	}
29660
29661	SDValue Load;
29662	// It is safe to replace the two loads if they have different alignments,
29663	// but the new load must be the minimum (most restrictive) alignment of the
29664	// inputs.
29665	Align Alignment = std::min(a: LLD->getAlign(), b: RLD->getAlign());
29666	unsigned AddrSpace = LLD->getAddressSpace();
29667	assert(AddrSpace == RLD->getAddressSpace());
29668
29669	MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
29670	if (!RLD->isInvariant())
29671	MMOFlags &= ~MachineMemOperand::MOInvariant;
29672	if (!RLD->isDereferenceable())
29673	MMOFlags &= ~MachineMemOperand::MODereferenceable;
29674	if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
29675	// FIXME: Discards pointer and AA info.
29676	Load = DAG.getLoad(VT: TheSelect->getValueType(ResNo: `0`), dl: SDLoc (TheSelect),
29677	Chain: LLD->getChain(), Ptr: Addr, PtrInfo: MachinePointerInfo (AddrSpace),
29678	Alignment, MMOFlags);
29679	} else {
29680	// FIXME: Discards pointer and AA info.
29681	Load = DAG.getExtLoad(
29682	ExtType: LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
29683	: LLD->getExtensionType(),
29684	dl: SDLoc (TheSelect), VT: TheSelect->getValueType(ResNo: `0`), Chain: LLD->getChain(), Ptr: Addr,
29685	PtrInfo: MachinePointerInfo (AddrSpace), MemVT: LLD->getMemoryVT(), Alignment,
29686	MMOFlags);
29687	}
29688
29689	// Users of the select now use the result of the load.
29690	CombineTo(N: TheSelect, Res: Load);
29691
29692	// Users of the old loads now use the new load's chain. We know the
29693	// old-load value is dead now.
29694	CombineTo(N: LHS.getNode(), Res0: Load.getValue(R: `0`), Res1: Load.getValue(R: `1`));
29695	CombineTo(N: RHS.getNode(), Res0: Load.getValue(R: `0`), Res1: Load.getValue(R: `1`));
29696	return true;
29697	}
29698
29699	return false;
29700	}
29701
29702	/// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
29703	/// bitwise 'and'.
29704	SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
29705	SDValue N1, SDValue N2, SDValue N3,
29706	ISD::CondCode CC) {
29707	// If this is a select where the false operand is zero and the compare is a
29708	// check of the sign bit, see if we can perform the "gzip trick":
29709	// select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
29710	// select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
29711	EVT XType = N0.getValueType();
29712	EVT AType = N2.getValueType();
29713	if (!isNullConstant(V: N3) \|\| !XType.bitsGE(VT: AType))
29714	return SDValue ();
29715
29716	// If the comparison is testing for a positive value, we have to invert
29717	// the sign bit mask, so only do that transform if the target has a bitwise
29718	// 'and not' instruction (the invert is free).
29719	if (CC == ISD::SETGT && TLI.hasAndNot(X: N2)) {
29720	// (X > -1) ? A : 0
29721	// (X > 0) ? X : 0 <-- This is canonical signed max.
29722	if (!(isAllOnesConstant(V: N1) \|\| (isNullConstant(V: N1) && N0 == N2)))
29723	return SDValue ();
29724	} else if (CC == ISD::SETLT) {
29725	// (X < 0) ? A : 0
29726	// (X < 1) ? X : 0 <-- This is un-canonicalized signed min.
29727	if (!(isNullConstant(V: N1) \|\| (isOneConstant(V: N1) && N0 == N2)))
29728	return SDValue ();
29729	} else {
29730	return SDValue ();
29731	}
29732
29733	// and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
29734	// constant.
29735	auto *N2C = dyn_cast<ConstantSDNode>(Val: N2.getNode());
29736	if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - `1`)) == `0`)) {
29737	unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - `1`;
29738	if (!TLI.shouldAvoidTransformToShift(VT: XType, Amount: ShCt)) {
29739	SDValue ShiftAmt = DAG.getShiftAmountConstant(Val: ShCt, VT: XType, DL);
29740	SDValue Shift = DAG.getNode(Opcode: ISD::SRL, DL, VT: XType, N1: N0, N2: ShiftAmt);
29741	AddToWorklist(N: Shift.getNode());
29742
29743	if (XType.bitsGT(VT: AType)) {
29744	Shift = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: AType, Operand: Shift);
29745	AddToWorklist(N: Shift.getNode());
29746	}
29747
29748	if (CC == ISD::SETGT)
29749	Shift = DAG.getNOT(DL, Val: Shift, VT: AType);
29750
29751	return DAG.getNode(Opcode: ISD::AND, DL, VT: AType, N1: Shift, N2);
29752	}
29753	}
29754
29755	unsigned ShCt = XType.getSizeInBits() - `1`;
29756	if (TLI.shouldAvoidTransformToShift(VT: XType, Amount: ShCt))
29757	return SDValue ();
29758
29759	SDValue ShiftAmt = DAG.getShiftAmountConstant(Val: ShCt, VT: XType, DL);
29760	SDValue Shift = DAG.getNode(Opcode: ISD::SRA, DL, VT: XType, N1: N0, N2: ShiftAmt);
29761	AddToWorklist(N: Shift.getNode());
29762
29763	if (XType.bitsGT(VT: AType)) {
29764	Shift = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: AType, Operand: Shift);
29765	AddToWorklist(N: Shift.getNode());
29766	}
29767
29768	if (CC == ISD::SETGT)
29769	Shift = DAG.getNOT(DL, Val: Shift, VT: AType);
29770
29771	return DAG.getNode(Opcode: ISD::AND, DL, VT: AType, N1: Shift, N2);
29772	}
29773
29774	// Fold select(cc, binop(), binop()) -> binop(select(), select()) etc.
29775	SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) {
29776	SDValue N0 = N->getOperand(Num: `0`);
29777	SDValue N1 = N->getOperand(Num: `1`);
29778	SDValue N2 = N->getOperand(Num: `2`);
29779	SDLoc DL(N);
29780
29781	unsigned BinOpc = N1.getOpcode();
29782	if (!TLI.isBinOp(Opcode: BinOpc) \|\| (N2.getOpcode() != BinOpc) \|\|
29783	(N1.getResNo() != N2.getResNo()))
29784	return SDValue ();
29785
29786	// The use checks are intentionally on SDNode because we may be dealing
29787	// with opcodes that produce more than one SDValue.
29788	// TODO: Do we really need to check N0 (the condition operand of the select)?
29789	// But removing that clause could cause an infinite loop...
29790	if (!N0 ->hasOneUse() \|\| !N1 ->hasOneUse() \|\| !N2 ->hasOneUse())
29791	return SDValue ();
29792
29793	// Binops may include opcodes that return multiple values, so all values
29794	// must be created/propagated from the newly created binops below.
29795	SDVTList OpVTs = N1 ->getVTList();
29796
29797	// Fold select(cond, binop(x, y), binop(z, y))
29798	// --> binop(select(cond, x, z), y)
29799	if (N1.getOperand(i: `1`) == N2.getOperand(i: `1`)) {
29800	SDValue N10 = N1.getOperand(i: `0`);
29801	SDValue N20 = N2.getOperand(i: `0`);
29802	SDValue NewSel = DAG.getSelect(DL, VT: N10.getValueType(), Cond: N0, LHS: N10, RHS: N20);
29803	SDNodeFlags Flags = N1 ->getFlags() & N2 ->getFlags();
29804	SDValue NewBinOp =
29805	DAG.getNode(Opcode: BinOpc, DL, VTList: OpVTs, Ops: {NewSel, N1.getOperand(i: `1`)}, Flags);
29806	return SDValue (NewBinOp.getNode(), N1.getResNo());
29807	}
29808
29809	// Fold select(cond, binop(x, y), binop(x, z))
29810	// --> binop(x, select(cond, y, z))
29811	if (N1.getOperand(i: `0`) == N2.getOperand(i: `0`)) {
29812	SDValue N11 = N1.getOperand(i: `1`);
29813	SDValue N21 = N2.getOperand(i: `1`);
29814	// Second op VT might be different (e.g. shift amount type)
29815	if (N11.getValueType() == N21.getValueType()) {
29816	SDValue NewSel = DAG.getSelect(DL, VT: N11.getValueType(), Cond: N0, LHS: N11, RHS: N21);
29817	SDNodeFlags Flags = N1 ->getFlags() & N2 ->getFlags();
29818	SDValue NewBinOp =
29819	DAG.getNode(Opcode: BinOpc, DL, VTList: OpVTs, Ops: {N1.getOperand(i: `0`), NewSel}, Flags);
29820	return SDValue (NewBinOp.getNode(), N1.getResNo());
29821	}
29822	}
29823
29824	// TODO: Handle isCommutativeBinOp patterns as well?
29825	return SDValue ();
29826	}
29827
29828	// Transform (fneg/fabs (bitconvert x)) to avoid loading constant pool values.
29829	SDValue DAGCombiner::foldSignChangeInBitcast(SDNode *N) {
29830	SDValue N0 = N->getOperand(Num: `0`);
29831	EVT VT = N->getValueType(ResNo: `0`);
29832	bool IsFabs = N->getOpcode() == ISD::FABS;
29833	bool IsFree = IsFabs ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
29834
29835	if (IsFree \|\| N0.getOpcode() != ISD::BITCAST \|\| !N0.hasOneUse())
29836	return SDValue ();
29837
29838	SDValue Int = N0.getOperand(i: `0`);
29839	EVT IntVT = Int.getValueType();
29840
29841	// The operand to cast should be integer.
29842	if (!IntVT.isInteger() \|\| IntVT.isVector())
29843	return SDValue ();
29844
29845	// (fneg (bitconvert x)) -> (bitconvert (xor x sign))
29846	// (fabs (bitconvert x)) -> (bitconvert (and x ~sign))
29847	APInt SignMask;
29848	if (N0.getValueType().isVector()) {
29849	// For vector, create a sign mask (0x80...) or its inverse (for fabs,
29850	// 0x7f...) per element and splat it.
29851	SignMask = APInt::getSignMask(BitWidth: N0.getScalarValueSizeInBits());
29852	if (IsFabs)
29853	SignMask = ~SignMask;
29854	SignMask = APInt::getSplat(NewLen: IntVT.getSizeInBits(), V: SignMask);
29855	} else {
29856	// For scalar, just use the sign mask (0x80... or the inverse, 0x7f...)
29857	SignMask = APInt::getSignMask(BitWidth: IntVT.getSizeInBits());
29858	if (IsFabs)
29859	SignMask = ~SignMask;
29860	}
29861	SDLoc DL(N0);
29862	Int = DAG.getNode(Opcode: IsFabs ? ISD::AND : ISD::XOR, DL, VT: IntVT, N1: Int,
29863	N2: DAG.getConstant(Val: SignMask, DL, VT: IntVT));
29864	AddToWorklist(N: Int.getNode());
29865	return DAG.getBitcast(VT, V: Int);
29866	}
29867
29868	/// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
29869	/// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
29870	/// in it. This may be a win when the constant is not otherwise available
29871	/// because it replaces two constant pool loads with one.
29872	SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
29873	const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
29874	ISD::CondCode CC) {
29875	if (!TLI.reduceSelectOfFPConstantLoads(CmpOpVT: N0.getValueType()))
29876	return SDValue ();
29877
29878	// If we are before legalize types, we want the other legalization to happen
29879	// first (for example, to avoid messing with soft float).
29880	auto *TV = dyn_cast<ConstantFPSDNode>(Val&: N2);
29881	auto *FV = dyn_cast<ConstantFPSDNode>(Val&: N3);
29882	EVT VT = N2.getValueType();
29883	if (!TV \|\| !FV \|\| !TLI.isTypeLegal(VT))
29884	return SDValue ();
29885
29886	// If a constant can be materialized without loads, this does not make sense.
29887	if (TLI.getOperationAction(Op: ISD::ConstantFP, VT) == TargetLowering::Legal \|\|
29888	TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(ResNo: `0`), ForCodeSize) \|\|
29889	TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(ResNo: `0`), ForCodeSize))
29890	return SDValue ();
29891
29892	// If both constants have multiple uses, then we won't need to do an extra
29893	// load. The values are likely around in registers for other users.
29894	if (!TV->hasOneUse() && !FV->hasOneUse())
29895	return SDValue ();
29896
29897	Constant Elts[] = { const_cast<ConstantFP>(FV->getConstantFPValue()),
29898	const_cast<ConstantFP*>(TV->getConstantFPValue()) };
29899	Type *FPTy = Elts[`0`]->getType();
29900	const DataLayout &TD = DAG.getDataLayout();
29901
29902	// Create a ConstantArray of the two constants.
29903	Constant *CA = ConstantArray::get(T: ArrayType::get(ElementType: FPTy, NumElements: `2`), V: Elts);
29904	SDValue CPIdx = DAG.getConstantPool(C: CA, VT: TLI.getPointerTy(DL: DAG.getDataLayout()),
29905	Align: TD.getPrefTypeAlign(Ty: FPTy));
29906	Align Alignment = cast<ConstantPoolSDNode>(Val&: CPIdx)->getAlign();
29907
29908	// Get offsets to the 0 and 1 elements of the array, so we can select between
29909	// them.
29910	SDValue Zero = DAG.getIntPtrConstant(Val: `0`, DL);
29911	unsigned EltSize = (unsigned)TD.getTypeAllocSize(Ty: Elts[`0`]->getType());
29912	SDValue One = DAG.getIntPtrConstant(Val: EltSize, DL: SDLoc (FV));
29913	SDValue Cond =
29914	DAG.getSetCC(DL, VT: getSetCCResultType(VT: N0.getValueType()), LHS: N0, RHS: N1, Cond: CC);
29915	AddToWorklist(N: Cond.getNode());
29916	SDValue CstOffset = DAG.getSelect(DL, VT: Zero.getValueType(), Cond, LHS: One, RHS: Zero);
29917	AddToWorklist(N: CstOffset.getNode());
29918	CPIdx = DAG.getNode(Opcode: ISD::ADD, DL, VT: CPIdx.getValueType(), N1: CPIdx, N2: CstOffset);
29919	AddToWorklist(N: CPIdx.getNode());
29920	return DAG.getLoad(VT: TV->getValueType(ResNo: `0`), dl: DL, Chain: DAG.getEntryNode(), Ptr: CPIdx,
29921	PtrInfo: MachinePointerInfo::getConstantPool(
29922	MF&: DAG.getMachineFunction()), Alignment);
29923	}
29924
29925	/// Simplify an expression of the form (N0 cond N1) ? N2 : N3
29926	/// where 'cond' is the comparison specified by CC.
29927	SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
29928	SDValue N2, SDValue N3, ISD::CondCode CC,
29929	bool NotExtCompare) {
29930	// (x ? y : y) -> y.
29931	if (N2 == N3) return N2;
29932
29933	EVT CmpOpVT = N0.getValueType();
29934	EVT CmpResVT = getSetCCResultType(VT: CmpOpVT);
29935	EVT VT = N2.getValueType();
29936	auto *N1C = dyn_cast<ConstantSDNode>(Val: N1.getNode());
29937	auto *N2C = dyn_cast<ConstantSDNode>(Val: N2.getNode());
29938	auto *N3C = dyn_cast<ConstantSDNode>(Val: N3.getNode());
29939
29940	// Determine if the condition we're dealing with is constant.
29941	if (SDValue SCC = DAG.FoldSetCC(VT: CmpResVT, N1: N0, N2: N1, Cond: CC, dl: DL)) {
29942	AddToWorklist(N: SCC.getNode());
29943	if (auto *SCCC = dyn_cast<ConstantSDNode>(Val&: SCC)) {
29944	// fold select_cc true, x, y -> x
29945	// fold select_cc false, x, y -> y
29946	return !(SCCC->isZero()) ? N2 : N3;
29947	}
29948	}
29949
29950	if (SDValue V =
29951	convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
29952	return V;
29953
29954	if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
29955	return V;
29956
29957	// fold (select_cc seteq (and x, y), 0, 0, A) -> (and (sra (shl x)) A)
29958	// where y is has a single bit set.
29959	// A plaintext description would be, we can turn the SELECT_CC into an AND
29960	// when the condition can be materialized as an all-ones register. Any
29961	// single bit-test can be materialized as an all-ones register with
29962	// shift-left and shift-right-arith.
29963	if (CC == ISD::SETEQ && N0 ->getOpcode() == ISD::AND &&
29964	N0 ->getValueType(ResNo: `0`) == VT && isNullConstant(V: N1) && isNullConstant(V: N2)) {
29965	SDValue AndLHS = N0 ->getOperand(Num: `0`);
29966	auto *ConstAndRHS = dyn_cast<ConstantSDNode>(Val: N0 ->getOperand(Num: `1`));
29967	if (ConstAndRHS && ConstAndRHS->getAPIntValue().isPowerOf2()) {
29968	// Shift the tested bit over the sign bit.
29969	const APInt &AndMask = ConstAndRHS->getAPIntValue();
29970	if (TLI.shouldFoldSelectWithSingleBitTest(VT, AndMask)) {
29971	unsigned ShCt = AndMask.getBitWidth() - `1`;
29972	SDValue ShlAmt = DAG.getShiftAmountConstant(Val: AndMask.countl_zero(), VT,
29973	DL: SDLoc (AndLHS));
29974	SDValue Shl = DAG.getNode(Opcode: ISD::SHL, DL: SDLoc (N0), VT, N1: AndLHS, N2: ShlAmt);
29975
29976	// Now arithmetic right shift it all the way over, so the result is
29977	// either all-ones, or zero.
29978	SDValue ShrAmt = DAG.getShiftAmountConstant(Val: ShCt, VT, DL: SDLoc (Shl));
29979	SDValue Shr = DAG.getNode(Opcode: ISD::SRA, DL: SDLoc (N0), VT, N1: Shl, N2: ShrAmt);
29980
29981	return DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Shr, N2: N3);
29982	}
29983	}
29984	}
29985
29986	// fold select C, 16, 0 -> shl C, 4
29987	bool Fold = N2C && isNullConstant(V: N3) && N2C->getAPIntValue().isPowerOf2();
29988	bool Swap = N3C && isNullConstant(V: N2) && N3C->getAPIntValue().isPowerOf2();
29989
29990	if ((Fold \|\| Swap) &&
29991	TLI.getBooleanContents(Type: CmpOpVT) ==
29992	TargetLowering::ZeroOrOneBooleanContent &&
29993	(!LegalOperations \|\| TLI.isOperationLegal(Op: ISD::SETCC, VT: CmpOpVT)) &&
29994	TLI.convertSelectOfConstantsToMath(VT)) {
29995
29996	if (Swap) {
29997	CC = ISD::getSetCCInverse(Operation: CC, Type: CmpOpVT);
29998	std::swap(a&: N2C, b&: N3C);
29999	}
30000
30001	// If the caller doesn't want us to simplify this into a zext of a compare,
30002	// don't do it.
30003	if (NotExtCompare && N2C->isOne())
30004	return SDValue ();
30005
30006	SDValue Temp, SCC;
30007	// zext (setcc n0, n1)
30008	if (LegalTypes) {
30009	SCC = DAG.getSetCC(DL, VT: CmpResVT, LHS: N0, RHS: N1, Cond: CC);
30010	Temp = DAG.getZExtOrTrunc(Op: SCC, DL: SDLoc (N2), VT);
30011	} else {
30012	SCC = DAG.getSetCC(DL: SDLoc (N0), VT: MVT::i1, LHS: N0, RHS: N1, Cond: CC);
30013	Temp = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: SDLoc (N2), VT, Operand: SCC);
30014	}
30015
30016	AddToWorklist(N: SCC.getNode());
30017	AddToWorklist(N: Temp.getNode());
30018
30019	if (N2C->isOne())
30020	return Temp;
30021
30022	unsigned ShCt = N2C->getAPIntValue().logBase2();
30023	if (TLI.shouldAvoidTransformToShift(VT, Amount: ShCt))
30024	return SDValue ();
30025
30026	// shl setcc result by log2 n2c
30027	return DAG.getNode(
30028	Opcode: ISD::SHL, DL, VT: N2.getValueType(), N1: Temp,
30029	N2: DAG.getShiftAmountConstant(Val: ShCt, VT: N2.getValueType(), DL: SDLoc (Temp)));
30030	}
30031
30032	// select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
30033	// select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
30034	// select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
30035	// select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
30036	// select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
30037	// select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
30038	// select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
30039	// select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
30040	if (N1C && N1C->isZero() && (CC == ISD::SETEQ \|\| CC == ISD::SETNE)) {
30041	SDValue ValueOnZero = N2;
30042	SDValue Count = N3;
30043	// If the condition is NE instead of E, swap the operands.
30044	if (CC == ISD::SETNE)
30045	std::swap(a&: ValueOnZero, b&: Count);
30046	// Check if the value on zero is a constant equal to the bits in the type.
30047	if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(Val&: ValueOnZero)) {
30048	if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
30049	// If the other operand is cttz/cttz_zero_undef of N0, and cttz is
30050	// legal, combine to just cttz.
30051	if ((Count.getOpcode() == ISD::CTTZ \|\|
30052	Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
30053	N0 == Count.getOperand(i: `0`) &&
30054	(!LegalOperations \|\| TLI.isOperationLegal(Op: ISD::CTTZ, VT)))
30055	return DAG.getNode(Opcode: ISD::CTTZ, DL, VT, Operand: N0);
30056	// If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
30057	// legal, combine to just ctlz.
30058	if ((Count.getOpcode() == ISD::CTLZ \|\|
30059	Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
30060	N0 == Count.getOperand(i: `0`) &&
30061	(!LegalOperations \|\| TLI.isOperationLegal(Op: ISD::CTLZ, VT)))
30062	return DAG.getNode(Opcode: ISD::CTLZ, DL, VT, Operand: N0);
30063	}
30064	}
30065	}
30066
30067	// Fold select_cc setgt X, -1, C, ~C -> xor (ashr X, BW-1), C
30068	// Fold select_cc setlt X, 0, C, ~C -> xor (ashr X, BW-1), ~C
30069	if (!NotExtCompare && N1C && N2C && N3C &&
30070	N2C->getAPIntValue() == ~N3C->getAPIntValue() &&
30071	((N1C->isAllOnes() && CC == ISD::SETGT) \|\|
30072	(N1C->isZero() && CC == ISD::SETLT)) &&
30073	!TLI.shouldAvoidTransformToShift(VT, Amount: CmpOpVT.getScalarSizeInBits() - `1`)) {
30074	SDValue ASHR =
30075	DAG.getNode(Opcode: ISD::SRA, DL, VT: CmpOpVT, N1: N0,
30076	N2: DAG.getShiftAmountConstant(
30077	Val: CmpOpVT.getScalarSizeInBits() - `1`, VT: CmpOpVT, DL));
30078	return DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: DAG.getSExtOrTrunc(Op: ASHR, DL, VT),
30079	N2: DAG.getSExtOrTrunc(Op: CC == ISD::SETLT ? N3 : N2, DL, VT));
30080	}
30081
30082	// Fold sign pattern select_cc setgt X, -1, 1, -1 -> or (ashr X, BW-1), 1
30083	if (CC == ISD::SETGT && N1C && N2C && N3C && N1C->isAllOnes() &&
30084	N2C->isOne() && N3C->isAllOnes() &&
30085	!TLI.shouldAvoidTransformToShift(VT: CmpOpVT,
30086	Amount: CmpOpVT.getScalarSizeInBits() - `1`)) {
30087	SDValue ASHR =
30088	DAG.getNode(Opcode: ISD::SRA, DL, VT: CmpOpVT, N1: N0,
30089	N2: DAG.getShiftAmountConstant(
30090	Val: CmpOpVT.getScalarSizeInBits() - `1`, VT: CmpOpVT, DL));
30091	return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: DAG.getSExtOrTrunc(Op: ASHR, DL, VT),
30092	N2: DAG.getConstant(Val: `1`, DL, VT));
30093	}
30094
30095	if (SDValue S = PerformMinMaxFpToSatCombine(N0, N1, N2, N3, CC, DAG))
30096	return S;
30097	if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N2, N3, CC, DAG))
30098	return S;
30099	if (SDValue ABD = foldSelectToABD(LHS: N0, RHS: N1, True: N2, False: N3, CC, DL))
30100	return ABD;
30101
30102	return SDValue ();
30103	}
30104
30105	static SDValue matchMergedBFX(SDValue Root, SelectionDAG &DAG,
30106	const TargetLowering &TLI) {
30107	// Match a pattern such as:
30108	// (X \| (X >> C0) \| (X >> C1) \| ...) & Mask
30109	// This extracts contiguous parts of X and ORs them together before comparing.
30110	// We can optimize this so that we directly check (X & SomeMask) instead,
30111	// eliminating the shifts.
30112
30113	EVT VT = Root.getValueType();
30114
30115	// TODO: Support vectors?
30116	if (!VT.isScalarInteger() \|\| Root.getOpcode() != ISD::AND)
30117	return SDValue ();
30118
30119	SDValue N0 = Root.getOperand(i: `0`);
30120	SDValue N1 = Root.getOperand(i: `1`);
30121
30122	if (N0.getOpcode() != ISD::OR \|\| !isa<ConstantSDNode>(Val: N1))
30123	return SDValue ();
30124
30125	APInt RootMask = cast<ConstantSDNode>(Val&: N1)->getAsAPIntVal();
30126
30127	SDValue Src;
30128	const auto IsSrc = [&](SDValue V) {
30129	if (!Src) {
30130	Src = V;
30131	return true;
30132	}
30133
30134	return Src == V;
30135	};
30136
30137	SmallVector<SDValue> Worklist = {N0};
30138	APInt PartsMask(VT.getSizeInBits(), `0`);
30139	while (!Worklist.empty()) {
30140	SDValue V = Worklist.pop_back_val();
30141	if (!V.hasOneUse() && (Src && Src != V))
30142	return SDValue ();
30143
30144	if (V.getOpcode() == ISD::OR) {
30145	Worklist.push_back(Elt: V.getOperand(i: `0`));
30146	Worklist.push_back(Elt: V.getOperand(i: `1`));
30147	continue;
30148	}
30149
30150	if (V.getOpcode() == ISD::SRL) {
30151	SDValue ShiftSrc = V.getOperand(i: `0`);
30152	SDValue ShiftAmt = V.getOperand(i: `1`);
30153
30154	if (!IsSrc (ShiftSrc) \|\| !isa<ConstantSDNode>(Val: ShiftAmt))
30155	return SDValue ();
30156
30157	auto ShiftAmtVal = cast<ConstantSDNode>(Val&: ShiftAmt)->getAsZExtVal();
30158	if (ShiftAmtVal > RootMask.getBitWidth())
30159	return SDValue ();
30160
30161	PartsMask \|= (RootMask << ShiftAmtVal);
30162	continue;
30163	}
30164
30165	if (IsSrc (V)) {
30166	PartsMask \|= RootMask;
30167	continue;
30168	}
30169
30170	return SDValue ();
30171	}
30172
30173	if (!Src)
30174	return SDValue ();
30175
30176	SDLoc DL(Root);
30177	return DAG.getNode(Opcode: ISD::AND, DL, VT,
30178	Ops: {Src, DAG.getConstant(Val: PartsMask, DL, VT)});
30179	}
30180
30181	/// This is a stub for TargetLowering::SimplifySetCC.
30182	SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
30183	ISD::CondCode Cond, const SDLoc &DL,
30184	bool foldBooleans) {
30185	TargetLowering::DAGCombinerInfo
30186	DagCombineInfo(DAG, Level, false, this);
30187	if (SDValue C =
30188	TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DCI&: DagCombineInfo, dl: DL))
30189	return C;
30190
30191	if (ISD::isIntEqualitySetCC(Code: Cond) && N0.getOpcode() == ISD::AND &&
30192	isNullConstant(V: N1)) {
30193
30194	if (SDValue Res = matchMergedBFX(Root: N0, DAG, TLI))
30195	return DAG.getSetCC(DL, VT, LHS: Res, RHS: N1, Cond);
30196	}
30197
30198	return SDValue ();
30199	}
30200
30201	/// Given an ISD::SDIV node expressing a divide by constant, return
30202	/// a DAG expression to select that will generate the same value by multiplying
30203	/// by a magic number.
30204	/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
30205	SDValue DAGCombiner::BuildSDIV(SDNode *N) {
30206	// when optimising for minimum size, we don't want to expand a div to a mul
30207	// and a shift.
30208	if (DAG.getMachineFunction().getFunction().hasMinSize())
30209	return SDValue ();
30210
30211	SmallVector<SDNode *, `8`> Built;
30212	if (SDValue S = TLI.BuildSDIV(N, DAG, IsAfterLegalization: LegalOperations, IsAfterLegalTypes: LegalTypes, Created&: Built)) {
30213	for (SDNode *N : Built)
30214	AddToWorklist(N);
30215	return S;
30216	}
30217
30218	return SDValue ();
30219	}
30220
30221	/// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
30222	/// DAG expression that will generate the same value by right shifting.
30223	SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
30224	ConstantSDNode *C = isConstOrConstSplat(N: N->getOperand(Num: `1`));
30225	if (!C)
30226	return SDValue ();
30227
30228	// Avoid division by zero.
30229	if (C->isZero())
30230	return SDValue ();
30231
30232	SmallVector<SDNode *, `8`> Built;
30233	if (SDValue S = TLI.BuildSDIVPow2(N, Divisor: C->getAPIntValue(), DAG, Created&: Built)) {
30234	for (SDNode *N : Built)
30235	AddToWorklist(N);
30236	return S;
30237	}
30238
30239	return SDValue ();
30240	}
30241
30242	/// Given an ISD::UDIV node expressing a divide by constant, return a DAG
30243	/// expression that will generate the same value by multiplying by a magic
30244	/// number.
30245	/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
30246	SDValue DAGCombiner::BuildUDIV(SDNode *N) {
30247	// when optimising for minimum size, we don't want to expand a div to a mul
30248	// and a shift.
30249	if (DAG.getMachineFunction().getFunction().hasMinSize())
30250	return SDValue ();
30251
30252	SmallVector<SDNode *, `8`> Built;
30253	if (SDValue S = TLI.BuildUDIV(N, DAG, IsAfterLegalization: LegalOperations, IsAfterLegalTypes: LegalTypes, Created&: Built)) {
30254	for (SDNode *N : Built)
30255	AddToWorklist(N);
30256	return S;
30257	}
30258
30259	return SDValue ();
30260	}
30261
30262	/// Given an ISD::SREM node expressing a remainder by constant power of 2,
30263	/// return a DAG expression that will generate the same value.
30264	SDValue DAGCombiner::BuildSREMPow2(SDNode *N) {
30265	ConstantSDNode *C = isConstOrConstSplat(N: N->getOperand(Num: `1`));
30266	if (!C)
30267	return SDValue ();
30268
30269	// Avoid division by zero.
30270	if (C->isZero())
30271	return SDValue ();
30272
30273	SmallVector<SDNode *, `8`> Built;
30274	if (SDValue S = TLI.BuildSREMPow2(N, Divisor: C->getAPIntValue(), DAG, Created&: Built)) {
30275	for (SDNode *N : Built)
30276	AddToWorklist(N);
30277	return S;
30278	}
30279
30280	return SDValue ();
30281	}
30282
30283	// This is basically just a port of takeLog2 from InstCombineMulDivRem.cpp
30284	//
30285	// Returns the node that represents `Log2(Op)`. This may create a new node. If
30286	// we are unable to compute `Log2(Op)` its return `SDValue()`.
30287	//
30288	// All nodes will be created at `DL` and the output will be of type `VT`.
30289	//
30290	// This will only return `Log2(Op)` if we can prove `Op` is non-zero. Set
30291	// `AssumeNonZero` if this function should simply assume (not require proving
30292	// `Op` is non-zero).
30293	static SDValue takeInexpensiveLog2(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
30294	SDValue Op, unsigned Depth,
30295	bool AssumeNonZero) {
30296	assert(VT.isInteger() && "Only integer types are supported!");
30297
30298	auto PeekThroughCastsAndTrunc = [](SDValue V) {
30299	while (true) {
30300	switch (V.getOpcode()) {
30301	case ISD::TRUNCATE:
30302	case ISD::ZERO_EXTEND:
30303	V = V.getOperand(i: `0`);
30304	break;
30305	default:
30306	return V;
30307	}
30308	}
30309	};
30310
30311	if (VT.isScalableVector())
30312	return SDValue ();
30313
30314	Op = PeekThroughCastsAndTrunc (Op);
30315
30316	// Helper for determining whether a value is a power-2 constant scalar or a
30317	// vector of such elements.
30318	SmallVector<APInt> Pow2Constants;
30319	auto IsPowerOfTwo = [&Pow2Constants](ConstantSDNode *C) {
30320	if (C->isZero() \|\| C->isOpaque())
30321	return false;
30322	// TODO: We may also be able to support negative powers of 2 here.
30323	if (C->getAPIntValue().isPowerOf2()) {
30324	Pow2Constants.emplace_back(Args: C->getAPIntValue());
30325	return true;
30326	}
30327	return false;
30328	};
30329
30330	if (ISD::matchUnaryPredicate(Op, Match: IsPowerOfTwo, /AllowUndefs=/false,
30331	/AllowTruncation=/true)) {
30332	if (!VT.isVector())
30333	return DAG.getConstant(Val: Pow2Constants.back().logBase2(), DL, VT);
30334	// We need to create a build vector
30335	if (Op.getOpcode() == ISD::SPLAT_VECTOR)
30336	return DAG.getSplat(VT, DL,
30337	Op: DAG.getConstant(Val: Pow2Constants.back().logBase2(), DL,
30338	VT: VT.getScalarType()));
30339	SmallVector<SDValue> Log2Ops;
30340	for (const APInt &Pow2 : Pow2Constants)
30341	Log2Ops.emplace_back(
30342	Args: DAG.getConstant(Val: Pow2.logBase2(), DL, VT: VT.getScalarType()));
30343	return DAG.getBuildVector(VT, DL, Ops: Log2Ops);
30344	}
30345
30346	if (Depth >= DAG.MaxRecursionDepth)
30347	return SDValue ();
30348
30349	auto CastToVT = [&](EVT NewVT, SDValue ToCast) {
30350	// Peek through zero extend. We can't peek through truncates since this
30351	// function is called on a shift amount. We must ensure that all of the bits
30352	// above the original shift amount are zeroed by this function.
30353	while (ToCast.getOpcode() == ISD::ZERO_EXTEND)
30354	ToCast = ToCast.getOperand(i: `0`);
30355	EVT CurVT = ToCast.getValueType();
30356	if (NewVT == CurVT)
30357	return ToCast;
30358
30359	if (NewVT.getSizeInBits() == CurVT.getSizeInBits())
30360	return DAG.getBitcast(VT: NewVT, V: ToCast);
30361
30362	return DAG.getZExtOrTrunc(Op: ToCast, DL, VT: NewVT);
30363	};
30364
30365	// log2(X << Y) -> log2(X) + Y
30366	if (Op.getOpcode() == ISD::SHL) {
30367	// 1 << Y and X nuw/nsw << Y are all non-zero.
30368	if (AssumeNonZero \|\| Op ->getFlags().hasNoUnsignedWrap() \|\|
30369	Op ->getFlags().hasNoSignedWrap() \|\| isOneConstant(V: Op.getOperand(i: `0`)))
30370	if (SDValue LogX = takeInexpensiveLog2(DAG, DL, VT, Op: Op.getOperand(i: `0`),
30371	Depth: Depth + `1`, AssumeNonZero))
30372	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: LogX,
30373	N2: CastToVT (VT, Op.getOperand(i: `1`)));
30374	}
30375
30376	// c ? X : Y -> c ? Log2(X) : Log2(Y)
30377	SDValue Cond, TVal, FVal;
30378	if (sd_match(N: Op, P: m_OneUse(P: m_SelectLike(Cond: m_Value(N&: Cond), T: m_Value(N&: TVal),
30379	F: m_Value(N&: FVal))))) {
30380	if (SDValue LogX =
30381	takeInexpensiveLog2(DAG, DL, VT, Op: TVal, Depth: Depth + `1`, AssumeNonZero))
30382	if (SDValue LogY =
30383	takeInexpensiveLog2(DAG, DL, VT, Op: FVal, Depth: Depth + `1`, AssumeNonZero))
30384	return DAG.getSelect(DL, VT, Cond, LHS: LogX, RHS: LogY);
30385	}
30386
30387	// log2(umin(X, Y)) -> umin(log2(X), log2(Y))
30388	// log2(umax(X, Y)) -> umax(log2(X), log2(Y))
30389	if ((Op.getOpcode() == ISD::UMIN \|\| Op.getOpcode() == ISD::UMAX) &&
30390	Op.hasOneUse()) {
30391	// Use AssumeNonZero as false here. Otherwise we can hit case where
30392	// log2(umax(X, Y)) != umax(log2(X), log2(Y)) (because overflow).
30393	if (SDValue LogX =
30394	takeInexpensiveLog2(DAG, DL, VT, Op: Op.getOperand(i: `0`), Depth: Depth + `1`,
30395	/AssumeNonZero/ false))
30396	if (SDValue LogY =
30397	takeInexpensiveLog2(DAG, DL, VT, Op: Op.getOperand(i: `1`), Depth: Depth + `1`,
30398	/AssumeNonZero/ false))
30399	return DAG.getNode(Opcode: Op.getOpcode(), DL, VT, N1: LogX, N2: LogY);
30400	}
30401
30402	return SDValue ();
30403	}
30404
30405	/// Determines the LogBase2 value for a non-null input value using the
30406	/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
30407	SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL,
30408	bool KnownNonZero, bool InexpensiveOnly,
30409	std::optional<EVT> OutVT) {
30410	EVT VT = OutVT ? *OutVT : V.getValueType();
30411	SDValue InexpensiveLogBase2 =
30412	takeInexpensiveLog2(DAG, DL, VT, Op: V, /Depth/ `0`, AssumeNonZero: KnownNonZero);
30413	if (InexpensiveLogBase2 \|\| InexpensiveOnly \|\| !DAG.isKnownToBeAPowerOfTwo(Val: V))
30414	return InexpensiveLogBase2;
30415
30416	SDValue Ctlz = DAG.getNode(Opcode: ISD::CTLZ, DL, VT, Operand: V);
30417	SDValue Base = DAG.getConstant(Val: VT.getScalarSizeInBits() - `1`, DL, VT);
30418	SDValue LogBase2 = DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Base, N2: Ctlz);
30419	return LogBase2;
30420	}
30421
30422	/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
30423	/// For the reciprocal, we need to find the zero of the function:
30424	/// F(X) = 1/X - A [which has a zero at X = 1/A]
30425	/// =>
30426	/// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
30427	/// does not require additional intermediate precision]
30428	/// For the last iteration, put numerator N into it to gain more precision:
30429	/// Result = N X_i + X_i (N - N A X_i)
30430	SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
30431	SDNodeFlags Flags) {
30432	if (LegalDAG)
30433	return SDValue ();
30434
30435	// TODO: Handle extended types?
30436	EVT VT = Op.getValueType();
30437	if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
30438	VT.getScalarType() != MVT::f64)
30439	return SDValue ();
30440
30441	// If estimates are explicitly disabled for this function, we're done.
30442	MachineFunction &MF = DAG.getMachineFunction();
30443	int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
30444	if (Enabled == TLI.ReciprocalEstimate::Disabled)
30445	return SDValue ();
30446
30447	// Estimates may be explicitly enabled for this type with a custom number of
30448	// refinement steps.
30449	int Iterations = TLI.getDivRefinementSteps(VT, MF);
30450	if (SDValue Est = TLI.getRecipEstimate(Operand: Op, DAG, Enabled, RefinementSteps&: Iterations)) {
30451	AddToWorklist(N: Est.getNode());
30452
30453	SDLoc DL(Op);
30454	if (Iterations) {
30455	SDValue FPOne = DAG.getConstantFP(Val: `1.0`, DL, VT);
30456
30457	// Newton iterations: Est = Est + Est (N - Arg Est)*
30458	// If this is the last iteration, also multiply by the numerator.
30459	for (int i = `0`; i < Iterations; ++i) {
30460	SDValue MulEst = Est;
30461
30462	if (i == Iterations - `1`) {
30463	MulEst = DAG.getNode(Opcode: ISD::FMUL, DL, VT, N1: N, N2: Est, Flags);
30464	AddToWorklist(N: MulEst.getNode());
30465	}
30466
30467	SDValue NewEst = DAG.getNode(Opcode: ISD::FMUL, DL, VT, N1: Op, N2: MulEst, Flags);
30468	AddToWorklist(N: NewEst.getNode());
30469
30470	NewEst = DAG.getNode(Opcode: ISD::FSUB, DL, VT,
30471	N1: (i == Iterations - `1` ? N : FPOne), N2: NewEst, Flags);
30472	AddToWorklist(N: NewEst.getNode());
30473
30474	NewEst = DAG.getNode(Opcode: ISD::FMUL, DL, VT, N1: Est, N2: NewEst, Flags);
30475	AddToWorklist(N: NewEst.getNode());
30476
30477	Est = DAG.getNode(Opcode: ISD::FADD, DL, VT, N1: MulEst, N2: NewEst, Flags);
30478	AddToWorklist(N: Est.getNode());
30479	}
30480	} else {
30481	// If no iterations are available, multiply with N.
30482	Est = DAG.getNode(Opcode: ISD::FMUL, DL, VT, N1: Est, N2: N, Flags);
30483	AddToWorklist(N: Est.getNode());
30484	}
30485
30486	return Est;
30487	}
30488
30489	return SDValue ();
30490	}
30491
30492	/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
30493	/// For the reciprocal sqrt, we need to find the zero of the function:
30494	/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
30495	/// =>
30496	/// X_{i+1} = X_i (1.5 - A X_i^2 / 2)
30497	/// As a result, we precompute A/2 prior to the iteration loop.
30498	SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
30499	unsigned Iterations, bool Reciprocal) {
30500	EVT VT = Arg.getValueType();
30501	SDLoc DL(Arg);
30502	SDValue ThreeHalves = DAG.getConstantFP(Val: `1.5`, DL, VT);
30503
30504	// We now need 0.5 Arg which we can write as (1.5 * Arg - Arg) so that*
30505	// this entire sequence requires only one FP constant.
30506	SDValue HalfArg = DAG.getNode(Opcode: ISD::FMUL, DL, VT, N1: ThreeHalves, N2: Arg);
30507	HalfArg = DAG.getNode(Opcode: ISD::FSUB, DL, VT, N1: HalfArg, N2: Arg);
30508
30509	// Newton iterations: Est = Est (1.5 - HalfArg * Est * Est)*
30510	for (unsigned i = `0`; i < Iterations; ++i) {
30511	SDValue NewEst = DAG.getNode(Opcode: ISD::FMUL, DL, VT, N1: Est, N2: Est);
30512	NewEst = DAG.getNode(Opcode: ISD::FMUL, DL, VT, N1: HalfArg, N2: NewEst);
30513	NewEst = DAG.getNode(Opcode: ISD::FSUB, DL, VT, N1: ThreeHalves, N2: NewEst);
30514	Est = DAG.getNode(Opcode: ISD::FMUL, DL, VT, N1: Est, N2: NewEst);
30515	}
30516
30517	// If non-reciprocal square root is requested, multiply the result by Arg.
30518	if (!Reciprocal)
30519	Est = DAG.getNode(Opcode: ISD::FMUL, DL, VT, N1: Est, N2: Arg);
30520
30521	return Est;
30522	}
30523
30524	/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
30525	/// For the reciprocal sqrt, we need to find the zero of the function:
30526	/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
30527	/// =>
30528	/// X_{i+1} = (-0.5 X_i) * (A * X_i * X_i + (-3.0))*
30529	SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
30530	unsigned Iterations, bool Reciprocal) {
30531	EVT VT = Arg.getValueType();
30532	SDLoc DL(Arg);
30533	SDValue MinusThree = DAG.getConstantFP(Val: -`3.0`, DL, VT);
30534	SDValue MinusHalf = DAG.getConstantFP(Val: -`0.5`, DL, VT);
30535
30536	// This routine must enter the loop below to work correctly
30537	// when (Reciprocal == false).
30538	assert(Iterations > `0`);
30539
30540	// Newton iterations for reciprocal square root:
30541	// E = (E -0.5) * ((A * E) * E + -3.0)*
30542	for (unsigned i = `0`; i < Iterations; ++i) {
30543	SDValue AE = DAG.getNode(Opcode: ISD::FMUL, DL, VT, N1: Arg, N2: Est);
30544	SDValue AEE = DAG.getNode(Opcode: ISD::FMUL, DL, VT, N1: AE, N2: Est);
30545	SDValue RHS = DAG.getNode(Opcode: ISD::FADD, DL, VT, N1: AEE, N2: MinusThree);
30546
30547	// When calculating a square root at the last iteration build:
30548	// S = ((A E) * -0.5) * ((A * E) * E + -3.0)*
30549	// (notice a common subexpression)
30550	SDValue LHS;
30551	if (Reciprocal \|\| (i + `1`) < Iterations) {
30552	// RSQRT: LHS = (E -0.5)*
30553	LHS = DAG.getNode(Opcode: ISD::FMUL, DL, VT, N1: Est, N2: MinusHalf);
30554	} else {
30555	// SQRT: LHS = (A E) * -0.5*
30556	LHS = DAG.getNode(Opcode: ISD::FMUL, DL, VT, N1: AE, N2: MinusHalf);
30557	}
30558
30559	Est = DAG.getNode(Opcode: ISD::FMUL, DL, VT, N1: LHS, N2: RHS);
30560	}
30561
30562	return Est;
30563	}
30564
30565	/// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
30566	/// Oprsqrt(Op) is actually computed, so additional postprocessing is needed if*
30567	/// Op can be zero.
30568	SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, bool Reciprocal,
30569	SDNodeFlags Flags) {
30570	if (LegalDAG)
30571	return SDValue ();
30572
30573	// TODO: Handle extended types?
30574	EVT VT = Op.getValueType();
30575	if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
30576	VT.getScalarType() != MVT::f64)
30577	return SDValue ();
30578
30579	// If estimates are explicitly disabled for this function, we're done.
30580	MachineFunction &MF = DAG.getMachineFunction();
30581	int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
30582	if (Enabled == TLI.ReciprocalEstimate::Disabled)
30583	return SDValue ();
30584
30585	// Estimates may be explicitly enabled for this type with a custom number of
30586	// refinement steps.
30587	int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
30588
30589	bool UseOneConstNR = false;
30590	if (SDValue Est =
30591	TLI.getSqrtEstimate(Operand: Op, DAG, Enabled, RefinementSteps&: Iterations, UseOneConstNR,
30592	Reciprocal)) {
30593	AddToWorklist(N: Est.getNode());
30594
30595	if (Iterations > `0`)
30596	Est = UseOneConstNR
30597	? buildSqrtNROneConst(Arg: Op, Est, Iterations, Reciprocal)
30598	: buildSqrtNRTwoConst(Arg: Op, Est, Iterations, Reciprocal);
30599	if (!Reciprocal) {
30600	SDLoc DL(Op);
30601	// Try the target specific test first.
30602	SDValue Test =
30603	TLI.getSqrtInputTest(Operand: Op, DAG, Mode: DAG.getDenormalMode(VT), Flags);
30604
30605	// The estimate is now completely wrong if the input was exactly 0.0 or
30606	// possibly a denormal. Force the answer to 0.0 or value provided by
30607	// target for those cases.
30608	Est = DAG.getSelect(DL, VT, Cond: Test,
30609	LHS: TLI.getSqrtResultForDenormInput(Operand: Op, DAG), RHS: Est);
30610	}
30611	return Est;
30612	}
30613
30614	return SDValue ();
30615	}
30616
30617	SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
30618	return buildSqrtEstimateImpl(Op, Reciprocal: true, Flags);
30619	}
30620
30621	SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
30622	return buildSqrtEstimateImpl(Op, Reciprocal: false, Flags);
30623	}
30624
30625	/// Return true if there is any possibility that the two addresses overlap.
30626	bool DAGCombiner::mayAlias(SDNode Op0, SDNode Op1) const {
30627
30628	struct MemUseCharacteristics {
30629	bool IsVolatile;
30630	bool IsAtomic;
30631	SDValue BasePtr;
30632	int64_t Offset;
30633	LocationSize NumBytes;
30634	MachineMemOperand *MMO;
30635	};
30636
30637	auto getCharacteristics = [this](SDNode *N) -> MemUseCharacteristics {
30638	if (const auto *LSN = dyn_cast<LSBaseSDNode>(Val: N)) {
30639	int64_t Offset = `0`;
30640	if (auto *C = dyn_cast<ConstantSDNode>(Val: LSN->getOffset()))
30641	Offset = (LSN->getAddressingMode() == ISD::PRE_INC) ? C->getSExtValue()
30642	: (LSN->getAddressingMode() == ISD::PRE_DEC)
30643	? -`1` * C->getSExtValue()
30644	: `0`;
30645	TypeSize Size = LSN->getMemoryVT().getStoreSize();
30646	return {.IsVolatile: LSN->isVolatile(), .IsAtomic: LSN->isAtomic(),
30647	.BasePtr: LSN->getBasePtr(), .Offset: Offset /base offset/,
30648	.NumBytes: LocationSize::precise(Value: Size), .MMO: LSN->getMemOperand()};
30649	}
30650	if (const auto *LN = cast<LifetimeSDNode>(Val: N)) {
30651	MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
30652	return {.IsVolatile: false /isVolatile/,
30653	/isAtomic/ .IsAtomic: false,
30654	.BasePtr: LN->getOperand(Num: `1`),
30655	.Offset: `0`,
30656	.NumBytes: LocationSize::precise(Value: MFI.getObjectSize(ObjectIdx: LN->getFrameIndex())),
30657	.MMO: (MachineMemOperand )nullptr*};
30658	}
30659	// Default.
30660	return {.IsVolatile: false /isvolatile/,
30661	/isAtomic/ .IsAtomic: false,
30662	.BasePtr: SDValue (),
30663	.Offset: (int64_t)`0` /offset/,
30664	.NumBytes: LocationSize::beforeOrAfterPointer() /size/,
30665	.MMO: (MachineMemOperand )nullptr*};
30666	};
30667
30668	MemUseCharacteristics MUC0 = getCharacteristics (Op0),
30669	MUC1 = getCharacteristics (Op1);
30670
30671	// If they are to the same address, then they must be aliases.
30672	if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
30673	MUC0.Offset == MUC1.Offset)
30674	return true;
30675
30676	// If they are both volatile then they cannot be reordered.
30677	if (MUC0.IsVolatile && MUC1.IsVolatile)
30678	return true;
30679
30680	// Be conservative about atomics for the moment
30681	// TODO: This is way overconservative for unordered atomics (see D66309)
30682	if (MUC0.IsAtomic && MUC1.IsAtomic)
30683	return true;
30684
30685	if (MUC0.MMO && MUC1.MMO) {
30686	if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) \|\|
30687	(MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
30688	return false;
30689	}
30690
30691	// If NumBytes is scalable and offset is not 0, conservatively return may
30692	// alias
30693	if ((MUC0.NumBytes.hasValue() && MUC0.NumBytes.isScalable() &&
30694	MUC0.Offset != `0`) \|\|
30695	(MUC1.NumBytes.hasValue() && MUC1.NumBytes.isScalable() &&
30696	MUC1.Offset != `0`))
30697	return true;
30698	// Try to prove that there is aliasing, or that there is no aliasing. Either
30699	// way, we can return now. If nothing can be proved, proceed with more tests.
30700	bool IsAlias;
30701	if (BaseIndexOffset::computeAliasing(Op0, NumBytes0: MUC0.NumBytes, Op1, NumBytes1: MUC1.NumBytes,
30702	DAG, IsAlias))
30703	return IsAlias;
30704
30705	// The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
30706	// either are not known.
30707	if (!MUC0.MMO \|\| !MUC1.MMO)
30708	return true;
30709
30710	// If one operation reads from invariant memory, and the other may store, they
30711	// cannot alias. These should really be checking the equivalent of mayWrite,
30712	// but it only matters for memory nodes other than load /store.
30713	if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) \|\|
30714	(MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
30715	return false;
30716
30717	// If we know required SrcValue1 and SrcValue2 have relatively large
30718	// alignment compared to the size and offset of the access, we may be able
30719	// to prove they do not alias. This check is conservative for now to catch
30720	// cases created by splitting vector types, it only works when the offsets are
30721	// multiples of the size of the data.
30722	int64_t SrcValOffset0 = MUC0.MMO->getOffset();
30723	int64_t SrcValOffset1 = MUC1.MMO->getOffset();
30724	Align OrigAlignment0 = MUC0.MMO->getBaseAlign();
30725	Align OrigAlignment1 = MUC1.MMO->getBaseAlign();
30726	LocationSize Size0 = MUC0.NumBytes;
30727	LocationSize Size1 = MUC1.NumBytes;
30728
30729	if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
30730	Size0.hasValue() && Size1.hasValue() && !Size0.isScalable() &&
30731	!Size1.isScalable() && Size0 == Size1 &&
30732	OrigAlignment0 > Size0.getValue().getKnownMinValue() &&
30733	SrcValOffset0 % Size0.getValue().getKnownMinValue() == `0` &&
30734	SrcValOffset1 % Size1.getValue().getKnownMinValue() == `0`) {
30735	int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value();
30736	int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value();
30737
30738	// There is no overlap between these relatively aligned accesses of
30739	// similar size. Return no alias.
30740	if ((OffAlign0 + static_cast<int64_t>(
30741	Size0.getValue().getKnownMinValue())) <= OffAlign1 \|\|
30742	(OffAlign1 + static_cast<int64_t>(
30743	Size1.getValue().getKnownMinValue())) <= OffAlign0)
30744	return false;
30745	}
30746
30747	bool UseAA = CombinerGlobalAA.getNumOccurrences() > `0`
30748	? CombinerGlobalAA
30749	: DAG.getSubtarget().useAA();
30750	#ifndef NDEBUG
30751	if (CombinerAAOnlyFunc.getNumOccurrences() &&
30752	CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
30753	UseAA = false;
30754	#endif
30755
30756	if (UseAA && BatchAA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
30757	Size0.hasValue() && Size1.hasValue() &&
30758	// Can't represent a scalable size + fixed offset in LocationSize
30759	(!Size0.isScalable() \|\| SrcValOffset0 == `0`) &&
30760	(!Size1.isScalable() \|\| SrcValOffset1 == `0`)) {
30761	// Use alias analysis information.
30762	int64_t MinOffset = std::min(a: SrcValOffset0, b: SrcValOffset1);
30763	int64_t Overlap0 =
30764	Size0.getValue().getKnownMinValue() + SrcValOffset0 - MinOffset;
30765	int64_t Overlap1 =
30766	Size1.getValue().getKnownMinValue() + SrcValOffset1 - MinOffset;
30767	LocationSize Loc0 =
30768	Size0.isScalable() ? Size0 : LocationSize::precise(Value: Overlap0);
30769	LocationSize Loc1 =
30770	Size1.isScalable() ? Size1 : LocationSize::precise(Value: Overlap1);
30771	if (BatchAA->isNoAlias(
30772	LocA: MemoryLocation (MUC0.MMO->getValue(), Loc0,
30773	UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes ()),
30774	LocB: MemoryLocation (MUC1.MMO->getValue(), Loc1,
30775	UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes ())))
30776	return false;
30777	}
30778
30779	// Otherwise we have to assume they alias.
30780	return true;
30781	}
30782
30783	/// Walk up chain skipping non-aliasing memory nodes,
30784	/// looking for aliasing nodes and adding them to the Aliases vector.
30785	void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
30786	SmallVectorImpl<SDValue> &Aliases) {
30787	SmallVector<SDValue, `8`> Chains; // List of chains to visit.
30788	SmallPtrSet<SDNode , `16`> Visited; // Visited node set.*
30789
30790	// Get alias information for node.
30791	// TODO: relax aliasing for unordered atomics (see D66309)
30792	const bool IsLoad = isa<LoadSDNode>(Val: N) && cast<LoadSDNode>(Val: N)->isSimple();
30793
30794	// Starting off.
30795	Chains.push_back(Elt: OriginalChain);
30796	unsigned Depth = `0`;
30797
30798	// Attempt to improve chain by a single step
30799	auto ImproveChain = [&](SDValue &C) -> bool {
30800	switch (C.getOpcode()) {
30801	case ISD::EntryToken:
30802	// No need to mark EntryToken.
30803	C = SDValue ();
30804	return true;
30805	case ISD::LOAD:
30806	case ISD::STORE: {
30807	// Get alias information for C.
30808	// TODO: Relax aliasing for unordered atomics (see D66309)
30809	bool IsOpLoad = isa<LoadSDNode>(Val: C.getNode()) &&
30810	cast<LSBaseSDNode>(Val: C.getNode())->isSimple();
30811	if ((IsLoad && IsOpLoad) \|\| !mayAlias(Op0: N, Op1: C.getNode())) {
30812	// Look further up the chain.
30813	C = C.getOperand(i: `0`);
30814	return true;
30815	}
30816	// Alias, so stop here.
30817	return false;
30818	}
30819
30820	case ISD::CopyFromReg:
30821	// Always forward past CopyFromReg.
30822	C = C.getOperand(i: `0`);
30823	return true;
30824
30825	case ISD::LIFETIME_START:
30826	case ISD::LIFETIME_END: {
30827	// We can forward past any lifetime start/end that can be proven not to
30828	// alias the memory access.
30829	if (!mayAlias(Op0: N, Op1: C.getNode())) {
30830	// Look further up the chain.
30831	C = C.getOperand(i: `0`);
30832	return true;
30833	}
30834	return false;
30835	}
30836	default:
30837	return false;
30838	}
30839	};
30840
30841	// Look at each chain and determine if it is an alias. If so, add it to the
30842	// aliases list. If not, then continue up the chain looking for the next
30843	// candidate.
30844	while (!Chains.empty()) {
30845	SDValue Chain = Chains.pop_back_val();
30846
30847	// Don't bother if we've seen Chain before.
30848	if (!Visited.insert(Ptr: Chain.getNode()).second)
30849	continue;
30850
30851	// For TokenFactor nodes, look at each operand and only continue up the
30852	// chain until we reach the depth limit.
30853	//
30854	// FIXME: The depth check could be made to return the last non-aliasing
30855	// chain we found before we hit a tokenfactor rather than the original
30856	// chain.
30857	if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
30858	Aliases.clear();
30859	Aliases.push_back(Elt: OriginalChain);
30860	return;
30861	}
30862
30863	if (Chain.getOpcode() == ISD::TokenFactor) {
30864	// We have to check each of the operands of the token factor for "small"
30865	// token factors, so we queue them up. Adding the operands to the queue
30866	// (stack) in reverse order maintains the original order and increases the
30867	// likelihood that getNode will find a matching token factor (CSE.)
30868	if (Chain.getNumOperands() > `16`) {
30869	Aliases.push_back(Elt: Chain);
30870	continue;
30871	}
30872	for (unsigned n = Chain.getNumOperands(); n;)
30873	Chains.push_back(Elt: Chain.getOperand(i: --n));
30874	++Depth;
30875	continue;
30876	}
30877	// Everything else
30878	if (ImproveChain (Chain)) {
30879	// Updated Chain Found, Consider new chain if one exists.
30880	if (Chain.getNode())
30881	Chains.push_back(Elt: Chain);
30882	++Depth;
30883	continue;
30884	}
30885	// No Improved Chain Possible, treat as Alias.
30886	Aliases.push_back(Elt: Chain);
30887	}
30888	}
30889
30890	/// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
30891	/// (aliasing node.)
30892	SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
30893	if (OptLevel == CodeGenOptLevel::None)
30894	return OldChain;
30895
30896	// Ops for replacing token factor.
30897	SmallVector<SDValue, `8`> Aliases;
30898
30899	// Accumulate all the aliases to this node.
30900	GatherAllAliases(N, OriginalChain: OldChain, Aliases);
30901
30902	// If no operands then chain to entry token.
30903	if (Aliases.empty())
30904	return DAG.getEntryNode();
30905
30906	// If a single operand then chain to it. We don't need to revisit it.
30907	if (Aliases.size() == `1`)
30908	return Aliases [`0`];
30909
30910	// Construct a custom tailored token factor.
30911	return DAG.getTokenFactor(DL: SDLoc (N), Vals&: Aliases);
30912	}
30913
30914	// This function tries to collect a bunch of potentially interesting
30915	// nodes to improve the chains of, all at once. This might seem
30916	// redundant, as this function gets called when visiting every store
30917	// node, so why not let the work be done on each store as it's visited?
30918	//
30919	// I believe this is mainly important because mergeConsecutiveStores
30920	// is unable to deal with merging stores of different sizes, so unless
30921	// we improve the chains of all the potential candidates up-front
30922	// before running mergeConsecutiveStores, it might only see some of
30923	// the nodes that will eventually be candidates, and then not be able
30924	// to go from a partially-merged state to the desired final
30925	// fully-merged state.
30926
30927	bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
30928	SmallVector<StoreSDNode *, `8`> ChainedStores;
30929	StoreSDNode *STChain = St;
30930	// Intervals records which offsets from BaseIndex have been covered. In
30931	// the common case, every store writes to the immediately previous address
30932	// space and thus merged with the previous interval at insertion time.
30933
30934	using IMap = llvm::IntervalMap<int64_t, std::monostate, `8`,
30935	IntervalMapHalfOpenInfo<int64_t>>;
30936	IMap::Allocator A;
30937	IMap Intervals(A);
30938
30939	// This holds the base pointer, index, and the offset in bytes from the base
30940	// pointer.
30941	const BaseIndexOffset BasePtr = BaseIndexOffset::match(N: St, DAG);
30942
30943	// We must have a base and an offset.
30944	if (!BasePtr.getBase().getNode())
30945	return false;
30946
30947	// Do not handle stores to undef base pointers.
30948	if (BasePtr.getBase().isUndef())
30949	return false;
30950
30951	// Do not handle stores to opaque types
30952	if (St->getMemoryVT().isZeroSized())
30953	return false;
30954
30955	// BaseIndexOffset assumes that offsets are fixed-size, which
30956	// is not valid for scalable vectors where the offsets are
30957	// scaled by `vscale`, so bail out early.
30958	if (St->getMemoryVT().isScalableVT())
30959	return false;
30960
30961	// Add ST's interval.
30962	Intervals.insert(a: `0`, b: (St->getMemoryVT().getSizeInBits() + `7`) / `8`,
30963	y: std::monostate{});
30964
30965	while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(Val: STChain->getChain())) {
30966	if (Chain->getMemoryVT().isScalableVector())
30967	return false;
30968
30969	// If the chain has more than one use, then we can't reorder the mem ops.
30970	if (!SDValue (Chain, `0`)->hasOneUse())
30971	break;
30972	// TODO: Relax for unordered atomics (see D66309)
30973	if (!Chain->isSimple() \|\| Chain->isIndexed())
30974	break;
30975
30976	// Find the base pointer and offset for this memory node.
30977	const BaseIndexOffset Ptr = BaseIndexOffset::match(N: Chain, DAG);
30978	// Check that the base pointer is the same as the original one.
30979	int64_t Offset;
30980	if (!BasePtr.equalBaseIndex(Other: Ptr, DAG, Off&: Offset))
30981	break;
30982	int64_t Length = (Chain->getMemoryVT().getSizeInBits() + `7`) / `8`;
30983	// Make sure we don't overlap with other intervals by checking the ones to
30984	// the left or right before inserting.
30985	auto I = Intervals.find(x: Offset);
30986	// If there's a next interval, we should end before it.
30987	if (I != Intervals.end() && I.start() < (Offset + Length))
30988	break;
30989	// If there's a previous interval, we should start after it.
30990	if (I != Intervals.begin() && (--I).stop() <= Offset)
30991	break;
30992	Intervals.insert(a: Offset, b: Offset + Length, y: std::monostate{});
30993
30994	ChainedStores.push_back(Elt: Chain);
30995	STChain = Chain;
30996	}
30997
30998	// If we didn't find a chained store, exit.
30999	if (ChainedStores.empty())
31000	return false;
31001
31002	// Improve all chained stores (St and ChainedStores members) starting from
31003	// where the store chain ended and return single TokenFactor.
31004	SDValue NewChain = STChain->getChain();
31005	SmallVector<SDValue, `8`> TFOps;
31006	for (unsigned I = ChainedStores.size(); I;) {
31007	StoreSDNode *S = ChainedStores [--I];
31008	SDValue BetterChain = FindBetterChain(N: S, OldChain: NewChain);
31009	S = cast<StoreSDNode>(Val: DAG.UpdateNodeOperands(
31010	N: S, Op1: BetterChain, Op2: S->getOperand(Num: `1`), Op3: S->getOperand(Num: `2`), Op4: S->getOperand(Num: `3`)));
31011	TFOps.push_back(Elt: SDValue (S, `0`));
31012	ChainedStores [I] = S;
31013	}
31014
31015	// Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
31016	SDValue BetterChain = FindBetterChain(N: St, OldChain: NewChain);
31017	SDValue NewST;
31018	if (St->isTruncatingStore())
31019	NewST = DAG.getTruncStore(Chain: BetterChain, dl: SDLoc (St), Val: St->getValue(),
31020	Ptr: St->getBasePtr(), SVT: St->getMemoryVT(),
31021	MMO: St->getMemOperand());
31022	else
31023	NewST = DAG.getStore(Chain: BetterChain, dl: SDLoc (St), Val: St->getValue(),
31024	Ptr: St->getBasePtr(), MMO: St->getMemOperand());
31025
31026	TFOps.push_back(Elt: NewST);
31027
31028	// If we improved every element of TFOps, then we've lost the dependence on
31029	// NewChain to successors of St and we need to add it back to TFOps. Do so at
31030	// the beginning to keep relative order consistent with FindBetterChains.
31031	auto hasImprovedChain = [&](SDValue ST) -> bool {
31032	return ST ->getOperand(Num: `0`) != NewChain;
31033	};
31034	bool AddNewChain = llvm::all_of(Range&: TFOps, P: hasImprovedChain);
31035	if (AddNewChain)
31036	TFOps.insert(I: TFOps.begin(), Elt: NewChain);
31037
31038	SDValue TF = DAG.getTokenFactor(DL: SDLoc (STChain), Vals&: TFOps);
31039	CombineTo(N: St, Res: TF);
31040
31041	// Add TF and its operands to the worklist.
31042	AddToWorklist(N: TF.getNode());
31043	for (const SDValue &Op : TF ->ops())
31044	AddToWorklist(N: Op.getNode());
31045	AddToWorklist(N: STChain);
31046	return true;
31047	}
31048
31049	bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
31050	if (OptLevel == CodeGenOptLevel::None)
31051	return false;
31052
31053	const BaseIndexOffset BasePtr = BaseIndexOffset::match(N: St, DAG);
31054
31055	// We must have a base and an offset.
31056	if (!BasePtr.getBase().getNode())
31057	return false;
31058
31059	// Do not handle stores to undef base pointers.
31060	if (BasePtr.getBase().isUndef())
31061	return false;
31062
31063	// Directly improve a chain of disjoint stores starting at St.
31064	if (parallelizeChainedStores(St))
31065	return true;
31066
31067	// Improve St's Chain..
31068	SDValue BetterChain = FindBetterChain(N: St, OldChain: St->getChain());
31069	if (St->getChain() != BetterChain) {
31070	replaceStoreChain(ST: St, BetterChain);
31071	return true;
31072	}
31073	return false;
31074	}
31075
31076	/// This is the entry point for the file.
31077	void SelectionDAG::Combine(CombineLevel Level, BatchAAResults *BatchAA,
31078	CodeGenOptLevel OptLevel) {
31079	/// This is the main entry point to this class.
31080	DAGCombiner (*this, BatchAA, OptLevel).Run(AtLevel: Level);
31081	}
31082

Browse the source code of llvm_projects/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp