HexagonISelLoweringHVX.cpp source code [llvm_projects/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp]

1	//===-- HexagonISelLoweringHVX.cpp --- Lowering HVX operations ------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "HexagonISelLowering.h"
10	#include "HexagonRegisterInfo.h"
11	#include "HexagonSubtarget.h"
12	#include "llvm/ADT/SetVector.h"
13	#include "llvm/ADT/SmallVector.h"
14	#include "llvm/Analysis/MemoryLocation.h"
15	#include "llvm/CodeGen/MachineBasicBlock.h"
16	#include "llvm/CodeGen/MachineFunction.h"
17	#include "llvm/CodeGen/MachineInstr.h"
18	#include "llvm/CodeGen/MachineOperand.h"
19	#include "llvm/CodeGen/MachineRegisterInfo.h"
20	#include "llvm/CodeGen/TargetInstrInfo.h"
21	#include "llvm/IR/IntrinsicsHexagon.h"
22	#include "llvm/Support/CommandLine.h"
23
24	#include <algorithm>
25	#include <string>
26	#include <utility>
27
28	using namespace llvm;
29
30	static cl::opt<unsigned> HvxWidenThreshold("hexagon-hvx-widen",
31	cl::Hidden, cl::init(Val: `16`),
32	cl::desc ("Lower threshold (in bytes) for widening to HVX vectors"));
33
34	static const MVT LegalV64[] = { MVT::v64i8, MVT::v32i16, MVT::v16i32 };
35	static const MVT LegalW64[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 };
36	static const MVT LegalV128[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 };
37	static const MVT LegalW128[] = { MVT::v256i8, MVT::v128i16, MVT::v64i32 };
38
39	static std::tuple<unsigned, unsigned, unsigned> getIEEEProperties(MVT Ty) {
40	// For a float scalar type, return (exp-bits, exp-bias, fraction-bits)
41	MVT ElemTy = Ty.getScalarType();
42	switch (ElemTy.SimpleTy) {
43	case MVT::f16:
44	return std::make_tuple(args: `5`, args: `15`, args: `10`);
45	case MVT::f32:
46	return std::make_tuple(args: `8`, args: `127`, args: `23`);
47	case MVT::f64:
48	return std::make_tuple(args: `11`, args: `1023`, args: `52`);
49	default:
50	break;
51	}
52	llvm_unreachable(("Unexpected type: " + EVT(ElemTy).getEVTString()).c_str());
53	}
54
55	void
56	HexagonTargetLowering::initializeHVXLowering() {
57	if (Subtarget.useHVX64BOps()) {
58	addRegisterClass(VT: MVT::v64i8, RC: &Hexagon::HvxVRRegClass);
59	addRegisterClass(VT: MVT::v32i16, RC: &Hexagon::HvxVRRegClass);
60	addRegisterClass(VT: MVT::v16i32, RC: &Hexagon::HvxVRRegClass);
61	addRegisterClass(VT: MVT::v128i8, RC: &Hexagon::HvxWRRegClass);
62	addRegisterClass(VT: MVT::v64i16, RC: &Hexagon::HvxWRRegClass);
63	addRegisterClass(VT: MVT::v32i32, RC: &Hexagon::HvxWRRegClass);
64	// These "short" boolean vector types should be legal because
65	// they will appear as results of vector compares. If they were
66	// not legal, type legalization would try to make them legal
67	// and that would require using operations that do not use or
68	// produce such types. That, in turn, would imply using custom
69	// nodes, which would be unoptimizable by the DAG combiner.
70	// The idea is to rely on target-independent operations as much
71	// as possible.
72	addRegisterClass(VT: MVT::v16i1, RC: &Hexagon::HvxQRRegClass);
73	addRegisterClass(VT: MVT::v32i1, RC: &Hexagon::HvxQRRegClass);
74	addRegisterClass(VT: MVT::v64i1, RC: &Hexagon::HvxQRRegClass);
75	} else if (Subtarget.useHVX128BOps()) {
76	addRegisterClass(VT: MVT::v128i8, RC: &Hexagon::HvxVRRegClass);
77	addRegisterClass(VT: MVT::v64i16, RC: &Hexagon::HvxVRRegClass);
78	addRegisterClass(VT: MVT::v32i32, RC: &Hexagon::HvxVRRegClass);
79	addRegisterClass(VT: MVT::v256i8, RC: &Hexagon::HvxWRRegClass);
80	addRegisterClass(VT: MVT::v128i16, RC: &Hexagon::HvxWRRegClass);
81	addRegisterClass(VT: MVT::v64i32, RC: &Hexagon::HvxWRRegClass);
82	addRegisterClass(VT: MVT::v32i1, RC: &Hexagon::HvxQRRegClass);
83	addRegisterClass(VT: MVT::v64i1, RC: &Hexagon::HvxQRRegClass);
84	addRegisterClass(VT: MVT::v128i1, RC: &Hexagon::HvxQRRegClass);
85	if (Subtarget.useHVXV68Ops() && Subtarget.useHVXFloatingPoint()) {
86	addRegisterClass(VT: MVT::v32f32, RC: &Hexagon::HvxVRRegClass);
87	addRegisterClass(VT: MVT::v64f16, RC: &Hexagon::HvxVRRegClass);
88	addRegisterClass(VT: MVT::v64f32, RC: &Hexagon::HvxWRRegClass);
89	addRegisterClass(VT: MVT::v128f16, RC: &Hexagon::HvxWRRegClass);
90	}
91	}
92
93	// Set up operation actions.
94
95	bool Use64b = Subtarget.useHVX64BOps();
96	ArrayRef<MVT> LegalV = Use64b ? LegalV64 : LegalV128;
97	ArrayRef<MVT> LegalW = Use64b ? LegalW64 : LegalW128;
98	MVT ByteV = Use64b ? MVT::v64i8 : MVT::v128i8;
99	MVT WordV = Use64b ? MVT::v16i32 : MVT::v32i32;
100	MVT ByteW = Use64b ? MVT::v128i8 : MVT::v256i8;
101
102	auto setPromoteTo = [this] (unsigned Opc, MVT FromTy, MVT ToTy) {
103	setOperationAction(Op: Opc, VT: FromTy, Action: Promote);
104	AddPromotedToType(Opc, OrigVT: FromTy, DestVT: ToTy);
105	};
106
107	// Handle bitcasts of vector predicates to scalars (e.g. v32i1 to i32).
108	// Note: v16i1 -> i16 is handled in type legalization instead of op
109	// legalization.
110	setOperationAction(Op: ISD::BITCAST, VT: MVT::i16, Action: Custom);
111	setOperationAction(Op: ISD::BITCAST, VT: MVT::i32, Action: Custom);
112	setOperationAction(Op: ISD::BITCAST, VT: MVT::i64, Action: Custom);
113	setOperationAction(Op: ISD::BITCAST, VT: MVT::v16i1, Action: Custom);
114	setOperationAction(Op: ISD::BITCAST, VT: MVT::v128i1, Action: Custom);
115	setOperationAction(Op: ISD::BITCAST, VT: MVT::i128, Action: Custom);
116	setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT: ByteV, Action: Legal);
117	setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT: ByteW, Action: Legal);
118	setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::Other, Action: Custom);
119
120	if (Subtarget.useHVX128BOps() && Subtarget.useHVXV68Ops() &&
121	Subtarget.useHVXFloatingPoint()) {
122
123	static const MVT FloatV[] = { MVT::v64f16, MVT::v32f32 };
124	static const MVT FloatW[] = { MVT::v128f16, MVT::v64f32 };
125
126	for (MVT T : FloatV) {
127	setOperationAction(Op: ISD::FADD, VT: T, Action: Legal);
128	setOperationAction(Op: ISD::FSUB, VT: T, Action: Legal);
129	setOperationAction(Op: ISD::FMUL, VT: T, Action: Legal);
130	setOperationAction(Op: ISD::FMINIMUMNUM, VT: T, Action: Legal);
131	setOperationAction(Op: ISD::FMAXIMUMNUM, VT: T, Action: Legal);
132
133	setOperationAction(Op: ISD::INSERT_SUBVECTOR, VT: T, Action: Custom);
134	setOperationAction(Op: ISD::EXTRACT_SUBVECTOR, VT: T, Action: Custom);
135
136	setOperationAction(Op: ISD::SPLAT_VECTOR, VT: T, Action: Legal);
137	setOperationAction(Op: ISD::SPLAT_VECTOR, VT: T, Action: Legal);
138
139	setOperationAction(Op: ISD::MLOAD, VT: T, Action: Custom);
140	setOperationAction(Op: ISD::MSTORE, VT: T, Action: Custom);
141	// Custom-lower BUILD_VECTOR. The standard (target-independent)
142	// handling of it would convert it to a load, which is not always
143	// the optimal choice.
144	setOperationAction(Op: ISD::BUILD_VECTOR, VT: T, Action: Custom);
145	}
146
147
148	// BUILD_VECTOR with f16 operands cannot be promoted without
149	// promoting the result, so lower the node to vsplat or constant pool
150	setOperationAction(Op: ISD::BUILD_VECTOR, VT: MVT::f16, Action: Custom);
151	setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: MVT::f16, Action: Custom);
152	setOperationAction(Op: ISD::SPLAT_VECTOR, VT: MVT::f16, Action: Custom);
153
154	// Vector shuffle is always promoted to ByteV and a bitcast to f16 is
155	// generated.
156	setPromoteTo (ISD::VECTOR_SHUFFLE, MVT::v128f16, ByteW);
157	setPromoteTo (ISD::VECTOR_SHUFFLE, MVT::v64f16, ByteV);
158	setPromoteTo (ISD::VECTOR_SHUFFLE, MVT::v64f32, ByteW);
159	setPromoteTo (ISD::VECTOR_SHUFFLE, MVT::v32f32, ByteV);
160
161	for (MVT P : FloatW) {
162	setOperationAction(Op: ISD::LOAD, VT: P, Action: Custom);
163	setOperationAction(Op: ISD::STORE, VT: P, Action: Custom);
164	setOperationAction(Op: ISD::FADD, VT: P, Action: Custom);
165	setOperationAction(Op: ISD::FSUB, VT: P, Action: Custom);
166	setOperationAction(Op: ISD::FMUL, VT: P, Action: Custom);
167	setOperationAction(Op: ISD::FMINIMUMNUM, VT: P, Action: Custom);
168	setOperationAction(Op: ISD::FMAXIMUMNUM, VT: P, Action: Custom);
169	setOperationAction(Op: ISD::SETCC, VT: P, Action: Custom);
170	setOperationAction(Op: ISD::VSELECT, VT: P, Action: Custom);
171
172	// Custom-lower BUILD_VECTOR. The standard (target-independent)
173	// handling of it would convert it to a load, which is not always
174	// the optimal choice.
175	setOperationAction(Op: ISD::BUILD_VECTOR, VT: P, Action: Custom);
176	// Make concat-vectors custom to handle concats of more than 2 vectors.
177	setOperationAction(Op: ISD::CONCAT_VECTORS, VT: P, Action: Custom);
178
179	setOperationAction(Op: ISD::MLOAD, VT: P, Action: Custom);
180	setOperationAction(Op: ISD::MSTORE, VT: P, Action: Custom);
181	}
182
183	if (Subtarget.useHVXQFloatOps()) {
184	setOperationAction(Op: ISD::FP_EXTEND, VT: MVT::v64f32, Action: Custom);
185	setOperationAction(Op: ISD::FP_ROUND, VT: MVT::v64f16, Action: Legal);
186	} else if (Subtarget.useHVXIEEEFPOps()) {
187	setOperationAction(Op: ISD::FP_EXTEND, VT: MVT::v64f32, Action: Legal);
188	setOperationAction(Op: ISD::FP_ROUND, VT: MVT::v64f16, Action: Legal);
189	}
190	}
191
192	for (MVT T : LegalV) {
193	setIndexedLoadAction(IdxModes: ISD::POST_INC, VT: T, Action: Legal);
194	setIndexedStoreAction(IdxModes: ISD::POST_INC, VT: T, Action: Legal);
195
196	setOperationAction(Op: ISD::ABS, VT: T, Action: Legal);
197	setOperationAction(Op: ISD::AND, VT: T, Action: Legal);
198	setOperationAction(Op: ISD::OR, VT: T, Action: Legal);
199	setOperationAction(Op: ISD::XOR, VT: T, Action: Legal);
200	setOperationAction(Op: ISD::ADD, VT: T, Action: Legal);
201	setOperationAction(Op: ISD::SUB, VT: T, Action: Legal);
202	setOperationAction(Op: ISD::MUL, VT: T, Action: Legal);
203	setOperationAction(Op: ISD::CTPOP, VT: T, Action: Legal);
204	setOperationAction(Op: ISD::CTLZ, VT: T, Action: Legal);
205	setOperationAction(Op: ISD::SELECT, VT: T, Action: Legal);
206	setOperationAction(Op: ISD::SPLAT_VECTOR, VT: T, Action: Legal);
207	if (T != ByteV) {
208	setOperationAction(Op: ISD::SIGN_EXTEND_VECTOR_INREG, VT: T, Action: Legal);
209	setOperationAction(Op: ISD::ZERO_EXTEND_VECTOR_INREG, VT: T, Action: Legal);
210	setOperationAction(Op: ISD::BSWAP, VT: T, Action: Legal);
211	}
212
213	setOperationAction(Op: ISD::SMIN, VT: T, Action: Legal);
214	setOperationAction(Op: ISD::SMAX, VT: T, Action: Legal);
215	if (T.getScalarType() != MVT::i32) {
216	setOperationAction(Op: ISD::UMIN, VT: T, Action: Legal);
217	setOperationAction(Op: ISD::UMAX, VT: T, Action: Legal);
218	}
219
220	setOperationAction(Op: ISD::CTTZ, VT: T, Action: Custom);
221	setOperationAction(Op: ISD::LOAD, VT: T, Action: Custom);
222	setOperationAction(Op: ISD::MLOAD, VT: T, Action: Custom);
223	setOperationAction(Op: ISD::MSTORE, VT: T, Action: Custom);
224	if (T.getScalarType() != MVT::i32) {
225	setOperationAction(Op: ISD::MULHS, VT: T, Action: Legal);
226	setOperationAction(Op: ISD::MULHU, VT: T, Action: Legal);
227	}
228
229	setOperationAction(Op: ISD::BUILD_VECTOR, VT: T, Action: Custom);
230	// Make concat-vectors custom to handle concats of more than 2 vectors.
231	setOperationAction(Op: ISD::CONCAT_VECTORS, VT: T, Action: Custom);
232	setOperationAction(Op: ISD::INSERT_SUBVECTOR, VT: T, Action: Custom);
233	setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: T, Action: Custom);
234	setOperationAction(Op: ISD::EXTRACT_SUBVECTOR, VT: T, Action: Custom);
235	setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: T, Action: Custom);
236	setOperationAction(Op: ISD::ANY_EXTEND, VT: T, Action: Custom);
237	setOperationAction(Op: ISD::SIGN_EXTEND, VT: T, Action: Custom);
238	setOperationAction(Op: ISD::ZERO_EXTEND, VT: T, Action: Custom);
239	setOperationAction(Op: ISD::FSHL, VT: T, Action: Custom);
240	setOperationAction(Op: ISD::FSHR, VT: T, Action: Custom);
241	if (T != ByteV) {
242	setOperationAction(Op: ISD::ANY_EXTEND_VECTOR_INREG, VT: T, Action: Custom);
243	// HVX only has shifts of words and halfwords.
244	setOperationAction(Op: ISD::SRA, VT: T, Action: Custom);
245	setOperationAction(Op: ISD::SHL, VT: T, Action: Custom);
246	setOperationAction(Op: ISD::SRL, VT: T, Action: Custom);
247
248	// Promote all shuffles to operate on vectors of bytes.
249	setPromoteTo (ISD::VECTOR_SHUFFLE, T, ByteV);
250	}
251
252	if (Subtarget.useHVXFloatingPoint()) {
253	// Same action for both QFloat and IEEE.
254	setOperationAction(Op: ISD::SINT_TO_FP, VT: T, Action: Custom);
255	setOperationAction(Op: ISD::UINT_TO_FP, VT: T, Action: Custom);
256	setOperationAction(Op: ISD::FP_TO_SINT, VT: T, Action: Custom);
257	setOperationAction(Op: ISD::FP_TO_UINT, VT: T, Action: Custom);
258	}
259
260	setCondCodeAction(CCs: ISD::SETNE, VT: T, Action: Expand);
261	setCondCodeAction(CCs: ISD::SETLE, VT: T, Action: Expand);
262	setCondCodeAction(CCs: ISD::SETGE, VT: T, Action: Expand);
263	setCondCodeAction(CCs: ISD::SETLT, VT: T, Action: Expand);
264	setCondCodeAction(CCs: ISD::SETULE, VT: T, Action: Expand);
265	setCondCodeAction(CCs: ISD::SETUGE, VT: T, Action: Expand);
266	setCondCodeAction(CCs: ISD::SETULT, VT: T, Action: Expand);
267	}
268
269	for (MVT T : LegalW) {
270	// Custom-lower BUILD_VECTOR for vector pairs. The standard (target-
271	// independent) handling of it would convert it to a load, which is
272	// not always the optimal choice.
273	setOperationAction(Op: ISD::BUILD_VECTOR, VT: T, Action: Custom);
274	// Make concat-vectors custom to handle concats of more than 2 vectors.
275	setOperationAction(Op: ISD::CONCAT_VECTORS, VT: T, Action: Custom);
276
277	// Custom-lower these operations for pairs. Expand them into a concat
278	// of the corresponding operations on individual vectors.
279	setOperationAction(Op: ISD::ANY_EXTEND, VT: T, Action: Custom);
280	setOperationAction(Op: ISD::SIGN_EXTEND, VT: T, Action: Custom);
281	setOperationAction(Op: ISD::ZERO_EXTEND, VT: T, Action: Custom);
282	setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: T, Action: Custom);
283	setOperationAction(Op: ISD::ANY_EXTEND_VECTOR_INREG, VT: T, Action: Custom);
284	setOperationAction(Op: ISD::SIGN_EXTEND_VECTOR_INREG, VT: T, Action: Legal);
285	setOperationAction(Op: ISD::ZERO_EXTEND_VECTOR_INREG, VT: T, Action: Legal);
286	setOperationAction(Op: ISD::SPLAT_VECTOR, VT: T, Action: Custom);
287
288	setOperationAction(Op: ISD::LOAD, VT: T, Action: Custom);
289	setOperationAction(Op: ISD::STORE, VT: T, Action: Custom);
290	setOperationAction(Op: ISD::MLOAD, VT: T, Action: Custom);
291	setOperationAction(Op: ISD::MSTORE, VT: T, Action: Custom);
292	setOperationAction(Op: ISD::ABS, VT: T, Action: Custom);
293	setOperationAction(Op: ISD::CTLZ, VT: T, Action: Custom);
294	setOperationAction(Op: ISD::CTTZ, VT: T, Action: Custom);
295	setOperationAction(Op: ISD::CTPOP, VT: T, Action: Custom);
296
297	setOperationAction(Op: ISD::ADD, VT: T, Action: Legal);
298	setOperationAction(Op: ISD::SUB, VT: T, Action: Legal);
299	setOperationAction(Op: ISD::MUL, VT: T, Action: Custom);
300	setOperationAction(Op: ISD::MULHS, VT: T, Action: Custom);
301	setOperationAction(Op: ISD::MULHU, VT: T, Action: Custom);
302	setOperationAction(Op: ISD::AND, VT: T, Action: Custom);
303	setOperationAction(Op: ISD::OR, VT: T, Action: Custom);
304	setOperationAction(Op: ISD::XOR, VT: T, Action: Custom);
305	setOperationAction(Op: ISD::SETCC, VT: T, Action: Custom);
306	setOperationAction(Op: ISD::VSELECT, VT: T, Action: Custom);
307	if (T != ByteW) {
308	setOperationAction(Op: ISD::SRA, VT: T, Action: Custom);
309	setOperationAction(Op: ISD::SHL, VT: T, Action: Custom);
310	setOperationAction(Op: ISD::SRL, VT: T, Action: Custom);
311
312	// Promote all shuffles to operate on vectors of bytes.
313	setPromoteTo (ISD::VECTOR_SHUFFLE, T, ByteW);
314	}
315	setOperationAction(Op: ISD::FSHL, VT: T, Action: Custom);
316	setOperationAction(Op: ISD::FSHR, VT: T, Action: Custom);
317
318	setOperationAction(Op: ISD::SMIN, VT: T, Action: Custom);
319	setOperationAction(Op: ISD::SMAX, VT: T, Action: Custom);
320	if (T.getScalarType() != MVT::i32) {
321	setOperationAction(Op: ISD::UMIN, VT: T, Action: Custom);
322	setOperationAction(Op: ISD::UMAX, VT: T, Action: Custom);
323	}
324
325	if (Subtarget.useHVXFloatingPoint()) {
326	// Same action for both QFloat and IEEE.
327	setOperationAction(Op: ISD::SINT_TO_FP, VT: T, Action: Custom);
328	setOperationAction(Op: ISD::UINT_TO_FP, VT: T, Action: Custom);
329	setOperationAction(Op: ISD::FP_TO_SINT, VT: T, Action: Custom);
330	setOperationAction(Op: ISD::FP_TO_UINT, VT: T, Action: Custom);
331	}
332	}
333
334	// Legalize all of these to HexagonISD::[SU]MUL_LOHI.
335	setOperationAction(Op: ISD::MULHS, VT: WordV, Action: Custom); // -> _LOHI
336	setOperationAction(Op: ISD::MULHU, VT: WordV, Action: Custom); // -> _LOHI
337	setOperationAction(Op: ISD::SMUL_LOHI, VT: WordV, Action: Custom);
338	setOperationAction(Op: ISD::UMUL_LOHI, VT: WordV, Action: Custom);
339
340	setCondCodeAction(CCs: ISD::SETNE, VT: MVT::v64f16, Action: Expand);
341	setCondCodeAction(CCs: ISD::SETLE, VT: MVT::v64f16, Action: Expand);
342	setCondCodeAction(CCs: ISD::SETGE, VT: MVT::v64f16, Action: Expand);
343	setCondCodeAction(CCs: ISD::SETLT, VT: MVT::v64f16, Action: Expand);
344	setCondCodeAction(CCs: ISD::SETONE, VT: MVT::v64f16, Action: Expand);
345	setCondCodeAction(CCs: ISD::SETOLE, VT: MVT::v64f16, Action: Expand);
346	setCondCodeAction(CCs: ISD::SETOGE, VT: MVT::v64f16, Action: Expand);
347	setCondCodeAction(CCs: ISD::SETOLT, VT: MVT::v64f16, Action: Expand);
348	setCondCodeAction(CCs: ISD::SETUNE, VT: MVT::v64f16, Action: Expand);
349	setCondCodeAction(CCs: ISD::SETULE, VT: MVT::v64f16, Action: Expand);
350	setCondCodeAction(CCs: ISD::SETUGE, VT: MVT::v64f16, Action: Expand);
351	setCondCodeAction(CCs: ISD::SETULT, VT: MVT::v64f16, Action: Expand);
352
353	setCondCodeAction(CCs: ISD::SETNE, VT: MVT::v32f32, Action: Expand);
354	setCondCodeAction(CCs: ISD::SETLE, VT: MVT::v32f32, Action: Expand);
355	setCondCodeAction(CCs: ISD::SETGE, VT: MVT::v32f32, Action: Expand);
356	setCondCodeAction(CCs: ISD::SETLT, VT: MVT::v32f32, Action: Expand);
357	setCondCodeAction(CCs: ISD::SETONE, VT: MVT::v32f32, Action: Expand);
358	setCondCodeAction(CCs: ISD::SETOLE, VT: MVT::v32f32, Action: Expand);
359	setCondCodeAction(CCs: ISD::SETOGE, VT: MVT::v32f32, Action: Expand);
360	setCondCodeAction(CCs: ISD::SETOLT, VT: MVT::v32f32, Action: Expand);
361	setCondCodeAction(CCs: ISD::SETUNE, VT: MVT::v32f32, Action: Expand);
362	setCondCodeAction(CCs: ISD::SETULE, VT: MVT::v32f32, Action: Expand);
363	setCondCodeAction(CCs: ISD::SETUGE, VT: MVT::v32f32, Action: Expand);
364	setCondCodeAction(CCs: ISD::SETULT, VT: MVT::v32f32, Action: Expand);
365
366	// Boolean vectors.
367
368	for (MVT T : LegalW) {
369	// Boolean types for vector pairs will overlap with the boolean
370	// types for single vectors, e.g.
371	// v64i8 -> v64i1 (single)
372	// v64i16 -> v64i1 (pair)
373	// Set these actions first, and allow the single actions to overwrite
374	// any duplicates.
375	MVT BoolW = MVT::getVectorVT(VT: MVT::i1, NumElements: T.getVectorNumElements());
376	setOperationAction(Op: ISD::SETCC, VT: BoolW, Action: Custom);
377	setOperationAction(Op: ISD::AND, VT: BoolW, Action: Custom);
378	setOperationAction(Op: ISD::OR, VT: BoolW, Action: Custom);
379	setOperationAction(Op: ISD::XOR, VT: BoolW, Action: Custom);
380	// Masked load/store takes a mask that may need splitting.
381	setOperationAction(Op: ISD::MLOAD, VT: BoolW, Action: Custom);
382	setOperationAction(Op: ISD::MSTORE, VT: BoolW, Action: Custom);
383	}
384
385	for (MVT T : LegalV) {
386	MVT BoolV = MVT::getVectorVT(VT: MVT::i1, NumElements: T.getVectorNumElements());
387	setOperationAction(Op: ISD::BUILD_VECTOR, VT: BoolV, Action: Custom);
388	setOperationAction(Op: ISD::CONCAT_VECTORS, VT: BoolV, Action: Custom);
389	setOperationAction(Op: ISD::INSERT_SUBVECTOR, VT: BoolV, Action: Custom);
390	setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: BoolV, Action: Custom);
391	setOperationAction(Op: ISD::EXTRACT_SUBVECTOR, VT: BoolV, Action: Custom);
392	setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: BoolV, Action: Custom);
393	setOperationAction(Op: ISD::SELECT, VT: BoolV, Action: Custom);
394	setOperationAction(Op: ISD::AND, VT: BoolV, Action: Legal);
395	setOperationAction(Op: ISD::OR, VT: BoolV, Action: Legal);
396	setOperationAction(Op: ISD::XOR, VT: BoolV, Action: Legal);
397	}
398
399	if (Use64b) {
400	for (MVT T: {MVT::v32i8, MVT::v32i16, MVT::v16i8, MVT::v16i16, MVT::v16i32})
401	setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: T, Action: Legal);
402	} else {
403	for (MVT T: {MVT::v64i8, MVT::v64i16, MVT::v32i8, MVT::v32i16, MVT::v32i32})
404	setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: T, Action: Legal);
405	}
406
407	// Handle store widening for short vectors.
408	unsigned HwLen = Subtarget.getVectorLength();
409	for (MVT ElemTy : Subtarget.getHVXElementTypes()) {
410	if (ElemTy == MVT::i1)
411	continue;
412	int ElemWidth = ElemTy.getFixedSizeInBits();
413	int MaxElems = (`8`*HwLen) / ElemWidth;
414	for (int N = `2`; N < MaxElems; N *= `2`) {
415	MVT VecTy = MVT::getVectorVT(VT: ElemTy, NumElements: N);
416	auto Action = getPreferredVectorAction(VT: VecTy);
417	if (Action == TargetLoweringBase::TypeWidenVector) {
418	setOperationAction(Op: ISD::LOAD, VT: VecTy, Action: Custom);
419	setOperationAction(Op: ISD::STORE, VT: VecTy, Action: Custom);
420	setOperationAction(Op: ISD::SETCC, VT: VecTy, Action: Custom);
421	setOperationAction(Op: ISD::TRUNCATE, VT: VecTy, Action: Custom);
422	setOperationAction(Op: ISD::ANY_EXTEND, VT: VecTy, Action: Custom);
423	setOperationAction(Op: ISD::SIGN_EXTEND, VT: VecTy, Action: Custom);
424	setOperationAction(Op: ISD::ZERO_EXTEND, VT: VecTy, Action: Custom);
425	if (Subtarget.useHVXFloatingPoint()) {
426	setOperationAction(Op: ISD::FP_TO_SINT, VT: VecTy, Action: Custom);
427	setOperationAction(Op: ISD::FP_TO_UINT, VT: VecTy, Action: Custom);
428	setOperationAction(Op: ISD::SINT_TO_FP, VT: VecTy, Action: Custom);
429	setOperationAction(Op: ISD::UINT_TO_FP, VT: VecTy, Action: Custom);
430	}
431
432	MVT BoolTy = MVT::getVectorVT(VT: MVT::i1, NumElements: N);
433	if (!isTypeLegal(VT: BoolTy))
434	setOperationAction(Op: ISD::SETCC, VT: BoolTy, Action: Custom);
435	}
436	}
437	}
438
439	setTargetDAGCombine({ISD::CONCAT_VECTORS, ISD::TRUNCATE, ISD::VSELECT});
440	}
441
442	unsigned
443	HexagonTargetLowering::getPreferredHvxVectorAction(MVT VecTy) const {
444	MVT ElemTy = VecTy.getVectorElementType();
445	unsigned VecLen = VecTy.getVectorNumElements();
446	unsigned HwLen = Subtarget.getVectorLength();
447
448	// Split vectors of i1 that exceed byte vector length.
449	if (ElemTy == MVT::i1 && VecLen > HwLen)
450	return TargetLoweringBase::TypeSplitVector;
451
452	ArrayRef<MVT> Tys = Subtarget.getHVXElementTypes();
453	// For shorter vectors of i1, widen them if any of the corresponding
454	// vectors of integers needs to be widened.
455	if (ElemTy == MVT::i1) {
456	for (MVT T : Tys) {
457	assert(T != MVT::i1);
458	auto A = getPreferredHvxVectorAction(VecTy: MVT::getVectorVT(VT: T, NumElements: VecLen));
459	if (A != ~`0u`)
460	return A;
461	}
462	return ~`0u`;
463	}
464
465	// If the size of VecTy is at least half of the vector length,
466	// widen the vector. Note: the threshold was not selected in
467	// any scientific way.
468	if (llvm::is_contained(Range&: Tys, Element: ElemTy)) {
469	unsigned VecWidth = VecTy.getSizeInBits();
470	unsigned HwWidth = `8`*HwLen;
471	if (VecWidth > `2`*HwWidth)
472	return TargetLoweringBase::TypeSplitVector;
473
474	bool HaveThreshold = HvxWidenThreshold.getNumOccurrences() > `0`;
475	if (HaveThreshold && `8`*HvxWidenThreshold <= VecWidth)
476	return TargetLoweringBase::TypeWidenVector;
477	if (VecWidth >= HwWidth/`2` && VecWidth < HwWidth)
478	return TargetLoweringBase::TypeWidenVector;
479	}
480
481	// Defer to default.
482	return ~`0u`;
483	}
484
485	unsigned
486	HexagonTargetLowering::getCustomHvxOperationAction(SDNode &Op) const {
487	unsigned Opc = Op.getOpcode();
488	switch (Opc) {
489	case HexagonISD::SMUL_LOHI:
490	case HexagonISD::UMUL_LOHI:
491	case HexagonISD::USMUL_LOHI:
492	return TargetLoweringBase::Custom;
493	}
494	return TargetLoweringBase::Legal;
495	}
496
497	SDValue
498	HexagonTargetLowering::getInt(unsigned IntId, MVT ResTy, ArrayRef<SDValue> Ops,
499	const SDLoc &dl, SelectionDAG &DAG) const {
500	SmallVector<SDValue,`4`> IntOps;
501	IntOps.push_back(Elt: DAG.getConstant(Val: IntId, DL: dl, VT: MVT::i32));
502	append_range(C&: IntOps, R&: Ops);
503	return DAG.getNode(Opcode: ISD::INTRINSIC_WO_CHAIN, DL: dl, VT: ResTy, Ops: IntOps);
504	}
505
506	MVT
507	HexagonTargetLowering::typeJoin(const TypePair &Tys) const {
508	assert(Tys.first.getVectorElementType() == Tys.second.getVectorElementType());
509
510	MVT ElemTy = Tys.first.getVectorElementType();
511	return MVT::getVectorVT(VT: ElemTy, NumElements: Tys.first.getVectorNumElements() +
512	Tys.second.getVectorNumElements());
513	}
514
515	HexagonTargetLowering::TypePair
516	HexagonTargetLowering::typeSplit(MVT VecTy) const {
517	assert(VecTy.isVector());
518	unsigned NumElem = VecTy.getVectorNumElements();
519	assert((NumElem % `2`) == `0` && "Expecting even-sized vector type");
520	MVT HalfTy = MVT::getVectorVT(VT: VecTy.getVectorElementType(), NumElements: NumElem/`2`);
521	return { HalfTy, HalfTy };
522	}
523
524	MVT
525	HexagonTargetLowering::typeExtElem(MVT VecTy, unsigned Factor) const {
526	MVT ElemTy = VecTy.getVectorElementType();
527	MVT NewElemTy = MVT::getIntegerVT(BitWidth: ElemTy.getSizeInBits() * Factor);
528	return MVT::getVectorVT(VT: NewElemTy, NumElements: VecTy.getVectorNumElements());
529	}
530
531	MVT
532	HexagonTargetLowering::typeTruncElem(MVT VecTy, unsigned Factor) const {
533	MVT ElemTy = VecTy.getVectorElementType();
534	MVT NewElemTy = MVT::getIntegerVT(BitWidth: ElemTy.getSizeInBits() / Factor);
535	return MVT::getVectorVT(VT: NewElemTy, NumElements: VecTy.getVectorNumElements());
536	}
537
538	SDValue
539	HexagonTargetLowering::opCastElem(SDValue Vec, MVT ElemTy,
540	SelectionDAG &DAG) const {
541	if (ty(Op: Vec).getVectorElementType() == ElemTy)
542	return Vec;
543	MVT CastTy = tyVector(Ty: Vec.getValueType().getSimpleVT(), ElemTy);
544	return DAG.getBitcast(VT: CastTy, V: Vec);
545	}
546
547	SDValue
548	HexagonTargetLowering::opJoin(const VectorPair &Ops, const SDLoc &dl,
549	SelectionDAG &DAG) const {
550	return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: typeJoin(Tys: ty(Ops)),
551	N1: Ops.first, N2: Ops.second);
552	}
553
554	HexagonTargetLowering::VectorPair
555	HexagonTargetLowering::opSplit(SDValue Vec, const SDLoc &dl,
556	SelectionDAG &DAG) const {
557	TypePair Tys = typeSplit(VecTy: ty(Op: Vec));
558	if (Vec.getOpcode() == HexagonISD::QCAT)
559	return VectorPair (Vec.getOperand(i: `0`), Vec.getOperand(i: `1`));
560	return DAG.SplitVector(N: Vec, DL: dl, LoVT: Tys.first, HiVT: Tys.second);
561	}
562
563	bool
564	HexagonTargetLowering::isHvxSingleTy(MVT Ty) const {
565	return Subtarget.isHVXVectorType(VecTy: Ty) &&
566	Ty.getSizeInBits() == `8` * Subtarget.getVectorLength();
567	}
568
569	bool
570	HexagonTargetLowering::isHvxPairTy(MVT Ty) const {
571	return Subtarget.isHVXVectorType(VecTy: Ty) &&
572	Ty.getSizeInBits() == `16` * Subtarget.getVectorLength();
573	}
574
575	bool
576	HexagonTargetLowering::isHvxBoolTy(MVT Ty) const {
577	return Subtarget.isHVXVectorType(VecTy: Ty, IncludeBool: true) &&
578	Ty.getVectorElementType() == MVT::i1;
579	}
580
581	bool HexagonTargetLowering::allowsHvxMemoryAccess(
582	MVT VecTy, MachineMemOperand::Flags Flags, unsigned Fast) const* {
583	// Bool vectors are excluded by default, but make it explicit to
584	// emphasize that bool vectors cannot be loaded or stored.
585	// Also, disallow double vector stores (to prevent unnecessary
586	// store widening in DAG combiner).
587	if (VecTy.getSizeInBits() > `8`*Subtarget.getVectorLength())
588	return false;
589	if (!Subtarget.isHVXVectorType(VecTy, /IncludeBool=/false))
590	return false;
591	if (Fast)
592	*Fast = `1`;
593	return true;
594	}
595
596	bool HexagonTargetLowering::allowsHvxMisalignedMemoryAccesses(
597	MVT VecTy, MachineMemOperand::Flags Flags, unsigned Fast) const* {
598	if (!Subtarget.isHVXVectorType(VecTy))
599	return false;
600	// XXX Should this be false? vmemu are a bit slower than vmem.
601	if (Fast)
602	*Fast = `1`;
603	return true;
604	}
605
606	void HexagonTargetLowering::AdjustHvxInstrPostInstrSelection(
607	MachineInstr &MI, SDNode Node) const* {
608	unsigned Opc = MI.getOpcode();
609	const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
610	MachineBasicBlock &MB = *MI.getParent();
611	MachineFunction &MF = *MB.getParent();
612	MachineRegisterInfo &MRI = MF.getRegInfo();
613	DebugLoc DL = MI.getDebugLoc();
614	auto At = MI.getIterator();
615
616	switch (Opc) {
617	case Hexagon::PS_vsplatib:
618	if (Subtarget.useHVXV62Ops()) {
619	// SplatV = A2_tfrsi #imm
620	// OutV = V6_lvsplatb SplatV
621	Register SplatV = MRI.createVirtualRegister(RegClass: &Hexagon::IntRegsRegClass);
622	BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::A2_tfrsi), DestReg: SplatV)
623	.add(MO: MI.getOperand(i: `1`));
624	Register OutV = MI.getOperand(i: `0`).getReg();
625	BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::V6_lvsplatb), DestReg: OutV)
626	.addReg(RegNo: SplatV);
627	} else {
628	// SplatV = A2_tfrsi #imm:#imm:#imm:#imm
629	// OutV = V6_lvsplatw SplatV
630	Register SplatV = MRI.createVirtualRegister(RegClass: &Hexagon::IntRegsRegClass);
631	const MachineOperand &InpOp = MI.getOperand(i: `1`);
632	assert(InpOp.isImm());
633	uint32_t V = InpOp.getImm() & `0xFF`;
634	BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::A2_tfrsi), DestReg: SplatV)
635	.addImm(Val: V << `24` \| V << `16` \| V << `8` \| V);
636	Register OutV = MI.getOperand(i: `0`).getReg();
637	BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::V6_lvsplatw), DestReg: OutV).addReg(RegNo: SplatV);
638	}
639	MB.erase(I: At);
640	break;
641	case Hexagon::PS_vsplatrb:
642	if (Subtarget.useHVXV62Ops()) {
643	// OutV = V6_lvsplatb Inp
644	Register OutV = MI.getOperand(i: `0`).getReg();
645	BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::V6_lvsplatb), DestReg: OutV)
646	.add(MO: MI.getOperand(i: `1`));
647	} else {
648	Register SplatV = MRI.createVirtualRegister(RegClass: &Hexagon::IntRegsRegClass);
649	const MachineOperand &InpOp = MI.getOperand(i: `1`);
650	BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::S2_vsplatrb), DestReg: SplatV)
651	.addReg(RegNo: InpOp.getReg(), flags: `0`, SubReg: InpOp.getSubReg());
652	Register OutV = MI.getOperand(i: `0`).getReg();
653	BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::V6_lvsplatw), DestReg: OutV)
654	.addReg(RegNo: SplatV);
655	}
656	MB.erase(I: At);
657	break;
658	case Hexagon::PS_vsplatih:
659	if (Subtarget.useHVXV62Ops()) {
660	// SplatV = A2_tfrsi #imm
661	// OutV = V6_lvsplath SplatV
662	Register SplatV = MRI.createVirtualRegister(RegClass: &Hexagon::IntRegsRegClass);
663	BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::A2_tfrsi), DestReg: SplatV)
664	.add(MO: MI.getOperand(i: `1`));
665	Register OutV = MI.getOperand(i: `0`).getReg();
666	BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::V6_lvsplath), DestReg: OutV)
667	.addReg(RegNo: SplatV);
668	} else {
669	// SplatV = A2_tfrsi #imm:#imm
670	// OutV = V6_lvsplatw SplatV
671	Register SplatV = MRI.createVirtualRegister(RegClass: &Hexagon::IntRegsRegClass);
672	const MachineOperand &InpOp = MI.getOperand(i: `1`);
673	assert(InpOp.isImm());
674	uint32_t V = InpOp.getImm() & `0xFFFF`;
675	BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::A2_tfrsi), DestReg: SplatV)
676	.addImm(Val: V << `16` \| V);
677	Register OutV = MI.getOperand(i: `0`).getReg();
678	BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::V6_lvsplatw), DestReg: OutV).addReg(RegNo: SplatV);
679	}
680	MB.erase(I: At);
681	break;
682	case Hexagon::PS_vsplatrh:
683	if (Subtarget.useHVXV62Ops()) {
684	// OutV = V6_lvsplath Inp
685	Register OutV = MI.getOperand(i: `0`).getReg();
686	BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::V6_lvsplath), DestReg: OutV)
687	.add(MO: MI.getOperand(i: `1`));
688	} else {
689	// SplatV = A2_combine_ll Inp, Inp
690	// OutV = V6_lvsplatw SplatV
691	Register SplatV = MRI.createVirtualRegister(RegClass: &Hexagon::IntRegsRegClass);
692	const MachineOperand &InpOp = MI.getOperand(i: `1`);
693	BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::A2_combine_ll), DestReg: SplatV)
694	.addReg(RegNo: InpOp.getReg(), flags: `0`, SubReg: InpOp.getSubReg())
695	.addReg(RegNo: InpOp.getReg(), flags: `0`, SubReg: InpOp.getSubReg());
696	Register OutV = MI.getOperand(i: `0`).getReg();
697	BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::V6_lvsplatw), DestReg: OutV).addReg(RegNo: SplatV);
698	}
699	MB.erase(I: At);
700	break;
701	case Hexagon::PS_vsplatiw:
702	case Hexagon::PS_vsplatrw:
703	if (Opc == Hexagon::PS_vsplatiw) {
704	// SplatV = A2_tfrsi #imm
705	Register SplatV = MRI.createVirtualRegister(RegClass: &Hexagon::IntRegsRegClass);
706	BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::A2_tfrsi), DestReg: SplatV)
707	.add(MO: MI.getOperand(i: `1`));
708	MI.getOperand(i: `1`).ChangeToRegister(Reg: SplatV, isDef: false);
709	}
710	// OutV = V6_lvsplatw SplatV/Inp
711	MI.setDesc(TII.get(Opcode: Hexagon::V6_lvsplatw));
712	break;
713	}
714	}
715
716	SDValue
717	HexagonTargetLowering::convertToByteIndex(SDValue ElemIdx, MVT ElemTy,
718	SelectionDAG &DAG) const {
719	if (ElemIdx.getValueType().getSimpleVT() != MVT::i32)
720	ElemIdx = DAG.getBitcast(VT: MVT::i32, V: ElemIdx);
721
722	unsigned ElemWidth = ElemTy.getSizeInBits();
723	if (ElemWidth == `8`)
724	return ElemIdx;
725
726	unsigned L = Log2_32(Value: ElemWidth/`8`);
727	const SDLoc &dl(ElemIdx);
728	return DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: MVT::i32,
729	Ops: {ElemIdx, DAG.getConstant(Val: L, DL: dl, VT: MVT::i32)});
730	}
731
732	SDValue
733	HexagonTargetLowering::getIndexInWord32(SDValue Idx, MVT ElemTy,
734	SelectionDAG &DAG) const {
735	unsigned ElemWidth = ElemTy.getSizeInBits();
736	assert(ElemWidth >= `8` && ElemWidth <= `32`);
737	if (ElemWidth == `32`)
738	return Idx;
739
740	if (ty(Op: Idx) != MVT::i32)
741	Idx = DAG.getBitcast(VT: MVT::i32, V: Idx);
742	const SDLoc &dl(Idx);
743	SDValue Mask = DAG.getConstant(Val: `32`/ElemWidth - `1`, DL: dl, VT: MVT::i32);
744	SDValue SubIdx = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i32, Ops: {Idx, Mask});
745	return SubIdx;
746	}
747
748	SDValue
749	HexagonTargetLowering::getByteShuffle(const SDLoc &dl, SDValue Op0,
750	SDValue Op1, ArrayRef<int> Mask,
751	SelectionDAG &DAG) const {
752	MVT OpTy = ty(Op: Op0);
753	assert(OpTy == ty(Op1));
754
755	MVT ElemTy = OpTy.getVectorElementType();
756	if (ElemTy == MVT::i8)
757	return DAG.getVectorShuffle(VT: OpTy, dl, N1: Op0, N2: Op1, Mask);
758	assert(ElemTy.getSizeInBits() >= `8`);
759
760	MVT ResTy = tyVector(Ty: OpTy, ElemTy: MVT::i8);
761	unsigned ElemSize = ElemTy.getSizeInBits() / `8`;
762
763	SmallVector<int,`128`> ByteMask;
764	for (int M : Mask) {
765	if (M < `0`) {
766	for (unsigned I = `0`; I != ElemSize; ++I)
767	ByteMask.push_back(Elt: -`1`);
768	} else {
769	int NewM = M*ElemSize;
770	for (unsigned I = `0`; I != ElemSize; ++I)
771	ByteMask.push_back(Elt: NewM+I);
772	}
773	}
774	assert(ResTy.getVectorNumElements() == ByteMask.size());
775	return DAG.getVectorShuffle(VT: ResTy, dl, N1: opCastElem(Vec: Op0, ElemTy: MVT::i8, DAG),
776	N2: opCastElem(Vec: Op1, ElemTy: MVT::i8, DAG), Mask: ByteMask);
777	}
778
779	SDValue
780	HexagonTargetLowering::buildHvxVectorReg(ArrayRef<SDValue> Values,
781	const SDLoc &dl, MVT VecTy,
782	SelectionDAG &DAG) const {
783	unsigned VecLen = Values.size();
784	MachineFunction &MF = DAG.getMachineFunction();
785	MVT ElemTy = VecTy.getVectorElementType();
786	unsigned ElemWidth = ElemTy.getSizeInBits();
787	unsigned HwLen = Subtarget.getVectorLength();
788
789	unsigned ElemSize = ElemWidth / `8`;
790	assert(ElemSize*VecLen == HwLen);
791	SmallVector<SDValue,`32`> Words;
792
793	if (VecTy.getVectorElementType() != MVT::i32 &&
794	!(Subtarget.useHVXFloatingPoint() &&
795	VecTy.getVectorElementType() == MVT::f32)) {
796	assert((ElemSize == `1` \|\| ElemSize == `2`) && "Invalid element size");
797	unsigned OpsPerWord = (ElemSize == `1`) ? `4` : `2`;
798	MVT PartVT = MVT::getVectorVT(VT: VecTy.getVectorElementType(), NumElements: OpsPerWord);
799	for (unsigned i = `0`; i != VecLen; i += OpsPerWord) {
800	SDValue W = buildVector32(Elem: Values.slice(N: i, M: OpsPerWord), dl, VecTy: PartVT, DAG);
801	Words.push_back(Elt: DAG.getBitcast(VT: MVT::i32, V: W));
802	}
803	} else {
804	for (SDValue V : Values)
805	Words.push_back(Elt: DAG.getBitcast(VT: MVT::i32, V));
806	}
807	auto isSplat = [] (ArrayRef<SDValue> Values, SDValue &SplatV) {
808	unsigned NumValues = Values.size();
809	assert(NumValues > `0`);
810	bool IsUndef = true;
811	for (unsigned i = `0`; i != NumValues; ++i) {
812	if (Values [i].isUndef())
813	continue;
814	IsUndef = false;
815	if (!SplatV.getNode())
816	SplatV = Values [i];
817	else if (SplatV != Values [i])
818	return false;
819	}
820	if (IsUndef)
821	SplatV = Values [`0`];
822	return true;
823	};
824
825	unsigned NumWords = Words.size();
826	SDValue SplatV;
827	bool IsSplat = isSplat (Words, SplatV);
828	if (IsSplat && isUndef(Op: SplatV))
829	return DAG.getUNDEF(VT: VecTy);
830	if (IsSplat) {
831	assert(SplatV.getNode());
832	if (isNullConstant(V: SplatV))
833	return getZero(dl, Ty: VecTy, DAG);
834	MVT WordTy = MVT::getVectorVT(VT: MVT::i32, NumElements: HwLen/`4`);
835	SDValue S = DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: WordTy, Operand: SplatV);
836	return DAG.getBitcast(VT: VecTy, V: S);
837	}
838
839	// Delay recognizing constant vectors until here, so that we can generate
840	// a vsplat.
841	SmallVector<ConstantInt*, `128`> Consts(VecLen);
842	bool AllConst = getBuildVectorConstInts(Values, VecTy, DAG, Consts);
843	if (AllConst) {
844	ArrayRef<Constant> Tmp((Constant*)Consts.begin(),
845	(Constant**)Consts.end());
846	Constant *CV = ConstantVector::get(V: Tmp);
847	Align Alignment(HwLen);
848	SDValue CP =
849	LowerConstantPool(Op: DAG.getConstantPool(C: CV, VT: VecTy, Align: Alignment), DAG);
850	return DAG.getLoad(VT: VecTy, dl, Chain: DAG.getEntryNode(), Ptr: CP,
851	PtrInfo: MachinePointerInfo::getConstantPool(MF), Alignment);
852	}
853
854	// A special case is a situation where the vector is built entirely from
855	// elements extracted from another vector. This could be done via a shuffle
856	// more efficiently, but typically, the size of the source vector will not
857	// match the size of the vector being built (which precludes the use of a
858	// shuffle directly).
859	// This only handles a single source vector, and the vector being built
860	// should be of a sub-vector type of the source vector type.
861	auto IsBuildFromExtracts = [this,&Values] (SDValue &SrcVec,
862	SmallVectorImpl<int> &SrcIdx) {
863	SDValue Vec;
864	for (SDValue V : Values) {
865	if (isUndef(Op: V)) {
866	SrcIdx.push_back(Elt: -`1`);
867	continue;
868	}
869	if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
870	return false;
871	// All extracts should come from the same vector.
872	SDValue T = V.getOperand(i: `0`);
873	if (Vec.getNode() != nullptr && T.getNode() != Vec.getNode())
874	return false;
875	Vec = T;
876	ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val: V.getOperand(i: `1`));
877	if (C == nullptr)
878	return false;
879	int I = C->getSExtValue();
880	assert(I >= `0` && "Negative element index");
881	SrcIdx.push_back(Elt: I);
882	}
883	SrcVec = Vec;
884	return true;
885	};
886
887	SmallVector<int,`128`> ExtIdx;
888	SDValue ExtVec;
889	if (IsBuildFromExtracts (ExtVec, ExtIdx)) {
890	MVT ExtTy = ty(Op: ExtVec);
891	unsigned ExtLen = ExtTy.getVectorNumElements();
892	if (ExtLen == VecLen \|\| ExtLen == `2`*VecLen) {
893	// Construct a new shuffle mask that will produce a vector with the same
894	// number of elements as the input vector, and such that the vector we
895	// want will be the initial subvector of it.
896	SmallVector<int,`128`> Mask;
897	BitVector Used(ExtLen);
898
899	for (int M : ExtIdx) {
900	Mask.push_back(Elt: M);
901	if (M >= `0`)
902	Used.set(M);
903	}
904	// Fill the rest of the mask with the unused elements of ExtVec in hopes
905	// that it will result in a permutation of ExtVec's elements. It's still
906	// fine if it doesn't (e.g. if undefs are present, or elements are
907	// repeated), but permutations can always be done efficiently via vdelta
908	// and vrdelta.
909	for (unsigned I = `0`; I != ExtLen; ++I) {
910	if (Mask.size() == ExtLen)
911	break;
912	if (!Used.test(Idx: I))
913	Mask.push_back(Elt: I);
914	}
915
916	SDValue S = DAG.getVectorShuffle(VT: ExtTy, dl, N1: ExtVec,
917	N2: DAG.getUNDEF(VT: ExtTy), Mask);
918	return ExtLen == VecLen ? S : LoHalf(V: S, DAG);
919	}
920	}
921
922	// Find most common element to initialize vector with. This is to avoid
923	// unnecessary vinsert/valign for cases where the same value is present
924	// many times. Creates a histogram of the vector's elements to find the
925	// most common element n.
926	assert(`4`*Words.size() == Subtarget.getVectorLength());
927	int VecHist[`32`];
928	int n = `0`;
929	for (unsigned i = `0`; i != NumWords; ++i) {
930	VecHist[i] = `0`;
931	if (Words [i].isUndef())
932	continue;
933	for (unsigned j = i; j != NumWords; ++j)
934	if (Words [i] == Words [j])
935	VecHist[i]++;
936
937	if (VecHist[i] > VecHist[n])
938	n = i;
939	}
940
941	SDValue HalfV = getZero(dl, Ty: VecTy, DAG);
942	if (VecHist[n] > `1`) {
943	SDValue SplatV = DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: VecTy, Operand: Words [n]);
944	HalfV = DAG.getNode(Opcode: HexagonISD::VALIGN, DL: dl, VT: VecTy,
945	Ops: {HalfV, SplatV, DAG.getConstant(Val: HwLen/`2`, DL: dl, VT: MVT::i32)});
946	}
947	SDValue HalfV0 = HalfV;
948	SDValue HalfV1 = HalfV;
949
950	// Construct two halves in parallel, then or them together. Rn and Rm count
951	// number of rotations needed before the next element. One last rotation is
952	// performed post-loop to position the last element.
953	int Rn = `0`, Rm = `0`;
954	SDValue Sn, Sm;
955	SDValue N = HalfV0;
956	SDValue M = HalfV1;
957	for (unsigned i = `0`; i != NumWords/`2`; ++i) {
958	// Rotate by element count since last insertion.
959	if (Words [i] != Words [n] \|\| VecHist[n] <= `1`) {
960	Sn = DAG.getConstant(Val: Rn, DL: dl, VT: MVT::i32);
961	HalfV0 = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: VecTy, Ops: {N, Sn});
962	N = DAG.getNode(Opcode: HexagonISD::VINSERTW0, DL: dl, VT: VecTy,
963	Ops: {HalfV0, Words [i]});
964	Rn = `0`;
965	}
966	if (Words [i+NumWords/`2`] != Words [n] \|\| VecHist[n] <= `1`) {
967	Sm = DAG.getConstant(Val: Rm, DL: dl, VT: MVT::i32);
968	HalfV1 = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: VecTy, Ops: {M, Sm});
969	M = DAG.getNode(Opcode: HexagonISD::VINSERTW0, DL: dl, VT: VecTy,
970	Ops: {HalfV1, Words [i+NumWords/`2`]});
971	Rm = `0`;
972	}
973	Rn += `4`;
974	Rm += `4`;
975	}
976	// Perform last rotation.
977	Sn = DAG.getConstant(Val: Rn+HwLen/`2`, DL: dl, VT: MVT::i32);
978	Sm = DAG.getConstant(Val: Rm, DL: dl, VT: MVT::i32);
979	HalfV0 = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: VecTy, Ops: {N, Sn});
980	HalfV1 = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: VecTy, Ops: {M, Sm});
981
982	SDValue T0 = DAG.getBitcast(VT: tyVector(Ty: VecTy, ElemTy: MVT::i32), V: HalfV0);
983	SDValue T1 = DAG.getBitcast(VT: tyVector(Ty: VecTy, ElemTy: MVT::i32), V: HalfV1);
984
985	SDValue DstV = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: ty(Op: T0), Ops: {T0, T1});
986
987	SDValue OutV =
988	DAG.getBitcast(VT: tyVector(Ty: ty(Op: DstV), ElemTy: VecTy.getVectorElementType()), V: DstV);
989	return OutV;
990	}
991
992	SDValue
993	HexagonTargetLowering::createHvxPrefixPred(SDValue PredV, const SDLoc &dl,
994	unsigned BitBytes, bool ZeroFill, SelectionDAG &DAG) const {
995	MVT PredTy = ty(Op: PredV);
996	unsigned HwLen = Subtarget.getVectorLength();
997	MVT ByteTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen);
998
999	if (Subtarget.isHVXVectorType(VecTy: PredTy, IncludeBool: true)) {
1000	// Move the vector predicate SubV to a vector register, and scale it
1001	// down to match the representation (bytes per type element) that VecV
1002	// uses. The scaling down will pick every 2nd or 4th (every Scale-th
1003	// in general) element and put them at the front of the resulting
1004	// vector. This subvector will then be inserted into the Q2V of VecV.
1005	// To avoid having an operation that generates an illegal type (short
1006	// vector), generate a full size vector.
1007	//
1008	SDValue T = DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: ByteTy, Operand: PredV);
1009	SmallVector<int,`128`> Mask(HwLen);
1010	// Scale = BitBytes(PredV) / Given BitBytes.
1011	unsigned Scale = HwLen / (PredTy.getVectorNumElements() * BitBytes);
1012	unsigned BlockLen = PredTy.getVectorNumElements() * BitBytes;
1013
1014	for (unsigned i = `0`; i != HwLen; ++i) {
1015	unsigned Num = i % Scale;
1016	unsigned Off = i / Scale;
1017	Mask [BlockLen*Num + Off] = i;
1018	}
1019	SDValue S = DAG.getVectorShuffle(VT: ByteTy, dl, N1: T, N2: DAG.getUNDEF(VT: ByteTy), Mask);
1020	if (!ZeroFill)
1021	return S;
1022	// Fill the bytes beyond BlockLen with 0s.
1023	// V6_pred_scalar2 cannot fill the entire predicate, so it only works
1024	// when BlockLen < HwLen.
1025	assert(BlockLen < HwLen && "vsetq(v1) prerequisite");
1026	MVT BoolTy = MVT::getVectorVT(VT: MVT::i1, NumElements: HwLen);
1027	SDValue Q = getInstr(MachineOpc: Hexagon::V6_pred_scalar2, dl, Ty: BoolTy,
1028	Ops: {DAG.getConstant(Val: BlockLen, DL: dl, VT: MVT::i32)}, DAG);
1029	SDValue M = DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: ByteTy, Operand: Q);
1030	return DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ByteTy, N1: S, N2: M);
1031	}
1032
1033	// Make sure that this is a valid scalar predicate.
1034	assert(PredTy == MVT::v2i1 \|\| PredTy == MVT::v4i1 \|\| PredTy == MVT::v8i1);
1035
1036	unsigned Bytes = `8` / PredTy.getVectorNumElements();
1037	SmallVector<SDValue,`4`> Words[`2`];
1038	unsigned IdxW = `0`;
1039
1040	SDValue W0 = isUndef(Op: PredV)
1041	? DAG.getUNDEF(VT: MVT::i64)
1042	: DAG.getNode(Opcode: HexagonISD::P2D, DL: dl, VT: MVT::i64, Operand: PredV);
1043	Words[IdxW].push_back(Elt: HiHalf(V: W0, DAG));
1044	Words[IdxW].push_back(Elt: LoHalf(V: W0, DAG));
1045
1046	while (Bytes < BitBytes) {
1047	IdxW ^= `1`;
1048	Words[IdxW].clear();
1049
1050	if (Bytes < `4`) {
1051	for (const SDValue &W : Words[IdxW ^ `1`]) {
1052	SDValue T = expandPredicate(Vec32: W, dl, DAG);
1053	Words[IdxW].push_back(Elt: HiHalf(V: T, DAG));
1054	Words[IdxW].push_back(Elt: LoHalf(V: T, DAG));
1055	}
1056	} else {
1057	for (const SDValue &W : Words[IdxW ^ `1`]) {
1058	Words[IdxW].push_back(Elt: W);
1059	Words[IdxW].push_back(Elt: W);
1060	}
1061	}
1062	Bytes *= `2`;
1063	}
1064
1065	assert(Bytes == BitBytes);
1066
1067	SDValue Vec = ZeroFill ? getZero(dl, Ty: ByteTy, DAG) : DAG.getUNDEF(VT: ByteTy);
1068	SDValue S4 = DAG.getConstant(Val: HwLen-`4`, DL: dl, VT: MVT::i32);
1069	for (const SDValue &W : Words[IdxW]) {
1070	Vec = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: ByteTy, N1: Vec, N2: S4);
1071	Vec = DAG.getNode(Opcode: HexagonISD::VINSERTW0, DL: dl, VT: ByteTy, N1: Vec, N2: W);
1072	}
1073
1074	return Vec;
1075	}
1076
1077	SDValue
1078	HexagonTargetLowering::buildHvxVectorPred(ArrayRef<SDValue> Values,
1079	const SDLoc &dl, MVT VecTy,
1080	SelectionDAG &DAG) const {
1081	// Construct a vector V of bytes, such that a comparison V >u 0 would
1082	// produce the required vector predicate.
1083	unsigned VecLen = Values.size();
1084	unsigned HwLen = Subtarget.getVectorLength();
1085	assert(VecLen <= HwLen \|\| VecLen == `8`*HwLen);
1086	SmallVector<SDValue,`128`> Bytes;
1087	bool AllT = true, AllF = true;
1088
1089	auto IsTrue = [] (SDValue V) {
1090	if (const auto *N = dyn_cast<ConstantSDNode>(Val: V.getNode()))
1091	return !N->isZero();
1092	return false;
1093	};
1094	auto IsFalse = [] (SDValue V) {
1095	if (const auto *N = dyn_cast<ConstantSDNode>(Val: V.getNode()))
1096	return N->isZero();
1097	return false;
1098	};
1099
1100	if (VecLen <= HwLen) {
1101	// In the hardware, each bit of a vector predicate corresponds to a byte
1102	// of a vector register. Calculate how many bytes does a bit of VecTy
1103	// correspond to.
1104	assert(HwLen % VecLen == `0`);
1105	unsigned BitBytes = HwLen / VecLen;
1106	for (SDValue V : Values) {
1107	AllT &= IsTrue (V);
1108	AllF &= IsFalse (V);
1109
1110	SDValue Ext = !V.isUndef() ? DAG.getZExtOrTrunc(Op: V, DL: dl, VT: MVT::i8)
1111	: DAG.getUNDEF(VT: MVT::i8);
1112	for (unsigned B = `0`; B != BitBytes; ++B)
1113	Bytes.push_back(Elt: Ext);
1114	}
1115	} else {
1116	// There are as many i1 values, as there are bits in a vector register.
1117	// Divide the values into groups of 8 and check that each group consists
1118	// of the same value (ignoring undefs).
1119	for (unsigned I = `0`; I != VecLen; I += `8`) {
1120	unsigned B = `0`;
1121	// Find the first non-undef value in this group.
1122	for (; B != `8`; ++B) {
1123	if (!Values [I+B].isUndef())
1124	break;
1125	}
1126	SDValue F = Values [I+B];
1127	AllT &= IsTrue (F);
1128	AllF &= IsFalse (F);
1129
1130	SDValue Ext = (B < `8`) ? DAG.getZExtOrTrunc(Op: F, DL: dl, VT: MVT::i8)
1131	: DAG.getUNDEF(VT: MVT::i8);
1132	Bytes.push_back(Elt: Ext);
1133	// Verify that the rest of values in the group are the same as the
1134	// first.
1135	for (; B != `8`; ++B)
1136	assert(Values[I+B].isUndef() \|\| Values[I+B] == F);
1137	}
1138	}
1139
1140	if (AllT)
1141	return DAG.getNode(Opcode: HexagonISD::QTRUE, DL: dl, VT: VecTy);
1142	if (AllF)
1143	return DAG.getNode(Opcode: HexagonISD::QFALSE, DL: dl, VT: VecTy);
1144
1145	MVT ByteTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen);
1146	SDValue ByteVec = buildHvxVectorReg(Values: Bytes, dl, VecTy: ByteTy, DAG);
1147	return DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: VecTy, Operand: ByteVec);
1148	}
1149
1150	SDValue
1151	HexagonTargetLowering::extractHvxElementReg(SDValue VecV, SDValue IdxV,
1152	const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1153	MVT ElemTy = ty(Op: VecV).getVectorElementType();
1154
1155	unsigned ElemWidth = ElemTy.getSizeInBits();
1156	assert(ElemWidth >= `8` && ElemWidth <= `32`);
1157	(void)ElemWidth;
1158
1159	SDValue ByteIdx = convertToByteIndex(ElemIdx: IdxV, ElemTy, DAG);
1160	SDValue ExWord = DAG.getNode(Opcode: HexagonISD::VEXTRACTW, DL: dl, VT: MVT::i32,
1161	Ops: {VecV, ByteIdx});
1162	if (ElemTy == MVT::i32)
1163	return ExWord;
1164
1165	// Have an extracted word, need to extract the smaller element out of it.
1166	// 1. Extract the bits of (the original) IdxV that correspond to the index
1167	// of the desired element in the 32-bit word.
1168	SDValue SubIdx = getIndexInWord32(Idx: IdxV, ElemTy, DAG);
1169	// 2. Extract the element from the word.
1170	SDValue ExVec = DAG.getBitcast(VT: tyVector(Ty: ty(Op: ExWord), ElemTy), V: ExWord);
1171	return extractVector(VecV: ExVec, IdxV: SubIdx, dl, ValTy: ElemTy, ResTy: MVT::i32, DAG);
1172	}
1173
1174	SDValue
1175	HexagonTargetLowering::extractHvxElementPred(SDValue VecV, SDValue IdxV,
1176	const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1177	// Implement other return types if necessary.
1178	assert(ResTy == MVT::i1);
1179
1180	unsigned HwLen = Subtarget.getVectorLength();
1181	MVT ByteTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen);
1182	SDValue ByteVec = DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: ByteTy, Operand: VecV);
1183
1184	unsigned Scale = HwLen / ty(Op: VecV).getVectorNumElements();
1185	SDValue ScV = DAG.getConstant(Val: Scale, DL: dl, VT: MVT::i32);
1186	IdxV = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: MVT::i32, N1: IdxV, N2: ScV);
1187
1188	SDValue ExtB = extractHvxElementReg(VecV: ByteVec, IdxV, dl, ResTy: MVT::i32, DAG);
1189	SDValue Zero = DAG.getTargetConstant(Val: `0`, DL: dl, VT: MVT::i32);
1190	return getInstr(MachineOpc: Hexagon::C2_cmpgtui, dl, Ty: MVT::i1, Ops: {ExtB, Zero}, DAG);
1191	}
1192
1193	SDValue
1194	HexagonTargetLowering::insertHvxElementReg(SDValue VecV, SDValue IdxV,
1195	SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const {
1196	MVT ElemTy = ty(Op: VecV).getVectorElementType();
1197
1198	unsigned ElemWidth = ElemTy.getSizeInBits();
1199	assert(ElemWidth >= `8` && ElemWidth <= `32`);
1200	(void)ElemWidth;
1201
1202	auto InsertWord = [&DAG,&dl,this] (SDValue VecV, SDValue ValV,
1203	SDValue ByteIdxV) {
1204	MVT VecTy = ty(Op: VecV);
1205	unsigned HwLen = Subtarget.getVectorLength();
1206	SDValue MaskV =
1207	DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i32,
1208	Ops: {ByteIdxV, DAG.getSignedConstant(Val: -`4`, DL: dl, VT: MVT::i32)});
1209	SDValue RotV = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: VecTy, Ops: {VecV, MaskV});
1210	SDValue InsV = DAG.getNode(Opcode: HexagonISD::VINSERTW0, DL: dl, VT: VecTy, Ops: {RotV, ValV});
1211	SDValue SubV = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: MVT::i32,
1212	Ops: {DAG.getConstant(Val: HwLen, DL: dl, VT: MVT::i32), MaskV});
1213	SDValue TorV = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: VecTy, Ops: {InsV, SubV});
1214	return TorV;
1215	};
1216
1217	SDValue ByteIdx = convertToByteIndex(ElemIdx: IdxV, ElemTy, DAG);
1218	if (ElemTy == MVT::i32)
1219	return InsertWord (VecV, ValV, ByteIdx);
1220
1221	// If this is not inserting a 32-bit word, convert it into such a thing.
1222	// 1. Extract the existing word from the target vector.
1223	SDValue WordIdx = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: MVT::i32,
1224	Ops: {ByteIdx, DAG.getConstant(Val: `2`, DL: dl, VT: MVT::i32)});
1225	SDValue Ext = extractHvxElementReg(VecV: opCastElem(Vec: VecV, ElemTy: MVT::i32, DAG), IdxV: WordIdx,
1226	dl, ResTy: MVT::i32, DAG);
1227
1228	// 2. Treating the extracted word as a 32-bit vector, insert the given
1229	// value into it.
1230	SDValue SubIdx = getIndexInWord32(Idx: IdxV, ElemTy, DAG);
1231	MVT SubVecTy = tyVector(Ty: ty(Op: Ext), ElemTy);
1232	SDValue Ins = insertVector(VecV: DAG.getBitcast(VT: SubVecTy, V: Ext),
1233	ValV, IdxV: SubIdx, dl, ValTy: ElemTy, DAG);
1234
1235	// 3. Insert the 32-bit word back into the original vector.
1236	return InsertWord (VecV, Ins, ByteIdx);
1237	}
1238
1239	SDValue
1240	HexagonTargetLowering::insertHvxElementPred(SDValue VecV, SDValue IdxV,
1241	SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const {
1242	unsigned HwLen = Subtarget.getVectorLength();
1243	MVT ByteTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen);
1244	SDValue ByteVec = DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: ByteTy, Operand: VecV);
1245
1246	unsigned Scale = HwLen / ty(Op: VecV).getVectorNumElements();
1247	SDValue ScV = DAG.getConstant(Val: Scale, DL: dl, VT: MVT::i32);
1248	IdxV = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: MVT::i32, N1: IdxV, N2: ScV);
1249	ValV = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: MVT::i32, Operand: ValV);
1250
1251	SDValue InsV = insertHvxElementReg(VecV: ByteVec, IdxV, ValV, dl, DAG);
1252	return DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: ty(Op: VecV), Operand: InsV);
1253	}
1254
1255	SDValue
1256	HexagonTargetLowering::extractHvxSubvectorReg(SDValue OrigOp, SDValue VecV,
1257	SDValue IdxV, const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1258	MVT VecTy = ty(Op: VecV);
1259	unsigned HwLen = Subtarget.getVectorLength();
1260	unsigned Idx = IdxV.getNode()->getAsZExtVal();
1261	MVT ElemTy = VecTy.getVectorElementType();
1262	unsigned ElemWidth = ElemTy.getSizeInBits();
1263
1264	// If the source vector is a vector pair, get the single vector containing
1265	// the subvector of interest. The subvector will never overlap two single
1266	// vectors.
1267	if (isHvxPairTy(Ty: VecTy)) {
1268	unsigned SubIdx = Hexagon::vsub_lo;
1269	if (Idx * ElemWidth >= `8` * HwLen) {
1270	SubIdx = Hexagon::vsub_hi;
1271	Idx -= VecTy.getVectorNumElements() / `2`;
1272	}
1273
1274	VecTy = typeSplit(VecTy).first;
1275	VecV = DAG.getTargetExtractSubreg(SRIdx: SubIdx, DL: dl, VT: VecTy, Operand: VecV);
1276	if (VecTy == ResTy)
1277	return VecV;
1278	}
1279
1280	// The only meaningful subvectors of a single HVX vector are those that
1281	// fit in a scalar register.
1282	assert(ResTy.getSizeInBits() == `32` \|\| ResTy.getSizeInBits() == `64`);
1283
1284	MVT WordTy = tyVector(Ty: VecTy, ElemTy: MVT::i32);
1285	SDValue WordVec = DAG.getBitcast(VT: WordTy, V: VecV);
1286	unsigned WordIdx = (Idx*ElemWidth) / `32`;
1287
1288	SDValue W0Idx = DAG.getConstant(Val: WordIdx, DL: dl, VT: MVT::i32);
1289	SDValue W0 = extractHvxElementReg(VecV: WordVec, IdxV: W0Idx, dl, ResTy: MVT::i32, DAG);
1290	if (ResTy.getSizeInBits() == `32`)
1291	return DAG.getBitcast(VT: ResTy, V: W0);
1292
1293	SDValue W1Idx = DAG.getConstant(Val: WordIdx+`1`, DL: dl, VT: MVT::i32);
1294	SDValue W1 = extractHvxElementReg(VecV: WordVec, IdxV: W1Idx, dl, ResTy: MVT::i32, DAG);
1295	SDValue WW = getCombine(Hi: W1, Lo: W0, dl, ResTy: MVT::i64, DAG);
1296	return DAG.getBitcast(VT: ResTy, V: WW);
1297	}
1298
1299	SDValue
1300	HexagonTargetLowering::extractHvxSubvectorPred(SDValue VecV, SDValue IdxV,
1301	const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1302	MVT VecTy = ty(Op: VecV);
1303	unsigned HwLen = Subtarget.getVectorLength();
1304	MVT ByteTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen);
1305	SDValue ByteVec = DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: ByteTy, Operand: VecV);
1306	// IdxV is required to be a constant.
1307	unsigned Idx = IdxV.getNode()->getAsZExtVal();
1308
1309	unsigned ResLen = ResTy.getVectorNumElements();
1310	unsigned BitBytes = HwLen / VecTy.getVectorNumElements();
1311	unsigned Offset = Idx * BitBytes;
1312	SDValue Undef = DAG.getUNDEF(VT: ByteTy);
1313	SmallVector<int,`128`> Mask;
1314
1315	if (Subtarget.isHVXVectorType(VecTy: ResTy, IncludeBool: true)) {
1316	// Converting between two vector predicates. Since the result is shorter
1317	// than the source, it will correspond to a vector predicate with the
1318	// relevant bits replicated. The replication count is the ratio of the
1319	// source and target vector lengths.
1320	unsigned Rep = VecTy.getVectorNumElements() / ResLen;
1321	assert(isPowerOf2_32(Rep) && HwLen % Rep == `0`);
1322	for (unsigned i = `0`; i != HwLen/Rep; ++i) {
1323	for (unsigned j = `0`; j != Rep; ++j)
1324	Mask.push_back(Elt: i + Offset);
1325	}
1326	SDValue ShuffV = DAG.getVectorShuffle(VT: ByteTy, dl, N1: ByteVec, N2: Undef, Mask);
1327	return DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: ResTy, Operand: ShuffV);
1328	}
1329
1330	// Converting between a vector predicate and a scalar predicate. In the
1331	// vector predicate, a group of BitBytes bits will correspond to a single
1332	// i1 element of the source vector type. Those bits will all have the same
1333	// value. The same will be true for ByteVec, where each byte corresponds
1334	// to a bit in the vector predicate.
1335	// The algorithm is to traverse the ByteVec, going over the i1 values from
1336	// the source vector, and generate the corresponding representation in an
1337	// 8-byte vector. To avoid repeated extracts from ByteVec, shuffle the
1338	// elements so that the interesting 8 bytes will be in the low end of the
1339	// vector.
1340	unsigned Rep = `8` / ResLen;
1341	// Make sure the output fill the entire vector register, so repeat the
1342	// 8-byte groups as many times as necessary.
1343	for (unsigned r = `0`; r != HwLen/ResLen; ++r) {
1344	// This will generate the indexes of the 8 interesting bytes.
1345	for (unsigned i = `0`; i != ResLen; ++i) {
1346	for (unsigned j = `0`; j != Rep; ++j)
1347	Mask.push_back(Elt: Offset + i*BitBytes);
1348	}
1349	}
1350
1351	SDValue Zero = getZero(dl, Ty: MVT::i32, DAG);
1352	SDValue ShuffV = DAG.getVectorShuffle(VT: ByteTy, dl, N1: ByteVec, N2: Undef, Mask);
1353	// Combine the two low words from ShuffV into a v8i8, and byte-compare
1354	// them against 0.
1355	SDValue W0 = DAG.getNode(Opcode: HexagonISD::VEXTRACTW, DL: dl, VT: MVT::i32, Ops: {ShuffV, Zero});
1356	SDValue W1 = DAG.getNode(Opcode: HexagonISD::VEXTRACTW, DL: dl, VT: MVT::i32,
1357	Ops: {ShuffV, DAG.getConstant(Val: `4`, DL: dl, VT: MVT::i32)});
1358	SDValue Vec64 = getCombine(Hi: W1, Lo: W0, dl, ResTy: MVT::v8i8, DAG);
1359	return getInstr(MachineOpc: Hexagon::A4_vcmpbgtui, dl, Ty: ResTy,
1360	Ops: {Vec64, DAG.getTargetConstant(Val: `0`, DL: dl, VT: MVT::i32)}, DAG);
1361	}
1362
1363	SDValue
1364	HexagonTargetLowering::insertHvxSubvectorReg(SDValue VecV, SDValue SubV,
1365	SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const {
1366	MVT VecTy = ty(Op: VecV);
1367	MVT SubTy = ty(Op: SubV);
1368	unsigned HwLen = Subtarget.getVectorLength();
1369	MVT ElemTy = VecTy.getVectorElementType();
1370	unsigned ElemWidth = ElemTy.getSizeInBits();
1371
1372	bool IsPair = isHvxPairTy(Ty: VecTy);
1373	MVT SingleTy = MVT::getVectorVT(VT: ElemTy, NumElements: (`8`*HwLen)/ElemWidth);
1374	// The two single vectors that VecV consists of, if it's a pair.
1375	SDValue V0, V1;
1376	SDValue SingleV = VecV;
1377	SDValue PickHi;
1378
1379	if (IsPair) {
1380	V0 = LoHalf(V: VecV, DAG);
1381	V1 = HiHalf(V: VecV, DAG);
1382
1383	SDValue HalfV = DAG.getConstant(Val: SingleTy.getVectorNumElements(),
1384	DL: dl, VT: MVT::i32);
1385	PickHi = DAG.getSetCC(DL: dl, VT: MVT::i1, LHS: IdxV, RHS: HalfV, Cond: ISD::SETUGT);
1386	if (isHvxSingleTy(Ty: SubTy)) {
1387	if (const auto CN = dyn_cast<const* ConstantSDNode>(Val: IdxV.getNode())) {
1388	unsigned Idx = CN->getZExtValue();
1389	assert(Idx == `0` \|\| Idx == VecTy.getVectorNumElements()/`2`);
1390	unsigned SubIdx = (Idx == `0`) ? Hexagon::vsub_lo : Hexagon::vsub_hi;
1391	return DAG.getTargetInsertSubreg(SRIdx: SubIdx, DL: dl, VT: VecTy, Operand: VecV, Subreg: SubV);
1392	}
1393	// If IdxV is not a constant, generate the two variants: with the
1394	// SubV as the high and as the low subregister, and select the right
1395	// pair based on the IdxV.
1396	SDValue InLo = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: VecTy, Ops: {SubV, V1});
1397	SDValue InHi = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: VecTy, Ops: {V0, SubV});
1398	return DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: VecTy, N1: PickHi, N2: InHi, N3: InLo);
1399	}
1400	// The subvector being inserted must be entirely contained in one of
1401	// the vectors V0 or V1. Set SingleV to the correct one, and update
1402	// IdxV to be the index relative to the beginning of that vector.
1403	SDValue S = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: MVT::i32, N1: IdxV, N2: HalfV);
1404	IdxV = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: MVT::i32, N1: PickHi, N2: S, N3: IdxV);
1405	SingleV = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: SingleTy, N1: PickHi, N2: V1, N3: V0);
1406	}
1407
1408	// The only meaningful subvectors of a single HVX vector are those that
1409	// fit in a scalar register.
1410	assert(SubTy.getSizeInBits() == `32` \|\| SubTy.getSizeInBits() == `64`);
1411	// Convert IdxV to be index in bytes.
1412	auto *IdxN = dyn_cast<ConstantSDNode>(Val: IdxV.getNode());
1413	if (!IdxN \|\| !IdxN->isZero()) {
1414	IdxV = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: MVT::i32, N1: IdxV,
1415	N2: DAG.getConstant(Val: ElemWidth/`8`, DL: dl, VT: MVT::i32));
1416	SingleV = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: SingleTy, N1: SingleV, N2: IdxV);
1417	}
1418	// When inserting a single word, the rotation back to the original position
1419	// would be by HwLen-Idx, but if two words are inserted, it will need to be
1420	// by (HwLen-4)-Idx.
1421	unsigned RolBase = HwLen;
1422	if (SubTy.getSizeInBits() == `32`) {
1423	SDValue V = DAG.getBitcast(VT: MVT::i32, V: SubV);
1424	SingleV = DAG.getNode(Opcode: HexagonISD::VINSERTW0, DL: dl, VT: SingleTy, N1: SingleV, N2: V);
1425	} else {
1426	SDValue V = DAG.getBitcast(VT: MVT::i64, V: SubV);
1427	SDValue R0 = LoHalf(V, DAG);
1428	SDValue R1 = HiHalf(V, DAG);
1429	SingleV = DAG.getNode(Opcode: HexagonISD::VINSERTW0, DL: dl, VT: SingleTy, N1: SingleV, N2: R0);
1430	SingleV = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: SingleTy, N1: SingleV,
1431	N2: DAG.getConstant(Val: `4`, DL: dl, VT: MVT::i32));
1432	SingleV = DAG.getNode(Opcode: HexagonISD::VINSERTW0, DL: dl, VT: SingleTy, N1: SingleV, N2: R1);
1433	RolBase = HwLen-`4`;
1434	}
1435	// If the vector wasn't ror'ed, don't ror it back.
1436	if (RolBase != `4` \|\| !IdxN \|\| !IdxN->isZero()) {
1437	SDValue RolV = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: MVT::i32,
1438	N1: DAG.getConstant(Val: RolBase, DL: dl, VT: MVT::i32), N2: IdxV);
1439	SingleV = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: SingleTy, N1: SingleV, N2: RolV);
1440	}
1441
1442	if (IsPair) {
1443	SDValue InLo = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: VecTy, Ops: {SingleV, V1});
1444	SDValue InHi = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: VecTy, Ops: {V0, SingleV});
1445	return DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: VecTy, N1: PickHi, N2: InHi, N3: InLo);
1446	}
1447	return SingleV;
1448	}
1449
1450	SDValue
1451	HexagonTargetLowering::insertHvxSubvectorPred(SDValue VecV, SDValue SubV,
1452	SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const {
1453	MVT VecTy = ty(Op: VecV);
1454	MVT SubTy = ty(Op: SubV);
1455	assert(Subtarget.isHVXVectorType(VecTy, true));
1456	// VecV is an HVX vector predicate. SubV may be either an HVX vector
1457	// predicate as well, or it can be a scalar predicate.
1458
1459	unsigned VecLen = VecTy.getVectorNumElements();
1460	unsigned HwLen = Subtarget.getVectorLength();
1461	assert(HwLen % VecLen == `0` && "Unexpected vector type");
1462
1463	unsigned Scale = VecLen / SubTy.getVectorNumElements();
1464	unsigned BitBytes = HwLen / VecLen;
1465	unsigned BlockLen = HwLen / Scale;
1466
1467	MVT ByteTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen);
1468	SDValue ByteVec = DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: ByteTy, Operand: VecV);
1469	SDValue ByteSub = createHvxPrefixPred(PredV: SubV, dl, BitBytes, ZeroFill: false, DAG);
1470	SDValue ByteIdx;
1471
1472	auto *IdxN = dyn_cast<ConstantSDNode>(Val: IdxV.getNode());
1473	if (!IdxN \|\| !IdxN->isZero()) {
1474	ByteIdx = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: MVT::i32, N1: IdxV,
1475	N2: DAG.getConstant(Val: BitBytes, DL: dl, VT: MVT::i32));
1476	ByteVec = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: ByteTy, N1: ByteVec, N2: ByteIdx);
1477	}
1478
1479	// ByteVec is the target vector VecV rotated in such a way that the
1480	// subvector should be inserted at index 0. Generate a predicate mask
1481	// and use vmux to do the insertion.
1482	assert(BlockLen < HwLen && "vsetq(v1) prerequisite");
1483	MVT BoolTy = MVT::getVectorVT(VT: MVT::i1, NumElements: HwLen);
1484	SDValue Q = getInstr(MachineOpc: Hexagon::V6_pred_scalar2, dl, Ty: BoolTy,
1485	Ops: {DAG.getConstant(Val: BlockLen, DL: dl, VT: MVT::i32)}, DAG);
1486	ByteVec = getInstr(MachineOpc: Hexagon::V6_vmux, dl, Ty: ByteTy, Ops: {Q, ByteSub, ByteVec}, DAG);
1487	// Rotate ByteVec back, and convert to a vector predicate.
1488	if (!IdxN \|\| !IdxN->isZero()) {
1489	SDValue HwLenV = DAG.getConstant(Val: HwLen, DL: dl, VT: MVT::i32);
1490	SDValue ByteXdi = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: MVT::i32, N1: HwLenV, N2: ByteIdx);
1491	ByteVec = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: ByteTy, N1: ByteVec, N2: ByteXdi);
1492	}
1493	return DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: VecTy, Operand: ByteVec);
1494	}
1495
1496	SDValue
1497	HexagonTargetLowering::extendHvxVectorPred(SDValue VecV, const SDLoc &dl,
1498	MVT ResTy, bool ZeroExt, SelectionDAG &DAG) const {
1499	// Sign- and any-extending of a vector predicate to a vector register is
1500	// equivalent to Q2V. For zero-extensions, generate a vmux between 0 and
1501	// a vector of 1s (where the 1s are of type matching the vector type).
1502	assert(Subtarget.isHVXVectorType(ResTy));
1503	if (!ZeroExt)
1504	return DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: ResTy, Operand: VecV);
1505
1506	assert(ty(VecV).getVectorNumElements() == ResTy.getVectorNumElements());
1507	SDValue True = DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: ResTy,
1508	Operand: DAG.getConstant(Val: `1`, DL: dl, VT: MVT::i32));
1509	SDValue False = getZero(dl, Ty: ResTy, DAG);
1510	return DAG.getSelect(DL: dl, VT: ResTy, Cond: VecV, LHS: True, RHS: False);
1511	}
1512
1513	SDValue
1514	HexagonTargetLowering::compressHvxPred(SDValue VecQ, const SDLoc &dl,
1515	MVT ResTy, SelectionDAG &DAG) const {
1516	// Given a predicate register VecQ, transfer bits VecQ[0..HwLen-1]
1517	// (i.e. the entire predicate register) to bits [0..HwLen-1] of a
1518	// vector register. The remaining bits of the vector register are
1519	// unspecified.
1520
1521	MachineFunction &MF = DAG.getMachineFunction();
1522	unsigned HwLen = Subtarget.getVectorLength();
1523	MVT ByteTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen);
1524	MVT PredTy = ty(Op: VecQ);
1525	unsigned PredLen = PredTy.getVectorNumElements();
1526	assert(HwLen % PredLen == `0`);
1527	MVT VecTy = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: `8`*HwLen/PredLen), NumElements: PredLen);
1528
1529	Type Int8Ty = Type::getInt8Ty(C&: DAG.getContext());
1530	SmallVector<Constant*, `128`> Tmp;
1531	// Create an array of bytes (hex): 01,02,04,08,10,20,40,80, 01,02,04,08,...
1532	// These are bytes with the LSB rotated left with respect to their index.
1533	for (unsigned i = `0`; i != HwLen/`8`; ++i) {
1534	for (unsigned j = `0`; j != `8`; ++j)
1535	Tmp.push_back(Elt: ConstantInt::get(Ty: Int8Ty, V: `1ull` << j));
1536	}
1537	Constant *CV = ConstantVector::get(V: Tmp);
1538	Align Alignment(HwLen);
1539	SDValue CP =
1540	LowerConstantPool(Op: DAG.getConstantPool(C: CV, VT: ByteTy, Align: Alignment), DAG);
1541	SDValue Bytes =
1542	DAG.getLoad(VT: ByteTy, dl, Chain: DAG.getEntryNode(), Ptr: CP,
1543	PtrInfo: MachinePointerInfo::getConstantPool(MF), Alignment);
1544
1545	// Select the bytes that correspond to true bits in the vector predicate.
1546	SDValue Sel = DAG.getSelect(DL: dl, VT: VecTy, Cond: VecQ, LHS: DAG.getBitcast(VT: VecTy, V: Bytes),
1547	RHS: getZero(dl, Ty: VecTy, DAG));
1548	// Calculate the OR of all bytes in each group of 8. That will compress
1549	// all the individual bits into a single byte.
1550	// First, OR groups of 4, via vrmpy with 0x01010101.
1551	SDValue All1 =
1552	DAG.getSplatBuildVector(VT: MVT::v4i8, DL: dl, Op: DAG.getConstant(Val: `1`, DL: dl, VT: MVT::i32));
1553	SDValue Vrmpy = getInstr(MachineOpc: Hexagon::V6_vrmpyub, dl, Ty: ByteTy, Ops: {Sel, All1}, DAG);
1554	// Then rotate the accumulated vector by 4 bytes, and do the final OR.
1555	SDValue Rot = getInstr(MachineOpc: Hexagon::V6_valignbi, dl, Ty: ByteTy,
1556	Ops: {Vrmpy, Vrmpy, DAG.getTargetConstant(Val: `4`, DL: dl, VT: MVT::i32)}, DAG);
1557	SDValue Vor = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: ByteTy, Ops: {Vrmpy, Rot});
1558
1559	// Pick every 8th byte and coalesce them at the beginning of the output.
1560	// For symmetry, coalesce every 1+8th byte after that, then every 2+8th
1561	// byte and so on.
1562	SmallVector<int,`128`> Mask;
1563	for (unsigned i = `0`; i != HwLen; ++i)
1564	Mask.push_back(Elt: (`8`*i) % HwLen + i/(HwLen/`8`));
1565	SDValue Collect =
1566	DAG.getVectorShuffle(VT: ByteTy, dl, N1: Vor, N2: DAG.getUNDEF(VT: ByteTy), Mask);
1567	return DAG.getBitcast(VT: ResTy, V: Collect);
1568	}
1569
1570	SDValue
1571	HexagonTargetLowering::resizeToWidth(SDValue VecV, MVT ResTy, bool Signed,
1572	const SDLoc &dl, SelectionDAG &DAG) const {
1573	// Take a vector and resize the element type to match the given type.
1574	MVT InpTy = ty(Op: VecV);
1575	if (InpTy == ResTy)
1576	return VecV;
1577
1578	unsigned InpWidth = InpTy.getSizeInBits();
1579	unsigned ResWidth = ResTy.getSizeInBits();
1580
1581	if (InpTy.isFloatingPoint()) {
1582	return InpWidth < ResWidth
1583	? DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: ResTy, Operand: VecV)
1584	: DAG.getNode(Opcode: ISD::FP_ROUND, DL: dl, VT: ResTy, N1: VecV,
1585	N2: DAG.getTargetConstant(Val: `0`, DL: dl, VT: MVT::i32));
1586	}
1587
1588	assert(InpTy.isInteger());
1589
1590	if (InpWidth < ResWidth) {
1591	unsigned ExtOpc = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1592	return DAG.getNode(Opcode: ExtOpc, DL: dl, VT: ResTy, Operand: VecV);
1593	} else {
1594	unsigned NarOpc = Signed ? HexagonISD::SSAT : HexagonISD::USAT;
1595	return DAG.getNode(Opcode: NarOpc, DL: dl, VT: ResTy, N1: VecV, N2: DAG.getValueType(ResTy));
1596	}
1597	}
1598
1599	SDValue
1600	HexagonTargetLowering::extractSubvector(SDValue Vec, MVT SubTy, unsigned SubIdx,
1601	SelectionDAG &DAG) const {
1602	assert(ty(Vec).getSizeInBits() % SubTy.getSizeInBits() == `0`);
1603
1604	const SDLoc &dl(Vec);
1605	unsigned ElemIdx = SubIdx * SubTy.getVectorNumElements();
1606	return DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL: dl, VT: SubTy,
1607	Ops: {Vec, DAG.getConstant(Val: ElemIdx, DL: dl, VT: MVT::i32)});
1608	}
1609
1610	SDValue
1611	HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG)
1612	const {
1613	const SDLoc &dl(Op);
1614	MVT VecTy = ty(Op);
1615
1616	unsigned Size = Op.getNumOperands();
1617	SmallVector<SDValue,`128`> Ops;
1618	for (unsigned i = `0`; i != Size; ++i)
1619	Ops.push_back(Elt: Op.getOperand(i));
1620
1621	if (VecTy.getVectorElementType() == MVT::i1)
1622	return buildHvxVectorPred(Values: Ops, dl, VecTy, DAG);
1623
1624	// In case of MVT::f16 BUILD_VECTOR, since MVT::f16 is
1625	// not a legal type, just bitcast the node to use i16
1626	// types and bitcast the result back to f16
1627	if (VecTy.getVectorElementType() == MVT::f16) {
1628	SmallVector<SDValue,`64`> NewOps;
1629	for (unsigned i = `0`; i != Size; i++)
1630	NewOps.push_back(Elt: DAG.getBitcast(VT: MVT::i16, V: Ops [i]));
1631
1632	SDValue T0 = DAG.getNode(Opcode: ISD::BUILD_VECTOR, DL: dl,
1633	VT: tyVector(Ty: VecTy, ElemTy: MVT::i16), Ops: NewOps);
1634	return DAG.getBitcast(VT: tyVector(Ty: VecTy, ElemTy: MVT::f16), V: T0);
1635	}
1636
1637	// First, split the BUILD_VECTOR for vector pairs. We could generate
1638	// some pairs directly (via splat), but splats should be generated
1639	// by the combiner prior to getting here.
1640	if (VecTy.getSizeInBits() == `16` * Subtarget.getVectorLength()) {
1641	ArrayRef<SDValue> A(Ops);
1642	MVT SingleTy = typeSplit(VecTy).first;
1643	SDValue V0 = buildHvxVectorReg(Values: A.take_front(N: Size / `2`), dl, VecTy: SingleTy, DAG);
1644	SDValue V1 = buildHvxVectorReg(Values: A.drop_front(N: Size / `2`), dl, VecTy: SingleTy, DAG);
1645	return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: VecTy, N1: V0, N2: V1);
1646	}
1647
1648	return buildHvxVectorReg(Values: Ops, dl, VecTy, DAG);
1649	}
1650
1651	SDValue
1652	HexagonTargetLowering::LowerHvxSplatVector(SDValue Op, SelectionDAG &DAG)
1653	const {
1654	const SDLoc &dl(Op);
1655	MVT VecTy = ty(Op);
1656	MVT ArgTy = ty(Op: Op.getOperand(i: `0`));
1657
1658	if (ArgTy == MVT::f16) {
1659	MVT SplatTy = MVT::getVectorVT(VT: MVT::i16, NumElements: VecTy.getVectorNumElements());
1660	SDValue ToInt16 = DAG.getBitcast(VT: MVT::i16, V: Op.getOperand(i: `0`));
1661	SDValue ToInt32 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT: MVT::i32, Operand: ToInt16);
1662	SDValue Splat = DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: SplatTy, Operand: ToInt32);
1663	return DAG.getBitcast(VT: VecTy, V: Splat);
1664	}
1665
1666	return SDValue ();
1667	}
1668
1669	SDValue
1670	HexagonTargetLowering::LowerHvxConcatVectors(SDValue Op, SelectionDAG &DAG)
1671	const {
1672	// Vector concatenation of two integer (non-bool) vectors does not need
1673	// special lowering. Custom-lower concats of bool vectors and expand
1674	// concats of more than 2 vectors.
1675	MVT VecTy = ty(Op);
1676	const SDLoc &dl(Op);
1677	unsigned NumOp = Op.getNumOperands();
1678	if (VecTy.getVectorElementType() != MVT::i1) {
1679	if (NumOp == `2`)
1680	return Op;
1681	// Expand the other cases into a build-vector.
1682	SmallVector<SDValue,`8`> Elems;
1683	for (SDValue V : Op.getNode()->ops())
1684	DAG.ExtractVectorElements(Op: V, Args&: Elems);
1685	// A vector of i16 will be broken up into a build_vector of i16's.
1686	// This is a problem, since at the time of operation legalization,
1687	// all operations are expected to be type-legalized, and i16 is not
1688	// a legal type. If any of the extracted elements is not of a valid
1689	// type, sign-extend it to a valid one.
1690	for (unsigned i = `0`, e = Elems.size(); i != e; ++i) {
1691	SDValue V = Elems [i];
1692	MVT Ty = ty(Op: V);
1693	if (!isTypeLegal(VT: Ty)) {
1694	MVT NTy = typeLegalize(Ty, DAG);
1695	if (V.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1696	Elems [i] = DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL: dl, VT: NTy,
1697	N1: DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: dl, VT: NTy,
1698	N1: V.getOperand(i: `0`), N2: V.getOperand(i: `1`)),
1699	N2: DAG.getValueType(Ty));
1700	continue;
1701	}
1702	// A few less complicated cases.
1703	switch (V.getOpcode()) {
1704	case ISD::Constant:
1705	Elems [i] = DAG.getSExtOrTrunc(Op: V, DL: dl, VT: NTy);
1706	break;
1707	case ISD::UNDEF:
1708	Elems [i] = DAG.getUNDEF(VT: NTy);
1709	break;
1710	case ISD::TRUNCATE:
1711	Elems [i] = V.getOperand(i: `0`);
1712	break;
1713	default:
1714	llvm_unreachable("Unexpected vector element");
1715	}
1716	}
1717	}
1718	return DAG.getBuildVector(VT: VecTy, DL: dl, Ops: Elems);
1719	}
1720
1721	assert(VecTy.getVectorElementType() == MVT::i1);
1722	unsigned HwLen = Subtarget.getVectorLength();
1723	assert(isPowerOf2_32(NumOp) && HwLen % NumOp == `0`);
1724
1725	SDValue Op0 = Op.getOperand(i: `0`);
1726
1727	// If the operands are HVX types (i.e. not scalar predicates), then
1728	// defer the concatenation, and create QCAT instead.
1729	if (Subtarget.isHVXVectorType(VecTy: ty(Op: Op0), IncludeBool: true)) {
1730	if (NumOp == `2`)
1731	return DAG.getNode(Opcode: HexagonISD::QCAT, DL: dl, VT: VecTy, N1: Op0, N2: Op.getOperand(i: `1`));
1732
1733	ArrayRef<SDUse> U(Op.getNode()->ops());
1734	SmallVector<SDValue, `4`> SV(U);
1735	ArrayRef<SDValue> Ops(SV);
1736
1737	MVT HalfTy = typeSplit(VecTy).first;
1738	SDValue V0 = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: HalfTy,
1739	Ops: Ops.take_front(N: NumOp/`2`));
1740	SDValue V1 = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: HalfTy,
1741	Ops: Ops.take_back(N: NumOp/`2`));
1742	return DAG.getNode(Opcode: HexagonISD::QCAT, DL: dl, VT: VecTy, N1: V0, N2: V1);
1743	}
1744
1745	// Count how many bytes (in a vector register) each bit in VecTy
1746	// corresponds to.
1747	unsigned BitBytes = HwLen / VecTy.getVectorNumElements();
1748
1749	SmallVector<SDValue,`8`> Prefixes;
1750	for (SDValue V : Op.getNode()->op_values()) {
1751	SDValue P = createHvxPrefixPred(PredV: V, dl, BitBytes, ZeroFill: true, DAG);
1752	Prefixes.push_back(Elt: P);
1753	}
1754
1755	unsigned InpLen = ty(Op: Op.getOperand(i: `0`)).getVectorNumElements();
1756	MVT ByteTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen);
1757	SDValue S = DAG.getConstant(Val: HwLen - InpLen*BitBytes, DL: dl, VT: MVT::i32);
1758	SDValue Res = getZero(dl, Ty: ByteTy, DAG);
1759	for (unsigned i = `0`, e = Prefixes.size(); i != e; ++i) {
1760	Res = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: ByteTy, N1: Res, N2: S);
1761	Res = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: ByteTy, N1: Res, N2: Prefixes [e-i-`1`]);
1762	}
1763	return DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: VecTy, Operand: Res);
1764	}
1765
1766	SDValue
1767	HexagonTargetLowering::LowerHvxExtractElement(SDValue Op, SelectionDAG &DAG)
1768	const {
1769	// Change the type of the extracted element to i32.
1770	SDValue VecV = Op.getOperand(i: `0`);
1771	MVT ElemTy = ty(Op: VecV).getVectorElementType();
1772	const SDLoc &dl(Op);
1773	SDValue IdxV = Op.getOperand(i: `1`);
1774	if (ElemTy == MVT::i1)
1775	return extractHvxElementPred(VecV, IdxV, dl, ResTy: ty(Op), DAG);
1776
1777	return extractHvxElementReg(VecV, IdxV, dl, ResTy: ty(Op), DAG);
1778	}
1779
1780	SDValue
1781	HexagonTargetLowering::LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG)
1782	const {
1783	const SDLoc &dl(Op);
1784	MVT VecTy = ty(Op);
1785	SDValue VecV = Op.getOperand(i: `0`);
1786	SDValue ValV = Op.getOperand(i: `1`);
1787	SDValue IdxV = Op.getOperand(i: `2`);
1788	MVT ElemTy = ty(Op: VecV).getVectorElementType();
1789	if (ElemTy == MVT::i1)
1790	return insertHvxElementPred(VecV, IdxV, ValV, dl, DAG);
1791
1792	if (ElemTy == MVT::f16) {
1793	SDValue T0 = DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: dl,
1794	VT: tyVector(Ty: VecTy, ElemTy: MVT::i16),
1795	N1: DAG.getBitcast(VT: tyVector(Ty: VecTy, ElemTy: MVT::i16), V: VecV),
1796	N2: DAG.getBitcast(VT: MVT::i16, V: ValV), N3: IdxV);
1797	return DAG.getBitcast(VT: tyVector(Ty: VecTy, ElemTy: MVT::f16), V: T0);
1798	}
1799
1800	return insertHvxElementReg(VecV, IdxV, ValV, dl, DAG);
1801	}
1802
1803	SDValue
1804	HexagonTargetLowering::LowerHvxExtractSubvector(SDValue Op, SelectionDAG &DAG)
1805	const {
1806	SDValue SrcV = Op.getOperand(i: `0`);
1807	MVT SrcTy = ty(Op: SrcV);
1808	MVT DstTy = ty(Op);
1809	SDValue IdxV = Op.getOperand(i: `1`);
1810	unsigned Idx = IdxV.getNode()->getAsZExtVal();
1811	assert(Idx % DstTy.getVectorNumElements() == `0`);
1812	(void)Idx;
1813	const SDLoc &dl(Op);
1814
1815	MVT ElemTy = SrcTy.getVectorElementType();
1816	if (ElemTy == MVT::i1)
1817	return extractHvxSubvectorPred(VecV: SrcV, IdxV, dl, ResTy: DstTy, DAG);
1818
1819	return extractHvxSubvectorReg(OrigOp: Op, VecV: SrcV, IdxV, dl, ResTy: DstTy, DAG);
1820	}
1821
1822	SDValue
1823	HexagonTargetLowering::LowerHvxInsertSubvector(SDValue Op, SelectionDAG &DAG)
1824	const {
1825	// Idx does not need to be a constant.
1826	SDValue VecV = Op.getOperand(i: `0`);
1827	SDValue ValV = Op.getOperand(i: `1`);
1828	SDValue IdxV = Op.getOperand(i: `2`);
1829
1830	const SDLoc &dl(Op);
1831	MVT VecTy = ty(Op: VecV);
1832	MVT ElemTy = VecTy.getVectorElementType();
1833	if (ElemTy == MVT::i1)
1834	return insertHvxSubvectorPred(VecV, SubV: ValV, IdxV, dl, DAG);
1835
1836	return insertHvxSubvectorReg(VecV, SubV: ValV, IdxV, dl, DAG);
1837	}
1838
1839	SDValue
1840	HexagonTargetLowering::LowerHvxAnyExt(SDValue Op, SelectionDAG &DAG) const {
1841	// Lower any-extends of boolean vectors to sign-extends, since they
1842	// translate directly to Q2V. Zero-extending could also be done equally
1843	// fast, but Q2V is used/recognized in more places.
1844	// For all other vectors, use zero-extend.
1845	MVT ResTy = ty(Op);
1846	SDValue InpV = Op.getOperand(i: `0`);
1847	MVT ElemTy = ty(Op: InpV).getVectorElementType();
1848	if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(VecTy: ResTy))
1849	return LowerHvxSignExt(Op, DAG);
1850	return DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: SDLoc (Op), VT: ResTy, Operand: InpV);
1851	}
1852
1853	SDValue
1854	HexagonTargetLowering::LowerHvxSignExt(SDValue Op, SelectionDAG &DAG) const {
1855	MVT ResTy = ty(Op);
1856	SDValue InpV = Op.getOperand(i: `0`);
1857	MVT ElemTy = ty(Op: InpV).getVectorElementType();
1858	if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(VecTy: ResTy))
1859	return extendHvxVectorPred(VecV: InpV, dl: SDLoc (Op), ResTy: ty(Op), ZeroExt: false, DAG);
1860	return Op;
1861	}
1862
1863	SDValue
1864	HexagonTargetLowering::LowerHvxZeroExt(SDValue Op, SelectionDAG &DAG) const {
1865	MVT ResTy = ty(Op);
1866	SDValue InpV = Op.getOperand(i: `0`);
1867	MVT ElemTy = ty(Op: InpV).getVectorElementType();
1868	if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(VecTy: ResTy))
1869	return extendHvxVectorPred(VecV: InpV, dl: SDLoc (Op), ResTy: ty(Op), ZeroExt: true, DAG);
1870	return Op;
1871	}
1872
1873	SDValue
1874	HexagonTargetLowering::LowerHvxCttz(SDValue Op, SelectionDAG &DAG) const {
1875	// Lower vector CTTZ into a computation using CTLZ (Hacker's Delight):
1876	// cttz(x) = bitwidth(x) - ctlz(~x & (x-1))
1877	const SDLoc &dl(Op);
1878	MVT ResTy = ty(Op);
1879	SDValue InpV = Op.getOperand(i: `0`);
1880	assert(ResTy == ty(InpV));
1881
1882	// Calculate the vectors of 1 and bitwidth(x).
1883	MVT ElemTy = ty(Op: InpV).getVectorElementType();
1884	unsigned ElemWidth = ElemTy.getSizeInBits();
1885
1886	SDValue Vec1 = DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: ResTy,
1887	Operand: DAG.getConstant(Val: `1`, DL: dl, VT: MVT::i32));
1888	SDValue VecW = DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: ResTy,
1889	Operand: DAG.getConstant(Val: ElemWidth, DL: dl, VT: MVT::i32));
1890	SDValue VecN1 = DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: ResTy,
1891	Operand: DAG.getAllOnesConstant(DL: dl, VT: MVT::i32));
1892
1893	// Do not use DAG.getNOT, because that would create BUILD_VECTOR with
1894	// a BITCAST. Here we can skip the BITCAST (so we don't have to handle
1895	// it separately in custom combine or selection).
1896	SDValue A = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ResTy,
1897	Ops: {DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: ResTy, Ops: {InpV, VecN1}),
1898	DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: ResTy, Ops: {InpV, Vec1})});
1899	return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: ResTy,
1900	Ops: {VecW, DAG.getNode(Opcode: ISD::CTLZ, DL: dl, VT: ResTy, Operand: A)});
1901	}
1902
1903	SDValue
1904	HexagonTargetLowering::LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const {
1905	const SDLoc &dl(Op);
1906	MVT ResTy = ty(Op);
1907	assert(ResTy.getVectorElementType() == MVT::i32);
1908
1909	SDValue Vs = Op.getOperand(i: `0`);
1910	SDValue Vt = Op.getOperand(i: `1`);
1911
1912	SDVTList ResTys = DAG.getVTList(VT1: ResTy, VT2: ResTy);
1913	unsigned Opc = Op.getOpcode();
1914
1915	// On HVX v62+ producing the full product is cheap, so legalize MULH to LOHI.
1916	if (Opc == ISD::MULHU)
1917	return DAG.getNode(Opcode: HexagonISD::UMUL_LOHI, DL: dl, VTList: ResTys, Ops: {Vs, Vt}).getValue(R: `1`);
1918	if (Opc == ISD::MULHS)
1919	return DAG.getNode(Opcode: HexagonISD::SMUL_LOHI, DL: dl, VTList: ResTys, Ops: {Vs, Vt}).getValue(R: `1`);
1920
1921	#ifndef NDEBUG
1922	Op.dump(&DAG);
1923	#endif
1924	llvm_unreachable("Unexpected mulh operation");
1925	}
1926
1927	SDValue
1928	HexagonTargetLowering::LowerHvxMulLoHi(SDValue Op, SelectionDAG &DAG) const {
1929	const SDLoc &dl(Op);
1930	unsigned Opc = Op.getOpcode();
1931	SDValue Vu = Op.getOperand(i: `0`);
1932	SDValue Vv = Op.getOperand(i: `1`);
1933
1934	// If the HI part is not used, convert it to a regular MUL.
1935	if (auto HiVal = Op.getValue(R: `1`); HiVal.use_empty()) {
1936	// Need to preserve the types and the number of values.
1937	SDValue Hi = DAG.getUNDEF(VT: ty(Op: HiVal));
1938	SDValue Lo = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: ty(Op), Ops: {Vu, Vv});
1939	return DAG.getMergeValues(Ops: {Lo, Hi}, dl);
1940	}
1941
1942	bool SignedVu = Opc == HexagonISD::SMUL_LOHI;
1943	bool SignedVv = Opc == HexagonISD::SMUL_LOHI \|\| Opc == HexagonISD::USMUL_LOHI;
1944
1945	// Legal on HVX v62+, but lower it here because patterns can't handle multi-
1946	// valued nodes.
1947	if (Subtarget.useHVXV62Ops())
1948	return emitHvxMulLoHiV62(A: Vu, SignedA: SignedVu, B: Vv, SignedB: SignedVv, dl, DAG);
1949
1950	if (Opc == HexagonISD::SMUL_LOHI) {
1951	// Direct MULHS expansion is cheaper than doing the whole SMUL_LOHI,
1952	// for other signedness LOHI is cheaper.
1953	if (auto LoVal = Op.getValue(R: `0`); LoVal.use_empty()) {
1954	SDValue Hi = emitHvxMulHsV60(A: Vu, B: Vv, dl, DAG);
1955	SDValue Lo = DAG.getUNDEF(VT: ty(Op: LoVal));
1956	return DAG.getMergeValues(Ops: {Lo, Hi}, dl);
1957	}
1958	}
1959
1960	return emitHvxMulLoHiV60(A: Vu, SignedA: SignedVu, B: Vv, SignedB: SignedVv, dl, DAG);
1961	}
1962
1963	SDValue
1964	HexagonTargetLowering::LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const {
1965	SDValue Val = Op.getOperand(i: `0`);
1966	MVT ResTy = ty(Op);
1967	MVT ValTy = ty(Op: Val);
1968	const SDLoc &dl(Op);
1969
1970	if (isHvxBoolTy(Ty: ValTy) && ResTy.isScalarInteger()) {
1971	unsigned HwLen = Subtarget.getVectorLength();
1972	MVT WordTy = MVT::getVectorVT(VT: MVT::i32, NumElements: HwLen/`4`);
1973	SDValue VQ = compressHvxPred(VecQ: Val, dl, ResTy: WordTy, DAG);
1974	unsigned BitWidth = ResTy.getSizeInBits();
1975
1976	if (BitWidth < `64`) {
1977	SDValue W0 = extractHvxElementReg(VecV: VQ, IdxV: DAG.getConstant(Val: `0`, DL: dl, VT: MVT::i32),
1978	dl, ResTy: MVT::i32, DAG);
1979	if (BitWidth == `32`)
1980	return W0;
1981	assert(BitWidth < `32u`);
1982	return DAG.getZExtOrTrunc(Op: W0, DL: dl, VT: ResTy);
1983	}
1984
1985	// The result is >= 64 bits. The only options are 64 or 128.
1986	assert(BitWidth == `64` \|\| BitWidth == `128`);
1987	SmallVector<SDValue,`4`> Words;
1988	for (unsigned i = `0`; i != BitWidth/`32`; ++i) {
1989	SDValue W = extractHvxElementReg(
1990	VecV: VQ, IdxV: DAG.getConstant(Val: i, DL: dl, VT: MVT::i32), dl, ResTy: MVT::i32, DAG);
1991	Words.push_back(Elt: W);
1992	}
1993	SmallVector<SDValue,`2`> Combines;
1994	assert(Words.size() % `2` == `0`);
1995	for (unsigned i = `0`, e = Words.size(); i < e; i += `2`) {
1996	SDValue C = getCombine(Hi: Words [i+`1`], Lo: Words [i], dl, ResTy: MVT::i64, DAG);
1997	Combines.push_back(Elt: C);
1998	}
1999
2000	if (BitWidth == `64`)
2001	return Combines [`0`];
2002
2003	return DAG.getNode(Opcode: ISD::BUILD_PAIR, DL: dl, VT: ResTy, Ops: Combines);
2004	}
2005	if (isHvxBoolTy(Ty: ResTy) && ValTy.isScalarInteger()) {
2006	// Handle bitcast from i128 -> v128i1 and i64 -> v64i1.
2007	unsigned BitWidth = ValTy.getSizeInBits();
2008	unsigned HwLen = Subtarget.getVectorLength();
2009	assert(BitWidth == HwLen);
2010
2011	MVT ValAsVecTy = MVT::getVectorVT(VT: MVT::i8, NumElements: BitWidth / `8`);
2012	SDValue ValAsVec = DAG.getBitcast(VT: ValAsVecTy, V: Val);
2013	// Splat each byte of Val 8 times.
2014	// Bytes = [(b0)x8, (b1)x8, ...., (b15)x8]
2015	// where b0, b1,..., b15 are least to most significant bytes of I.
2016	SmallVector<SDValue, `128`> Bytes;
2017	// Tmp: 0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80, 0x01,0x02,0x04,0x08,...
2018	// These are bytes with the LSB rotated left with respect to their index.
2019	SmallVector<SDValue, `128`> Tmp;
2020	for (unsigned I = `0`; I != HwLen / `8`; ++I) {
2021	SDValue Idx = DAG.getConstant(Val: I, DL: dl, VT: MVT::i32);
2022	SDValue Byte =
2023	DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: dl, VT: MVT::i8, N1: ValAsVec, N2: Idx);
2024	for (unsigned J = `0`; J != `8`; ++J) {
2025	Bytes.push_back(Elt: Byte);
2026	Tmp.push_back(Elt: DAG.getConstant(Val: `1ull` << J, DL: dl, VT: MVT::i8));
2027	}
2028	}
2029
2030	MVT ConstantVecTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen);
2031	SDValue ConstantVec = DAG.getBuildVector(VT: ConstantVecTy, DL: dl, Ops: Tmp);
2032	SDValue I2V = buildHvxVectorReg(Values: Bytes, dl, VecTy: ConstantVecTy, DAG);
2033
2034	// Each Byte in the I2V will be set iff corresponding bit is set in Val.
2035	I2V = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ConstantVecTy, Ops: {I2V, ConstantVec});
2036	return DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: ResTy, Operand: I2V);
2037	}
2038
2039	return Op;
2040	}
2041
2042	SDValue
2043	HexagonTargetLowering::LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const {
2044	// Sign- and zero-extends are legal.
2045	assert(Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG);
2046	return DAG.getNode(Opcode: ISD::ZERO_EXTEND_VECTOR_INREG, DL: SDLoc (Op), VT: ty(Op),
2047	Operand: Op.getOperand(i: `0`));
2048	}
2049
2050	SDValue
2051	HexagonTargetLowering::LowerHvxSelect(SDValue Op, SelectionDAG &DAG) const {
2052	MVT ResTy = ty(Op);
2053	if (ResTy.getVectorElementType() != MVT::i1)
2054	return Op;
2055
2056	const SDLoc &dl(Op);
2057	unsigned HwLen = Subtarget.getVectorLength();
2058	unsigned VecLen = ResTy.getVectorNumElements();
2059	assert(HwLen % VecLen == `0`);
2060	unsigned ElemSize = HwLen / VecLen;
2061
2062	MVT VecTy = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: ElemSize * `8`), NumElements: VecLen);
2063	SDValue S =
2064	DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: VecTy, N1: Op.getOperand(i: `0`),
2065	N2: DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: VecTy, Operand: Op.getOperand(i: `1`)),
2066	N3: DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: VecTy, Operand: Op.getOperand(i: `2`)));
2067	return DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: ResTy, Operand: S);
2068	}
2069
2070	SDValue
2071	HexagonTargetLowering::LowerHvxShift(SDValue Op, SelectionDAG &DAG) const {
2072	if (SDValue S = getVectorShiftByInt(Op, DAG))
2073	return S;
2074	return Op;
2075	}
2076
2077	SDValue
2078	HexagonTargetLowering::LowerHvxFunnelShift(SDValue Op,
2079	SelectionDAG &DAG) const {
2080	unsigned Opc = Op.getOpcode();
2081	assert(Opc == ISD::FSHL \|\| Opc == ISD::FSHR);
2082
2083	// Make sure the shift amount is within the range of the bitwidth
2084	// of the element type.
2085	SDValue A = Op.getOperand(i: `0`);
2086	SDValue B = Op.getOperand(i: `1`);
2087	SDValue S = Op.getOperand(i: `2`);
2088
2089	MVT InpTy = ty(Op: A);
2090	MVT ElemTy = InpTy.getVectorElementType();
2091
2092	const SDLoc &dl(Op);
2093	unsigned ElemWidth = ElemTy.getSizeInBits();
2094	bool IsLeft = Opc == ISD::FSHL;
2095
2096	// The expansion into regular shifts produces worse code for i8 and for
2097	// right shift of i32 on v65+.
2098	bool UseShifts = ElemTy != MVT::i8;
2099	if (Subtarget.useHVXV65Ops() && ElemTy == MVT::i32)
2100	UseShifts = false;
2101
2102	if (SDValue SplatV = getSplatValue(Op: S, DAG); SplatV && UseShifts) {
2103	// If this is a funnel shift by a scalar, lower it into regular shifts.
2104	SDValue Mask = DAG.getConstant(Val: ElemWidth - `1`, DL: dl, VT: MVT::i32);
2105	SDValue ModS =
2106	DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i32,
2107	Ops: {DAG.getZExtOrTrunc(Op: SplatV, DL: dl, VT: MVT::i32), Mask});
2108	SDValue NegS =
2109	DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: MVT::i32,
2110	Ops: {DAG.getConstant(Val: ElemWidth, DL: dl, VT: MVT::i32), ModS});
2111	SDValue IsZero =
2112	DAG.getSetCC(DL: dl, VT: MVT::i1, LHS: ModS, RHS: getZero(dl, Ty: MVT::i32, DAG), Cond: ISD::SETEQ);
2113	// FSHL A, B => A << \| B >>n
2114	// FSHR A, B => A <<n \| B >>
2115	SDValue Part1 =
2116	DAG.getNode(Opcode: HexagonISD::VASL, DL: dl, VT: InpTy, Ops: {A, IsLeft ? ModS : NegS});
2117	SDValue Part2 =
2118	DAG.getNode(Opcode: HexagonISD::VLSR, DL: dl, VT: InpTy, Ops: {B, IsLeft ? NegS : ModS});
2119	SDValue Or = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: InpTy, Ops: {Part1, Part2});
2120	// If the shift amount was 0, pick A or B, depending on the direction.
2121	// The opposite shift will also be by 0, so the "Or" will be incorrect.
2122	return DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: InpTy, Ops: {IsZero, (IsLeft ? A : B), Or});
2123	}
2124
2125	SDValue Mask = DAG.getSplatBuildVector(
2126	VT: InpTy, DL: dl, Op: DAG.getConstant(Val: ElemWidth - `1`, DL: dl, VT: ElemTy));
2127
2128	unsigned MOpc = Opc == ISD::FSHL ? HexagonISD::MFSHL : HexagonISD::MFSHR;
2129	return DAG.getNode(Opcode: MOpc, DL: dl, VT: ty(Op),
2130	Ops: {A, B, DAG.getNode(Opcode: ISD::AND, DL: dl, VT: InpTy, Ops: {S, Mask})});
2131	}
2132
2133	SDValue
2134	HexagonTargetLowering::LowerHvxIntrinsic(SDValue Op, SelectionDAG &DAG) const {
2135	const SDLoc &dl(Op);
2136	unsigned IntNo = Op.getConstantOperandVal(i: `0`);
2137	SmallVector<SDValue> Ops(Op ->ops());
2138
2139	auto Swap = [&](SDValue P) {
2140	return DAG.getMergeValues(Ops: {P.getValue(R: `1`), P.getValue(R: `0`)}, dl);
2141	};
2142
2143	switch (IntNo) {
2144	case Intrinsic::hexagon_V6_pred_typecast:
2145	case Intrinsic::hexagon_V6_pred_typecast_128B: {
2146	MVT ResTy = ty(Op), InpTy = ty(Op: Ops [`1`]);
2147	if (isHvxBoolTy(Ty: ResTy) && isHvxBoolTy(Ty: InpTy)) {
2148	if (ResTy == InpTy)
2149	return Ops [`1`];
2150	return DAG.getNode(Opcode: HexagonISD::TYPECAST, DL: dl, VT: ResTy, Operand: Ops [`1`]);
2151	}
2152	break;
2153	}
2154	case Intrinsic::hexagon_V6_vmpyss_parts:
2155	case Intrinsic::hexagon_V6_vmpyss_parts_128B:
2156	return Swap (DAG.getNode(Opcode: HexagonISD::SMUL_LOHI, DL: dl, VTList: Op ->getVTList(),
2157	Ops: {Ops [`1`], Ops [`2`]}));
2158	case Intrinsic::hexagon_V6_vmpyuu_parts:
2159	case Intrinsic::hexagon_V6_vmpyuu_parts_128B:
2160	return Swap (DAG.getNode(Opcode: HexagonISD::UMUL_LOHI, DL: dl, VTList: Op ->getVTList(),
2161	Ops: {Ops [`1`], Ops [`2`]}));
2162	case Intrinsic::hexagon_V6_vmpyus_parts:
2163	case Intrinsic::hexagon_V6_vmpyus_parts_128B: {
2164	return Swap (DAG.getNode(Opcode: HexagonISD::USMUL_LOHI, DL: dl, VTList: Op ->getVTList(),
2165	Ops: {Ops [`1`], Ops [`2`]}));
2166	}
2167	} // switch
2168
2169	return Op;
2170	}
2171
2172	SDValue
2173	HexagonTargetLowering::LowerHvxMaskedOp(SDValue Op, SelectionDAG &DAG) const {
2174	const SDLoc &dl(Op);
2175	unsigned HwLen = Subtarget.getVectorLength();
2176	MachineFunction &MF = DAG.getMachineFunction();
2177	auto *MaskN = cast<MaskedLoadStoreSDNode>(Val: Op.getNode());
2178	SDValue Mask = MaskN->getMask();
2179	SDValue Chain = MaskN->getChain();
2180	SDValue Base = MaskN->getBasePtr();
2181	auto *MemOp = MF.getMachineMemOperand(MMO: MaskN->getMemOperand(), Offset: `0`, Size: HwLen);
2182
2183	unsigned Opc = Op ->getOpcode();
2184	assert(Opc == ISD::MLOAD \|\| Opc == ISD::MSTORE);
2185
2186	if (Opc == ISD::MLOAD) {
2187	MVT ValTy = ty(Op);
2188	SDValue Load = DAG.getLoad(VT: ValTy, dl, Chain, Ptr: Base, MMO: MemOp);
2189	SDValue Thru = cast<MaskedLoadSDNode>(Val: MaskN)->getPassThru();
2190	if (isUndef(Op: Thru))
2191	return Load;
2192	SDValue VSel = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: ValTy, N1: Mask, N2: Load, N3: Thru);
2193	return DAG.getMergeValues(Ops: {VSel, Load.getValue(R: `1`)}, dl);
2194	}
2195
2196	// MSTORE
2197	// HVX only has aligned masked stores.
2198
2199	// TODO: Fold negations of the mask into the store.
2200	unsigned StoreOpc = Hexagon::V6_vS32b_qpred_ai;
2201	SDValue Value = cast<MaskedStoreSDNode>(Val: MaskN)->getValue();
2202	SDValue Offset0 = DAG.getTargetConstant(Val: `0`, DL: dl, VT: ty(Op: Base));
2203
2204	if (MaskN->getAlign().value() % HwLen == `0`) {
2205	SDValue Store = getInstr(MachineOpc: StoreOpc, dl, Ty: MVT::Other,
2206	Ops: {Mask, Base, Offset0, Value, Chain}, DAG);
2207	DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: Store.getNode()), NewMemRefs: {MemOp});
2208	return Store;
2209	}
2210
2211	// Unaligned case.
2212	auto StoreAlign = [&](SDValue V, SDValue A) {
2213	SDValue Z = getZero(dl, Ty: ty(Op: V), DAG);
2214	// TODO: use funnel shifts?
2215	// vlalign(Vu,Vv,Rt) rotates the pair Vu:Vv left by Rt and takes the
2216	// upper half.
2217	SDValue LoV = getInstr(MachineOpc: Hexagon::V6_vlalignb, dl, Ty: ty(Op: V), Ops: {V, Z, A}, DAG);
2218	SDValue HiV = getInstr(MachineOpc: Hexagon::V6_vlalignb, dl, Ty: ty(Op: V), Ops: {Z, V, A}, DAG);
2219	return std::make_pair(x&: LoV, y&: HiV);
2220	};
2221
2222	MVT ByteTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen);
2223	MVT BoolTy = MVT::getVectorVT(VT: MVT::i1, NumElements: HwLen);
2224	SDValue MaskV = DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: ByteTy, Operand: Mask);
2225	VectorPair Tmp = StoreAlign (MaskV, Base);
2226	VectorPair MaskU = {DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: BoolTy, Operand: Tmp.first),
2227	DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: BoolTy, Operand: Tmp.second)};
2228	VectorPair ValueU = StoreAlign (Value, Base);
2229
2230	SDValue Offset1 = DAG.getTargetConstant(Val: HwLen, DL: dl, VT: MVT::i32);
2231	SDValue StoreLo =
2232	getInstr(MachineOpc: StoreOpc, dl, Ty: MVT::Other,
2233	Ops: {MaskU.first, Base, Offset0, ValueU.first, Chain}, DAG);
2234	SDValue StoreHi =
2235	getInstr(MachineOpc: StoreOpc, dl, Ty: MVT::Other,
2236	Ops: {MaskU.second, Base, Offset1, ValueU.second, Chain}, DAG);
2237	DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: StoreLo.getNode()), NewMemRefs: {MemOp});
2238	DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: StoreHi.getNode()), NewMemRefs: {MemOp});
2239	return DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: {StoreLo, StoreHi});
2240	}
2241
2242	SDValue HexagonTargetLowering::LowerHvxFpExtend(SDValue Op,
2243	SelectionDAG &DAG) const {
2244	// This conversion only applies to QFloat. IEEE extension from f16 to f32
2245	// is legal (done via a pattern).
2246	assert(Subtarget.useHVXQFloatOps());
2247
2248	assert(Op->getOpcode() == ISD::FP_EXTEND);
2249
2250	MVT VecTy = ty(Op);
2251	MVT ArgTy = ty(Op: Op.getOperand(i: `0`));
2252	const SDLoc &dl(Op);
2253	assert(VecTy == MVT::v64f32 && ArgTy == MVT::v64f16);
2254
2255	SDValue F16Vec = Op.getOperand(i: `0`);
2256
2257	APFloat FloatVal = APFloat (`1.0f`);
2258	bool Ignored;
2259	FloatVal.convert(ToSemantics: APFloat::IEEEhalf(), RM: APFloat::rmNearestTiesToEven, losesInfo: &Ignored);
2260	SDValue Fp16Ones = DAG.getConstantFP(Val: FloatVal, DL: dl, VT: ArgTy);
2261	SDValue VmpyVec =
2262	getInstr(MachineOpc: Hexagon::V6_vmpy_qf32_hf, dl, Ty: VecTy, Ops: {F16Vec, Fp16Ones}, DAG);
2263
2264	MVT HalfTy = typeSplit(VecTy).first;
2265	VectorPair Pair = opSplit(Vec: VmpyVec, dl, DAG);
2266	SDValue LoVec =
2267	getInstr(MachineOpc: Hexagon::V6_vconv_sf_qf32, dl, Ty: HalfTy, Ops: {Pair.first}, DAG);
2268	SDValue HiVec =
2269	getInstr(MachineOpc: Hexagon::V6_vconv_sf_qf32, dl, Ty: HalfTy, Ops: {Pair.second}, DAG);
2270
2271	SDValue ShuffVec =
2272	getInstr(MachineOpc: Hexagon::V6_vshuffvdd, dl, Ty: VecTy,
2273	Ops: {HiVec, LoVec, DAG.getSignedConstant(Val: -`4`, DL: dl, VT: MVT::i32)}, DAG);
2274
2275	return ShuffVec;
2276	}
2277
2278	SDValue
2279	HexagonTargetLowering::LowerHvxFpToInt(SDValue Op, SelectionDAG &DAG) const {
2280	// Catch invalid conversion ops (just in case).
2281	assert(Op.getOpcode() == ISD::FP_TO_SINT \|\|
2282	Op.getOpcode() == ISD::FP_TO_UINT);
2283
2284	MVT ResTy = ty(Op);
2285	MVT FpTy = ty(Op: Op.getOperand(i: `0`)).getVectorElementType();
2286	MVT IntTy = ResTy.getVectorElementType();
2287
2288	if (Subtarget.useHVXIEEEFPOps()) {
2289	// There are only conversions from f16.
2290	if (FpTy == MVT::f16) {
2291	// Other int types aren't legal in HVX, so we shouldn't see them here.
2292	assert(IntTy == MVT::i8 \|\| IntTy == MVT::i16 \|\| IntTy == MVT::i32);
2293	// Conversions to i8 and i16 are legal.
2294	if (IntTy == MVT::i8 \|\| IntTy == MVT::i16)
2295	return Op;
2296	}
2297	}
2298
2299	if (IntTy.getSizeInBits() != FpTy.getSizeInBits())
2300	return EqualizeFpIntConversion(Op, DAG);
2301
2302	return ExpandHvxFpToInt(Op, DAG);
2303	}
2304
2305	SDValue
2306	HexagonTargetLowering::LowerHvxIntToFp(SDValue Op, SelectionDAG &DAG) const {
2307	// Catch invalid conversion ops (just in case).
2308	assert(Op.getOpcode() == ISD::SINT_TO_FP \|\|
2309	Op.getOpcode() == ISD::UINT_TO_FP);
2310
2311	MVT ResTy = ty(Op);
2312	MVT IntTy = ty(Op: Op.getOperand(i: `0`)).getVectorElementType();
2313	MVT FpTy = ResTy.getVectorElementType();
2314
2315	if (Subtarget.useHVXIEEEFPOps()) {
2316	// There are only conversions to f16.
2317	if (FpTy == MVT::f16) {
2318	// Other int types aren't legal in HVX, so we shouldn't see them here.
2319	assert(IntTy == MVT::i8 \|\| IntTy == MVT::i16 \|\| IntTy == MVT::i32);
2320	// i8, i16 -> f16 is legal.
2321	if (IntTy == MVT::i8 \|\| IntTy == MVT::i16)
2322	return Op;
2323	}
2324	}
2325
2326	if (IntTy.getSizeInBits() != FpTy.getSizeInBits())
2327	return EqualizeFpIntConversion(Op, DAG);
2328
2329	return ExpandHvxIntToFp(Op, DAG);
2330	}
2331
2332	HexagonTargetLowering::TypePair
2333	HexagonTargetLowering::typeExtendToWider(MVT Ty0, MVT Ty1) const {
2334	// Compare the widths of elements of the two types, and extend the narrower
2335	// type to match the with of the wider type. For vector types, apply this
2336	// to the element type.
2337	assert(Ty0.isVector() == Ty1.isVector());
2338
2339	MVT ElemTy0 = Ty0.getScalarType();
2340	MVT ElemTy1 = Ty1.getScalarType();
2341
2342	unsigned Width0 = ElemTy0.getSizeInBits();
2343	unsigned Width1 = ElemTy1.getSizeInBits();
2344	unsigned MaxWidth = std::max(a: Width0, b: Width1);
2345
2346	auto getScalarWithWidth = [](MVT ScalarTy, unsigned Width) {
2347	if (ScalarTy.isInteger())
2348	return MVT::getIntegerVT(BitWidth: Width);
2349	assert(ScalarTy.isFloatingPoint());
2350	return MVT::getFloatingPointVT(BitWidth: Width);
2351	};
2352
2353	MVT WideETy0 = getScalarWithWidth (ElemTy0, MaxWidth);
2354	MVT WideETy1 = getScalarWithWidth (ElemTy1, MaxWidth);
2355
2356	if (!Ty0.isVector()) {
2357	// Both types are scalars.
2358	return {WideETy0, WideETy1};
2359	}
2360
2361	// Vector types.
2362	unsigned NumElem = Ty0.getVectorNumElements();
2363	assert(NumElem == Ty1.getVectorNumElements());
2364
2365	return {MVT::getVectorVT(VT: WideETy0, NumElements: NumElem),
2366	MVT::getVectorVT(VT: WideETy1, NumElements: NumElem)};
2367	}
2368
2369	HexagonTargetLowering::TypePair
2370	HexagonTargetLowering::typeWidenToWider(MVT Ty0, MVT Ty1) const {
2371	// Compare the numbers of elements of two vector types, and widen the
2372	// narrower one to match the number of elements in the wider one.
2373	assert(Ty0.isVector() && Ty1.isVector());
2374
2375	unsigned Len0 = Ty0.getVectorNumElements();
2376	unsigned Len1 = Ty1.getVectorNumElements();
2377	if (Len0 == Len1)
2378	return {Ty0, Ty1};
2379
2380	unsigned MaxLen = std::max(a: Len0, b: Len1);
2381	return {MVT::getVectorVT(VT: Ty0.getVectorElementType(), NumElements: MaxLen),
2382	MVT::getVectorVT(VT: Ty1.getVectorElementType(), NumElements: MaxLen)};
2383	}
2384
2385	MVT
2386	HexagonTargetLowering::typeLegalize(MVT Ty, SelectionDAG &DAG) const {
2387	EVT LegalTy = getTypeToTransformTo(Context&: *DAG.getContext(), VT: Ty);
2388	assert(LegalTy.isSimple());
2389	return LegalTy.getSimpleVT();
2390	}
2391
2392	MVT
2393	HexagonTargetLowering::typeWidenToHvx(MVT Ty) const {
2394	unsigned HwWidth = `8` * Subtarget.getVectorLength();
2395	assert(Ty.getSizeInBits() <= HwWidth);
2396	if (Ty.getSizeInBits() == HwWidth)
2397	return Ty;
2398
2399	MVT ElemTy = Ty.getScalarType();
2400	return MVT::getVectorVT(VT: ElemTy, NumElements: HwWidth / ElemTy.getSizeInBits());
2401	}
2402
2403	HexagonTargetLowering::VectorPair
2404	HexagonTargetLowering::emitHvxAddWithOverflow(SDValue A, SDValue B,
2405	const SDLoc &dl, bool Signed, SelectionDAG &DAG) const {
2406	// Compute A+B, return {A+B, O}, where O = vector predicate indicating
2407	// whether an overflow has occurred.
2408	MVT ResTy = ty(Op: A);
2409	assert(ResTy == ty(B));
2410	MVT PredTy = MVT::getVectorVT(VT: MVT::i1, NumElements: ResTy.getVectorNumElements());
2411
2412	if (!Signed) {
2413	// V62+ has V6_vaddcarry, but it requires input predicate, so it doesn't
2414	// save any instructions.
2415	SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: ResTy, Ops: {A, B});
2416	SDValue Ovf = DAG.getSetCC(DL: dl, VT: PredTy, LHS: Add, RHS: A, Cond: ISD::SETULT);
2417	return {Add, Ovf};
2418	}
2419
2420	// Signed overflow has happened, if:
2421	// (A, B have the same sign) and (A+B has a different sign from either)
2422	// i.e. (~A xor B) & ((A+B) xor B), then check the sign bit
2423	SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: ResTy, Ops: {A, B});
2424	SDValue NotA =
2425	DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: ResTy, Ops: {A, DAG.getAllOnesConstant(DL: dl, VT: ResTy)});
2426	SDValue Xor0 = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: ResTy, Ops: {NotA, B});
2427	SDValue Xor1 = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: ResTy, Ops: {Add, B});
2428	SDValue And = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ResTy, Ops: {Xor0, Xor1});
2429	SDValue MSB =
2430	DAG.getSetCC(DL: dl, VT: PredTy, LHS: And, RHS: getZero(dl, Ty: ResTy, DAG), Cond: ISD::SETLT);
2431	return {Add, MSB};
2432	}
2433
2434	HexagonTargetLowering::VectorPair
2435	HexagonTargetLowering::emitHvxShiftRightRnd(SDValue Val, unsigned Amt,
2436	bool Signed, SelectionDAG &DAG) const {
2437	// Shift Val right by Amt bits, round the result to the nearest integer,
2438	// tie-break by rounding halves to even integer.
2439
2440	const SDLoc &dl(Val);
2441	MVT ValTy = ty(Op: Val);
2442
2443	// This should also work for signed integers.
2444	//
2445	// uint tmp0 = inp + ((1 << (Amt-1)) - 1);
2446	// bool ovf = (inp > tmp0);
2447	// uint rup = inp & (1 << (Amt+1));
2448	//
2449	// uint tmp1 = inp >> (Amt-1); // tmp1 == tmp2 iff
2450	// uint tmp2 = tmp0 >> (Amt-1); // the Amt-1 lower bits were all 0
2451	// uint tmp3 = tmp2 + rup;
2452	// uint frac = (tmp1 != tmp2) ? tmp2 >> 1 : tmp3 >> 1;
2453	unsigned ElemWidth = ValTy.getVectorElementType().getSizeInBits();
2454	MVT ElemTy = MVT::getIntegerVT(BitWidth: ElemWidth);
2455	MVT IntTy = tyVector(Ty: ValTy, ElemTy);
2456	MVT PredTy = MVT::getVectorVT(VT: MVT::i1, NumElements: IntTy.getVectorNumElements());
2457	unsigned ShRight = Signed ? ISD::SRA : ISD::SRL;
2458
2459	SDValue Inp = DAG.getBitcast(VT: IntTy, V: Val);
2460	SDValue LowBits = DAG.getConstant(Val: (`1ull` << (Amt - `1`)) - `1`, DL: dl, VT: IntTy);
2461
2462	SDValue AmtP1 = DAG.getConstant(Val: `1ull` << Amt, DL: dl, VT: IntTy);
2463	SDValue And = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: IntTy, Ops: {Inp, AmtP1});
2464	SDValue Zero = getZero(dl, Ty: IntTy, DAG);
2465	SDValue Bit = DAG.getSetCC(DL: dl, VT: PredTy, LHS: And, RHS: Zero, Cond: ISD::SETNE);
2466	SDValue Rup = DAG.getZExtOrTrunc(Op: Bit, DL: dl, VT: IntTy);
2467	auto [Tmp0, Ovf] = emitHvxAddWithOverflow(A: Inp, B: LowBits, dl, Signed, DAG);
2468
2469	SDValue AmtM1 = DAG.getConstant(Val: Amt - `1`, DL: dl, VT: IntTy);
2470	SDValue Tmp1 = DAG.getNode(Opcode: ShRight, DL: dl, VT: IntTy, N1: Inp, N2: AmtM1);
2471	SDValue Tmp2 = DAG.getNode(Opcode: ShRight, DL: dl, VT: IntTy, N1: Tmp0, N2: AmtM1);
2472	SDValue Tmp3 = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: IntTy, N1: Tmp2, N2: Rup);
2473
2474	SDValue Eq = DAG.getSetCC(DL: dl, VT: PredTy, LHS: Tmp1, RHS: Tmp2, Cond: ISD::SETEQ);
2475	SDValue One = DAG.getConstant(Val: `1`, DL: dl, VT: IntTy);
2476	SDValue Tmp4 = DAG.getNode(Opcode: ShRight, DL: dl, VT: IntTy, Ops: {Tmp2, One});
2477	SDValue Tmp5 = DAG.getNode(Opcode: ShRight, DL: dl, VT: IntTy, Ops: {Tmp3, One});
2478	SDValue Mux = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: IntTy, Ops: {Eq, Tmp5, Tmp4});
2479	return {Mux, Ovf};
2480	}
2481
2482	SDValue
2483	HexagonTargetLowering::emitHvxMulHsV60(SDValue A, SDValue B, const SDLoc &dl,
2484	SelectionDAG &DAG) const {
2485	MVT VecTy = ty(Op: A);
2486	MVT PairTy = typeJoin(Tys: {VecTy, VecTy});
2487	assert(VecTy.getVectorElementType() == MVT::i32);
2488
2489	SDValue S16 = DAG.getConstant(Val: `16`, DL: dl, VT: MVT::i32);
2490
2491	// mulhs(A,B) =
2492	// = [(Hi(A)2^16 + Lo(A)) s (Hi(B)2^16 + Lo(B))] >> 32*
2493	// = [Hi(A)2^16 s Hi(B)2^16 + Hi(A) su Lo(B)2^16*
2494	// + Lo(A) us (Hi(B)2^16 + Lo(B))] >> 32
2495	// = [Hi(A) s Hi(B)2^32 + Hi(A) su Lo(B)2^16 + Lo(A) us B] >> 32*
2496	// The low half of Lo(A)Lo(B) will be discarded (it's not added to*
2497	// anything, so it cannot produce any carry over to higher bits),
2498	// so everything in [] can be shifted by 16 without loss of precision.
2499	// = [Hi(A) s Hi(B)2^16 + Hi(A)su Lo(B) + Lo(A)B >> 16] >> 16
2500	// = [Hi(A) s Hi(B)2^16 + Hi(A)su Lo(B) + V6_vmpyewuh(A,B)] >> 16*
2501	// The final additions need to make sure to properly maintain any carry-
2502	// out bits.
2503	//
2504	// Hi(B) Lo(B)
2505	// Hi(A) Lo(A)
2506	// --------------
2507	// Lo(B)Lo(A) \| T0 = V6_vmpyewuh(B,A) does this,*
2508	// Hi(B)Lo(A) \| + dropping the low 16 bits*
2509	// Hi(A)Lo(B) \| T2*
2510	// Hi(B)Hi(A)*
2511
2512	SDValue T0 = getInstr(MachineOpc: Hexagon::V6_vmpyewuh, dl, Ty: VecTy, Ops: {B, A}, DAG);
2513	// T1 = get Hi(A) into low halves.
2514	SDValue T1 = getInstr(MachineOpc: Hexagon::V6_vasrw, dl, Ty: VecTy, Ops: {A, S16}, DAG);
2515	// P0 = interleaved T1.hB.uh (full precision product)*
2516	SDValue P0 = getInstr(MachineOpc: Hexagon::V6_vmpyhus, dl, Ty: PairTy, Ops: {T1, B}, DAG);
2517	// T2 = T1.even(h) B.even(uh), i.e. Hi(A)Lo(B)
2518	SDValue T2 = LoHalf(V: P0, DAG);
2519	// We need to add T0+T2, recording the carry-out, which will be 1<<16
2520	// added to the final sum.
2521	// P1 = interleaved even/odd 32-bit (unsigned) sums of 16-bit halves
2522	SDValue P1 = getInstr(MachineOpc: Hexagon::V6_vadduhw, dl, Ty: PairTy, Ops: {T0, T2}, DAG);
2523	// P2 = interleaved even/odd 32-bit (signed) sums of 16-bit halves
2524	SDValue P2 = getInstr(MachineOpc: Hexagon::V6_vaddhw, dl, Ty: PairTy, Ops: {T0, T2}, DAG);
2525	// T3 = full-precision(T0+T2) >> 16
2526	// The low halves are added-unsigned, the high ones are added-signed.
2527	SDValue T3 = getInstr(MachineOpc: Hexagon::V6_vasrw_acc, dl, Ty: VecTy,
2528	Ops: {HiHalf(V: P2, DAG), LoHalf(V: P1, DAG), S16}, DAG);
2529	SDValue T4 = getInstr(MachineOpc: Hexagon::V6_vasrw, dl, Ty: VecTy, Ops: {B, S16}, DAG);
2530	// P3 = interleaved Hi(B)Hi(A) (full precision),*
2531	// which is now Lo(T1)Lo(T4), so we want to keep the even product.*
2532	SDValue P3 = getInstr(MachineOpc: Hexagon::V6_vmpyhv, dl, Ty: PairTy, Ops: {T1, T4}, DAG);
2533	SDValue T5 = LoHalf(V: P3, DAG);
2534	// Add:
2535	SDValue T6 = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: VecTy, Ops: {T3, T5});
2536	return T6;
2537	}
2538
2539	SDValue
2540	HexagonTargetLowering::emitHvxMulLoHiV60(SDValue A, bool SignedA, SDValue B,
2541	bool SignedB, const SDLoc &dl,
2542	SelectionDAG &DAG) const {
2543	MVT VecTy = ty(Op: A);
2544	MVT PairTy = typeJoin(Tys: {VecTy, VecTy});
2545	assert(VecTy.getVectorElementType() == MVT::i32);
2546
2547	SDValue S16 = DAG.getConstant(Val: `16`, DL: dl, VT: MVT::i32);
2548
2549	if (SignedA && !SignedB) {
2550	// Make A:unsigned, B:signed.
2551	std::swap(a&: A, b&: B);
2552	std::swap(a&: SignedA, b&: SignedB);
2553	}
2554
2555	// Do halfword-wise multiplications for unsignedunsigned product, then*
2556	// add corrections for signed and unsignedsigned.*
2557
2558	SDValue Lo, Hi;
2559
2560	// P0:lo = (uu) products of low halves of A and B,
2561	// P0:hi = (uu) products of high halves.
2562	SDValue P0 = getInstr(MachineOpc: Hexagon::V6_vmpyuhv, dl, Ty: PairTy, Ops: {A, B}, DAG);
2563
2564	// Swap low/high halves in B
2565	SDValue T0 = getInstr(MachineOpc: Hexagon::V6_lvsplatw, dl, Ty: VecTy,
2566	Ops: {DAG.getConstant(Val: `0x02020202`, DL: dl, VT: MVT::i32)}, DAG);
2567	SDValue T1 = getInstr(MachineOpc: Hexagon::V6_vdelta, dl, Ty: VecTy, Ops: {B, T0}, DAG);
2568	// P1 = products of even/odd halfwords.
2569	// P1:lo = (uu) products of even(A.uh) odd(B.uh)*
2570	// P1:hi = (uu) products of odd(A.uh) even(B.uh)*
2571	SDValue P1 = getInstr(MachineOpc: Hexagon::V6_vmpyuhv, dl, Ty: PairTy, Ops: {A, T1}, DAG);
2572
2573	// P2:lo = low halves of P1:lo + P1:hi,
2574	// P2:hi = high halves of P1:lo + P1:hi.
2575	SDValue P2 = getInstr(MachineOpc: Hexagon::V6_vadduhw, dl, Ty: PairTy,
2576	Ops: {HiHalf(V: P1, DAG), LoHalf(V: P1, DAG)}, DAG);
2577	// Still need to add the high halves of P0:lo to P2:lo
2578	SDValue T2 =
2579	getInstr(MachineOpc: Hexagon::V6_vlsrw, dl, Ty: VecTy, Ops: {LoHalf(V: P0, DAG), S16}, DAG);
2580	SDValue T3 = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: VecTy, Ops: {LoHalf(V: P2, DAG), T2});
2581
2582	// The high halves of T3 will contribute to the HI part of LOHI.
2583	SDValue T4 = getInstr(MachineOpc: Hexagon::V6_vasrw_acc, dl, Ty: VecTy,
2584	Ops: {HiHalf(V: P2, DAG), T3, S16}, DAG);
2585
2586	// The low halves of P2 need to be added to high halves of the LO part.
2587	Lo = getInstr(MachineOpc: Hexagon::V6_vaslw_acc, dl, Ty: VecTy,
2588	Ops: {LoHalf(V: P0, DAG), LoHalf(V: P2, DAG), S16}, DAG);
2589	Hi = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: VecTy, Ops: {HiHalf(V: P0, DAG), T4});
2590
2591	if (SignedA) {
2592	assert(SignedB && "Signed A and unsigned B should have been inverted");
2593
2594	MVT PredTy = MVT::getVectorVT(VT: MVT::i1, NumElements: VecTy.getVectorNumElements());
2595	SDValue Zero = getZero(dl, Ty: VecTy, DAG);
2596	SDValue Q0 = DAG.getSetCC(DL: dl, VT: PredTy, LHS: A, RHS: Zero, Cond: ISD::SETLT);
2597	SDValue Q1 = DAG.getSetCC(DL: dl, VT: PredTy, LHS: B, RHS: Zero, Cond: ISD::SETLT);
2598	SDValue X0 = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: VecTy, Ops: {Q0, B, Zero});
2599	SDValue X1 = getInstr(MachineOpc: Hexagon::V6_vaddwq, dl, Ty: VecTy, Ops: {Q1, X0, A}, DAG);
2600	Hi = getInstr(MachineOpc: Hexagon::V6_vsubw, dl, Ty: VecTy, Ops: {Hi, X1}, DAG);
2601	} else if (SignedB) {
2602	// Same correction as for mulhus:
2603	// mulhus(A.uw,B.w) = mulhu(A.uw,B.uw) - (A.w if B < 0)
2604	MVT PredTy = MVT::getVectorVT(VT: MVT::i1, NumElements: VecTy.getVectorNumElements());
2605	SDValue Zero = getZero(dl, Ty: VecTy, DAG);
2606	SDValue Q1 = DAG.getSetCC(DL: dl, VT: PredTy, LHS: B, RHS: Zero, Cond: ISD::SETLT);
2607	Hi = getInstr(MachineOpc: Hexagon::V6_vsubwq, dl, Ty: VecTy, Ops: {Q1, Hi, A}, DAG);
2608	} else {
2609	assert(!SignedA && !SignedB);
2610	}
2611
2612	return DAG.getMergeValues(Ops: {Lo, Hi}, dl);
2613	}
2614
2615	SDValue
2616	HexagonTargetLowering::emitHvxMulLoHiV62(SDValue A, bool SignedA,
2617	SDValue B, bool SignedB,
2618	const SDLoc &dl,
2619	SelectionDAG &DAG) const {
2620	MVT VecTy = ty(Op: A);
2621	MVT PairTy = typeJoin(Tys: {VecTy, VecTy});
2622	assert(VecTy.getVectorElementType() == MVT::i32);
2623
2624	if (SignedA && !SignedB) {
2625	// Make A:unsigned, B:signed.
2626	std::swap(a&: A, b&: B);
2627	std::swap(a&: SignedA, b&: SignedB);
2628	}
2629
2630	// Do SS first, then make corrections for US or UU if needed.*
2631	SDValue P0 = getInstr(MachineOpc: Hexagon::V6_vmpyewuh_64, dl, Ty: PairTy, Ops: {A, B}, DAG);
2632	SDValue P1 =
2633	getInstr(MachineOpc: Hexagon::V6_vmpyowh_64_acc, dl, Ty: PairTy, Ops: {P0, A, B}, DAG);
2634	SDValue Lo = LoHalf(V: P1, DAG);
2635	SDValue Hi = HiHalf(V: P1, DAG);
2636
2637	if (!SignedB) {
2638	assert(!SignedA && "Signed A and unsigned B should have been inverted");
2639	SDValue Zero = getZero(dl, Ty: VecTy, DAG);
2640	MVT PredTy = MVT::getVectorVT(VT: MVT::i1, NumElements: VecTy.getVectorNumElements());
2641
2642	// Mulhu(X, Y) = Mulhs(X, Y) + (X, if Y < 0) + (Y, if X < 0).
2643	// def: Pat<(VecI32 (mulhu HVI32:$A, HVI32:$B)),
2644	// (V6_vaddw (HiHalf (Muls64O $A, $B)),
2645	// (V6_vaddwq (V6_vgtw (V6_vd0), $B),
2646	// (V6_vandvqv (V6_vgtw (V6_vd0), $A), $B),
2647	// $A))>;
2648	SDValue Q0 = DAG.getSetCC(DL: dl, VT: PredTy, LHS: A, RHS: Zero, Cond: ISD::SETLT);
2649	SDValue Q1 = DAG.getSetCC(DL: dl, VT: PredTy, LHS: B, RHS: Zero, Cond: ISD::SETLT);
2650	SDValue T0 = getInstr(MachineOpc: Hexagon::V6_vandvqv, dl, Ty: VecTy, Ops: {Q0, B}, DAG);
2651	SDValue T1 = getInstr(MachineOpc: Hexagon::V6_vaddwq, dl, Ty: VecTy, Ops: {Q1, T0, A}, DAG);
2652	Hi = getInstr(MachineOpc: Hexagon::V6_vaddw, dl, Ty: VecTy, Ops: {Hi, T1}, DAG);
2653	} else if (!SignedA) {
2654	SDValue Zero = getZero(dl, Ty: VecTy, DAG);
2655	MVT PredTy = MVT::getVectorVT(VT: MVT::i1, NumElements: VecTy.getVectorNumElements());
2656
2657	// Mulhus(unsigned X, signed Y) = Mulhs(X, Y) + (Y, if X < 0).
2658	// def: Pat<(VecI32 (HexagonMULHUS HVI32:$A, HVI32:$B)),
2659	// (V6_vaddwq (V6_vgtw (V6_vd0), $A),
2660	// (HiHalf (Muls64O $A, $B)),
2661	// $B)>;
2662	SDValue Q0 = DAG.getSetCC(DL: dl, VT: PredTy, LHS: A, RHS: Zero, Cond: ISD::SETLT);
2663	Hi = getInstr(MachineOpc: Hexagon::V6_vaddwq, dl, Ty: VecTy, Ops: {Q0, Hi, B}, DAG);
2664	}
2665
2666	return DAG.getMergeValues(Ops: {Lo, Hi}, dl);
2667	}
2668
2669	SDValue
2670	HexagonTargetLowering::EqualizeFpIntConversion(SDValue Op, SelectionDAG &DAG)
2671	const {
2672	// Rewrite conversion between integer and floating-point in such a way that
2673	// the integer type is extended/narrowed to match the bitwidth of the
2674	// floating-point type, combined with additional integer-integer extensions
2675	// or narrowings to match the original input/result types.
2676	// E.g. f32 -> i8 ==> f32 -> i32 -> i8
2677	//
2678	// The input/result types are not required to be legal, but if they are
2679	// legal, this function should not introduce illegal types.
2680
2681	unsigned Opc = Op.getOpcode();
2682	assert(Opc == ISD::FP_TO_SINT \|\| Opc == ISD::FP_TO_UINT \|\|
2683	Opc == ISD::SINT_TO_FP \|\| Opc == ISD::UINT_TO_FP);
2684
2685	SDValue Inp = Op.getOperand(i: `0`);
2686	MVT InpTy = ty(Op: Inp);
2687	MVT ResTy = ty(Op);
2688
2689	if (InpTy == ResTy)
2690	return Op;
2691
2692	const SDLoc &dl(Op);
2693	bool Signed = Opc == ISD::FP_TO_SINT \|\| Opc == ISD::SINT_TO_FP;
2694
2695	auto [WInpTy, WResTy] = typeExtendToWider(Ty0: InpTy, Ty1: ResTy);
2696	SDValue WInp = resizeToWidth(VecV: Inp, ResTy: WInpTy, Signed, dl, DAG);
2697	SDValue Conv = DAG.getNode(Opcode: Opc, DL: dl, VT: WResTy, Operand: WInp);
2698	SDValue Res = resizeToWidth(VecV: Conv, ResTy, Signed, dl, DAG);
2699	return Res;
2700	}
2701
2702	SDValue
2703	HexagonTargetLowering::ExpandHvxFpToInt(SDValue Op, SelectionDAG &DAG) const {
2704	unsigned Opc = Op.getOpcode();
2705	assert(Opc == ISD::FP_TO_SINT \|\| Opc == ISD::FP_TO_UINT);
2706
2707	const SDLoc &dl(Op);
2708	SDValue Op0 = Op.getOperand(i: `0`);
2709	MVT InpTy = ty(Op: Op0);
2710	MVT ResTy = ty(Op);
2711	assert(InpTy.changeTypeToInteger() == ResTy);
2712
2713	// int32_t conv_f32_to_i32(uint32_t inp) {
2714	// // s \| exp8 \| frac23
2715	//
2716	// int neg = (int32_t)inp < 0;
2717	//
2718	// // "expm1" is the actual exponent minus 1: instead of "bias", subtract
2719	// // "bias+1". When the encoded exp is "all-1" (i.e. inf/nan), this will
2720	// // produce a large positive "expm1", which will result in max u/int.
2721	// // In all IEEE formats, bias is the largest positive number that can be
2722	// // represented in bias-width bits (i.e. 011..1).
2723	// int32_t expm1 = (inp << 1) - 0x80000000;
2724	// expm1 >>= 24;
2725	//
2726	// // Always insert the "implicit 1". Subnormal numbers will become 0
2727	// // regardless.
2728	// uint32_t frac = (inp << 8) \| 0x80000000;
2729	//
2730	// // "frac" is the fraction part represented as Q1.31. If it was
2731	// // interpreted as uint32_t, it would be the fraction part multiplied
2732	// // by 2^31.
2733	//
2734	// // Calculate the amount of right shift, since shifting further to the
2735	// // left would lose significant bits. Limit it to 32, because we want
2736	// // shifts by 32+ to produce 0, whereas V6_vlsrwv treats the shift
2737	// // amount as a 6-bit signed value (so 33 is same as -31, i.e. shift
2738	// // left by 31). "rsh" can be negative.
2739	// int32_t rsh = min(31 - (expm1 + 1), 32);
2740	//
2741	// frac >>= rsh; // rsh == 32 will produce 0
2742	//
2743	// // Everything up to this point is the same for conversion to signed
2744	// // unsigned integer.
2745	//
2746	// if (neg) // Only for signed int
2747	// frac = -frac; //
2748	// if (rsh <= 0 && neg) // bound = neg ? 0x80000000 : 0x7fffffff
2749	// frac = 0x80000000; // frac = rsh <= 0 ? bound : frac
2750	// if (rsh <= 0 && !neg) //
2751	// frac = 0x7fffffff; //
2752	//
2753	// if (neg) // Only for unsigned int
2754	// frac = 0; //
2755	// if (rsh < 0 && !neg) // frac = rsh < 0 ? 0x7fffffff : frac;
2756	// frac = 0x7fffffff; // frac = neg ? 0 : frac;
2757	//
2758	// return frac;
2759	// }
2760
2761	MVT PredTy = MVT::getVectorVT(VT: MVT::i1, EC: ResTy.getVectorElementCount());
2762
2763	// Zero = V6_vd0();
2764	// Neg = V6_vgtw(Zero, Inp);
2765	// One = V6_lvsplatw(1);
2766	// M80 = V6_lvsplatw(0x80000000);
2767	// Exp00 = V6_vaslwv(Inp, One);
2768	// Exp01 = V6_vsubw(Exp00, M80);
2769	// ExpM1 = V6_vasrw(Exp01, 24);
2770	// Frc00 = V6_vaslw(Inp, 8);
2771	// Frc01 = V6_vor(Frc00, M80);
2772	// Rsh00 = V6_vsubw(V6_lvsplatw(30), ExpM1);
2773	// Rsh01 = V6_vminw(Rsh00, V6_lvsplatw(32));
2774	// Frc02 = V6_vlsrwv(Frc01, Rsh01);
2775
2776	// if signed int:
2777	// Bnd = V6_vmux(Neg, M80, V6_lvsplatw(0x7fffffff))
2778	// Pos = V6_vgtw(Rsh01, Zero);
2779	// Frc13 = V6_vsubw(Zero, Frc02);
2780	// Frc14 = V6_vmux(Neg, Frc13, Frc02);
2781	// Int = V6_vmux(Pos, Frc14, Bnd);
2782	//
2783	// if unsigned int:
2784	// Rsn = V6_vgtw(Zero, Rsh01)
2785	// Frc23 = V6_vmux(Rsn, V6_lvsplatw(0x7fffffff), Frc02)
2786	// Int = V6_vmux(Neg, Zero, Frc23)
2787
2788	auto [ExpWidth, ExpBias, FracWidth] = getIEEEProperties(Ty: InpTy);
2789	unsigned ElemWidth = `1` + ExpWidth + FracWidth;
2790	assert((`1ull` << (ExpWidth - `1`)) == (`1` + ExpBias));
2791
2792	SDValue Inp = DAG.getBitcast(VT: ResTy, V: Op0);
2793	SDValue Zero = getZero(dl, Ty: ResTy, DAG);
2794	SDValue Neg = DAG.getSetCC(DL: dl, VT: PredTy, LHS: Inp, RHS: Zero, Cond: ISD::SETLT);
2795	SDValue M80 = DAG.getConstant(Val: `1ull` << (ElemWidth - `1`), DL: dl, VT: ResTy);
2796	SDValue M7F = DAG.getConstant(Val: (`1ull` << (ElemWidth - `1`)) - `1`, DL: dl, VT: ResTy);
2797	SDValue One = DAG.getConstant(Val: `1`, DL: dl, VT: ResTy);
2798	SDValue Exp00 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: ResTy, Ops: {Inp, One});
2799	SDValue Exp01 = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: ResTy, Ops: {Exp00, M80});
2800	SDValue MNE = DAG.getConstant(Val: ElemWidth - ExpWidth, DL: dl, VT: ResTy);
2801	SDValue ExpM1 = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT: ResTy, Ops: {Exp01, MNE});
2802
2803	SDValue ExpW = DAG.getConstant(Val: ExpWidth, DL: dl, VT: ResTy);
2804	SDValue Frc00 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: ResTy, Ops: {Inp, ExpW});
2805	SDValue Frc01 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: ResTy, Ops: {Frc00, M80});
2806
2807	SDValue MN2 = DAG.getConstant(Val: ElemWidth - `2`, DL: dl, VT: ResTy);
2808	SDValue Rsh00 = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: ResTy, Ops: {MN2, ExpM1});
2809	SDValue MW = DAG.getConstant(Val: ElemWidth, DL: dl, VT: ResTy);
2810	SDValue Rsh01 = DAG.getNode(Opcode: ISD::SMIN, DL: dl, VT: ResTy, Ops: {Rsh00, MW});
2811	SDValue Frc02 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: ResTy, Ops: {Frc01, Rsh01});
2812
2813	SDValue Int;
2814
2815	if (Opc == ISD::FP_TO_SINT) {
2816	SDValue Bnd = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: ResTy, Ops: {Neg, M80, M7F});
2817	SDValue Pos = DAG.getSetCC(DL: dl, VT: PredTy, LHS: Rsh01, RHS: Zero, Cond: ISD::SETGT);
2818	SDValue Frc13 = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: ResTy, Ops: {Zero, Frc02});
2819	SDValue Frc14 = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: ResTy, Ops: {Neg, Frc13, Frc02});
2820	Int = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: ResTy, Ops: {Pos, Frc14, Bnd});
2821	} else {
2822	assert(Opc == ISD::FP_TO_UINT);
2823	SDValue Rsn = DAG.getSetCC(DL: dl, VT: PredTy, LHS: Rsh01, RHS: Zero, Cond: ISD::SETLT);
2824	SDValue Frc23 = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: ResTy, N1: Rsn, N2: M7F, N3: Frc02);
2825	Int = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: ResTy, N1: Neg, N2: Zero, N3: Frc23);
2826	}
2827
2828	return Int;
2829	}
2830
2831	SDValue
2832	HexagonTargetLowering::ExpandHvxIntToFp(SDValue Op, SelectionDAG &DAG) const {
2833	unsigned Opc = Op.getOpcode();
2834	assert(Opc == ISD::SINT_TO_FP \|\| Opc == ISD::UINT_TO_FP);
2835
2836	const SDLoc &dl(Op);
2837	SDValue Op0 = Op.getOperand(i: `0`);
2838	MVT InpTy = ty(Op: Op0);
2839	MVT ResTy = ty(Op);
2840	assert(ResTy.changeTypeToInteger() == InpTy);
2841
2842	// uint32_t vnoc1_rnd(int32_t w) {
2843	// int32_t iszero = w == 0;
2844	// int32_t isneg = w < 0;
2845	// uint32_t u = __builtin_HEXAGON_A2_abs(w);
2846	//
2847	// uint32_t norm_left = __builtin_HEXAGON_S2_cl0(u) + 1;
2848	// uint32_t frac0 = (uint64_t)u << norm_left;
2849	//
2850	// // Rounding:
2851	// uint32_t frac1 = frac0 + ((1 << 8) - 1);
2852	// uint32_t renorm = (frac0 > frac1);
2853	// uint32_t rup = (int)(frac0 << 22) < 0;
2854	//
2855	// uint32_t frac2 = frac0 >> 8;
2856	// uint32_t frac3 = frac1 >> 8;
2857	// uint32_t frac = (frac2 != frac3) ? frac3 >> 1 : (frac3 + rup) >> 1;
2858	//
2859	// int32_t exp = 32 - norm_left + renorm + 127;
2860	// exp <<= 23;
2861	//
2862	// uint32_t sign = 0x80000000 isneg;*
2863	// uint32_t f = sign \| exp \| frac;
2864	// return iszero ? 0 : f;
2865	// }
2866
2867	MVT PredTy = MVT::getVectorVT(VT: MVT::i1, EC: InpTy.getVectorElementCount());
2868	bool Signed = Opc == ISD::SINT_TO_FP;
2869
2870	auto [ExpWidth, ExpBias, FracWidth] = getIEEEProperties(Ty: ResTy);
2871	unsigned ElemWidth = `1` + ExpWidth + FracWidth;
2872
2873	SDValue Zero = getZero(dl, Ty: InpTy, DAG);
2874	SDValue One = DAG.getConstant(Val: `1`, DL: dl, VT: InpTy);
2875	SDValue IsZero = DAG.getSetCC(DL: dl, VT: PredTy, LHS: Op0, RHS: Zero, Cond: ISD::SETEQ);
2876	SDValue Abs = Signed ? DAG.getNode(Opcode: ISD::ABS, DL: dl, VT: InpTy, Operand: Op0) : Op0;
2877	SDValue Clz = DAG.getNode(Opcode: ISD::CTLZ, DL: dl, VT: InpTy, Operand: Abs);
2878	SDValue NLeft = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: InpTy, Ops: {Clz, One});
2879	SDValue Frac0 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: InpTy, Ops: {Abs, NLeft});
2880
2881	auto [Frac, Ovf] = emitHvxShiftRightRnd(Val: Frac0, Amt: ExpWidth + `1`, Signed: false, DAG);
2882	if (Signed) {
2883	SDValue IsNeg = DAG.getSetCC(DL: dl, VT: PredTy, LHS: Op0, RHS: Zero, Cond: ISD::SETLT);
2884	SDValue M80 = DAG.getConstant(Val: `1ull` << (ElemWidth - `1`), DL: dl, VT: InpTy);
2885	SDValue Sign = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: InpTy, Ops: {IsNeg, M80, Zero});
2886	Frac = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: InpTy, Ops: {Sign, Frac});
2887	}
2888
2889	SDValue Rnrm = DAG.getZExtOrTrunc(Op: Ovf, DL: dl, VT: InpTy);
2890	SDValue Exp0 = DAG.getConstant(Val: ElemWidth + ExpBias, DL: dl, VT: InpTy);
2891	SDValue Exp1 = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: InpTy, Ops: {Rnrm, Exp0});
2892	SDValue Exp2 = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: InpTy, Ops: {Exp1, NLeft});
2893	SDValue Exp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: InpTy,
2894	Ops: {Exp2, DAG.getConstant(Val: FracWidth, DL: dl, VT: InpTy)});
2895	SDValue Flt0 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: InpTy, Ops: {Frac, Exp3});
2896	SDValue Flt1 = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: InpTy, Ops: {IsZero, Zero, Flt0});
2897	SDValue Flt = DAG.getBitcast(VT: ResTy, V: Flt1);
2898
2899	return Flt;
2900	}
2901
2902	SDValue
2903	HexagonTargetLowering::CreateTLWrapper(SDValue Op, SelectionDAG &DAG) const {
2904	unsigned Opc = Op.getOpcode();
2905	unsigned TLOpc;
2906	switch (Opc) {
2907	case ISD::ANY_EXTEND:
2908	case ISD::SIGN_EXTEND:
2909	case ISD::ZERO_EXTEND:
2910	TLOpc = HexagonISD::TL_EXTEND;
2911	break;
2912	case ISD::TRUNCATE:
2913	TLOpc = HexagonISD::TL_TRUNCATE;
2914	break;
2915	#ifndef NDEBUG
2916	Op.dump(&DAG);
2917	#endif
2918	llvm_unreachable("Unexpected operator");
2919	}
2920
2921	const SDLoc &dl(Op);
2922	return DAG.getNode(Opcode: TLOpc, DL: dl, VT: ty(Op), N1: Op.getOperand(i: `0`),
2923	N2: DAG.getUNDEF(VT: MVT::i128), // illegal type
2924	N3: DAG.getConstant(Val: Opc, DL: dl, VT: MVT::i32));
2925	}
2926
2927	SDValue
2928	HexagonTargetLowering::RemoveTLWrapper(SDValue Op, SelectionDAG &DAG) const {
2929	assert(Op.getOpcode() == HexagonISD::TL_EXTEND \|\|
2930	Op.getOpcode() == HexagonISD::TL_TRUNCATE);
2931	unsigned Opc = Op.getConstantOperandVal(i: `2`);
2932	return DAG.getNode(Opcode: Opc, DL: SDLoc (Op), VT: ty(Op), Operand: Op.getOperand(i: `0`));
2933	}
2934
2935	HexagonTargetLowering::VectorPair
2936	HexagonTargetLowering::SplitVectorOp(SDValue Op, SelectionDAG &DAG) const {
2937	assert(!Op.isMachineOpcode());
2938	SmallVector<SDValue, `2`> OpsL, OpsH;
2939	const SDLoc &dl(Op);
2940
2941	auto SplitVTNode = [&DAG, this](const VTSDNode *N) {
2942	MVT Ty = typeSplit(VecTy: N->getVT().getSimpleVT()).first;
2943	SDValue TV = DAG.getValueType(Ty);
2944	return std::make_pair(x&: TV, y&: TV);
2945	};
2946
2947	for (SDValue A : Op.getNode()->ops()) {
2948	auto [Lo, Hi] =
2949	ty(Op: A).isVector() ? opSplit(Vec: A, dl, DAG) : std::make_pair(x&: A, y&: A);
2950	// Special case for type operand.
2951	switch (Op.getOpcode()) {
2952	case ISD::SIGN_EXTEND_INREG:
2953	case HexagonISD::SSAT:
2954	case HexagonISD::USAT:
2955	if (const auto N = dyn_cast<const* VTSDNode>(Val: A.getNode()))
2956	std::tie(args&: Lo, args&: Hi) = SplitVTNode (N);
2957	break;
2958	}
2959	OpsL.push_back(Elt: Lo);
2960	OpsH.push_back(Elt: Hi);
2961	}
2962
2963	MVT ResTy = ty(Op);
2964	MVT HalfTy = typeSplit(VecTy: ResTy).first;
2965	SDValue L = DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT: HalfTy, Ops: OpsL);
2966	SDValue H = DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT: HalfTy, Ops: OpsH);
2967	return {L, H};
2968	}
2969
2970	SDValue
2971	HexagonTargetLowering::SplitHvxMemOp(SDValue Op, SelectionDAG &DAG) const {
2972	auto *MemN = cast<MemSDNode>(Val: Op.getNode());
2973
2974	MVT MemTy = MemN->getMemoryVT().getSimpleVT();
2975	if (!isHvxPairTy(Ty: MemTy))
2976	return Op;
2977
2978	const SDLoc &dl(Op);
2979	unsigned HwLen = Subtarget.getVectorLength();
2980	MVT SingleTy = typeSplit(VecTy: MemTy).first;
2981	SDValue Chain = MemN->getChain();
2982	SDValue Base0 = MemN->getBasePtr();
2983	SDValue Base1 =
2984	DAG.getMemBasePlusOffset(Base: Base0, Offset: TypeSize::getFixed(ExactSize: HwLen), DL: dl);
2985	unsigned MemOpc = MemN->getOpcode();
2986
2987	MachineMemOperand MOp0 = nullptr, MOp1 = nullptr;
2988	if (MachineMemOperand *MMO = MemN->getMemOperand()) {
2989	MachineFunction &MF = DAG.getMachineFunction();
2990	uint64_t MemSize = (MemOpc == ISD::MLOAD \|\| MemOpc == ISD::MSTORE)
2991	? (uint64_t)MemoryLocation::UnknownSize
2992	: HwLen;
2993	MOp0 = MF.getMachineMemOperand(MMO, Offset: `0`, Size: MemSize);
2994	MOp1 = MF.getMachineMemOperand(MMO, Offset: HwLen, Size: MemSize);
2995	}
2996
2997	if (MemOpc == ISD::LOAD) {
2998	assert(cast<LoadSDNode>(Op)->isUnindexed());
2999	SDValue Load0 = DAG.getLoad(VT: SingleTy, dl, Chain, Ptr: Base0, MMO: MOp0);
3000	SDValue Load1 = DAG.getLoad(VT: SingleTy, dl, Chain, Ptr: Base1, MMO: MOp1);
3001	return DAG.getMergeValues(
3002	Ops: { DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: MemTy, N1: Load0, N2: Load1),
3003	DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other,
3004	N1: Load0.getValue(R: `1`), N2: Load1.getValue(R: `1`)) }, dl);
3005	}
3006	if (MemOpc == ISD::STORE) {
3007	assert(cast<StoreSDNode>(Op)->isUnindexed());
3008	VectorPair Vals = opSplit(Vec: cast<StoreSDNode>(Val&: Op)->getValue(), dl, DAG);
3009	SDValue Store0 = DAG.getStore(Chain, dl, Val: Vals.first, Ptr: Base0, MMO: MOp0);
3010	SDValue Store1 = DAG.getStore(Chain, dl, Val: Vals.second, Ptr: Base1, MMO: MOp1);
3011	return DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, N1: Store0, N2: Store1);
3012	}
3013
3014	assert(MemOpc == ISD::MLOAD \|\| MemOpc == ISD::MSTORE);
3015
3016	auto MaskN = cast<MaskedLoadStoreSDNode>(Val&: Op);
3017	assert(MaskN->isUnindexed());
3018	VectorPair Masks = opSplit(Vec: MaskN->getMask(), dl, DAG);
3019	SDValue Offset = DAG.getUNDEF(VT: MVT::i32);
3020
3021	if (MemOpc == ISD::MLOAD) {
3022	VectorPair Thru =
3023	opSplit(Vec: cast<MaskedLoadSDNode>(Val&: Op)->getPassThru(), dl, DAG);
3024	SDValue MLoad0 =
3025	DAG.getMaskedLoad(VT: SingleTy, dl, Chain, Base: Base0, Offset, Mask: Masks.first,
3026	Src0: Thru.first, MemVT: SingleTy, MMO: MOp0, AM: ISD::UNINDEXED,
3027	ISD::NON_EXTLOAD, IsExpanding: false);
3028	SDValue MLoad1 =
3029	DAG.getMaskedLoad(VT: SingleTy, dl, Chain, Base: Base1, Offset, Mask: Masks.second,
3030	Src0: Thru.second, MemVT: SingleTy, MMO: MOp1, AM: ISD::UNINDEXED,
3031	ISD::NON_EXTLOAD, IsExpanding: false);
3032	return DAG.getMergeValues(
3033	Ops: { DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: MemTy, N1: MLoad0, N2: MLoad1),
3034	DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other,
3035	N1: MLoad0.getValue(R: `1`), N2: MLoad1.getValue(R: `1`)) }, dl);
3036	}
3037	if (MemOpc == ISD::MSTORE) {
3038	VectorPair Vals = opSplit(Vec: cast<MaskedStoreSDNode>(Val&: Op)->getValue(), dl, DAG);
3039	SDValue MStore0 = DAG.getMaskedStore(Chain, dl, Val: Vals.first, Base: Base0, Offset,
3040	Mask: Masks.first, MemVT: SingleTy, MMO: MOp0,
3041	AM: ISD::UNINDEXED, IsTruncating: false, IsCompressing: false);
3042	SDValue MStore1 = DAG.getMaskedStore(Chain, dl, Val: Vals.second, Base: Base1, Offset,
3043	Mask: Masks.second, MemVT: SingleTy, MMO: MOp1,
3044	AM: ISD::UNINDEXED, IsTruncating: false, IsCompressing: false);
3045	return DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, N1: MStore0, N2: MStore1);
3046	}
3047
3048	std::string Name = "Unexpected operation: " + Op ->getOperationName(G: &DAG);
3049	llvm_unreachable(Name.c_str());
3050	}
3051
3052	SDValue
3053	HexagonTargetLowering::WidenHvxLoad(SDValue Op, SelectionDAG &DAG) const {
3054	const SDLoc &dl(Op);
3055	auto *LoadN = cast<LoadSDNode>(Val: Op.getNode());
3056	assert(LoadN->isUnindexed() && "Not widening indexed loads yet");
3057	assert(LoadN->getMemoryVT().getVectorElementType() != MVT::i1 &&
3058	"Not widening loads of i1 yet");
3059
3060	SDValue Chain = LoadN->getChain();
3061	SDValue Base = LoadN->getBasePtr();
3062	SDValue Offset = DAG.getUNDEF(VT: MVT::i32);
3063
3064	MVT ResTy = ty(Op);
3065	unsigned HwLen = Subtarget.getVectorLength();
3066	unsigned ResLen = ResTy.getStoreSize();
3067	assert(ResLen < HwLen && "vsetq(v1) prerequisite");
3068
3069	MVT BoolTy = MVT::getVectorVT(VT: MVT::i1, NumElements: HwLen);
3070	SDValue Mask = getInstr(MachineOpc: Hexagon::V6_pred_scalar2, dl, Ty: BoolTy,
3071	Ops: {DAG.getConstant(Val: ResLen, DL: dl, VT: MVT::i32)}, DAG);
3072
3073	MVT LoadTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen);
3074	MachineFunction &MF = DAG.getMachineFunction();
3075	auto *MemOp = MF.getMachineMemOperand(MMO: LoadN->getMemOperand(), Offset: `0`, Size: HwLen);
3076
3077	SDValue Load = DAG.getMaskedLoad(VT: LoadTy, dl, Chain, Base, Offset, Mask,
3078	Src0: DAG.getUNDEF(VT: LoadTy), MemVT: LoadTy, MMO: MemOp,
3079	AM: ISD::UNINDEXED, ISD::NON_EXTLOAD, IsExpanding: false);
3080	SDValue Value = opCastElem(Vec: Load, ElemTy: ResTy.getVectorElementType(), DAG);
3081	return DAG.getMergeValues(Ops: {Value, Load.getValue(R: `1`)}, dl);
3082	}
3083
3084	SDValue
3085	HexagonTargetLowering::WidenHvxStore(SDValue Op, SelectionDAG &DAG) const {
3086	const SDLoc &dl(Op);
3087	auto *StoreN = cast<StoreSDNode>(Val: Op.getNode());
3088	assert(StoreN->isUnindexed() && "Not widening indexed stores yet");
3089	assert(StoreN->getMemoryVT().getVectorElementType() != MVT::i1 &&
3090	"Not widening stores of i1 yet");
3091
3092	SDValue Chain = StoreN->getChain();
3093	SDValue Base = StoreN->getBasePtr();
3094	SDValue Offset = DAG.getUNDEF(VT: MVT::i32);
3095
3096	SDValue Value = opCastElem(Vec: StoreN->getValue(), ElemTy: MVT::i8, DAG);
3097	MVT ValueTy = ty(Op: Value);
3098	unsigned ValueLen = ValueTy.getVectorNumElements();
3099	unsigned HwLen = Subtarget.getVectorLength();
3100	assert(isPowerOf2_32(ValueLen));
3101
3102	for (unsigned Len = ValueLen; Len < HwLen; ) {
3103	Value = opJoin(Ops: {Value, DAG.getUNDEF(VT: ty(Op: Value))}, dl, DAG);
3104	Len = ty(Op: Value).getVectorNumElements(); // This is Len = 2*
3105	}
3106	assert(ty(Value).getVectorNumElements() == HwLen); // Paranoia
3107
3108	assert(ValueLen < HwLen && "vsetq(v1) prerequisite");
3109	MVT BoolTy = MVT::getVectorVT(VT: MVT::i1, NumElements: HwLen);
3110	SDValue Mask = getInstr(MachineOpc: Hexagon::V6_pred_scalar2, dl, Ty: BoolTy,
3111	Ops: {DAG.getConstant(Val: ValueLen, DL: dl, VT: MVT::i32)}, DAG);
3112	MachineFunction &MF = DAG.getMachineFunction();
3113	auto *MemOp = MF.getMachineMemOperand(MMO: StoreN->getMemOperand(), Offset: `0`, Size: HwLen);
3114	return DAG.getMaskedStore(Chain, dl, Val: Value, Base, Offset, Mask, MemVT: ty(Op: Value),
3115	MMO: MemOp, AM: ISD::UNINDEXED, IsTruncating: false, IsCompressing: false);
3116	}
3117
3118	SDValue
3119	HexagonTargetLowering::WidenHvxSetCC(SDValue Op, SelectionDAG &DAG) const {
3120	const SDLoc &dl(Op);
3121	SDValue Op0 = Op.getOperand(i: `0`), Op1 = Op.getOperand(i: `1`);
3122	MVT ElemTy = ty(Op: Op0).getVectorElementType();
3123	unsigned HwLen = Subtarget.getVectorLength();
3124
3125	unsigned WideOpLen = (`8` * HwLen) / ElemTy.getSizeInBits();
3126	assert(WideOpLen * ElemTy.getSizeInBits() == `8` * HwLen);
3127	MVT WideOpTy = MVT::getVectorVT(VT: ElemTy, NumElements: WideOpLen);
3128	if (!Subtarget.isHVXVectorType(VecTy: WideOpTy, IncludeBool: true))
3129	return SDValue ();
3130
3131	SDValue WideOp0 = appendUndef(Val: Op0, ResTy: WideOpTy, DAG);
3132	SDValue WideOp1 = appendUndef(Val: Op1, ResTy: WideOpTy, DAG);
3133	EVT ResTy =
3134	getSetCCResultType(DAG.getDataLayout(), C&: *DAG.getContext(), VT: WideOpTy);
3135	SDValue SetCC = DAG.getNode(Opcode: ISD::SETCC, DL: dl, VT: ResTy,
3136	Ops: {WideOp0, WideOp1, Op.getOperand(i: `2`)});
3137
3138	EVT RetTy = typeLegalize(Ty: ty(Op), DAG);
3139	return DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL: dl, VT: RetTy,
3140	Ops: {SetCC, getZero(dl, Ty: MVT::i32, DAG)});
3141	}
3142
3143	SDValue
3144	HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const {
3145	unsigned Opc = Op.getOpcode();
3146	bool IsPairOp = isHvxPairTy(Ty: ty(Op)) \|\|
3147	llvm::any_of(Range: Op.getNode()->ops(), P: [this] (SDValue V) {
3148	return isHvxPairTy(Ty: ty(Op: V));
3149	});
3150
3151	if (IsPairOp) {
3152	switch (Opc) {
3153	default:
3154	break;
3155	case ISD::LOAD:
3156	case ISD::STORE:
3157	case ISD::MLOAD:
3158	case ISD::MSTORE:
3159	return SplitHvxMemOp(Op, DAG);
3160	case ISD::SINT_TO_FP:
3161	case ISD::UINT_TO_FP:
3162	case ISD::FP_TO_SINT:
3163	case ISD::FP_TO_UINT:
3164	if (ty(Op).getSizeInBits() == ty(Op: Op.getOperand(i: `0`)).getSizeInBits())
3165	return opJoin(Ops: SplitVectorOp(Op, DAG), dl: SDLoc (Op), DAG);
3166	break;
3167	case ISD::ABS:
3168	case ISD::CTPOP:
3169	case ISD::CTLZ:
3170	case ISD::CTTZ:
3171	case ISD::MUL:
3172	case ISD::FADD:
3173	case ISD::FSUB:
3174	case ISD::FMUL:
3175	case ISD::FMINIMUMNUM:
3176	case ISD::FMAXIMUMNUM:
3177	case ISD::MULHS:
3178	case ISD::MULHU:
3179	case ISD::AND:
3180	case ISD::OR:
3181	case ISD::XOR:
3182	case ISD::SRA:
3183	case ISD::SHL:
3184	case ISD::SRL:
3185	case ISD::FSHL:
3186	case ISD::FSHR:
3187	case ISD::SMIN:
3188	case ISD::SMAX:
3189	case ISD::UMIN:
3190	case ISD::UMAX:
3191	case ISD::SETCC:
3192	case ISD::VSELECT:
3193	case ISD::SIGN_EXTEND_INREG:
3194	case ISD::SPLAT_VECTOR:
3195	return opJoin(Ops: SplitVectorOp(Op, DAG), dl: SDLoc (Op), DAG);
3196	case ISD::SIGN_EXTEND:
3197	case ISD::ZERO_EXTEND:
3198	// In general, sign- and zero-extends can't be split and still
3199	// be legal. The only exception is extending bool vectors.
3200	if (ty(Op: Op.getOperand(i: `0`)).getVectorElementType() == MVT::i1)
3201	return opJoin(Ops: SplitVectorOp(Op, DAG), dl: SDLoc (Op), DAG);
3202	break;
3203	}
3204	}
3205
3206	switch (Opc) {
3207	default:
3208	break;
3209	case ISD::BUILD_VECTOR: return LowerHvxBuildVector(Op, DAG);
3210	case ISD::SPLAT_VECTOR: return LowerHvxSplatVector(Op, DAG);
3211	case ISD::CONCAT_VECTORS: return LowerHvxConcatVectors(Op, DAG);
3212	case ISD::INSERT_SUBVECTOR: return LowerHvxInsertSubvector(Op, DAG);
3213	case ISD::INSERT_VECTOR_ELT: return LowerHvxInsertElement(Op, DAG);
3214	case ISD::EXTRACT_SUBVECTOR: return LowerHvxExtractSubvector(Op, DAG);
3215	case ISD::EXTRACT_VECTOR_ELT: return LowerHvxExtractElement(Op, DAG);
3216	case ISD::BITCAST: return LowerHvxBitcast(Op, DAG);
3217	case ISD::ANY_EXTEND: return LowerHvxAnyExt(Op, DAG);
3218	case ISD::SIGN_EXTEND: return LowerHvxSignExt(Op, DAG);
3219	case ISD::ZERO_EXTEND: return LowerHvxZeroExt(Op, DAG);
3220	case ISD::CTTZ: return LowerHvxCttz(Op, DAG);
3221	case ISD::SELECT: return LowerHvxSelect(Op, DAG);
3222	case ISD::SRA:
3223	case ISD::SHL:
3224	case ISD::SRL: return LowerHvxShift(Op, DAG);
3225	case ISD::FSHL:
3226	case ISD::FSHR: return LowerHvxFunnelShift(Op, DAG);
3227	case ISD::MULHS:
3228	case ISD::MULHU: return LowerHvxMulh(Op, DAG);
3229	case ISD::SMUL_LOHI:
3230	case ISD::UMUL_LOHI: return LowerHvxMulLoHi(Op, DAG);
3231	case ISD::ANY_EXTEND_VECTOR_INREG: return LowerHvxExtend(Op, DAG);
3232	case ISD::SETCC:
3233	case ISD::INTRINSIC_VOID: return Op;
3234	case ISD::INTRINSIC_WO_CHAIN: return LowerHvxIntrinsic(Op, DAG);
3235	case ISD::MLOAD:
3236	case ISD::MSTORE: return LowerHvxMaskedOp(Op, DAG);
3237	// Unaligned loads will be handled by the default lowering.
3238	case ISD::LOAD: return SDValue ();
3239	case ISD::FP_EXTEND: return LowerHvxFpExtend(Op, DAG);
3240	case ISD::FP_TO_SINT:
3241	case ISD::FP_TO_UINT: return LowerHvxFpToInt(Op, DAG);
3242	case ISD::SINT_TO_FP:
3243	case ISD::UINT_TO_FP: return LowerHvxIntToFp(Op, DAG);
3244
3245	// Special nodes:
3246	case HexagonISD::SMUL_LOHI:
3247	case HexagonISD::UMUL_LOHI:
3248	case HexagonISD::USMUL_LOHI: return LowerHvxMulLoHi(Op, DAG);
3249	}
3250	#ifndef NDEBUG
3251	Op.dumpr(&DAG);
3252	#endif
3253	llvm_unreachable("Unhandled HVX operation");
3254	}
3255
3256	SDValue
3257	HexagonTargetLowering::ExpandHvxResizeIntoSteps(SDValue Op, SelectionDAG &DAG)
3258	const {
3259	// Rewrite the extension/truncation/saturation op into steps where each
3260	// step changes the type widths by a factor of 2.
3261	// E.g. i8 -> i16 remains unchanged, but i8 -> i32 ==> i8 -> i16 -> i32.
3262	//
3263	// Some of the vector types in Op may not be legal.
3264
3265	unsigned Opc = Op.getOpcode();
3266	switch (Opc) {
3267	case HexagonISD::SSAT:
3268	case HexagonISD::USAT:
3269	case HexagonISD::TL_EXTEND:
3270	case HexagonISD::TL_TRUNCATE:
3271	break;
3272	case ISD::ANY_EXTEND:
3273	case ISD::ZERO_EXTEND:
3274	case ISD::SIGN_EXTEND:
3275	case ISD::TRUNCATE:
3276	llvm_unreachable("ISD:: ops will be auto-folded");
3277	break;
3278	#ifndef NDEBUG
3279	Op.dump(&DAG);
3280	#endif
3281	llvm_unreachable("Unexpected operation");
3282	}
3283
3284	SDValue Inp = Op.getOperand(i: `0`);
3285	MVT InpTy = ty(Op: Inp);
3286	MVT ResTy = ty(Op);
3287
3288	unsigned InpWidth = InpTy.getVectorElementType().getSizeInBits();
3289	unsigned ResWidth = ResTy.getVectorElementType().getSizeInBits();
3290	assert(InpWidth != ResWidth);
3291
3292	if (InpWidth == `2` * ResWidth \|\| ResWidth == `2` * InpWidth)
3293	return Op;
3294
3295	const SDLoc &dl(Op);
3296	unsigned NumElems = InpTy.getVectorNumElements();
3297	assert(NumElems == ResTy.getVectorNumElements());
3298
3299	auto repeatOp = [&](unsigned NewWidth, SDValue Arg) {
3300	MVT Ty = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: NewWidth), NumElements: NumElems);
3301	switch (Opc) {
3302	case HexagonISD::SSAT:
3303	case HexagonISD::USAT:
3304	return DAG.getNode(Opcode: Opc, DL: dl, VT: Ty, Ops: {Arg, DAG.getValueType(Ty)});
3305	case HexagonISD::TL_EXTEND:
3306	case HexagonISD::TL_TRUNCATE:
3307	return DAG.getNode(Opcode: Opc, DL: dl, VT: Ty, Ops: {Arg, Op.getOperand(i: `1`), Op.getOperand(i: `2`)});
3308	default:
3309	llvm_unreachable("Unexpected opcode");
3310	}
3311	};
3312
3313	SDValue S = Inp;
3314	if (InpWidth < ResWidth) {
3315	assert(ResWidth % InpWidth == `0` && isPowerOf2_32(ResWidth / InpWidth));
3316	while (InpWidth * `2` <= ResWidth)
3317	S = repeatOp (InpWidth *= `2`, S);
3318	} else {
3319	// InpWidth > ResWidth
3320	assert(InpWidth % ResWidth == `0` && isPowerOf2_32(InpWidth / ResWidth));
3321	while (InpWidth / `2` >= ResWidth)
3322	S = repeatOp (InpWidth /= `2`, S);
3323	}
3324	return S;
3325	}
3326
3327	SDValue
3328	HexagonTargetLowering::LegalizeHvxResize(SDValue Op, SelectionDAG &DAG) const {
3329	SDValue Inp0 = Op.getOperand(i: `0`);
3330	MVT InpTy = ty(Op: Inp0);
3331	MVT ResTy = ty(Op);
3332	unsigned InpWidth = InpTy.getSizeInBits();
3333	unsigned ResWidth = ResTy.getSizeInBits();
3334	unsigned Opc = Op.getOpcode();
3335
3336	if (shouldWidenToHvx(Ty: InpTy, DAG) \|\| shouldWidenToHvx(Ty: ResTy, DAG)) {
3337	// First, make sure that the narrower type is widened to HVX.
3338	// This may cause the result to be wider than what the legalizer
3339	// expects, so insert EXTRACT_SUBVECTOR to bring it back to the
3340	// desired type.
3341	auto [WInpTy, WResTy] =
3342	InpWidth < ResWidth ? typeWidenToWider(Ty0: typeWidenToHvx(Ty: InpTy), Ty1: ResTy)
3343	: typeWidenToWider(Ty0: InpTy, Ty1: typeWidenToHvx(Ty: ResTy));
3344	SDValue W = appendUndef(Val: Inp0, ResTy: WInpTy, DAG);
3345	SDValue S;
3346	if (Opc == HexagonISD::TL_EXTEND \|\| Opc == HexagonISD::TL_TRUNCATE) {
3347	S = DAG.getNode(Opcode: Opc, DL: SDLoc (Op), VT: WResTy, N1: W, N2: Op.getOperand(i: `1`),
3348	N3: Op.getOperand(i: `2`));
3349	} else {
3350	S = DAG.getNode(Opcode: Opc, DL: SDLoc (Op), VT: WResTy, N1: W, N2: DAG.getValueType(WResTy));
3351	}
3352	SDValue T = ExpandHvxResizeIntoSteps(Op: S, DAG);
3353	return extractSubvector(Vec: T, SubTy: typeLegalize(Ty: ResTy, DAG), SubIdx: `0`, DAG);
3354	} else if (shouldSplitToHvx(Ty: InpWidth < ResWidth ? ResTy : InpTy, DAG)) {
3355	return opJoin(Ops: SplitVectorOp(Op, DAG), dl: SDLoc (Op), DAG);
3356	} else {
3357	assert(isTypeLegal(InpTy) && isTypeLegal(ResTy));
3358	return RemoveTLWrapper(Op, DAG);
3359	}
3360	llvm_unreachable("Unexpected situation");
3361	}
3362
3363	void
3364	HexagonTargetLowering::LowerHvxOperationWrapper(SDNode *N,
3365	SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
3366	unsigned Opc = N->getOpcode();
3367	SDValue Op(N, `0`);
3368	SDValue Inp0; // Optional first argument.
3369	if (N->getNumOperands() > `0`)
3370	Inp0 = Op.getOperand(i: `0`);
3371
3372	switch (Opc) {
3373	case ISD::ANY_EXTEND:
3374	case ISD::SIGN_EXTEND:
3375	case ISD::ZERO_EXTEND:
3376	case ISD::TRUNCATE:
3377	if (Subtarget.isHVXElementType(Ty: ty(Op)) &&
3378	Subtarget.isHVXElementType(Ty: ty(Op: Inp0))) {
3379	Results.push_back(Elt: CreateTLWrapper(Op, DAG));
3380	}
3381	break;
3382	case ISD::SETCC:
3383	if (shouldWidenToHvx(Ty: ty(Op: Inp0), DAG)) {
3384	if (SDValue T = WidenHvxSetCC(Op, DAG))
3385	Results.push_back(Elt: T);
3386	}
3387	break;
3388	case ISD::STORE: {
3389	if (shouldWidenToHvx(Ty: ty(Op: cast<StoreSDNode>(Val: N)->getValue()), DAG)) {
3390	SDValue Store = WidenHvxStore(Op, DAG);
3391	Results.push_back(Elt: Store);
3392	}
3393	break;
3394	}
3395	case ISD::MLOAD:
3396	if (isHvxPairTy(Ty: ty(Op))) {
3397	SDValue S = SplitHvxMemOp(Op, DAG);
3398	assert(S->getOpcode() == ISD::MERGE_VALUES);
3399	Results.push_back(Elt: S.getOperand(i: `0`));
3400	Results.push_back(Elt: S.getOperand(i: `1`));
3401	}
3402	break;
3403	case ISD::MSTORE:
3404	if (isHvxPairTy(Ty: ty(Op: Op ->getOperand(Num: `1`)))) { // Stored value
3405	SDValue S = SplitHvxMemOp(Op, DAG);
3406	Results.push_back(Elt: S);
3407	}
3408	break;
3409	case ISD::SINT_TO_FP:
3410	case ISD::UINT_TO_FP:
3411	case ISD::FP_TO_SINT:
3412	case ISD::FP_TO_UINT:
3413	if (ty(Op).getSizeInBits() != ty(Op: Inp0).getSizeInBits()) {
3414	SDValue T = EqualizeFpIntConversion(Op, DAG);
3415	Results.push_back(Elt: T);
3416	}
3417	break;
3418	case HexagonISD::SSAT:
3419	case HexagonISD::USAT:
3420	case HexagonISD::TL_EXTEND:
3421	case HexagonISD::TL_TRUNCATE:
3422	Results.push_back(Elt: LegalizeHvxResize(Op, DAG));
3423	break;
3424	default:
3425	break;
3426	}
3427	}
3428
3429	void
3430	HexagonTargetLowering::ReplaceHvxNodeResults(SDNode *N,
3431	SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
3432	unsigned Opc = N->getOpcode();
3433	SDValue Op(N, `0`);
3434	SDValue Inp0; // Optional first argument.
3435	if (N->getNumOperands() > `0`)
3436	Inp0 = Op.getOperand(i: `0`);
3437
3438	switch (Opc) {
3439	case ISD::ANY_EXTEND:
3440	case ISD::SIGN_EXTEND:
3441	case ISD::ZERO_EXTEND:
3442	case ISD::TRUNCATE:
3443	if (Subtarget.isHVXElementType(Ty: ty(Op)) &&
3444	Subtarget.isHVXElementType(Ty: ty(Op: Inp0))) {
3445	Results.push_back(Elt: CreateTLWrapper(Op, DAG));
3446	}
3447	break;
3448	case ISD::SETCC:
3449	if (shouldWidenToHvx(Ty: ty(Op), DAG)) {
3450	if (SDValue T = WidenHvxSetCC(Op, DAG))
3451	Results.push_back(Elt: T);
3452	}
3453	break;
3454	case ISD::LOAD: {
3455	if (shouldWidenToHvx(Ty: ty(Op), DAG)) {
3456	SDValue Load = WidenHvxLoad(Op, DAG);
3457	assert(Load->getOpcode() == ISD::MERGE_VALUES);
3458	Results.push_back(Elt: Load.getOperand(i: `0`));
3459	Results.push_back(Elt: Load.getOperand(i: `1`));
3460	}
3461	break;
3462	}
3463	case ISD::BITCAST:
3464	if (isHvxBoolTy(Ty: ty(Op: Inp0))) {
3465	SDValue C = LowerHvxBitcast(Op, DAG);
3466	Results.push_back(Elt: C);
3467	}
3468	break;
3469	case ISD::FP_TO_SINT:
3470	case ISD::FP_TO_UINT:
3471	if (ty(Op).getSizeInBits() != ty(Op: Inp0).getSizeInBits()) {
3472	SDValue T = EqualizeFpIntConversion(Op, DAG);
3473	Results.push_back(Elt: T);
3474	}
3475	break;
3476	case HexagonISD::SSAT:
3477	case HexagonISD::USAT:
3478	case HexagonISD::TL_EXTEND:
3479	case HexagonISD::TL_TRUNCATE:
3480	Results.push_back(Elt: LegalizeHvxResize(Op, DAG));
3481	break;
3482	default:
3483	break;
3484	}
3485	}
3486
3487	SDValue
3488	HexagonTargetLowering::combineTruncateBeforeLegal(SDValue Op,
3489	DAGCombinerInfo &DCI) const {
3490	// Simplify V:v2NiB --(bitcast)--> vNi2B --(truncate)--> vNiB
3491	// to extract-subvector (shuffle V, pick even, pick odd)
3492
3493	assert(Op.getOpcode() == ISD::TRUNCATE);
3494	SelectionDAG &DAG = DCI.DAG;
3495	const SDLoc &dl(Op);
3496
3497	if (Op.getOperand(i: `0`).getOpcode() == ISD::BITCAST)
3498	return SDValue ();
3499	SDValue Cast = Op.getOperand(i: `0`);
3500	SDValue Src = Cast.getOperand(i: `0`);
3501
3502	EVT TruncTy = Op.getValueType();
3503	EVT CastTy = Cast.getValueType();
3504	EVT SrcTy = Src.getValueType();
3505	if (SrcTy.isSimple())
3506	return SDValue ();
3507	if (SrcTy.getVectorElementType() != TruncTy.getVectorElementType())
3508	return SDValue ();
3509	unsigned SrcLen = SrcTy.getVectorNumElements();
3510	unsigned CastLen = CastTy.getVectorNumElements();
3511	if (`2` * CastLen != SrcLen)
3512	return SDValue ();
3513
3514	SmallVector<int, `128`> Mask(SrcLen);
3515	for (int i = `0`; i != static_cast<int>(CastLen); ++i) {
3516	Mask [i] = `2` * i;
3517	Mask [i + CastLen] = `2` * i + `1`;
3518	}
3519	SDValue Deal =
3520	DAG.getVectorShuffle(VT: SrcTy, dl, N1: Src, N2: DAG.getUNDEF(VT: SrcTy), Mask);
3521	return opSplit(Vec: Deal, dl, DAG).first;
3522	}
3523
3524	SDValue
3525	HexagonTargetLowering::combineConcatVectorsBeforeLegal(
3526	SDValue Op, DAGCombinerInfo &DCI) const {
3527	// Fold
3528	// concat (shuffle x, y, m1), (shuffle x, y, m2)
3529	// into
3530	// shuffle (concat x, y), undef, m3
3531	if (Op.getNumOperands() != `2`)
3532	return SDValue ();
3533
3534	SelectionDAG &DAG = DCI.DAG;
3535	const SDLoc &dl(Op);
3536	SDValue V0 = Op.getOperand(i: `0`);
3537	SDValue V1 = Op.getOperand(i: `1`);
3538
3539	if (V0.getOpcode() != ISD::VECTOR_SHUFFLE)
3540	return SDValue ();
3541	if (V1.getOpcode() != ISD::VECTOR_SHUFFLE)
3542	return SDValue ();
3543
3544	SetVector<SDValue> Order;
3545	Order.insert(X: V0.getOperand(i: `0`));
3546	Order.insert(X: V0.getOperand(i: `1`));
3547	Order.insert(X: V1.getOperand(i: `0`));
3548	Order.insert(X: V1.getOperand(i: `1`));
3549
3550	if (Order.size() > `2`)
3551	return SDValue ();
3552
3553	// In ISD::VECTOR_SHUFFLE, the types of each input and the type of the
3554	// result must be the same.
3555	EVT InpTy = V0.getValueType();
3556	assert(InpTy.isVector());
3557	unsigned InpLen = InpTy.getVectorNumElements();
3558
3559	SmallVector<int, `128`> LongMask;
3560	auto AppendToMask = [&](SDValue Shuffle) {
3561	auto *SV = cast<ShuffleVectorSDNode>(Val: Shuffle.getNode());
3562	ArrayRef<int> Mask = SV->getMask();
3563	SDValue X = Shuffle.getOperand(i: `0`);
3564	SDValue Y = Shuffle.getOperand(i: `1`);
3565	for (int M : Mask) {
3566	if (M == -`1`) {
3567	LongMask.push_back(Elt: M);
3568	continue;
3569	}
3570	SDValue Src = static_cast<unsigned>(M) < InpLen ? X : Y;
3571	if (static_cast<unsigned>(M) >= InpLen)
3572	M -= InpLen;
3573
3574	int OutOffset = Order [`0`] == Src ? `0` : InpLen;
3575	LongMask.push_back(Elt: M + OutOffset);
3576	}
3577	};
3578
3579	AppendToMask (V0);
3580	AppendToMask (V1);
3581
3582	SDValue C0 = Order.front();
3583	SDValue C1 = Order.back(); // Can be same as front
3584	EVT LongTy = InpTy.getDoubleNumVectorElementsVT(Context&: *DAG.getContext());
3585
3586	SDValue Cat = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: LongTy, Ops: {C0, C1});
3587	return DAG.getVectorShuffle(VT: LongTy, dl, N1: Cat, N2: DAG.getUNDEF(VT: LongTy), Mask: LongMask);
3588	}
3589
3590	SDValue
3591	HexagonTargetLowering::PerformHvxDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
3592	const {
3593	const SDLoc &dl(N);
3594	SelectionDAG &DAG = DCI.DAG;
3595	SDValue Op(N, `0`);
3596	unsigned Opc = Op.getOpcode();
3597
3598	SmallVector<SDValue, `4`> Ops(N->ops());
3599
3600	if (Opc == ISD::TRUNCATE)
3601	return combineTruncateBeforeLegal(Op, DCI);
3602	if (Opc == ISD::CONCAT_VECTORS)
3603	return combineConcatVectorsBeforeLegal(Op, DCI);
3604
3605	if (DCI.isBeforeLegalizeOps())
3606	return SDValue ();
3607
3608	switch (Opc) {
3609	case ISD::VSELECT: {
3610	// (vselect (xor x, qtrue), v0, v1) -> (vselect x, v1, v0)
3611	SDValue Cond = Ops [`0`];
3612	if (Cond ->getOpcode() == ISD::XOR) {
3613	SDValue C0 = Cond.getOperand(i: `0`), C1 = Cond.getOperand(i: `1`);
3614	if (C1 ->getOpcode() == HexagonISD::QTRUE)
3615	return DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: ty(Op), N1: C0, N2: Ops [`2`], N3: Ops [`1`]);
3616	}
3617	break;
3618	}
3619	case HexagonISD::V2Q:
3620	if (Ops [`0`].getOpcode() == ISD::SPLAT_VECTOR) {
3621	if (const auto *C = dyn_cast<ConstantSDNode>(Val: Ops [`0`].getOperand(i: `0`)))
3622	return C->isZero() ? DAG.getNode(Opcode: HexagonISD::QFALSE, DL: dl, VT: ty(Op))
3623	: DAG.getNode(Opcode: HexagonISD::QTRUE, DL: dl, VT: ty(Op));
3624	}
3625	break;
3626	case HexagonISD::Q2V:
3627	if (Ops [`0`].getOpcode() == HexagonISD::QTRUE)
3628	return DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: ty(Op),
3629	Operand: DAG.getAllOnesConstant(DL: dl, VT: MVT::i32));
3630	if (Ops [`0`].getOpcode() == HexagonISD::QFALSE)
3631	return getZero(dl, Ty: ty(Op), DAG);
3632	break;
3633	case HexagonISD::VINSERTW0:
3634	if (isUndef(Op: Ops [`1`]))
3635	return Ops [`0`];
3636	break;
3637	case HexagonISD::VROR: {
3638	if (Ops [`0`].getOpcode() == HexagonISD::VROR) {
3639	SDValue Vec = Ops [`0`].getOperand(i: `0`);
3640	SDValue Rot0 = Ops [`1`], Rot1 = Ops [`0`].getOperand(i: `1`);
3641	SDValue Rot = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: ty(Op: Rot0), Ops: {Rot0, Rot1});
3642	return DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: ty(Op), Ops: {Vec, Rot});
3643	}
3644	break;
3645	}
3646	}
3647
3648	return SDValue ();
3649	}
3650
3651	bool
3652	HexagonTargetLowering::shouldSplitToHvx(MVT Ty, SelectionDAG &DAG) const {
3653	if (Subtarget.isHVXVectorType(VecTy: Ty, IncludeBool: true))
3654	return false;
3655	auto Action = getPreferredHvxVectorAction(VecTy: Ty);
3656	if (Action == TargetLoweringBase::TypeSplitVector)
3657	return Subtarget.isHVXVectorType(VecTy: typeLegalize(Ty, DAG), IncludeBool: true);
3658	return false;
3659	}
3660
3661	bool
3662	HexagonTargetLowering::shouldWidenToHvx(MVT Ty, SelectionDAG &DAG) const {
3663	if (Subtarget.isHVXVectorType(VecTy: Ty, IncludeBool: true))
3664	return false;
3665	auto Action = getPreferredHvxVectorAction(VecTy: Ty);
3666	if (Action == TargetLoweringBase::TypeWidenVector)
3667	return Subtarget.isHVXVectorType(VecTy: typeLegalize(Ty, DAG), IncludeBool: true);
3668	return false;
3669	}
3670
3671	bool
3672	HexagonTargetLowering::isHvxOperation(SDNode N, SelectionDAG &DAG) const* {
3673	if (!Subtarget.useHVXOps())
3674	return false;
3675	// If the type of any result, or any operand type are HVX vector types,
3676	// this is an HVX operation.
3677	auto IsHvxTy = [this](EVT Ty) {
3678	return Ty.isSimple() && Subtarget.isHVXVectorType(VecTy: Ty.getSimpleVT(), IncludeBool: true);
3679	};
3680	auto IsHvxOp = [this](SDValue Op) {
3681	return Op.getValueType().isSimple() &&
3682	Subtarget.isHVXVectorType(VecTy: ty(Op), IncludeBool: true);
3683	};
3684	if (llvm::any_of(Range: N->values(), P: IsHvxTy) \|\| llvm::any_of(Range: N->ops(), P: IsHvxOp))
3685	return true;
3686
3687	// Check if this could be an HVX operation after type widening.
3688	auto IsWidenedToHvx = [this, &DAG](SDValue Op) {
3689	if (!Op.getValueType().isSimple())
3690	return false;
3691	MVT ValTy = ty(Op);
3692	return ValTy.isVector() && shouldWidenToHvx(Ty: ValTy, DAG);
3693	};
3694
3695	for (int i = `0`, e = N->getNumValues(); i != e; ++i) {
3696	if (IsWidenedToHvx (SDValue (N, i)))
3697	return true;
3698	}
3699	return llvm::any_of(Range: N->ops(), P: IsWidenedToHvx);
3700	}
3701

Browse the source code of llvm_projects/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp