1//===-- HexagonISelLoweringHVX.cpp --- Lowering HVX operations ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "HexagonISelLowering.h"
10#include "HexagonRegisterInfo.h"
11#include "HexagonSubtarget.h"
12#include "llvm/ADT/SetVector.h"
13#include "llvm/ADT/SmallVector.h"
14#include "llvm/Analysis/MemoryLocation.h"
15#include "llvm/CodeGen/MachineBasicBlock.h"
16#include "llvm/CodeGen/MachineFunction.h"
17#include "llvm/CodeGen/MachineInstr.h"
18#include "llvm/CodeGen/MachineOperand.h"
19#include "llvm/CodeGen/MachineRegisterInfo.h"
20#include "llvm/CodeGen/TargetInstrInfo.h"
21#include "llvm/IR/IntrinsicsHexagon.h"
22#include "llvm/Support/CommandLine.h"
23
24#include <algorithm>
25#include <string>
26#include <utility>
27
28using namespace llvm;
29
30static cl::opt<unsigned> HvxWidenThreshold("hexagon-hvx-widen",
31 cl::Hidden, cl::init(Val: 16),
32 cl::desc("Lower threshold (in bytes) for widening to HVX vectors"));
33
34static cl::opt<bool>
35 EnableFpFastConvert("hexagon-fp-fast-convert", cl::Hidden, cl::init(Val: false),
36 cl::desc("Enable FP fast conversion routine."));
37
38static const MVT LegalV64[] = { MVT::v64i8, MVT::v32i16, MVT::v16i32 };
39static const MVT LegalW64[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 };
40static const MVT LegalV128[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 };
41static const MVT LegalW128[] = { MVT::v256i8, MVT::v128i16, MVT::v64i32 };
42
43static const unsigned MaxExpandMLA = 8;
44
45static std::tuple<unsigned, unsigned, unsigned> getIEEEProperties(MVT Ty) {
46 // For a float scalar type, return (exp-bits, exp-bias, fraction-bits)
47 MVT ElemTy = Ty.getScalarType();
48 switch (ElemTy.SimpleTy) {
49 case MVT::f16:
50 return std::make_tuple(args: 5, args: 15, args: 10);
51 case MVT::f32:
52 return std::make_tuple(args: 8, args: 127, args: 23);
53 case MVT::f64:
54 return std::make_tuple(args: 11, args: 1023, args: 52);
55 default:
56 break;
57 }
58 llvm_unreachable(("Unexpected type: " + EVT(ElemTy).getEVTString()).c_str());
59}
60
61void
62HexagonTargetLowering::initializeHVXLowering() {
63 if (Subtarget.useHVX64BOps()) {
64 addRegisterClass(VT: MVT::v64i8, RC: &Hexagon::HvxVRRegClass);
65 addRegisterClass(VT: MVT::v32i16, RC: &Hexagon::HvxVRRegClass);
66 addRegisterClass(VT: MVT::v16i32, RC: &Hexagon::HvxVRRegClass);
67 addRegisterClass(VT: MVT::v128i8, RC: &Hexagon::HvxWRRegClass);
68 addRegisterClass(VT: MVT::v64i16, RC: &Hexagon::HvxWRRegClass);
69 addRegisterClass(VT: MVT::v32i32, RC: &Hexagon::HvxWRRegClass);
70 // These "short" boolean vector types should be legal because
71 // they will appear as results of vector compares. If they were
72 // not legal, type legalization would try to make them legal
73 // and that would require using operations that do not use or
74 // produce such types. That, in turn, would imply using custom
75 // nodes, which would be unoptimizable by the DAG combiner.
76 // The idea is to rely on target-independent operations as much
77 // as possible.
78 addRegisterClass(VT: MVT::v16i1, RC: &Hexagon::HvxQRRegClass);
79 addRegisterClass(VT: MVT::v32i1, RC: &Hexagon::HvxQRRegClass);
80 addRegisterClass(VT: MVT::v64i1, RC: &Hexagon::HvxQRRegClass);
81 } else if (Subtarget.useHVX128BOps()) {
82 addRegisterClass(VT: MVT::v128i8, RC: &Hexagon::HvxVRRegClass);
83 addRegisterClass(VT: MVT::v64i16, RC: &Hexagon::HvxVRRegClass);
84 addRegisterClass(VT: MVT::v32i32, RC: &Hexagon::HvxVRRegClass);
85 addRegisterClass(VT: MVT::v256i8, RC: &Hexagon::HvxWRRegClass);
86 addRegisterClass(VT: MVT::v128i16, RC: &Hexagon::HvxWRRegClass);
87 addRegisterClass(VT: MVT::v64i32, RC: &Hexagon::HvxWRRegClass);
88 addRegisterClass(VT: MVT::v32i1, RC: &Hexagon::HvxQRRegClass);
89 addRegisterClass(VT: MVT::v64i1, RC: &Hexagon::HvxQRRegClass);
90 addRegisterClass(VT: MVT::v128i1, RC: &Hexagon::HvxQRRegClass);
91 if (Subtarget.useHVXV68Ops() && Subtarget.useHVXFloatingPoint()) {
92 addRegisterClass(VT: MVT::v32f32, RC: &Hexagon::HvxVRRegClass);
93 addRegisterClass(VT: MVT::v64f16, RC: &Hexagon::HvxVRRegClass);
94 addRegisterClass(VT: MVT::v64f32, RC: &Hexagon::HvxWRRegClass);
95 addRegisterClass(VT: MVT::v128f16, RC: &Hexagon::HvxWRRegClass);
96 }
97 if (Subtarget.useHVXV81Ops()) {
98 addRegisterClass(VT: MVT::v64bf16, RC: &Hexagon::HvxVRRegClass);
99 addRegisterClass(VT: MVT::v128bf16, RC: &Hexagon::HvxWRRegClass);
100 }
101 }
102
103 // Set up operation actions.
104
105 bool Use64b = Subtarget.useHVX64BOps();
106 ArrayRef<MVT> LegalV = Use64b ? LegalV64 : LegalV128;
107 ArrayRef<MVT> LegalW = Use64b ? LegalW64 : LegalW128;
108 MVT ByteV = Use64b ? MVT::v64i8 : MVT::v128i8;
109 MVT WordV = Use64b ? MVT::v16i32 : MVT::v32i32;
110 MVT ByteW = Use64b ? MVT::v128i8 : MVT::v256i8;
111
112 auto setPromoteTo = [this] (unsigned Opc, MVT FromTy, MVT ToTy) {
113 setOperationAction(Op: Opc, VT: FromTy, Action: Promote);
114 AddPromotedToType(Opc, OrigVT: FromTy, DestVT: ToTy);
115 };
116
117 // Handle bitcasts of vector predicates to scalars (e.g. v32i1 to i32).
118 // Note: v16i1 -> i16 is handled in type legalization instead of op
119 // legalization.
120 setOperationAction(Op: ISD::BITCAST, VT: MVT::i16, Action: Custom);
121 setOperationAction(Op: ISD::BITCAST, VT: MVT::i32, Action: Custom);
122 setOperationAction(Op: ISD::BITCAST, VT: MVT::i64, Action: Custom);
123 setOperationAction(Op: ISD::BITCAST, VT: MVT::v16i1, Action: Custom);
124 setOperationAction(Op: ISD::BITCAST, VT: MVT::v128i1, Action: Custom);
125 setOperationAction(Op: ISD::BITCAST, VT: MVT::i128, Action: Custom);
126 setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT: ByteV, Action: Legal);
127 setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT: ByteW, Action: Legal);
128 setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::Other, Action: Custom);
129
130 if (Subtarget.useHVX128BOps()) {
131 setOperationAction(Op: ISD::BITCAST, VT: MVT::v32i1, Action: Custom);
132 setOperationAction(Op: ISD::BITCAST, VT: MVT::v64i1, Action: Custom);
133 setOperationAction(Op: ISD::STORE, VT: MVT::v32i1, Action: Custom);
134 setOperationAction(Op: ISD::LOAD, VT: MVT::v32i1, Action: Custom);
135 setOperationAction(Op: ISD::STORE, VT: MVT::v64i1, Action: Custom);
136 setOperationAction(Op: ISD::LOAD, VT: MVT::v64i1, Action: Custom);
137 setOperationAction(Op: ISD::STORE, VT: MVT::v128i1, Action: Custom);
138 setOperationAction(Op: ISD::LOAD, VT: MVT::v128i1, Action: Custom);
139 }
140 if (Subtarget.useHVX128BOps() && Subtarget.useHVXV68Ops() &&
141 Subtarget.useHVXFloatingPoint()) {
142
143 static const MVT FloatV[] = { MVT::v64f16, MVT::v32f32 };
144 static const MVT FloatW[] = { MVT::v128f16, MVT::v64f32 };
145
146 for (MVT T : FloatV) {
147 setOperationAction(Op: ISD::FADD, VT: T, Action: Legal);
148 setOperationAction(Op: ISD::FSUB, VT: T, Action: Legal);
149 setOperationAction(Op: ISD::FMUL, VT: T, Action: Legal);
150 setOperationAction(Op: ISD::FMINIMUMNUM, VT: T, Action: Legal);
151 setOperationAction(Op: ISD::FMAXIMUMNUM, VT: T, Action: Legal);
152
153 setOperationAction(Op: ISD::INSERT_SUBVECTOR, VT: T, Action: Custom);
154 setOperationAction(Op: ISD::EXTRACT_SUBVECTOR, VT: T, Action: Custom);
155
156 setOperationAction(Op: ISD::SPLAT_VECTOR, VT: T, Action: Legal);
157 setOperationAction(Op: ISD::SPLAT_VECTOR, VT: T, Action: Legal);
158
159 setOperationAction(Op: ISD::MLOAD, VT: T, Action: Custom);
160 setOperationAction(Op: ISD::MSTORE, VT: T, Action: Custom);
161 // Custom-lower BUILD_VECTOR. The standard (target-independent)
162 // handling of it would convert it to a load, which is not always
163 // the optimal choice.
164 setOperationAction(Op: ISD::BUILD_VECTOR, VT: T, Action: Custom);
165 }
166
167
168 // BUILD_VECTOR with f16 operands cannot be promoted without
169 // promoting the result, so lower the node to vsplat or constant pool
170 setOperationAction(Op: ISD::BUILD_VECTOR, VT: MVT::f16, Action: Custom);
171 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: MVT::f16, Action: Custom);
172 setOperationAction(Op: ISD::SPLAT_VECTOR, VT: MVT::f16, Action: Custom);
173
174 // Vector shuffle is always promoted to ByteV and a bitcast to f16 is
175 // generated.
176 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v128f16, ByteW);
177 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f16, ByteV);
178 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f32, ByteW);
179 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v32f32, ByteV);
180
181 if (Subtarget.useHVXV81Ops()) {
182 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v128bf16, ByteW);
183 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64bf16, ByteV);
184 setPromoteTo(ISD::SETCC, MVT::v64bf16, MVT::v64f32);
185 setPromoteTo(ISD::FADD, MVT::v64bf16, MVT::v64f32);
186 setPromoteTo(ISD::FSUB, MVT::v64bf16, MVT::v64f32);
187 setPromoteTo(ISD::FMUL, MVT::v64bf16, MVT::v64f32);
188 setPromoteTo(ISD::FMINNUM, MVT::v64bf16, MVT::v64f32);
189 setPromoteTo(ISD::FMAXNUM, MVT::v64bf16, MVT::v64f32);
190
191 setOperationAction(Op: ISD::SPLAT_VECTOR, VT: MVT::v64bf16, Action: Legal);
192 setOperationAction(Op: ISD::INSERT_SUBVECTOR, VT: MVT::v64bf16, Action: Custom);
193 setOperationAction(Op: ISD::EXTRACT_SUBVECTOR, VT: MVT::v64bf16, Action: Custom);
194
195 setOperationAction(Op: ISD::LOAD, VT: MVT::v128bf16, Action: Custom);
196 setOperationAction(Op: ISD::STORE, VT: MVT::v128bf16, Action: Custom);
197
198 setOperationAction(Op: ISD::MLOAD, VT: MVT::v64bf16, Action: Custom);
199 setOperationAction(Op: ISD::MSTORE, VT: MVT::v64bf16, Action: Custom);
200 setOperationAction(Op: ISD::BUILD_VECTOR, VT: MVT::v64bf16, Action: Custom);
201 setOperationAction(Op: ISD::CONCAT_VECTORS, VT: MVT::v64bf16, Action: Custom);
202
203 setOperationAction(Op: ISD::MLOAD, VT: MVT::v128bf16, Action: Custom);
204 setOperationAction(Op: ISD::MSTORE, VT: MVT::v128bf16, Action: Custom);
205 setOperationAction(Op: ISD::BUILD_VECTOR, VT: MVT::v128bf16, Action: Custom);
206 setOperationAction(Op: ISD::CONCAT_VECTORS, VT: MVT::v128bf16, Action: Custom);
207
208 setOperationAction(Op: ISD::SPLAT_VECTOR, VT: MVT::bf16, Action: Custom);
209 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: MVT::bf16, Action: Custom);
210 setOperationAction(Op: ISD::BUILD_VECTOR, VT: MVT::bf16, Action: Custom);
211 }
212
213 for (MVT P : FloatW) {
214 setOperationAction(Op: ISD::LOAD, VT: P, Action: Custom);
215 setOperationAction(Op: ISD::STORE, VT: P, Action: Custom);
216 setOperationAction(Op: ISD::FADD, VT: P, Action: Custom);
217 setOperationAction(Op: ISD::FSUB, VT: P, Action: Custom);
218 setOperationAction(Op: ISD::FMUL, VT: P, Action: Custom);
219 setOperationAction(Op: ISD::FMINIMUMNUM, VT: P, Action: Custom);
220 setOperationAction(Op: ISD::FMAXIMUMNUM, VT: P, Action: Custom);
221 setOperationAction(Op: ISD::SETCC, VT: P, Action: Custom);
222 setOperationAction(Op: ISD::VSELECT, VT: P, Action: Custom);
223
224 // Custom-lower BUILD_VECTOR. The standard (target-independent)
225 // handling of it would convert it to a load, which is not always
226 // the optimal choice.
227 setOperationAction(Op: ISD::BUILD_VECTOR, VT: P, Action: Custom);
228 // Make concat-vectors custom to handle concats of more than 2 vectors.
229 setOperationAction(Op: ISD::CONCAT_VECTORS, VT: P, Action: Custom);
230
231 setOperationAction(Op: ISD::MLOAD, VT: P, Action: Custom);
232 setOperationAction(Op: ISD::MSTORE, VT: P, Action: Custom);
233 }
234
235 if (Subtarget.useHVXQFloatOps()) {
236 setOperationAction(Op: ISD::FP_EXTEND, VT: MVT::v64f32, Action: Custom);
237 setOperationAction(Op: ISD::FP_ROUND, VT: MVT::v64f16, Action: Legal);
238 } else if (Subtarget.useHVXIEEEFPOps()) {
239 setOperationAction(Op: ISD::FP_EXTEND, VT: MVT::v64f32, Action: Legal);
240 setOperationAction(Op: ISD::FP_ROUND, VT: MVT::v64f16, Action: Legal);
241 }
242 }
243
244 for (MVT T : LegalV) {
245 setIndexedLoadAction(IdxModes: ISD::POST_INC, VT: T, Action: Legal);
246 setIndexedStoreAction(IdxModes: ISD::POST_INC, VT: T, Action: Legal);
247
248 setOperationAction(Op: ISD::ABS, VT: T, Action: Legal);
249 setOperationAction(Op: ISD::AND, VT: T, Action: Legal);
250 setOperationAction(Op: ISD::OR, VT: T, Action: Legal);
251 setOperationAction(Op: ISD::XOR, VT: T, Action: Legal);
252 setOperationAction(Op: ISD::ADD, VT: T, Action: Legal);
253 setOperationAction(Op: ISD::SUB, VT: T, Action: Legal);
254 setOperationAction(Op: ISD::MUL, VT: T, Action: Legal);
255 setOperationAction(Op: ISD::CTPOP, VT: T, Action: Legal);
256 setOperationAction(Op: ISD::CTLZ, VT: T, Action: Legal);
257 setOperationAction(Op: ISD::SELECT, VT: T, Action: Legal);
258 setOperationAction(Op: ISD::SPLAT_VECTOR, VT: T, Action: Legal);
259 setOperationAction(Op: ISD::UADDSAT, VT: T, Action: Legal);
260 setOperationAction(Op: ISD::SADDSAT, VT: T, Action: Legal);
261 setOperationAction(Op: ISD::USUBSAT, VT: T, Action: Legal);
262 setOperationAction(Op: ISD::SSUBSAT, VT: T, Action: Legal);
263 if (T != ByteV) {
264 setOperationAction(Op: ISD::SIGN_EXTEND_VECTOR_INREG, VT: T, Action: Legal);
265 setOperationAction(Op: ISD::ZERO_EXTEND_VECTOR_INREG, VT: T, Action: Legal);
266 setOperationAction(Op: ISD::BSWAP, VT: T, Action: Legal);
267 }
268
269 setOperationAction(Op: ISD::SMIN, VT: T, Action: Legal);
270 setOperationAction(Op: ISD::SMAX, VT: T, Action: Legal);
271 if (T.getScalarType() != MVT::i32) {
272 setOperationAction(Op: ISD::UMIN, VT: T, Action: Legal);
273 setOperationAction(Op: ISD::UMAX, VT: T, Action: Legal);
274 }
275
276 setOperationAction(Op: ISD::CTTZ, VT: T, Action: Custom);
277 setOperationAction(Op: ISD::LOAD, VT: T, Action: Custom);
278 setOperationAction(Op: ISD::MLOAD, VT: T, Action: Custom);
279 setOperationAction(Op: ISD::MSTORE, VT: T, Action: Custom);
280 if (T.getScalarType() != MVT::i32) {
281 setOperationAction(Op: ISD::MULHS, VT: T, Action: Legal);
282 setOperationAction(Op: ISD::MULHU, VT: T, Action: Legal);
283 }
284
285 setOperationAction(Op: ISD::BUILD_VECTOR, VT: T, Action: Custom);
286 // Make concat-vectors custom to handle concats of more than 2 vectors.
287 setOperationAction(Op: ISD::CONCAT_VECTORS, VT: T, Action: Custom);
288 setOperationAction(Op: ISD::INSERT_SUBVECTOR, VT: T, Action: Custom);
289 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: T, Action: Custom);
290 setOperationAction(Op: ISD::EXTRACT_SUBVECTOR, VT: T, Action: Custom);
291 setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: T, Action: Custom);
292 setOperationAction(Op: ISD::ANY_EXTEND, VT: T, Action: Custom);
293 setOperationAction(Op: ISD::SIGN_EXTEND, VT: T, Action: Custom);
294 setOperationAction(Op: ISD::ZERO_EXTEND, VT: T, Action: Custom);
295 setOperationAction(Op: ISD::FSHL, VT: T, Action: Custom);
296 setOperationAction(Op: ISD::FSHR, VT: T, Action: Custom);
297 if (T != ByteV) {
298 setOperationAction(Op: ISD::ANY_EXTEND_VECTOR_INREG, VT: T, Action: Custom);
299 // HVX only has shifts of words and halfwords.
300 setOperationAction(Op: ISD::SRA, VT: T, Action: Custom);
301 setOperationAction(Op: ISD::SHL, VT: T, Action: Custom);
302 setOperationAction(Op: ISD::SRL, VT: T, Action: Custom);
303
304 // Promote all shuffles to operate on vectors of bytes.
305 setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteV);
306 }
307
308 if (Subtarget.useHVXFloatingPoint()) {
309 // Same action for both QFloat and IEEE.
310 setOperationAction(Op: ISD::SINT_TO_FP, VT: T, Action: Custom);
311 setOperationAction(Op: ISD::UINT_TO_FP, VT: T, Action: Custom);
312 setOperationAction(Op: ISD::FP_TO_SINT, VT: T, Action: Custom);
313 setOperationAction(Op: ISD::FP_TO_UINT, VT: T, Action: Custom);
314 }
315
316 setCondCodeAction(CCs: ISD::SETNE, VT: T, Action: Expand);
317 setCondCodeAction(CCs: ISD::SETLE, VT: T, Action: Expand);
318 setCondCodeAction(CCs: ISD::SETGE, VT: T, Action: Expand);
319 setCondCodeAction(CCs: ISD::SETLT, VT: T, Action: Expand);
320 setCondCodeAction(CCs: ISD::SETULE, VT: T, Action: Expand);
321 setCondCodeAction(CCs: ISD::SETUGE, VT: T, Action: Expand);
322 setCondCodeAction(CCs: ISD::SETULT, VT: T, Action: Expand);
323 }
324
325 for (MVT T : LegalW) {
326 // Custom-lower BUILD_VECTOR for vector pairs. The standard (target-
327 // independent) handling of it would convert it to a load, which is
328 // not always the optimal choice.
329 setOperationAction(Op: ISD::BUILD_VECTOR, VT: T, Action: Custom);
330 // Make concat-vectors custom to handle concats of more than 2 vectors.
331 setOperationAction(Op: ISD::CONCAT_VECTORS, VT: T, Action: Custom);
332
333 // Custom-lower these operations for pairs. Expand them into a concat
334 // of the corresponding operations on individual vectors.
335 setOperationAction(Op: ISD::ANY_EXTEND, VT: T, Action: Custom);
336 setOperationAction(Op: ISD::SIGN_EXTEND, VT: T, Action: Custom);
337 setOperationAction(Op: ISD::ZERO_EXTEND, VT: T, Action: Custom);
338 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: T, Action: Custom);
339 setOperationAction(Op: ISD::ANY_EXTEND_VECTOR_INREG, VT: T, Action: Custom);
340 setOperationAction(Op: ISD::SIGN_EXTEND_VECTOR_INREG, VT: T, Action: Legal);
341 setOperationAction(Op: ISD::ZERO_EXTEND_VECTOR_INREG, VT: T, Action: Legal);
342 setOperationAction(Op: ISD::SPLAT_VECTOR, VT: T, Action: Custom);
343
344 setOperationAction(Op: ISD::LOAD, VT: T, Action: Custom);
345 setOperationAction(Op: ISD::STORE, VT: T, Action: Custom);
346 setOperationAction(Op: ISD::MLOAD, VT: T, Action: Custom);
347 setOperationAction(Op: ISD::MSTORE, VT: T, Action: Custom);
348 setOperationAction(Op: ISD::ABS, VT: T, Action: Custom);
349 setOperationAction(Op: ISD::CTLZ, VT: T, Action: Custom);
350 setOperationAction(Op: ISD::CTTZ, VT: T, Action: Custom);
351 setOperationAction(Op: ISD::CTPOP, VT: T, Action: Custom);
352
353 setOperationAction(Op: ISD::ADD, VT: T, Action: Legal);
354 setOperationAction(Op: ISD::UADDSAT, VT: T, Action: Legal);
355 setOperationAction(Op: ISD::SADDSAT, VT: T, Action: Legal);
356 setOperationAction(Op: ISD::SUB, VT: T, Action: Legal);
357 setOperationAction(Op: ISD::USUBSAT, VT: T, Action: Legal);
358 setOperationAction(Op: ISD::SSUBSAT, VT: T, Action: Legal);
359 setOperationAction(Op: ISD::MUL, VT: T, Action: Custom);
360 setOperationAction(Op: ISD::MULHS, VT: T, Action: Custom);
361 setOperationAction(Op: ISD::MULHU, VT: T, Action: Custom);
362 setOperationAction(Op: ISD::AND, VT: T, Action: Custom);
363 setOperationAction(Op: ISD::OR, VT: T, Action: Custom);
364 setOperationAction(Op: ISD::XOR, VT: T, Action: Custom);
365 setOperationAction(Op: ISD::SETCC, VT: T, Action: Custom);
366 setOperationAction(Op: ISD::VSELECT, VT: T, Action: Custom);
367 if (T != ByteW) {
368 setOperationAction(Op: ISD::SRA, VT: T, Action: Custom);
369 setOperationAction(Op: ISD::SHL, VT: T, Action: Custom);
370 setOperationAction(Op: ISD::SRL, VT: T, Action: Custom);
371
372 // Promote all shuffles to operate on vectors of bytes.
373 setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteW);
374 }
375 setOperationAction(Op: ISD::FSHL, VT: T, Action: Custom);
376 setOperationAction(Op: ISD::FSHR, VT: T, Action: Custom);
377
378 setOperationAction(Op: ISD::SMIN, VT: T, Action: Custom);
379 setOperationAction(Op: ISD::SMAX, VT: T, Action: Custom);
380 if (T.getScalarType() != MVT::i32) {
381 setOperationAction(Op: ISD::UMIN, VT: T, Action: Custom);
382 setOperationAction(Op: ISD::UMAX, VT: T, Action: Custom);
383 }
384
385 if (Subtarget.useHVXFloatingPoint()) {
386 // Same action for both QFloat and IEEE.
387 setOperationAction(Op: ISD::SINT_TO_FP, VT: T, Action: Custom);
388 setOperationAction(Op: ISD::UINT_TO_FP, VT: T, Action: Custom);
389 setOperationAction(Op: ISD::FP_TO_SINT, VT: T, Action: Custom);
390 setOperationAction(Op: ISD::FP_TO_UINT, VT: T, Action: Custom);
391 }
392 }
393
394 // Legalize all of these to HexagonISD::[SU]MUL_LOHI.
395 setOperationAction(Op: ISD::MULHS, VT: WordV, Action: Custom); // -> _LOHI
396 setOperationAction(Op: ISD::MULHU, VT: WordV, Action: Custom); // -> _LOHI
397 setOperationAction(Op: ISD::SMUL_LOHI, VT: WordV, Action: Custom);
398 setOperationAction(Op: ISD::UMUL_LOHI, VT: WordV, Action: Custom);
399
400 setCondCodeAction(CCs: ISD::SETNE, VT: MVT::v64f16, Action: Expand);
401 setCondCodeAction(CCs: ISD::SETLE, VT: MVT::v64f16, Action: Expand);
402 setCondCodeAction(CCs: ISD::SETGE, VT: MVT::v64f16, Action: Expand);
403 setCondCodeAction(CCs: ISD::SETLT, VT: MVT::v64f16, Action: Expand);
404 setCondCodeAction(CCs: ISD::SETONE, VT: MVT::v64f16, Action: Expand);
405 setCondCodeAction(CCs: ISD::SETOLE, VT: MVT::v64f16, Action: Expand);
406 setCondCodeAction(CCs: ISD::SETOGE, VT: MVT::v64f16, Action: Expand);
407 setCondCodeAction(CCs: ISD::SETOLT, VT: MVT::v64f16, Action: Expand);
408 setCondCodeAction(CCs: ISD::SETUNE, VT: MVT::v64f16, Action: Expand);
409 setCondCodeAction(CCs: ISD::SETULE, VT: MVT::v64f16, Action: Expand);
410 setCondCodeAction(CCs: ISD::SETUGE, VT: MVT::v64f16, Action: Expand);
411 setCondCodeAction(CCs: ISD::SETULT, VT: MVT::v64f16, Action: Expand);
412 setCondCodeAction(CCs: ISD::SETUO, VT: MVT::v64f16, Action: Expand);
413 setCondCodeAction(CCs: ISD::SETO, VT: MVT::v64f16, Action: Expand);
414
415 setCondCodeAction(CCs: ISD::SETNE, VT: MVT::v32f32, Action: Expand);
416 setCondCodeAction(CCs: ISD::SETLE, VT: MVT::v32f32, Action: Expand);
417 setCondCodeAction(CCs: ISD::SETGE, VT: MVT::v32f32, Action: Expand);
418 setCondCodeAction(CCs: ISD::SETLT, VT: MVT::v32f32, Action: Expand);
419 setCondCodeAction(CCs: ISD::SETONE, VT: MVT::v32f32, Action: Expand);
420 setCondCodeAction(CCs: ISD::SETOLE, VT: MVT::v32f32, Action: Expand);
421 setCondCodeAction(CCs: ISD::SETOGE, VT: MVT::v32f32, Action: Expand);
422 setCondCodeAction(CCs: ISD::SETOLT, VT: MVT::v32f32, Action: Expand);
423 setCondCodeAction(CCs: ISD::SETUNE, VT: MVT::v32f32, Action: Expand);
424 setCondCodeAction(CCs: ISD::SETULE, VT: MVT::v32f32, Action: Expand);
425 setCondCodeAction(CCs: ISD::SETUGE, VT: MVT::v32f32, Action: Expand);
426 setCondCodeAction(CCs: ISD::SETULT, VT: MVT::v32f32, Action: Expand);
427 setCondCodeAction(CCs: ISD::SETUO, VT: MVT::v32f32, Action: Expand);
428 setCondCodeAction(CCs: ISD::SETO, VT: MVT::v32f32, Action: Expand);
429
430 // Boolean vectors.
431
432 for (MVT T : LegalW) {
433 // Boolean types for vector pairs will overlap with the boolean
434 // types for single vectors, e.g.
435 // v64i8 -> v64i1 (single)
436 // v64i16 -> v64i1 (pair)
437 // Set these actions first, and allow the single actions to overwrite
438 // any duplicates.
439 MVT BoolW = MVT::getVectorVT(VT: MVT::i1, NumElements: T.getVectorNumElements());
440 setOperationAction(Op: ISD::SETCC, VT: BoolW, Action: Custom);
441 setOperationAction(Op: ISD::AND, VT: BoolW, Action: Custom);
442 setOperationAction(Op: ISD::OR, VT: BoolW, Action: Custom);
443 setOperationAction(Op: ISD::XOR, VT: BoolW, Action: Custom);
444 // Masked load/store takes a mask that may need splitting.
445 setOperationAction(Op: ISD::MLOAD, VT: BoolW, Action: Custom);
446 setOperationAction(Op: ISD::MSTORE, VT: BoolW, Action: Custom);
447 }
448
449 for (MVT T : LegalV) {
450 MVT BoolV = MVT::getVectorVT(VT: MVT::i1, NumElements: T.getVectorNumElements());
451 setOperationAction(Op: ISD::BUILD_VECTOR, VT: BoolV, Action: Custom);
452 setOperationAction(Op: ISD::CONCAT_VECTORS, VT: BoolV, Action: Custom);
453 setOperationAction(Op: ISD::INSERT_SUBVECTOR, VT: BoolV, Action: Custom);
454 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: BoolV, Action: Custom);
455 setOperationAction(Op: ISD::EXTRACT_SUBVECTOR, VT: BoolV, Action: Custom);
456 setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: BoolV, Action: Custom);
457 setOperationAction(Op: ISD::SELECT, VT: BoolV, Action: Custom);
458 setOperationAction(Op: ISD::AND, VT: BoolV, Action: Legal);
459 setOperationAction(Op: ISD::OR, VT: BoolV, Action: Legal);
460 setOperationAction(Op: ISD::XOR, VT: BoolV, Action: Legal);
461 }
462
463 if (Use64b) {
464 for (MVT T: {MVT::v32i8, MVT::v32i16, MVT::v16i8, MVT::v16i16, MVT::v16i32})
465 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: T, Action: Legal);
466 } else {
467 for (MVT T: {MVT::v64i8, MVT::v64i16, MVT::v32i8, MVT::v32i16, MVT::v32i32})
468 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: T, Action: Legal);
469 }
470
471 // Handle store widening for short vectors.
472 unsigned HwLen = Subtarget.getVectorLength();
473 for (MVT ElemTy : Subtarget.getHVXElementTypes()) {
474 if (ElemTy == MVT::i1)
475 continue;
476 int ElemWidth = ElemTy.getFixedSizeInBits();
477 int MaxElems = (8*HwLen) / ElemWidth;
478 for (int N = 2; N < MaxElems; N *= 2) {
479 MVT VecTy = MVT::getVectorVT(VT: ElemTy, NumElements: N);
480 auto Action = getPreferredVectorAction(VT: VecTy);
481 if (Action == TargetLoweringBase::TypeWidenVector) {
482 setOperationAction(Op: ISD::LOAD, VT: VecTy, Action: Custom);
483 setOperationAction(Op: ISD::STORE, VT: VecTy, Action: Custom);
484 setOperationAction(Op: ISD::SETCC, VT: VecTy, Action: Custom);
485 setOperationAction(Op: ISD::TRUNCATE, VT: VecTy, Action: Custom);
486 setOperationAction(Op: ISD::ANY_EXTEND, VT: VecTy, Action: Custom);
487 setOperationAction(Op: ISD::SIGN_EXTEND, VT: VecTy, Action: Custom);
488 setOperationAction(Op: ISD::ZERO_EXTEND, VT: VecTy, Action: Custom);
489 if (Subtarget.useHVXFloatingPoint()) {
490 setOperationAction(Op: ISD::FP_TO_SINT, VT: VecTy, Action: Custom);
491 setOperationAction(Op: ISD::FP_TO_UINT, VT: VecTy, Action: Custom);
492 setOperationAction(Op: ISD::SINT_TO_FP, VT: VecTy, Action: Custom);
493 setOperationAction(Op: ISD::UINT_TO_FP, VT: VecTy, Action: Custom);
494 }
495
496 MVT BoolTy = MVT::getVectorVT(VT: MVT::i1, NumElements: N);
497 if (!isTypeLegal(VT: BoolTy))
498 setOperationAction(Op: ISD::SETCC, VT: BoolTy, Action: Custom);
499 }
500 }
501 }
502
503 // Include cases which are not hander earlier
504 setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::v32i1, Action: Custom);
505 setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::v64i1, Action: Custom);
506 setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::v32i1, Action: Custom);
507
508 setTargetDAGCombine({ISD::CONCAT_VECTORS, ISD::TRUNCATE, ISD::VSELECT});
509
510 setTargetDAGCombine({ISD::PARTIAL_REDUCE_SMLA, ISD::PARTIAL_REDUCE_UMLA,
511 ISD::PARTIAL_REDUCE_SUMLA});
512
513 // Partial MLA reductions.
514 {
515 static const unsigned MLAOps[] = {ISD::PARTIAL_REDUCE_SMLA,
516 ISD::PARTIAL_REDUCE_UMLA,
517 ISD::PARTIAL_REDUCE_SUMLA};
518
519 auto HvxType = [=](MVT ScalarT, unsigned Factor = 1) {
520 return MVT::getVectorVT(VT: ScalarT, NumElements: Subtarget.getVectorLength() * Factor *
521 8 / ScalarT.getSizeInBits());
522 };
523
524 // Tuple of (Acc element type, input element type, vector pair).
525 // The assumption is both the input and reduction result are of the same
526 // size so the reduction ratio is the same as the ratio of element type
527 // sizes. This may not hold for all available instructions.
528 typedef std::tuple<MVT, MVT, bool> ReductionSignature;
529
530 static const std::vector<ReductionSignature> NativeReductions = {
531 {MVT::i32, MVT::i8, false},
532 };
533
534 for (const auto &R : NativeReductions) {
535
536 MVT AccType = std::get<0>(t: R);
537 MVT InputType = std::get<1>(t: R);
538 unsigned Factor = std::get<2>(t: R) ? 2 : 1;
539
540 // The native size is legal.
541 setPartialReduceMLAAction(Opcodes: MLAOps, AccVT: HvxType(AccType), InputVT: HvxType(InputType),
542 Action: Legal);
543
544 // Allow custom partial MLA reductions on larger vectors than legally
545 // supported. These reduction must be declared as Custom (or Legal)
546 // for foldPartialReduceMLAMulOp() to fold the multiply by one pattern
547 // inserted when the partial reduction intrinsic is converted to
548 // PARTIAL_REDUCE_U/S/SUMLA. Otherwise, the Split action will apply
549 // on the original pattern, including the extensions and multiplies,
550 // which will make it impossible to match.
551 // There are two independent ways to extend the
552 // input size: 1. to concatenate the result - output vector is
553 // proportionally extended, 2) to reduce the result - the output vector
554 // size stays the same. We limit allowed combinations so that the total
555 // number of generated reduction instructions is limited by a constant
556 // number. This limit is arbitrary and can be revised. On one hand, it is
557 // convenient to have more choices; on the other hand, there is a
558 // diminishing benefit of very long sequences, which should probably be
559 // written as loops instead.
560 for (unsigned ConcatFactor = 1; ConcatFactor <= MaxExpandMLA;
561 ConcatFactor <<= 1)
562 for (unsigned ReductionFactor = 1; ReductionFactor <= MaxExpandMLA;
563 ReductionFactor <<= 1)
564 if (ConcatFactor * ReductionFactor != 1 &&
565 ConcatFactor * ReductionFactor <= MaxExpandMLA)
566 setPartialReduceMLAAction(
567 Opcodes: MLAOps, AccVT: HvxType(AccType, Factor * ConcatFactor),
568 InputVT: HvxType(InputType, Factor * ConcatFactor * ReductionFactor),
569 Action: Custom);
570 }
571 }
572}
573
574unsigned
575HexagonTargetLowering::getPreferredHvxVectorAction(MVT VecTy) const {
576 // Early exit for invalid input types
577 if (!VecTy.isVector())
578 return ~0u;
579
580 MVT ElemTy = VecTy.getVectorElementType();
581 unsigned VecLen = VecTy.getVectorNumElements();
582 unsigned HwLen = Subtarget.getVectorLength();
583
584 // Split vectors of i1 that exceed byte vector length.
585 if (ElemTy == MVT::i1 && VecLen > HwLen)
586 return TargetLoweringBase::TypeSplitVector;
587
588 ArrayRef<MVT> Tys = Subtarget.getHVXElementTypes();
589 // For shorter vectors of i1, widen them if any of the corresponding
590 // vectors of integers needs to be widened.
591 if (ElemTy == MVT::i1) {
592 for (MVT T : Tys) {
593 assert(T != MVT::i1);
594 auto A = getPreferredHvxVectorAction(VecTy: MVT::getVectorVT(VT: T, NumElements: VecLen));
595 if (A != ~0u)
596 return A;
597 }
598 return ~0u;
599 }
600
601 // If the size of VecTy is at least half of the vector length,
602 // widen the vector. Note: the threshold was not selected in
603 // any scientific way.
604 if (llvm::is_contained(Range&: Tys, Element: ElemTy)) {
605 unsigned VecWidth = VecTy.getSizeInBits();
606 unsigned HwWidth = 8*HwLen;
607 if (VecWidth > 2*HwWidth)
608 return TargetLoweringBase::TypeSplitVector;
609
610 bool HaveThreshold = HvxWidenThreshold.getNumOccurrences() > 0;
611 if (HaveThreshold && 8*HvxWidenThreshold <= VecWidth)
612 return TargetLoweringBase::TypeWidenVector;
613 if (VecWidth >= HwWidth/2 && VecWidth < HwWidth)
614 return TargetLoweringBase::TypeWidenVector;
615 }
616
617 // Defer to default.
618 return ~0u;
619}
620
621unsigned
622HexagonTargetLowering::getCustomHvxOperationAction(SDNode &Op) const {
623 unsigned Opc = Op.getOpcode();
624 switch (Opc) {
625 case HexagonISD::SMUL_LOHI:
626 case HexagonISD::UMUL_LOHI:
627 case HexagonISD::USMUL_LOHI:
628 return TargetLoweringBase::Custom;
629 }
630 return TargetLoweringBase::Legal;
631}
632
633SDValue
634HexagonTargetLowering::getInt(unsigned IntId, MVT ResTy, ArrayRef<SDValue> Ops,
635 const SDLoc &dl, SelectionDAG &DAG) const {
636 SmallVector<SDValue,4> IntOps;
637 IntOps.push_back(Elt: DAG.getConstant(Val: IntId, DL: dl, VT: MVT::i32));
638 append_range(C&: IntOps, R&: Ops);
639 return DAG.getNode(Opcode: ISD::INTRINSIC_WO_CHAIN, DL: dl, VT: ResTy, Ops: IntOps);
640}
641
642MVT
643HexagonTargetLowering::typeJoin(const TypePair &Tys) const {
644 assert(Tys.first.getVectorElementType() == Tys.second.getVectorElementType());
645
646 MVT ElemTy = Tys.first.getVectorElementType();
647 return MVT::getVectorVT(VT: ElemTy, NumElements: Tys.first.getVectorNumElements() +
648 Tys.second.getVectorNumElements());
649}
650
651HexagonTargetLowering::TypePair
652HexagonTargetLowering::typeSplit(MVT VecTy) const {
653 assert(VecTy.isVector());
654 unsigned NumElem = VecTy.getVectorNumElements();
655 assert((NumElem % 2) == 0 && "Expecting even-sized vector type");
656 MVT HalfTy = MVT::getVectorVT(VT: VecTy.getVectorElementType(), NumElements: NumElem/2);
657 return { HalfTy, HalfTy };
658}
659
660MVT
661HexagonTargetLowering::typeExtElem(MVT VecTy, unsigned Factor) const {
662 MVT ElemTy = VecTy.getVectorElementType();
663 MVT NewElemTy = MVT::getIntegerVT(BitWidth: ElemTy.getSizeInBits() * Factor);
664 return MVT::getVectorVT(VT: NewElemTy, NumElements: VecTy.getVectorNumElements());
665}
666
667MVT
668HexagonTargetLowering::typeTruncElem(MVT VecTy, unsigned Factor) const {
669 MVT ElemTy = VecTy.getVectorElementType();
670 MVT NewElemTy = MVT::getIntegerVT(BitWidth: ElemTy.getSizeInBits() / Factor);
671 return MVT::getVectorVT(VT: NewElemTy, NumElements: VecTy.getVectorNumElements());
672}
673
674SDValue
675HexagonTargetLowering::opCastElem(SDValue Vec, MVT ElemTy,
676 SelectionDAG &DAG) const {
677 if (ty(Op: Vec).getVectorElementType() == ElemTy)
678 return Vec;
679 MVT CastTy = tyVector(Ty: Vec.getValueType().getSimpleVT(), ElemTy);
680 return DAG.getBitcast(VT: CastTy, V: Vec);
681}
682
683SDValue
684HexagonTargetLowering::opJoin(const VectorPair &Ops, const SDLoc &dl,
685 SelectionDAG &DAG) const {
686 return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: typeJoin(Tys: ty(Ops)),
687 N1: Ops.first, N2: Ops.second);
688}
689
690HexagonTargetLowering::VectorPair
691HexagonTargetLowering::opSplit(SDValue Vec, const SDLoc &dl,
692 SelectionDAG &DAG) const {
693 TypePair Tys = typeSplit(VecTy: ty(Op: Vec));
694 if (Vec.getOpcode() == HexagonISD::QCAT)
695 return VectorPair(Vec.getOperand(i: 0), Vec.getOperand(i: 1));
696 return DAG.SplitVector(N: Vec, DL: dl, LoVT: Tys.first, HiVT: Tys.second);
697}
698
699bool
700HexagonTargetLowering::isHvxSingleTy(MVT Ty) const {
701 return Subtarget.isHVXVectorType(VecTy: Ty) &&
702 Ty.getSizeInBits() == 8 * Subtarget.getVectorLength();
703}
704
705bool
706HexagonTargetLowering::isHvxPairTy(MVT Ty) const {
707 return Subtarget.isHVXVectorType(VecTy: Ty) &&
708 Ty.getSizeInBits() == 16 * Subtarget.getVectorLength();
709}
710
711bool
712HexagonTargetLowering::isHvxBoolTy(MVT Ty) const {
713 return Subtarget.isHVXVectorType(VecTy: Ty, IncludeBool: true) &&
714 Ty.getVectorElementType() == MVT::i1;
715}
716
717bool HexagonTargetLowering::allowsHvxMemoryAccess(
718 MVT VecTy, MachineMemOperand::Flags Flags, unsigned *Fast) const {
719 // Bool vectors are excluded by default, but make it explicit to
720 // emphasize that bool vectors cannot be loaded or stored.
721 // Also, disallow double vector stores (to prevent unnecessary
722 // store widening in DAG combiner).
723 if (VecTy.getSizeInBits() > 8*Subtarget.getVectorLength())
724 return false;
725 if (!Subtarget.isHVXVectorType(VecTy, /*IncludeBool=*/false))
726 return false;
727 if (Fast)
728 *Fast = 1;
729 return true;
730}
731
732bool HexagonTargetLowering::allowsHvxMisalignedMemoryAccesses(
733 MVT VecTy, MachineMemOperand::Flags Flags, unsigned *Fast) const {
734 if (!Subtarget.isHVXVectorType(VecTy))
735 return false;
736 // XXX Should this be false? vmemu are a bit slower than vmem.
737 if (Fast)
738 *Fast = 1;
739 return true;
740}
741
742void HexagonTargetLowering::AdjustHvxInstrPostInstrSelection(
743 MachineInstr &MI, SDNode *Node) const {
744 unsigned Opc = MI.getOpcode();
745 const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
746 MachineBasicBlock &MB = *MI.getParent();
747 MachineFunction &MF = *MB.getParent();
748 MachineRegisterInfo &MRI = MF.getRegInfo();
749 DebugLoc DL = MI.getDebugLoc();
750 auto At = MI.getIterator();
751
752 switch (Opc) {
753 case Hexagon::PS_vsplatib:
754 if (Subtarget.useHVXV62Ops()) {
755 // SplatV = A2_tfrsi #imm
756 // OutV = V6_lvsplatb SplatV
757 Register SplatV = MRI.createVirtualRegister(RegClass: &Hexagon::IntRegsRegClass);
758 BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::A2_tfrsi), DestReg: SplatV)
759 .add(MO: MI.getOperand(i: 1));
760 Register OutV = MI.getOperand(i: 0).getReg();
761 BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::V6_lvsplatb), DestReg: OutV)
762 .addReg(RegNo: SplatV);
763 } else {
764 // SplatV = A2_tfrsi #imm:#imm:#imm:#imm
765 // OutV = V6_lvsplatw SplatV
766 Register SplatV = MRI.createVirtualRegister(RegClass: &Hexagon::IntRegsRegClass);
767 const MachineOperand &InpOp = MI.getOperand(i: 1);
768 assert(InpOp.isImm());
769 uint32_t V = InpOp.getImm() & 0xFF;
770 BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::A2_tfrsi), DestReg: SplatV)
771 .addImm(Val: V << 24 | V << 16 | V << 8 | V);
772 Register OutV = MI.getOperand(i: 0).getReg();
773 BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::V6_lvsplatw), DestReg: OutV).addReg(RegNo: SplatV);
774 }
775 MB.erase(I: At);
776 break;
777 case Hexagon::PS_vsplatrb:
778 if (Subtarget.useHVXV62Ops()) {
779 // OutV = V6_lvsplatb Inp
780 Register OutV = MI.getOperand(i: 0).getReg();
781 BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::V6_lvsplatb), DestReg: OutV)
782 .add(MO: MI.getOperand(i: 1));
783 } else {
784 Register SplatV = MRI.createVirtualRegister(RegClass: &Hexagon::IntRegsRegClass);
785 const MachineOperand &InpOp = MI.getOperand(i: 1);
786 BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::S2_vsplatrb), DestReg: SplatV)
787 .addReg(RegNo: InpOp.getReg(), Flags: {}, SubReg: InpOp.getSubReg());
788 Register OutV = MI.getOperand(i: 0).getReg();
789 BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::V6_lvsplatw), DestReg: OutV)
790 .addReg(RegNo: SplatV);
791 }
792 MB.erase(I: At);
793 break;
794 case Hexagon::PS_vsplatih:
795 if (Subtarget.useHVXV62Ops()) {
796 // SplatV = A2_tfrsi #imm
797 // OutV = V6_lvsplath SplatV
798 Register SplatV = MRI.createVirtualRegister(RegClass: &Hexagon::IntRegsRegClass);
799 BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::A2_tfrsi), DestReg: SplatV)
800 .add(MO: MI.getOperand(i: 1));
801 Register OutV = MI.getOperand(i: 0).getReg();
802 BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::V6_lvsplath), DestReg: OutV)
803 .addReg(RegNo: SplatV);
804 } else {
805 // SplatV = A2_tfrsi #imm:#imm
806 // OutV = V6_lvsplatw SplatV
807 Register SplatV = MRI.createVirtualRegister(RegClass: &Hexagon::IntRegsRegClass);
808 const MachineOperand &InpOp = MI.getOperand(i: 1);
809 assert(InpOp.isImm());
810 uint32_t V = InpOp.getImm() & 0xFFFF;
811 BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::A2_tfrsi), DestReg: SplatV)
812 .addImm(Val: V << 16 | V);
813 Register OutV = MI.getOperand(i: 0).getReg();
814 BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::V6_lvsplatw), DestReg: OutV).addReg(RegNo: SplatV);
815 }
816 MB.erase(I: At);
817 break;
818 case Hexagon::PS_vsplatrh:
819 if (Subtarget.useHVXV62Ops()) {
820 // OutV = V6_lvsplath Inp
821 Register OutV = MI.getOperand(i: 0).getReg();
822 BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::V6_lvsplath), DestReg: OutV)
823 .add(MO: MI.getOperand(i: 1));
824 } else {
825 // SplatV = A2_combine_ll Inp, Inp
826 // OutV = V6_lvsplatw SplatV
827 Register SplatV = MRI.createVirtualRegister(RegClass: &Hexagon::IntRegsRegClass);
828 const MachineOperand &InpOp = MI.getOperand(i: 1);
829 BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::A2_combine_ll), DestReg: SplatV)
830 .addReg(RegNo: InpOp.getReg(), Flags: {}, SubReg: InpOp.getSubReg())
831 .addReg(RegNo: InpOp.getReg(), Flags: {}, SubReg: InpOp.getSubReg());
832 Register OutV = MI.getOperand(i: 0).getReg();
833 BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::V6_lvsplatw), DestReg: OutV).addReg(RegNo: SplatV);
834 }
835 MB.erase(I: At);
836 break;
837 case Hexagon::PS_vsplatiw:
838 case Hexagon::PS_vsplatrw:
839 if (Opc == Hexagon::PS_vsplatiw) {
840 // SplatV = A2_tfrsi #imm
841 Register SplatV = MRI.createVirtualRegister(RegClass: &Hexagon::IntRegsRegClass);
842 BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::A2_tfrsi), DestReg: SplatV)
843 .add(MO: MI.getOperand(i: 1));
844 MI.getOperand(i: 1).ChangeToRegister(Reg: SplatV, isDef: false);
845 }
846 // OutV = V6_lvsplatw SplatV/Inp
847 MI.setDesc(TII.get(Opcode: Hexagon::V6_lvsplatw));
848 break;
849 }
850}
851
852SDValue
853HexagonTargetLowering::convertToByteIndex(SDValue ElemIdx, MVT ElemTy,
854 SelectionDAG &DAG) const {
855 if (ElemIdx.getValueType().getSimpleVT() != MVT::i32)
856 ElemIdx = DAG.getBitcast(VT: MVT::i32, V: ElemIdx);
857
858 unsigned ElemWidth = ElemTy.getSizeInBits();
859 if (ElemWidth == 8)
860 return ElemIdx;
861
862 unsigned L = Log2_32(Value: ElemWidth/8);
863 const SDLoc &dl(ElemIdx);
864 return DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: MVT::i32,
865 Ops: {ElemIdx, DAG.getConstant(Val: L, DL: dl, VT: MVT::i32)});
866}
867
868SDValue
869HexagonTargetLowering::getIndexInWord32(SDValue Idx, MVT ElemTy,
870 SelectionDAG &DAG) const {
871 unsigned ElemWidth = ElemTy.getSizeInBits();
872 assert(ElemWidth >= 8 && ElemWidth <= 32);
873 if (ElemWidth == 32)
874 return Idx;
875
876 if (ty(Op: Idx) != MVT::i32)
877 Idx = DAG.getBitcast(VT: MVT::i32, V: Idx);
878 const SDLoc &dl(Idx);
879 SDValue Mask = DAG.getConstant(Val: 32/ElemWidth - 1, DL: dl, VT: MVT::i32);
880 SDValue SubIdx = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i32, Ops: {Idx, Mask});
881 return SubIdx;
882}
883
884SDValue
885HexagonTargetLowering::getByteShuffle(const SDLoc &dl, SDValue Op0,
886 SDValue Op1, ArrayRef<int> Mask,
887 SelectionDAG &DAG) const {
888 MVT OpTy = ty(Op: Op0);
889 assert(OpTy == ty(Op1));
890
891 MVT ElemTy = OpTy.getVectorElementType();
892 if (ElemTy == MVT::i8)
893 return DAG.getVectorShuffle(VT: OpTy, dl, N1: Op0, N2: Op1, Mask);
894 assert(ElemTy.getSizeInBits() >= 8);
895
896 MVT ResTy = tyVector(Ty: OpTy, ElemTy: MVT::i8);
897 unsigned ElemSize = ElemTy.getSizeInBits() / 8;
898
899 SmallVector<int,128> ByteMask;
900 for (int M : Mask) {
901 if (M < 0) {
902 for (unsigned I = 0; I != ElemSize; ++I)
903 ByteMask.push_back(Elt: -1);
904 } else {
905 int NewM = M*ElemSize;
906 for (unsigned I = 0; I != ElemSize; ++I)
907 ByteMask.push_back(Elt: NewM+I);
908 }
909 }
910 assert(ResTy.getVectorNumElements() == ByteMask.size());
911 return DAG.getVectorShuffle(VT: ResTy, dl, N1: opCastElem(Vec: Op0, ElemTy: MVT::i8, DAG),
912 N2: opCastElem(Vec: Op1, ElemTy: MVT::i8, DAG), Mask: ByteMask);
913}
914
915SDValue
916HexagonTargetLowering::buildHvxVectorReg(ArrayRef<SDValue> Values,
917 const SDLoc &dl, MVT VecTy,
918 SelectionDAG &DAG) const {
919 unsigned VecLen = Values.size();
920 MachineFunction &MF = DAG.getMachineFunction();
921 MVT ElemTy = VecTy.getVectorElementType();
922 unsigned ElemWidth = ElemTy.getSizeInBits();
923 unsigned HwLen = Subtarget.getVectorLength();
924
925 unsigned ElemSize = ElemWidth / 8;
926 assert(ElemSize*VecLen == HwLen);
927 SmallVector<SDValue,32> Words;
928
929 if (VecTy.getVectorElementType() != MVT::i32 &&
930 !(Subtarget.useHVXFloatingPoint() &&
931 VecTy.getVectorElementType() == MVT::f32)) {
932 assert((ElemSize == 1 || ElemSize == 2) && "Invalid element size");
933 unsigned OpsPerWord = (ElemSize == 1) ? 4 : 2;
934 MVT PartVT = MVT::getVectorVT(VT: VecTy.getVectorElementType(), NumElements: OpsPerWord);
935 for (unsigned i = 0; i != VecLen; i += OpsPerWord) {
936 SDValue W = buildVector32(Elem: Values.slice(N: i, M: OpsPerWord), dl, VecTy: PartVT, DAG);
937 Words.push_back(Elt: DAG.getBitcast(VT: MVT::i32, V: W));
938 }
939 } else {
940 for (SDValue V : Values)
941 Words.push_back(Elt: DAG.getBitcast(VT: MVT::i32, V));
942 }
943 auto isSplat = [] (ArrayRef<SDValue> Values, SDValue &SplatV) {
944 unsigned NumValues = Values.size();
945 assert(NumValues > 0);
946 bool IsUndef = true;
947 for (unsigned i = 0; i != NumValues; ++i) {
948 if (Values[i].isUndef())
949 continue;
950 IsUndef = false;
951 if (!SplatV.getNode())
952 SplatV = Values[i];
953 else if (SplatV != Values[i])
954 return false;
955 }
956 if (IsUndef)
957 SplatV = Values[0];
958 return true;
959 };
960
961 unsigned NumWords = Words.size();
962 SDValue SplatV;
963 bool IsSplat = isSplat(Words, SplatV);
964 if (IsSplat && isUndef(Op: SplatV))
965 return DAG.getUNDEF(VT: VecTy);
966 if (IsSplat) {
967 assert(SplatV.getNode());
968 if (isNullConstant(V: SplatV))
969 return getZero(dl, Ty: VecTy, DAG);
970 MVT WordTy = MVT::getVectorVT(VT: MVT::i32, NumElements: HwLen/4);
971 SDValue S = DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: WordTy, Operand: SplatV);
972 return DAG.getBitcast(VT: VecTy, V: S);
973 }
974
975 // Delay recognizing constant vectors until here, so that we can generate
976 // a vsplat.
977 SmallVector<ConstantInt*, 128> Consts(VecLen);
978 bool AllConst = getBuildVectorConstInts(Values, VecTy, DAG, Consts);
979 if (AllConst) {
980 ArrayRef<Constant*> Tmp((Constant**)Consts.begin(),
981 (Constant**)Consts.end());
982 Constant *CV = ConstantVector::get(V: Tmp);
983 Align Alignment(HwLen);
984 SDValue CP = LowerConstantPool(
985 Op: DAG.getConstantPool(C: CV, VT: getPointerTy(DL: DAG.getDataLayout()), Align: Alignment),
986 DAG);
987 return DAG.getLoad(VT: VecTy, dl, Chain: DAG.getEntryNode(), Ptr: CP,
988 PtrInfo: MachinePointerInfo::getConstantPool(MF), Alignment);
989 }
990
991 // A special case is a situation where the vector is built entirely from
992 // elements extracted from another vector. This could be done via a shuffle
993 // more efficiently, but typically, the size of the source vector will not
994 // match the size of the vector being built (which precludes the use of a
995 // shuffle directly).
996 // This only handles a single source vector, and the vector being built
997 // should be of a sub-vector type of the source vector type.
998 auto IsBuildFromExtracts = [this,&Values] (SDValue &SrcVec,
999 SmallVectorImpl<int> &SrcIdx) {
1000 SDValue Vec;
1001 for (SDValue V : Values) {
1002 if (isUndef(Op: V)) {
1003 SrcIdx.push_back(Elt: -1);
1004 continue;
1005 }
1006 if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
1007 return false;
1008 // All extracts should come from the same vector.
1009 SDValue T = V.getOperand(i: 0);
1010 if (Vec.getNode() != nullptr && T.getNode() != Vec.getNode())
1011 return false;
1012 Vec = T;
1013 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val: V.getOperand(i: 1));
1014 if (C == nullptr)
1015 return false;
1016 int I = C->getSExtValue();
1017 assert(I >= 0 && "Negative element index");
1018 SrcIdx.push_back(Elt: I);
1019 }
1020 SrcVec = Vec;
1021 return true;
1022 };
1023
1024 SmallVector<int,128> ExtIdx;
1025 SDValue ExtVec;
1026 if (IsBuildFromExtracts(ExtVec, ExtIdx)) {
1027 MVT ExtTy = ty(Op: ExtVec);
1028 unsigned ExtLen = ExtTy.getVectorNumElements();
1029 if (ExtLen == VecLen || ExtLen == 2*VecLen) {
1030 // Construct a new shuffle mask that will produce a vector with the same
1031 // number of elements as the input vector, and such that the vector we
1032 // want will be the initial subvector of it.
1033 SmallVector<int,128> Mask;
1034 BitVector Used(ExtLen);
1035
1036 for (int M : ExtIdx) {
1037 Mask.push_back(Elt: M);
1038 if (M >= 0)
1039 Used.set(M);
1040 }
1041 // Fill the rest of the mask with the unused elements of ExtVec in hopes
1042 // that it will result in a permutation of ExtVec's elements. It's still
1043 // fine if it doesn't (e.g. if undefs are present, or elements are
1044 // repeated), but permutations can always be done efficiently via vdelta
1045 // and vrdelta.
1046 for (unsigned I = 0; I != ExtLen; ++I) {
1047 if (Mask.size() == ExtLen)
1048 break;
1049 if (!Used.test(Idx: I))
1050 Mask.push_back(Elt: I);
1051 }
1052
1053 SDValue S = DAG.getVectorShuffle(VT: ExtTy, dl, N1: ExtVec,
1054 N2: DAG.getUNDEF(VT: ExtTy), Mask);
1055 return ExtLen == VecLen ? S : LoHalf(V: S, DAG);
1056 }
1057 }
1058
1059 // Find most common element to initialize vector with. This is to avoid
1060 // unnecessary vinsert/valign for cases where the same value is present
1061 // many times. Creates a histogram of the vector's elements to find the
1062 // most common element n.
1063 assert(4*Words.size() == Subtarget.getVectorLength());
1064 int VecHist[32];
1065 int n = 0;
1066 for (unsigned i = 0; i != NumWords; ++i) {
1067 VecHist[i] = 0;
1068 if (Words[i].isUndef())
1069 continue;
1070 for (unsigned j = i; j != NumWords; ++j)
1071 if (Words[i] == Words[j])
1072 VecHist[i]++;
1073
1074 if (VecHist[i] > VecHist[n])
1075 n = i;
1076 }
1077
1078 SDValue HalfV = getZero(dl, Ty: VecTy, DAG);
1079 if (VecHist[n] > 1) {
1080 SDValue SplatV = DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: VecTy, Operand: Words[n]);
1081 HalfV = DAG.getNode(Opcode: HexagonISD::VALIGN, DL: dl, VT: VecTy,
1082 Ops: {HalfV, SplatV, DAG.getConstant(Val: HwLen/2, DL: dl, VT: MVT::i32)});
1083 }
1084 SDValue HalfV0 = HalfV;
1085 SDValue HalfV1 = HalfV;
1086
1087 // Construct two halves in parallel, then or them together. Rn and Rm count
1088 // number of rotations needed before the next element. One last rotation is
1089 // performed post-loop to position the last element.
1090 int Rn = 0, Rm = 0;
1091 SDValue Sn, Sm;
1092 SDValue N = HalfV0;
1093 SDValue M = HalfV1;
1094 for (unsigned i = 0; i != NumWords/2; ++i) {
1095 // Rotate by element count since last insertion.
1096 if (Words[i] != Words[n] || VecHist[n] <= 1) {
1097 Sn = DAG.getConstant(Val: Rn, DL: dl, VT: MVT::i32);
1098 HalfV0 = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: VecTy, Ops: {N, Sn});
1099 N = DAG.getNode(Opcode: HexagonISD::VINSERTW0, DL: dl, VT: VecTy,
1100 Ops: {HalfV0, Words[i]});
1101 Rn = 0;
1102 }
1103 if (Words[i+NumWords/2] != Words[n] || VecHist[n] <= 1) {
1104 Sm = DAG.getConstant(Val: Rm, DL: dl, VT: MVT::i32);
1105 HalfV1 = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: VecTy, Ops: {M, Sm});
1106 M = DAG.getNode(Opcode: HexagonISD::VINSERTW0, DL: dl, VT: VecTy,
1107 Ops: {HalfV1, Words[i+NumWords/2]});
1108 Rm = 0;
1109 }
1110 Rn += 4;
1111 Rm += 4;
1112 }
1113 // Perform last rotation.
1114 Sn = DAG.getConstant(Val: Rn+HwLen/2, DL: dl, VT: MVT::i32);
1115 Sm = DAG.getConstant(Val: Rm, DL: dl, VT: MVT::i32);
1116 HalfV0 = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: VecTy, Ops: {N, Sn});
1117 HalfV1 = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: VecTy, Ops: {M, Sm});
1118
1119 SDValue T0 = DAG.getBitcast(VT: tyVector(Ty: VecTy, ElemTy: MVT::i32), V: HalfV0);
1120 SDValue T1 = DAG.getBitcast(VT: tyVector(Ty: VecTy, ElemTy: MVT::i32), V: HalfV1);
1121
1122 SDValue DstV = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: ty(Op: T0), Ops: {T0, T1});
1123
1124 SDValue OutV =
1125 DAG.getBitcast(VT: tyVector(Ty: ty(Op: DstV), ElemTy: VecTy.getVectorElementType()), V: DstV);
1126 return OutV;
1127}
1128
1129SDValue
1130HexagonTargetLowering::createHvxPrefixPred(SDValue PredV, const SDLoc &dl,
1131 unsigned BitBytes, bool ZeroFill, SelectionDAG &DAG) const {
1132 MVT PredTy = ty(Op: PredV);
1133 unsigned HwLen = Subtarget.getVectorLength();
1134 MVT ByteTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen);
1135
1136 if (Subtarget.isHVXVectorType(VecTy: PredTy, IncludeBool: true)) {
1137 // Move the vector predicate SubV to a vector register, and scale it
1138 // down to match the representation (bytes per type element) that VecV
1139 // uses. The scaling down will pick every 2nd or 4th (every Scale-th
1140 // in general) element and put them at the front of the resulting
1141 // vector. This subvector will then be inserted into the Q2V of VecV.
1142 // To avoid having an operation that generates an illegal type (short
1143 // vector), generate a full size vector.
1144 //
1145 SDValue T = DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: ByteTy, Operand: PredV);
1146 SmallVector<int,128> Mask(HwLen);
1147 // Scale = BitBytes(PredV) / Given BitBytes.
1148 unsigned Scale = HwLen / (PredTy.getVectorNumElements() * BitBytes);
1149 unsigned BlockLen = PredTy.getVectorNumElements() * BitBytes;
1150
1151 for (unsigned i = 0; i != HwLen; ++i) {
1152 unsigned Num = i % Scale;
1153 unsigned Off = i / Scale;
1154 Mask[BlockLen*Num + Off] = i;
1155 }
1156 SDValue S = DAG.getVectorShuffle(VT: ByteTy, dl, N1: T, N2: DAG.getUNDEF(VT: ByteTy), Mask);
1157 if (!ZeroFill)
1158 return S;
1159 // Fill the bytes beyond BlockLen with 0s.
1160 // V6_pred_scalar2 cannot fill the entire predicate, so it only works
1161 // when BlockLen < HwLen.
1162 assert(BlockLen < HwLen && "vsetq(v1) prerequisite");
1163 MVT BoolTy = MVT::getVectorVT(VT: MVT::i1, NumElements: HwLen);
1164 SDValue Q = getInstr(MachineOpc: Hexagon::V6_pred_scalar2, dl, Ty: BoolTy,
1165 Ops: {DAG.getConstant(Val: BlockLen, DL: dl, VT: MVT::i32)}, DAG);
1166 SDValue M = DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: ByteTy, Operand: Q);
1167 return DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ByteTy, N1: S, N2: M);
1168 }
1169
1170 // Make sure that this is a valid scalar predicate.
1171 assert(PredTy == MVT::v2i1 || PredTy == MVT::v4i1 || PredTy == MVT::v8i1);
1172
1173 unsigned Bytes = 8 / PredTy.getVectorNumElements();
1174 SmallVector<SDValue,4> Words[2];
1175 unsigned IdxW = 0;
1176
1177 SDValue W0 = isUndef(Op: PredV)
1178 ? DAG.getUNDEF(VT: MVT::i64)
1179 : DAG.getNode(Opcode: HexagonISD::P2D, DL: dl, VT: MVT::i64, Operand: PredV);
1180 Words[IdxW].push_back(Elt: HiHalf(V: W0, DAG));
1181 Words[IdxW].push_back(Elt: LoHalf(V: W0, DAG));
1182
1183 while (Bytes < BitBytes) {
1184 IdxW ^= 1;
1185 Words[IdxW].clear();
1186
1187 if (Bytes < 4) {
1188 for (const SDValue &W : Words[IdxW ^ 1]) {
1189 SDValue T = expandPredicate(Vec32: W, dl, DAG);
1190 Words[IdxW].push_back(Elt: HiHalf(V: T, DAG));
1191 Words[IdxW].push_back(Elt: LoHalf(V: T, DAG));
1192 }
1193 } else {
1194 for (const SDValue &W : Words[IdxW ^ 1]) {
1195 Words[IdxW].push_back(Elt: W);
1196 Words[IdxW].push_back(Elt: W);
1197 }
1198 }
1199 Bytes *= 2;
1200 }
1201
1202 assert(Bytes == BitBytes);
1203 SDValue Vec = ZeroFill ? getZero(dl, Ty: ByteTy, DAG) : DAG.getUNDEF(VT: ByteTy);
1204 SDValue S4 = DAG.getConstant(Val: HwLen-4, DL: dl, VT: MVT::i32);
1205 for (const SDValue &W : Words[IdxW]) {
1206 Vec = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: ByteTy, N1: Vec, N2: S4);
1207 Vec = DAG.getNode(Opcode: HexagonISD::VINSERTW0, DL: dl, VT: ByteTy, N1: Vec, N2: W);
1208 }
1209
1210 return Vec;
1211}
1212
1213SDValue
1214HexagonTargetLowering::buildHvxVectorPred(ArrayRef<SDValue> Values,
1215 const SDLoc &dl, MVT VecTy,
1216 SelectionDAG &DAG) const {
1217 // Construct a vector V of bytes, such that a comparison V >u 0 would
1218 // produce the required vector predicate.
1219 unsigned VecLen = Values.size();
1220 unsigned HwLen = Subtarget.getVectorLength();
1221 assert(VecLen <= HwLen || VecLen == 8*HwLen);
1222 SmallVector<SDValue,128> Bytes;
1223 bool AllT = true, AllF = true;
1224
1225 auto IsTrue = [] (SDValue V) {
1226 if (const auto *N = dyn_cast<ConstantSDNode>(Val: V.getNode()))
1227 return !N->isZero();
1228 return false;
1229 };
1230 auto IsFalse = [] (SDValue V) {
1231 if (const auto *N = dyn_cast<ConstantSDNode>(Val: V.getNode()))
1232 return N->isZero();
1233 return false;
1234 };
1235
1236 if (VecLen <= HwLen) {
1237 // In the hardware, each bit of a vector predicate corresponds to a byte
1238 // of a vector register. Calculate how many bytes does a bit of VecTy
1239 // correspond to.
1240 assert(HwLen % VecLen == 0);
1241 unsigned BitBytes = HwLen / VecLen;
1242 for (SDValue V : Values) {
1243 AllT &= IsTrue(V);
1244 AllF &= IsFalse(V);
1245
1246 SDValue Ext = !V.isUndef() ? DAG.getZExtOrTrunc(Op: V, DL: dl, VT: MVT::i8)
1247 : DAG.getUNDEF(VT: MVT::i8);
1248 for (unsigned B = 0; B != BitBytes; ++B)
1249 Bytes.push_back(Elt: Ext);
1250 }
1251 } else {
1252 // There are as many i1 values, as there are bits in a vector register.
1253 // Divide the values into groups of 8 and check that each group consists
1254 // of the same value (ignoring undefs).
1255 for (unsigned I = 0; I != VecLen; I += 8) {
1256 unsigned B = 0;
1257 // Find the first non-undef value in this group.
1258 for (; B != 8; ++B) {
1259 if (!Values[I+B].isUndef())
1260 break;
1261 }
1262 SDValue F = Values[I+B];
1263 AllT &= IsTrue(F);
1264 AllF &= IsFalse(F);
1265
1266 SDValue Ext = (B < 8) ? DAG.getZExtOrTrunc(Op: F, DL: dl, VT: MVT::i8)
1267 : DAG.getUNDEF(VT: MVT::i8);
1268 Bytes.push_back(Elt: Ext);
1269 // Verify that the rest of values in the group are the same as the
1270 // first.
1271 for (; B != 8; ++B)
1272 assert(Values[I+B].isUndef() || Values[I+B] == F);
1273 }
1274 }
1275
1276 if (AllT)
1277 return DAG.getNode(Opcode: HexagonISD::QTRUE, DL: dl, VT: VecTy);
1278 if (AllF)
1279 return DAG.getNode(Opcode: HexagonISD::QFALSE, DL: dl, VT: VecTy);
1280
1281 MVT ByteTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen);
1282 SDValue ByteVec = buildHvxVectorReg(Values: Bytes, dl, VecTy: ByteTy, DAG);
1283 return DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: VecTy, Operand: ByteVec);
1284}
1285
1286SDValue
1287HexagonTargetLowering::extractHvxElementReg(SDValue VecV, SDValue IdxV,
1288 const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1289 MVT ElemTy = ty(Op: VecV).getVectorElementType();
1290
1291 unsigned ElemWidth = ElemTy.getSizeInBits();
1292 assert(ElemWidth >= 8 && ElemWidth <= 32);
1293 (void)ElemWidth;
1294
1295 SDValue ByteIdx = convertToByteIndex(ElemIdx: IdxV, ElemTy, DAG);
1296 SDValue ExWord = DAG.getNode(Opcode: HexagonISD::VEXTRACTW, DL: dl, VT: MVT::i32,
1297 Ops: {VecV, ByteIdx});
1298 if (ElemTy == MVT::i32)
1299 return ExWord;
1300
1301 // Have an extracted word, need to extract the smaller element out of it.
1302 // 1. Extract the bits of (the original) IdxV that correspond to the index
1303 // of the desired element in the 32-bit word.
1304 SDValue SubIdx = getIndexInWord32(Idx: IdxV, ElemTy, DAG);
1305 // 2. Extract the element from the word.
1306 SDValue ExVec = DAG.getBitcast(VT: tyVector(Ty: ty(Op: ExWord), ElemTy), V: ExWord);
1307 return extractVector(VecV: ExVec, IdxV: SubIdx, dl, ValTy: ElemTy, ResTy: MVT::i32, DAG);
1308}
1309
1310SDValue
1311HexagonTargetLowering::extractHvxElementPred(SDValue VecV, SDValue IdxV,
1312 const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1313 // Implement other return types if necessary.
1314 assert(ResTy == MVT::i1);
1315
1316 unsigned HwLen = Subtarget.getVectorLength();
1317 MVT ByteTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen);
1318 SDValue ByteVec = DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: ByteTy, Operand: VecV);
1319
1320 unsigned Scale = HwLen / ty(Op: VecV).getVectorNumElements();
1321 SDValue ScV = DAG.getConstant(Val: Scale, DL: dl, VT: MVT::i32);
1322 IdxV = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: MVT::i32, N1: IdxV, N2: ScV);
1323
1324 SDValue ExtB = extractHvxElementReg(VecV: ByteVec, IdxV, dl, ResTy: MVT::i32, DAG);
1325 SDValue Zero = DAG.getTargetConstant(Val: 0, DL: dl, VT: MVT::i32);
1326 return getInstr(MachineOpc: Hexagon::C2_cmpgtui, dl, Ty: MVT::i1, Ops: {ExtB, Zero}, DAG);
1327}
1328
1329SDValue
1330HexagonTargetLowering::insertHvxElementReg(SDValue VecV, SDValue IdxV,
1331 SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const {
1332 MVT ElemTy = ty(Op: VecV).getVectorElementType();
1333
1334 unsigned ElemWidth = ElemTy.getSizeInBits();
1335 assert(ElemWidth >= 8 && ElemWidth <= 32);
1336 (void)ElemWidth;
1337
1338 auto InsertWord = [&DAG,&dl,this] (SDValue VecV, SDValue ValV,
1339 SDValue ByteIdxV) {
1340 MVT VecTy = ty(Op: VecV);
1341 unsigned HwLen = Subtarget.getVectorLength();
1342 SDValue MaskV =
1343 DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i32,
1344 Ops: {ByteIdxV, DAG.getSignedConstant(Val: -4, DL: dl, VT: MVT::i32)});
1345 SDValue RotV = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: VecTy, Ops: {VecV, MaskV});
1346 SDValue InsV = DAG.getNode(Opcode: HexagonISD::VINSERTW0, DL: dl, VT: VecTy, Ops: {RotV, ValV});
1347 SDValue SubV = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: MVT::i32,
1348 Ops: {DAG.getConstant(Val: HwLen, DL: dl, VT: MVT::i32), MaskV});
1349 SDValue TorV = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: VecTy, Ops: {InsV, SubV});
1350 return TorV;
1351 };
1352
1353 SDValue ByteIdx = convertToByteIndex(ElemIdx: IdxV, ElemTy, DAG);
1354 if (ElemTy == MVT::i32)
1355 return InsertWord(VecV, ValV, ByteIdx);
1356
1357 // If this is not inserting a 32-bit word, convert it into such a thing.
1358 // 1. Extract the existing word from the target vector.
1359 SDValue WordIdx = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: MVT::i32,
1360 Ops: {ByteIdx, DAG.getConstant(Val: 2, DL: dl, VT: MVT::i32)});
1361 SDValue Ext = extractHvxElementReg(VecV: opCastElem(Vec: VecV, ElemTy: MVT::i32, DAG), IdxV: WordIdx,
1362 dl, ResTy: MVT::i32, DAG);
1363
1364 // 2. Treating the extracted word as a 32-bit vector, insert the given
1365 // value into it.
1366 SDValue SubIdx = getIndexInWord32(Idx: IdxV, ElemTy, DAG);
1367 MVT SubVecTy = tyVector(Ty: ty(Op: Ext), ElemTy);
1368 SDValue Ins = insertVector(VecV: DAG.getBitcast(VT: SubVecTy, V: Ext),
1369 ValV, IdxV: SubIdx, dl, ValTy: ElemTy, DAG);
1370
1371 // 3. Insert the 32-bit word back into the original vector.
1372 return InsertWord(VecV, Ins, ByteIdx);
1373}
1374
1375SDValue
1376HexagonTargetLowering::insertHvxElementPred(SDValue VecV, SDValue IdxV,
1377 SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const {
1378 unsigned HwLen = Subtarget.getVectorLength();
1379 MVT ByteTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen);
1380 SDValue ByteVec = DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: ByteTy, Operand: VecV);
1381
1382 unsigned Scale = HwLen / ty(Op: VecV).getVectorNumElements();
1383 SDValue ScV = DAG.getConstant(Val: Scale, DL: dl, VT: MVT::i32);
1384 IdxV = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: MVT::i32, N1: IdxV, N2: ScV);
1385 ValV = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: MVT::i32, Operand: ValV);
1386
1387 SDValue InsV = insertHvxElementReg(VecV: ByteVec, IdxV, ValV, dl, DAG);
1388 return DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: ty(Op: VecV), Operand: InsV);
1389}
1390
1391SDValue
1392HexagonTargetLowering::extractHvxSubvectorReg(SDValue OrigOp, SDValue VecV,
1393 SDValue IdxV, const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1394 MVT VecTy = ty(Op: VecV);
1395 unsigned HwLen = Subtarget.getVectorLength();
1396 unsigned Idx = IdxV.getNode()->getAsZExtVal();
1397 MVT ElemTy = VecTy.getVectorElementType();
1398 unsigned ElemWidth = ElemTy.getSizeInBits();
1399
1400 // If the source vector is a vector pair, get the single vector containing
1401 // the subvector of interest. The subvector will never overlap two single
1402 // vectors.
1403 if (isHvxPairTy(Ty: VecTy)) {
1404 unsigned SubIdx = Hexagon::vsub_lo;
1405 if (Idx * ElemWidth >= 8 * HwLen) {
1406 SubIdx = Hexagon::vsub_hi;
1407 Idx -= VecTy.getVectorNumElements() / 2;
1408 }
1409
1410 VecTy = typeSplit(VecTy).first;
1411 VecV = DAG.getTargetExtractSubreg(SRIdx: SubIdx, DL: dl, VT: VecTy, Operand: VecV);
1412 if (VecTy == ResTy)
1413 return VecV;
1414 }
1415
1416 // The only meaningful subvectors of a single HVX vector are those that
1417 // fit in a scalar register.
1418 assert(ResTy.getSizeInBits() == 32 || ResTy.getSizeInBits() == 64);
1419
1420 MVT WordTy = tyVector(Ty: VecTy, ElemTy: MVT::i32);
1421 SDValue WordVec = DAG.getBitcast(VT: WordTy, V: VecV);
1422 unsigned WordIdx = (Idx*ElemWidth) / 32;
1423
1424 SDValue W0Idx = DAG.getConstant(Val: WordIdx, DL: dl, VT: MVT::i32);
1425 SDValue W0 = extractHvxElementReg(VecV: WordVec, IdxV: W0Idx, dl, ResTy: MVT::i32, DAG);
1426 if (ResTy.getSizeInBits() == 32)
1427 return DAG.getBitcast(VT: ResTy, V: W0);
1428
1429 SDValue W1Idx = DAG.getConstant(Val: WordIdx+1, DL: dl, VT: MVT::i32);
1430 SDValue W1 = extractHvxElementReg(VecV: WordVec, IdxV: W1Idx, dl, ResTy: MVT::i32, DAG);
1431 SDValue WW = getCombine(Hi: W1, Lo: W0, dl, ResTy: MVT::i64, DAG);
1432 return DAG.getBitcast(VT: ResTy, V: WW);
1433}
1434
1435SDValue
1436HexagonTargetLowering::extractHvxSubvectorPred(SDValue VecV, SDValue IdxV,
1437 const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1438 MVT VecTy = ty(Op: VecV);
1439 unsigned HwLen = Subtarget.getVectorLength();
1440 MVT ByteTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen);
1441 SDValue ByteVec = DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: ByteTy, Operand: VecV);
1442 // IdxV is required to be a constant.
1443 unsigned Idx = IdxV.getNode()->getAsZExtVal();
1444
1445 unsigned ResLen = ResTy.getVectorNumElements();
1446 unsigned BitBytes = HwLen / VecTy.getVectorNumElements();
1447 unsigned Offset = Idx * BitBytes;
1448 SDValue Undef = DAG.getUNDEF(VT: ByteTy);
1449 SmallVector<int,128> Mask;
1450
1451 if (Subtarget.isHVXVectorType(VecTy: ResTy, IncludeBool: true)) {
1452 // Converting between two vector predicates. Since the result is shorter
1453 // than the source, it will correspond to a vector predicate with the
1454 // relevant bits replicated. The replication count is the ratio of the
1455 // source and target vector lengths.
1456 unsigned Rep = VecTy.getVectorNumElements() / ResLen;
1457 assert(isPowerOf2_32(Rep) && HwLen % Rep == 0);
1458 for (unsigned i = 0; i != HwLen/Rep; ++i) {
1459 for (unsigned j = 0; j != Rep; ++j)
1460 Mask.push_back(Elt: i + Offset);
1461 }
1462 SDValue ShuffV = DAG.getVectorShuffle(VT: ByteTy, dl, N1: ByteVec, N2: Undef, Mask);
1463 return DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: ResTy, Operand: ShuffV);
1464 }
1465
1466 // Converting between a vector predicate and a scalar predicate. In the
1467 // vector predicate, a group of BitBytes bits will correspond to a single
1468 // i1 element of the source vector type. Those bits will all have the same
1469 // value. The same will be true for ByteVec, where each byte corresponds
1470 // to a bit in the vector predicate.
1471 // The algorithm is to traverse the ByteVec, going over the i1 values from
1472 // the source vector, and generate the corresponding representation in an
1473 // 8-byte vector. To avoid repeated extracts from ByteVec, shuffle the
1474 // elements so that the interesting 8 bytes will be in the low end of the
1475 // vector.
1476 unsigned Rep = 8 / ResLen;
1477 // Make sure the output fill the entire vector register, so repeat the
1478 // 8-byte groups as many times as necessary.
1479 for (unsigned r = 0; r != HwLen / 8; ++r) {
1480 // This will generate the indexes of the 8 interesting bytes.
1481 for (unsigned i = 0; i != ResLen; ++i) {
1482 for (unsigned j = 0; j != Rep; ++j)
1483 Mask.push_back(Elt: Offset + i*BitBytes);
1484 }
1485 }
1486
1487 SDValue Zero = getZero(dl, Ty: MVT::i32, DAG);
1488 SDValue ShuffV = DAG.getVectorShuffle(VT: ByteTy, dl, N1: ByteVec, N2: Undef, Mask);
1489 // Combine the two low words from ShuffV into a v8i8, and byte-compare
1490 // them against 0.
1491 SDValue W0 = DAG.getNode(Opcode: HexagonISD::VEXTRACTW, DL: dl, VT: MVT::i32, Ops: {ShuffV, Zero});
1492 SDValue W1 = DAG.getNode(Opcode: HexagonISD::VEXTRACTW, DL: dl, VT: MVT::i32,
1493 Ops: {ShuffV, DAG.getConstant(Val: 4, DL: dl, VT: MVT::i32)});
1494 SDValue Vec64 = getCombine(Hi: W1, Lo: W0, dl, ResTy: MVT::v8i8, DAG);
1495 return getInstr(MachineOpc: Hexagon::A4_vcmpbgtui, dl, Ty: ResTy,
1496 Ops: {Vec64, DAG.getTargetConstant(Val: 0, DL: dl, VT: MVT::i32)}, DAG);
1497}
1498
1499SDValue
1500HexagonTargetLowering::insertHvxSubvectorReg(SDValue VecV, SDValue SubV,
1501 SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const {
1502 MVT VecTy = ty(Op: VecV);
1503 MVT SubTy = ty(Op: SubV);
1504 unsigned HwLen = Subtarget.getVectorLength();
1505 MVT ElemTy = VecTy.getVectorElementType();
1506 unsigned ElemWidth = ElemTy.getSizeInBits();
1507
1508 bool IsPair = isHvxPairTy(Ty: VecTy);
1509 MVT SingleTy = MVT::getVectorVT(VT: ElemTy, NumElements: (8*HwLen)/ElemWidth);
1510 // The two single vectors that VecV consists of, if it's a pair.
1511 SDValue V0, V1;
1512 SDValue SingleV = VecV;
1513 SDValue PickHi;
1514
1515 if (IsPair) {
1516 V0 = LoHalf(V: VecV, DAG);
1517 V1 = HiHalf(V: VecV, DAG);
1518
1519 SDValue HalfV = DAG.getConstant(Val: SingleTy.getVectorNumElements(),
1520 DL: dl, VT: MVT::i32);
1521 PickHi = DAG.getSetCC(DL: dl, VT: MVT::i1, LHS: IdxV, RHS: HalfV, Cond: ISD::SETUGT);
1522 if (isHvxSingleTy(Ty: SubTy)) {
1523 if (const auto *CN = dyn_cast<const ConstantSDNode>(Val: IdxV.getNode())) {
1524 unsigned Idx = CN->getZExtValue();
1525 assert(Idx == 0 || Idx == VecTy.getVectorNumElements()/2);
1526 unsigned SubIdx = (Idx == 0) ? Hexagon::vsub_lo : Hexagon::vsub_hi;
1527 return DAG.getTargetInsertSubreg(SRIdx: SubIdx, DL: dl, VT: VecTy, Operand: VecV, Subreg: SubV);
1528 }
1529 // If IdxV is not a constant, generate the two variants: with the
1530 // SubV as the high and as the low subregister, and select the right
1531 // pair based on the IdxV.
1532 SDValue InLo = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: VecTy, Ops: {SubV, V1});
1533 SDValue InHi = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: VecTy, Ops: {V0, SubV});
1534 return DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: VecTy, N1: PickHi, N2: InHi, N3: InLo);
1535 }
1536 // The subvector being inserted must be entirely contained in one of
1537 // the vectors V0 or V1. Set SingleV to the correct one, and update
1538 // IdxV to be the index relative to the beginning of that vector.
1539 SDValue S = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: MVT::i32, N1: IdxV, N2: HalfV);
1540 IdxV = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: MVT::i32, N1: PickHi, N2: S, N3: IdxV);
1541 SingleV = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: SingleTy, N1: PickHi, N2: V1, N3: V0);
1542 }
1543
1544 // The only meaningful subvectors of a single HVX vector are those that
1545 // fit in a scalar register.
1546 assert(SubTy.getSizeInBits() == 32 || SubTy.getSizeInBits() == 64);
1547 // Convert IdxV to be index in bytes.
1548 auto *IdxN = dyn_cast<ConstantSDNode>(Val: IdxV.getNode());
1549 if (!IdxN || !IdxN->isZero()) {
1550 IdxV = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: MVT::i32, N1: IdxV,
1551 N2: DAG.getConstant(Val: ElemWidth/8, DL: dl, VT: MVT::i32));
1552 SingleV = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: SingleTy, N1: SingleV, N2: IdxV);
1553 }
1554 // When inserting a single word, the rotation back to the original position
1555 // would be by HwLen-Idx, but if two words are inserted, it will need to be
1556 // by (HwLen-4)-Idx.
1557 unsigned RolBase = HwLen;
1558 if (SubTy.getSizeInBits() == 32) {
1559 SDValue V = DAG.getBitcast(VT: MVT::i32, V: SubV);
1560 SingleV = DAG.getNode(Opcode: HexagonISD::VINSERTW0, DL: dl, VT: SingleTy, N1: SingleV, N2: V);
1561 } else {
1562 SDValue V = DAG.getBitcast(VT: MVT::i64, V: SubV);
1563 SDValue R0 = LoHalf(V, DAG);
1564 SDValue R1 = HiHalf(V, DAG);
1565 SingleV = DAG.getNode(Opcode: HexagonISD::VINSERTW0, DL: dl, VT: SingleTy, N1: SingleV, N2: R0);
1566 SingleV = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: SingleTy, N1: SingleV,
1567 N2: DAG.getConstant(Val: 4, DL: dl, VT: MVT::i32));
1568 SingleV = DAG.getNode(Opcode: HexagonISD::VINSERTW0, DL: dl, VT: SingleTy, N1: SingleV, N2: R1);
1569 RolBase = HwLen-4;
1570 }
1571 // If the vector wasn't ror'ed, don't ror it back.
1572 if (RolBase != 4 || !IdxN || !IdxN->isZero()) {
1573 SDValue RolV = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: MVT::i32,
1574 N1: DAG.getConstant(Val: RolBase, DL: dl, VT: MVT::i32), N2: IdxV);
1575 SingleV = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: SingleTy, N1: SingleV, N2: RolV);
1576 }
1577
1578 if (IsPair) {
1579 SDValue InLo = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: VecTy, Ops: {SingleV, V1});
1580 SDValue InHi = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: VecTy, Ops: {V0, SingleV});
1581 return DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: VecTy, N1: PickHi, N2: InHi, N3: InLo);
1582 }
1583 return SingleV;
1584}
1585
1586SDValue
1587HexagonTargetLowering::insertHvxSubvectorPred(SDValue VecV, SDValue SubV,
1588 SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const {
1589 MVT VecTy = ty(Op: VecV);
1590 MVT SubTy = ty(Op: SubV);
1591 assert(Subtarget.isHVXVectorType(VecTy, true));
1592 // VecV is an HVX vector predicate. SubV may be either an HVX vector
1593 // predicate as well, or it can be a scalar predicate.
1594
1595 unsigned VecLen = VecTy.getVectorNumElements();
1596 unsigned HwLen = Subtarget.getVectorLength();
1597 assert(HwLen % VecLen == 0 && "Unexpected vector type");
1598
1599 unsigned Scale = VecLen / SubTy.getVectorNumElements();
1600 unsigned BitBytes = HwLen / VecLen;
1601 unsigned BlockLen = HwLen / Scale;
1602
1603 MVT ByteTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen);
1604 SDValue ByteVec = DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: ByteTy, Operand: VecV);
1605 SDValue ByteSub = createHvxPrefixPred(PredV: SubV, dl, BitBytes, ZeroFill: false, DAG);
1606 SDValue ByteIdx;
1607
1608 auto *IdxN = dyn_cast<ConstantSDNode>(Val: IdxV.getNode());
1609 if (!IdxN || !IdxN->isZero()) {
1610 ByteIdx = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: MVT::i32, N1: IdxV,
1611 N2: DAG.getConstant(Val: BitBytes, DL: dl, VT: MVT::i32));
1612 ByteVec = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: ByteTy, N1: ByteVec, N2: ByteIdx);
1613 }
1614
1615 // ByteVec is the target vector VecV rotated in such a way that the
1616 // subvector should be inserted at index 0. Generate a predicate mask
1617 // and use vmux to do the insertion.
1618 assert(BlockLen < HwLen && "vsetq(v1) prerequisite");
1619 MVT BoolTy = MVT::getVectorVT(VT: MVT::i1, NumElements: HwLen);
1620 SDValue Q = getInstr(MachineOpc: Hexagon::V6_pred_scalar2, dl, Ty: BoolTy,
1621 Ops: {DAG.getConstant(Val: BlockLen, DL: dl, VT: MVT::i32)}, DAG);
1622 ByteVec = getInstr(MachineOpc: Hexagon::V6_vmux, dl, Ty: ByteTy, Ops: {Q, ByteSub, ByteVec}, DAG);
1623 // Rotate ByteVec back, and convert to a vector predicate.
1624 if (!IdxN || !IdxN->isZero()) {
1625 SDValue HwLenV = DAG.getConstant(Val: HwLen, DL: dl, VT: MVT::i32);
1626 SDValue ByteXdi = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: MVT::i32, N1: HwLenV, N2: ByteIdx);
1627 ByteVec = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: ByteTy, N1: ByteVec, N2: ByteXdi);
1628 }
1629 return DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: VecTy, Operand: ByteVec);
1630}
1631
1632SDValue
1633HexagonTargetLowering::extendHvxVectorPred(SDValue VecV, const SDLoc &dl,
1634 MVT ResTy, bool ZeroExt, SelectionDAG &DAG) const {
1635 // Sign- and any-extending of a vector predicate to a vector register is
1636 // equivalent to Q2V. For zero-extensions, generate a vmux between 0 and
1637 // a vector of 1s (where the 1s are of type matching the vector type).
1638 assert(Subtarget.isHVXVectorType(ResTy));
1639 if (!ZeroExt)
1640 return DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: ResTy, Operand: VecV);
1641
1642 assert(ty(VecV).getVectorNumElements() == ResTy.getVectorNumElements());
1643 SDValue True = DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: ResTy,
1644 Operand: DAG.getConstant(Val: 1, DL: dl, VT: MVT::i32));
1645 SDValue False = getZero(dl, Ty: ResTy, DAG);
1646 return DAG.getSelect(DL: dl, VT: ResTy, Cond: VecV, LHS: True, RHS: False);
1647}
1648
1649SDValue
1650HexagonTargetLowering::compressHvxPred(SDValue VecQ, const SDLoc &dl,
1651 MVT ResTy, SelectionDAG &DAG) const {
1652 // Given a predicate register VecQ, transfer bits VecQ[0..HwLen-1]
1653 // (i.e. the entire predicate register) to bits [0..HwLen-1] of a
1654 // vector register. The remaining bits of the vector register are
1655 // unspecified.
1656
1657 MachineFunction &MF = DAG.getMachineFunction();
1658 unsigned HwLen = Subtarget.getVectorLength();
1659 MVT ByteTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen);
1660 MVT PredTy = ty(Op: VecQ);
1661 unsigned PredLen = PredTy.getVectorNumElements();
1662 assert(HwLen % PredLen == 0);
1663 MVT VecTy = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: 8*HwLen/PredLen), NumElements: PredLen);
1664
1665 Type *Int8Ty = Type::getInt8Ty(C&: *DAG.getContext());
1666 SmallVector<Constant*, 128> Tmp;
1667 // Create an array of bytes (hex): 01,02,04,08,10,20,40,80, 01,02,04,08,...
1668 // These are bytes with the LSB rotated left with respect to their index.
1669 for (unsigned i = 0; i != HwLen/8; ++i) {
1670 for (unsigned j = 0; j != 8; ++j)
1671 Tmp.push_back(Elt: ConstantInt::get(Ty: Int8Ty, V: 1ull << j));
1672 }
1673 Constant *CV = ConstantVector::get(V: Tmp);
1674 Align Alignment(HwLen);
1675 SDValue CP = LowerConstantPool(
1676 Op: DAG.getConstantPool(C: CV, VT: getPointerTy(DL: DAG.getDataLayout()), Align: Alignment),
1677 DAG);
1678 SDValue Bytes =
1679 DAG.getLoad(VT: ByteTy, dl, Chain: DAG.getEntryNode(), Ptr: CP,
1680 PtrInfo: MachinePointerInfo::getConstantPool(MF), Alignment);
1681
1682 // Select the bytes that correspond to true bits in the vector predicate.
1683 SDValue Sel = DAG.getSelect(DL: dl, VT: VecTy, Cond: VecQ, LHS: DAG.getBitcast(VT: VecTy, V: Bytes),
1684 RHS: getZero(dl, Ty: VecTy, DAG));
1685 // Calculate the OR of all bytes in each group of 8. That will compress
1686 // all the individual bits into a single byte.
1687 // First, OR groups of 4, via vrmpy with 0x01010101.
1688 SDValue All1 =
1689 DAG.getSplatBuildVector(VT: MVT::v4i8, DL: dl, Op: DAG.getConstant(Val: 1, DL: dl, VT: MVT::i32));
1690 SDValue Vrmpy = getInstr(MachineOpc: Hexagon::V6_vrmpyub, dl, Ty: ByteTy, Ops: {Sel, All1}, DAG);
1691 // Then rotate the accumulated vector by 4 bytes, and do the final OR.
1692 SDValue Rot = getInstr(MachineOpc: Hexagon::V6_valignbi, dl, Ty: ByteTy,
1693 Ops: {Vrmpy, Vrmpy, DAG.getTargetConstant(Val: 4, DL: dl, VT: MVT::i32)}, DAG);
1694 SDValue Vor = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: ByteTy, Ops: {Vrmpy, Rot});
1695
1696 // Pick every 8th byte and coalesce them at the beginning of the output.
1697 // For symmetry, coalesce every 1+8th byte after that, then every 2+8th
1698 // byte and so on.
1699 SmallVector<int,128> Mask;
1700 for (unsigned i = 0; i != HwLen; ++i)
1701 Mask.push_back(Elt: (8*i) % HwLen + i/(HwLen/8));
1702 SDValue Collect =
1703 DAG.getVectorShuffle(VT: ByteTy, dl, N1: Vor, N2: DAG.getUNDEF(VT: ByteTy), Mask);
1704 return DAG.getBitcast(VT: ResTy, V: Collect);
1705}
1706
1707SDValue
1708HexagonTargetLowering::resizeToWidth(SDValue VecV, MVT ResTy, bool Signed,
1709 const SDLoc &dl, SelectionDAG &DAG) const {
1710 // Take a vector and resize the element type to match the given type.
1711 MVT InpTy = ty(Op: VecV);
1712 if (InpTy == ResTy)
1713 return VecV;
1714
1715 unsigned InpWidth = InpTy.getSizeInBits();
1716 unsigned ResWidth = ResTy.getSizeInBits();
1717
1718 if (InpTy.isFloatingPoint()) {
1719 return InpWidth < ResWidth
1720 ? DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: ResTy, Operand: VecV)
1721 : DAG.getNode(Opcode: ISD::FP_ROUND, DL: dl, VT: ResTy, N1: VecV,
1722 N2: DAG.getTargetConstant(Val: 0, DL: dl, VT: MVT::i32));
1723 }
1724
1725 assert(InpTy.isInteger());
1726
1727 if (InpWidth < ResWidth) {
1728 unsigned ExtOpc = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1729 return DAG.getNode(Opcode: ExtOpc, DL: dl, VT: ResTy, Operand: VecV);
1730 } else {
1731 unsigned NarOpc = Signed ? HexagonISD::SSAT : HexagonISD::USAT;
1732 return DAG.getNode(Opcode: NarOpc, DL: dl, VT: ResTy, N1: VecV, N2: DAG.getValueType(ResTy));
1733 }
1734}
1735
1736SDValue
1737HexagonTargetLowering::extractSubvector(SDValue Vec, MVT SubTy, unsigned SubIdx,
1738 SelectionDAG &DAG) const {
1739 assert(ty(Vec).getSizeInBits() % SubTy.getSizeInBits() == 0);
1740
1741 const SDLoc &dl(Vec);
1742 unsigned ElemIdx = SubIdx * SubTy.getVectorNumElements();
1743 return DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL: dl, VT: SubTy,
1744 Ops: {Vec, DAG.getConstant(Val: ElemIdx, DL: dl, VT: MVT::i32)});
1745}
1746
1747SDValue
1748HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG)
1749 const {
1750 const SDLoc &dl(Op);
1751 MVT VecTy = ty(Op);
1752
1753 unsigned Size = Op.getNumOperands();
1754 SmallVector<SDValue,128> Ops;
1755 for (unsigned i = 0; i != Size; ++i)
1756 Ops.push_back(Elt: Op.getOperand(i));
1757
1758 if (VecTy.getVectorElementType() == MVT::i1)
1759 return buildHvxVectorPred(Values: Ops, dl, VecTy, DAG);
1760
1761 // In case of MVT::f16 BUILD_VECTOR, since MVT::f16 is
1762 // not a legal type, just bitcast the node to use i16
1763 // types and bitcast the result back to f16
1764 if (VecTy.getVectorElementType() == MVT::f16 ||
1765 VecTy.getVectorElementType() == MVT::bf16) {
1766 SmallVector<SDValue, 64> NewOps;
1767 for (unsigned i = 0; i != Size; i++)
1768 NewOps.push_back(Elt: DAG.getBitcast(VT: MVT::i16, V: Ops[i]));
1769
1770 SDValue T0 =
1771 DAG.getNode(Opcode: ISD::BUILD_VECTOR, DL: dl, VT: tyVector(Ty: VecTy, ElemTy: MVT::i16), Ops: NewOps);
1772 return DAG.getBitcast(VT: tyVector(Ty: VecTy, ElemTy: VecTy.getVectorElementType()), V: T0);
1773 }
1774
1775 // First, split the BUILD_VECTOR for vector pairs. We could generate
1776 // some pairs directly (via splat), but splats should be generated
1777 // by the combiner prior to getting here.
1778 if (VecTy.getSizeInBits() == 16 * Subtarget.getVectorLength()) {
1779 ArrayRef<SDValue> A(Ops);
1780 MVT SingleTy = typeSplit(VecTy).first;
1781 SDValue V0 = buildHvxVectorReg(Values: A.take_front(N: Size / 2), dl, VecTy: SingleTy, DAG);
1782 SDValue V1 = buildHvxVectorReg(Values: A.drop_front(N: Size / 2), dl, VecTy: SingleTy, DAG);
1783 return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: VecTy, N1: V0, N2: V1);
1784 }
1785
1786 return buildHvxVectorReg(Values: Ops, dl, VecTy, DAG);
1787}
1788
1789SDValue
1790HexagonTargetLowering::LowerHvxSplatVector(SDValue Op, SelectionDAG &DAG)
1791 const {
1792 const SDLoc &dl(Op);
1793 MVT VecTy = ty(Op);
1794 MVT ArgTy = ty(Op: Op.getOperand(i: 0));
1795
1796 if (ArgTy == MVT::f16 || ArgTy == MVT::bf16) {
1797 MVT SplatTy = MVT::getVectorVT(VT: MVT::i16, NumElements: VecTy.getVectorNumElements());
1798 SDValue ToInt16 = DAG.getBitcast(VT: MVT::i16, V: Op.getOperand(i: 0));
1799 SDValue ToInt32 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT: MVT::i32, Operand: ToInt16);
1800 SDValue Splat = DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: SplatTy, Operand: ToInt32);
1801 return DAG.getBitcast(VT: VecTy, V: Splat);
1802 }
1803
1804 return SDValue();
1805}
1806
1807SDValue
1808HexagonTargetLowering::LowerHvxConcatVectors(SDValue Op, SelectionDAG &DAG)
1809 const {
1810 // Vector concatenation of two integer (non-bool) vectors does not need
1811 // special lowering. Custom-lower concats of bool vectors and expand
1812 // concats of more than 2 vectors.
1813 MVT VecTy = ty(Op);
1814 const SDLoc &dl(Op);
1815 unsigned NumOp = Op.getNumOperands();
1816 if (VecTy.getVectorElementType() != MVT::i1) {
1817 if (NumOp == 2)
1818 return Op;
1819 // Expand the other cases into a build-vector.
1820 SmallVector<SDValue,8> Elems;
1821 for (SDValue V : Op.getNode()->ops())
1822 DAG.ExtractVectorElements(Op: V, Args&: Elems);
1823 // A vector of i16 will be broken up into a build_vector of i16's.
1824 // This is a problem, since at the time of operation legalization,
1825 // all operations are expected to be type-legalized, and i16 is not
1826 // a legal type. If any of the extracted elements is not of a valid
1827 // type, sign-extend it to a valid one.
1828 for (SDValue &V : Elems) {
1829 MVT Ty = ty(Op: V);
1830 if (!isTypeLegal(VT: Ty)) {
1831 MVT NTy = typeLegalize(Ty, DAG);
1832 if (V.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1833 V = DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL: dl, VT: NTy,
1834 N1: DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: dl, VT: NTy,
1835 N1: V.getOperand(i: 0), N2: V.getOperand(i: 1)),
1836 N2: DAG.getValueType(Ty));
1837 continue;
1838 }
1839 // A few less complicated cases.
1840 switch (V.getOpcode()) {
1841 case ISD::Constant:
1842 V = DAG.getSExtOrTrunc(Op: V, DL: dl, VT: NTy);
1843 break;
1844 case ISD::UNDEF:
1845 V = DAG.getUNDEF(VT: NTy);
1846 break;
1847 case ISD::TRUNCATE:
1848 V = V.getOperand(i: 0);
1849 break;
1850 default:
1851 llvm_unreachable("Unexpected vector element");
1852 }
1853 }
1854 }
1855 return DAG.getBuildVector(VT: VecTy, DL: dl, Ops: Elems);
1856 }
1857
1858 assert(VecTy.getVectorElementType() == MVT::i1);
1859 unsigned HwLen = Subtarget.getVectorLength();
1860 assert(isPowerOf2_32(NumOp) && HwLen % NumOp == 0);
1861
1862 SDValue Op0 = Op.getOperand(i: 0);
1863
1864 // If the operands are HVX types (i.e. not scalar predicates), then
1865 // defer the concatenation, and create QCAT instead.
1866 if (Subtarget.isHVXVectorType(VecTy: ty(Op: Op0), IncludeBool: true)) {
1867 if (NumOp == 2)
1868 return DAG.getNode(Opcode: HexagonISD::QCAT, DL: dl, VT: VecTy, N1: Op0, N2: Op.getOperand(i: 1));
1869
1870 ArrayRef<SDUse> U(Op.getNode()->ops());
1871 SmallVector<SDValue, 4> SV(U);
1872 ArrayRef<SDValue> Ops(SV);
1873
1874 MVT HalfTy = typeSplit(VecTy).first;
1875 SDValue V0 = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: HalfTy,
1876 Ops: Ops.take_front(N: NumOp/2));
1877 SDValue V1 = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: HalfTy,
1878 Ops: Ops.take_back(N: NumOp/2));
1879 return DAG.getNode(Opcode: HexagonISD::QCAT, DL: dl, VT: VecTy, N1: V0, N2: V1);
1880 }
1881
1882 // Count how many bytes (in a vector register) each bit in VecTy
1883 // corresponds to.
1884 unsigned BitBytes = HwLen / VecTy.getVectorNumElements();
1885
1886 // Make sure that createHvxPrefixPred will only ever need to expand
1887 // the predicate, i.e. bytes-per-bit in the input is not greater than
1888 // the target bytes-per-bit in the result.
1889 SDValue Combined = combineConcatOfScalarPreds(Op, BitBytes, DAG);
1890 SmallVector<SDValue,8> Prefixes;
1891 for (SDValue V : Combined.getNode()->op_values()) {
1892 SDValue P = createHvxPrefixPred(PredV: V, dl, BitBytes, ZeroFill: true, DAG);
1893 Prefixes.push_back(Elt: P);
1894 }
1895
1896 unsigned InpLen = ty(Op: Combined.getOperand(i: 0)).getVectorNumElements();
1897 MVT ByteTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen);
1898 SDValue S = DAG.getConstant(Val: HwLen - InpLen*BitBytes, DL: dl, VT: MVT::i32);
1899 SDValue Res = getZero(dl, Ty: ByteTy, DAG);
1900 for (unsigned i = 0, e = Prefixes.size(); i != e; ++i) {
1901 Res = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: ByteTy, N1: Res, N2: S);
1902 Res = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: ByteTy, N1: Res, N2: Prefixes[e-i-1]);
1903 }
1904 return DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: VecTy, Operand: Res);
1905}
1906
1907SDValue
1908HexagonTargetLowering::LowerHvxExtractElement(SDValue Op, SelectionDAG &DAG)
1909 const {
1910 // Change the type of the extracted element to i32.
1911 SDValue VecV = Op.getOperand(i: 0);
1912 MVT ElemTy = ty(Op: VecV).getVectorElementType();
1913 const SDLoc &dl(Op);
1914 SDValue IdxV = Op.getOperand(i: 1);
1915 if (ElemTy == MVT::i1)
1916 return extractHvxElementPred(VecV, IdxV, dl, ResTy: ty(Op), DAG);
1917
1918 return extractHvxElementReg(VecV, IdxV, dl, ResTy: ty(Op), DAG);
1919}
1920
1921SDValue
1922HexagonTargetLowering::LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG)
1923 const {
1924 const SDLoc &dl(Op);
1925 MVT VecTy = ty(Op);
1926 SDValue VecV = Op.getOperand(i: 0);
1927 SDValue ValV = Op.getOperand(i: 1);
1928 SDValue IdxV = Op.getOperand(i: 2);
1929 MVT ElemTy = ty(Op: VecV).getVectorElementType();
1930 if (ElemTy == MVT::i1)
1931 return insertHvxElementPred(VecV, IdxV, ValV, dl, DAG);
1932
1933 if (ElemTy == MVT::f16 || ElemTy == MVT::bf16) {
1934 SDValue T0 = DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: dl,
1935 VT: tyVector(Ty: VecTy, ElemTy: MVT::i16),
1936 N1: DAG.getBitcast(VT: tyVector(Ty: VecTy, ElemTy: MVT::i16), V: VecV),
1937 N2: DAG.getBitcast(VT: MVT::i16, V: ValV), N3: IdxV);
1938 return DAG.getBitcast(VT: tyVector(Ty: VecTy, ElemTy), V: T0);
1939 }
1940
1941 return insertHvxElementReg(VecV, IdxV, ValV, dl, DAG);
1942}
1943
1944SDValue
1945HexagonTargetLowering::LowerHvxExtractSubvector(SDValue Op, SelectionDAG &DAG)
1946 const {
1947 SDValue SrcV = Op.getOperand(i: 0);
1948 MVT SrcTy = ty(Op: SrcV);
1949 MVT DstTy = ty(Op);
1950 SDValue IdxV = Op.getOperand(i: 1);
1951 unsigned Idx = IdxV.getNode()->getAsZExtVal();
1952 assert(Idx % DstTy.getVectorNumElements() == 0);
1953 (void)Idx;
1954 const SDLoc &dl(Op);
1955
1956 MVT ElemTy = SrcTy.getVectorElementType();
1957 if (ElemTy == MVT::i1)
1958 return extractHvxSubvectorPred(VecV: SrcV, IdxV, dl, ResTy: DstTy, DAG);
1959
1960 return extractHvxSubvectorReg(OrigOp: Op, VecV: SrcV, IdxV, dl, ResTy: DstTy, DAG);
1961}
1962
1963SDValue
1964HexagonTargetLowering::LowerHvxInsertSubvector(SDValue Op, SelectionDAG &DAG)
1965 const {
1966 // Idx does not need to be a constant.
1967 SDValue VecV = Op.getOperand(i: 0);
1968 SDValue ValV = Op.getOperand(i: 1);
1969 SDValue IdxV = Op.getOperand(i: 2);
1970
1971 const SDLoc &dl(Op);
1972 MVT VecTy = ty(Op: VecV);
1973 MVT ElemTy = VecTy.getVectorElementType();
1974 if (ElemTy == MVT::i1)
1975 return insertHvxSubvectorPred(VecV, SubV: ValV, IdxV, dl, DAG);
1976
1977 return insertHvxSubvectorReg(VecV, SubV: ValV, IdxV, dl, DAG);
1978}
1979
1980SDValue
1981HexagonTargetLowering::LowerHvxAnyExt(SDValue Op, SelectionDAG &DAG) const {
1982 // Lower any-extends of boolean vectors to sign-extends, since they
1983 // translate directly to Q2V. Zero-extending could also be done equally
1984 // fast, but Q2V is used/recognized in more places.
1985 // For all other vectors, use zero-extend.
1986 MVT ResTy = ty(Op);
1987 SDValue InpV = Op.getOperand(i: 0);
1988 MVT ElemTy = ty(Op: InpV).getVectorElementType();
1989 if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(VecTy: ResTy))
1990 return LowerHvxSignExt(Op, DAG);
1991 return DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: SDLoc(Op), VT: ResTy, Operand: InpV);
1992}
1993
1994SDValue
1995HexagonTargetLowering::LowerHvxSignExt(SDValue Op, SelectionDAG &DAG) const {
1996 MVT ResTy = ty(Op);
1997 SDValue InpV = Op.getOperand(i: 0);
1998 MVT ElemTy = ty(Op: InpV).getVectorElementType();
1999 if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(VecTy: ResTy))
2000 return extendHvxVectorPred(VecV: InpV, dl: SDLoc(Op), ResTy: ty(Op), ZeroExt: false, DAG);
2001 return Op;
2002}
2003
2004SDValue
2005HexagonTargetLowering::LowerHvxZeroExt(SDValue Op, SelectionDAG &DAG) const {
2006 MVT ResTy = ty(Op);
2007 SDValue InpV = Op.getOperand(i: 0);
2008 MVT ElemTy = ty(Op: InpV).getVectorElementType();
2009 if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(VecTy: ResTy))
2010 return extendHvxVectorPred(VecV: InpV, dl: SDLoc(Op), ResTy: ty(Op), ZeroExt: true, DAG);
2011 return Op;
2012}
2013
2014SDValue
2015HexagonTargetLowering::LowerHvxCttz(SDValue Op, SelectionDAG &DAG) const {
2016 // Lower vector CTTZ into a computation using CTLZ (Hacker's Delight):
2017 // cttz(x) = bitwidth(x) - ctlz(~x & (x-1))
2018 const SDLoc &dl(Op);
2019 MVT ResTy = ty(Op);
2020 SDValue InpV = Op.getOperand(i: 0);
2021 assert(ResTy == ty(InpV));
2022
2023 // Calculate the vectors of 1 and bitwidth(x).
2024 MVT ElemTy = ty(Op: InpV).getVectorElementType();
2025 unsigned ElemWidth = ElemTy.getSizeInBits();
2026
2027 SDValue Vec1 = DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: ResTy,
2028 Operand: DAG.getConstant(Val: 1, DL: dl, VT: MVT::i32));
2029 SDValue VecW = DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: ResTy,
2030 Operand: DAG.getConstant(Val: ElemWidth, DL: dl, VT: MVT::i32));
2031 SDValue VecN1 = DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: ResTy,
2032 Operand: DAG.getAllOnesConstant(DL: dl, VT: MVT::i32));
2033
2034 // Do not use DAG.getNOT, because that would create BUILD_VECTOR with
2035 // a BITCAST. Here we can skip the BITCAST (so we don't have to handle
2036 // it separately in custom combine or selection).
2037 SDValue A = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ResTy,
2038 Ops: {DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: ResTy, Ops: {InpV, VecN1}),
2039 DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: ResTy, Ops: {InpV, Vec1})});
2040 return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: ResTy,
2041 Ops: {VecW, DAG.getNode(Opcode: ISD::CTLZ, DL: dl, VT: ResTy, Operand: A)});
2042}
2043
2044SDValue
2045HexagonTargetLowering::LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const {
2046 const SDLoc &dl(Op);
2047 MVT ResTy = ty(Op);
2048 assert(ResTy.getVectorElementType() == MVT::i32);
2049
2050 SDValue Vs = Op.getOperand(i: 0);
2051 SDValue Vt = Op.getOperand(i: 1);
2052
2053 SDVTList ResTys = DAG.getVTList(VT1: ResTy, VT2: ResTy);
2054 unsigned Opc = Op.getOpcode();
2055
2056 // On HVX v62+ producing the full product is cheap, so legalize MULH to LOHI.
2057 if (Opc == ISD::MULHU)
2058 return DAG.getNode(Opcode: HexagonISD::UMUL_LOHI, DL: dl, VTList: ResTys, Ops: {Vs, Vt}).getValue(R: 1);
2059 if (Opc == ISD::MULHS)
2060 return DAG.getNode(Opcode: HexagonISD::SMUL_LOHI, DL: dl, VTList: ResTys, Ops: {Vs, Vt}).getValue(R: 1);
2061
2062#ifndef NDEBUG
2063 Op.dump(&DAG);
2064#endif
2065 llvm_unreachable("Unexpected mulh operation");
2066}
2067
2068SDValue
2069HexagonTargetLowering::LowerHvxMulLoHi(SDValue Op, SelectionDAG &DAG) const {
2070 const SDLoc &dl(Op);
2071 unsigned Opc = Op.getOpcode();
2072 SDValue Vu = Op.getOperand(i: 0);
2073 SDValue Vv = Op.getOperand(i: 1);
2074
2075 // If the HI part is not used, convert it to a regular MUL.
2076 if (auto HiVal = Op.getValue(R: 1); HiVal.use_empty()) {
2077 // Need to preserve the types and the number of values.
2078 SDValue Hi = DAG.getUNDEF(VT: ty(Op: HiVal));
2079 SDValue Lo = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: ty(Op), Ops: {Vu, Vv});
2080 return DAG.getMergeValues(Ops: {Lo, Hi}, dl);
2081 }
2082
2083 bool SignedVu = Opc == HexagonISD::SMUL_LOHI;
2084 bool SignedVv = Opc == HexagonISD::SMUL_LOHI || Opc == HexagonISD::USMUL_LOHI;
2085
2086 // Legal on HVX v62+, but lower it here because patterns can't handle multi-
2087 // valued nodes.
2088 if (Subtarget.useHVXV62Ops())
2089 return emitHvxMulLoHiV62(A: Vu, SignedA: SignedVu, B: Vv, SignedB: SignedVv, dl, DAG);
2090
2091 if (Opc == HexagonISD::SMUL_LOHI) {
2092 // Direct MULHS expansion is cheaper than doing the whole SMUL_LOHI,
2093 // for other signedness LOHI is cheaper.
2094 if (auto LoVal = Op.getValue(R: 0); LoVal.use_empty()) {
2095 SDValue Hi = emitHvxMulHsV60(A: Vu, B: Vv, dl, DAG);
2096 SDValue Lo = DAG.getUNDEF(VT: ty(Op: LoVal));
2097 return DAG.getMergeValues(Ops: {Lo, Hi}, dl);
2098 }
2099 }
2100
2101 return emitHvxMulLoHiV60(A: Vu, SignedA: SignedVu, B: Vv, SignedB: SignedVv, dl, DAG);
2102}
2103
2104SDValue
2105HexagonTargetLowering::LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const {
2106 SDValue Val = Op.getOperand(i: 0);
2107 MVT ResTy = ty(Op);
2108 MVT ValTy = ty(Op: Val);
2109 const SDLoc &dl(Op);
2110
2111 if (isHvxBoolTy(Ty: ValTy) && ResTy.isScalarInteger()) {
2112 unsigned HwLen = Subtarget.getVectorLength();
2113 MVT WordTy = MVT::getVectorVT(VT: MVT::i32, NumElements: HwLen/4);
2114
2115 // When the predicate is shorter than the predicate register, each boolean
2116 // is represented by multiple consecutive bits in the input register.
2117 // Condense the bits so each boolean is represented by one bit. This only
2118 // handles 2x and 4x compaction ratios.
2119 unsigned PredLen = ValTy.getVectorNumElements();
2120 if (PredLen < HwLen) {
2121 MVT ByteTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen);
2122 Val = DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: ByteTy, Operand: Val);
2123 if (HwLen > PredLen * 2) {
2124 assert(HwLen == PredLen * 4);
2125 PredLen *= 2;
2126 Val = getInstr(MachineOpc: Hexagon::V6_vdealh, dl, Ty: ByteTy, Ops: Val, DAG);
2127 }
2128 if (HwLen > PredLen) {
2129 assert(HwLen == PredLen * 2);
2130 Val = getInstr(MachineOpc: Hexagon::V6_vdealb, dl, Ty: ByteTy, Ops: Val, DAG);
2131 }
2132 Val = DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: ValTy, Operand: Val);
2133 }
2134
2135 SDValue VQ = compressHvxPred(VecQ: Val, dl, ResTy: WordTy, DAG);
2136 unsigned BitWidth = ResTy.getSizeInBits();
2137
2138 if (BitWidth < 64) {
2139 SDValue W0 = extractHvxElementReg(VecV: VQ, IdxV: DAG.getConstant(Val: 0, DL: dl, VT: MVT::i32),
2140 dl, ResTy: MVT::i32, DAG);
2141 if (BitWidth == 32)
2142 return W0;
2143 assert(BitWidth < 32u);
2144 return DAG.getZExtOrTrunc(Op: W0, DL: dl, VT: ResTy);
2145 }
2146
2147 // The result is >= 64 bits. The only options are 64 or 128.
2148 assert(BitWidth == 64 || BitWidth == 128);
2149 SmallVector<SDValue,4> Words;
2150 for (unsigned i = 0; i != BitWidth/32; ++i) {
2151 SDValue W = extractHvxElementReg(
2152 VecV: VQ, IdxV: DAG.getConstant(Val: i, DL: dl, VT: MVT::i32), dl, ResTy: MVT::i32, DAG);
2153 Words.push_back(Elt: W);
2154 }
2155 SmallVector<SDValue,2> Combines;
2156 assert(Words.size() % 2 == 0);
2157 for (unsigned i = 0, e = Words.size(); i < e; i += 2) {
2158 SDValue C = getCombine(Hi: Words[i+1], Lo: Words[i], dl, ResTy: MVT::i64, DAG);
2159 Combines.push_back(Elt: C);
2160 }
2161
2162 if (BitWidth == 64)
2163 return Combines[0];
2164
2165 return DAG.getNode(Opcode: ISD::BUILD_PAIR, DL: dl, VT: ResTy, Ops: Combines);
2166 }
2167
2168 // Handle bitcast from i32, v2i16, and v4i8 to v32i1.
2169 // Splat the input into a 32-element i32 vector, then AND each element
2170 // with a unique bitmask to isolate individual bits.
2171 auto bitcastI32ToV32I1 = [&](SDValue Val32) {
2172 assert(Val32.getValueType().getSizeInBits() == 32 &&
2173 "Input must be 32 bits");
2174 MVT VecTy = MVT::getVectorVT(VT: MVT::i32, NumElements: 32);
2175 SDValue Splat = DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: VecTy, Operand: Val32);
2176 SmallVector<SDValue, 32> Mask;
2177 for (unsigned i = 0; i < 32; ++i)
2178 Mask.push_back(Elt: DAG.getConstant(Val: 1ull << i, DL: dl, VT: MVT::i32));
2179
2180 SDValue MaskVec = DAG.getBuildVector(VT: VecTy, DL: dl, Ops: Mask);
2181 SDValue Anded = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: VecTy, N1: Splat, N2: MaskVec);
2182 return DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: MVT::v32i1, Operand: Anded);
2183 };
2184 // === Case: v32i1 ===
2185 if (ResTy == MVT::v32i1 &&
2186 (ValTy == MVT::i32 || ValTy == MVT::v2i16 || ValTy == MVT::v4i8) &&
2187 Subtarget.useHVX128BOps()) {
2188 SDValue Val32 = Val;
2189 if (ValTy == MVT::v2i16 || ValTy == MVT::v4i8)
2190 Val32 = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::i32, Operand: Val);
2191 return bitcastI32ToV32I1(Val32);
2192 }
2193 // === Case: v64i1 ===
2194 if (ResTy == MVT::v64i1 && ValTy == MVT::i64 && Subtarget.useHVX128BOps()) {
2195 // Split i64 into lo/hi 32-bit halves.
2196 SDValue Lo = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i32, Operand: Val);
2197 SDValue HiShifted = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: MVT::i64, N1: Val,
2198 N2: DAG.getConstant(Val: 32, DL: dl, VT: MVT::i64));
2199 SDValue Hi = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i32, Operand: HiShifted);
2200
2201 // Reuse the same 32-bit logic twice.
2202 SDValue LoRes = bitcastI32ToV32I1(Lo);
2203 SDValue HiRes = bitcastI32ToV32I1(Hi);
2204
2205 // Concatenate into a v64i1 predicate.
2206 return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: MVT::v64i1, N1: LoRes, N2: HiRes);
2207 }
2208
2209 if (isHvxBoolTy(Ty: ResTy) && ValTy.isScalarInteger()) {
2210 // Handle bitcast from i128 -> v128i1 and i64 -> v64i1.
2211 unsigned BitWidth = ValTy.getSizeInBits();
2212 unsigned HwLen = Subtarget.getVectorLength();
2213 assert(BitWidth == HwLen);
2214
2215 MVT ValAsVecTy = MVT::getVectorVT(VT: MVT::i8, NumElements: BitWidth / 8);
2216 SDValue ValAsVec = DAG.getBitcast(VT: ValAsVecTy, V: Val);
2217 // Splat each byte of Val 8 times.
2218 // Bytes = [(b0)x8, (b1)x8, ...., (b15)x8]
2219 // where b0, b1,..., b15 are least to most significant bytes of I.
2220 SmallVector<SDValue, 128> Bytes;
2221 // Tmp: 0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80, 0x01,0x02,0x04,0x08,...
2222 // These are bytes with the LSB rotated left with respect to their index.
2223 SmallVector<SDValue, 128> Tmp;
2224 for (unsigned I = 0; I != HwLen / 8; ++I) {
2225 SDValue Idx = DAG.getConstant(Val: I, DL: dl, VT: MVT::i32);
2226 SDValue Byte =
2227 DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: dl, VT: MVT::i8, N1: ValAsVec, N2: Idx);
2228 for (unsigned J = 0; J != 8; ++J) {
2229 Bytes.push_back(Elt: Byte);
2230 Tmp.push_back(Elt: DAG.getConstant(Val: 1ull << J, DL: dl, VT: MVT::i8));
2231 }
2232 }
2233
2234 MVT ConstantVecTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen);
2235 SDValue ConstantVec = DAG.getBuildVector(VT: ConstantVecTy, DL: dl, Ops: Tmp);
2236 SDValue I2V = buildHvxVectorReg(Values: Bytes, dl, VecTy: ConstantVecTy, DAG);
2237
2238 // Each Byte in the I2V will be set iff corresponding bit is set in Val.
2239 I2V = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ConstantVecTy, Ops: {I2V, ConstantVec});
2240 return DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: ResTy, Operand: I2V);
2241 }
2242
2243 return Op;
2244}
2245
2246SDValue HexagonTargetLowering::LowerHvxStore(SDValue Op,
2247 SelectionDAG &DAG) const {
2248 const SDLoc &dl(Op);
2249 StoreSDNode *SN = cast<StoreSDNode>(Val: Op.getNode());
2250 SDValue Val = SN->getValue();
2251 MVT ValTy = ty(Op: Val);
2252
2253 // Check if this is a store of an HVX bool vector (predicate)
2254 if (!isHvxBoolTy(Ty: ValTy))
2255 return SDValue();
2256
2257 unsigned NumBits = ValTy.getVectorNumElements();
2258 MachineMemOperand *MMO = SN->getMemOperand();
2259
2260 // Check alignment requirements based on predicate size
2261 unsigned RequiredAlign = (NumBits == 32) ? 4 : 8;
2262 if (MMO->getBaseAlign().value() % RequiredAlign != 0)
2263 return SDValue();
2264
2265 unsigned HwLen = Subtarget.getVectorLength();
2266 MVT WordTy = MVT::getVectorVT(VT: MVT::i32, NumElements: HwLen / 4);
2267
2268 // Compress the predicate into a vector register
2269 SDValue VQ = compressHvxPred(VecQ: Val, dl, ResTy: WordTy, DAG);
2270
2271 // Extract words from the compressed vector
2272 SmallVector<SDValue, 4> Words;
2273 for (unsigned i = 0; i != NumBits / 32; ++i) {
2274 SDValue W = extractHvxElementReg(VecV: VQ, IdxV: DAG.getConstant(Val: i, DL: dl, VT: MVT::i32), dl,
2275 ResTy: MVT::i32, DAG);
2276 Words.push_back(Elt: W);
2277 }
2278
2279 SDValue Chain = SN->getChain();
2280 SDValue BasePtr = SN->getBasePtr();
2281 MachinePointerInfo PtrInfo = MMO->getPointerInfo();
2282
2283 if (NumBits == 32)
2284 return DAG.getStore(Chain, dl, Val: Words[0], Ptr: BasePtr, PtrInfo,
2285 Alignment: MMO->getBaseAlign());
2286
2287 if (NumBits == 64) {
2288 SDValue W64 = getCombine(Hi: Words[1], Lo: Words[0], dl, ResTy: MVT::i64, DAG);
2289 return DAG.getStore(Chain, dl, Val: W64, Ptr: BasePtr, PtrInfo, Alignment: MMO->getBaseAlign());
2290 }
2291
2292 if (NumBits == 128) {
2293 SDValue Lo64 = getCombine(Hi: Words[1], Lo: Words[0], dl, ResTy: MVT::i64, DAG);
2294 SDValue Hi64 = getCombine(Hi: Words[3], Lo: Words[2], dl, ResTy: MVT::i64, DAG);
2295
2296 Chain =
2297 DAG.getStore(Chain, dl, Val: Lo64, Ptr: BasePtr, PtrInfo, Alignment: MMO->getBaseAlign());
2298
2299 SDValue Offset8 = DAG.getConstant(Val: 8, DL: dl, VT: MVT::i32);
2300 SDValue Ptr8 = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: MVT::i32, N1: BasePtr, N2: Offset8);
2301 return DAG.getStore(Chain, dl, Val: Hi64, Ptr: Ptr8, PtrInfo: PtrInfo.getWithOffset(O: 8),
2302 Alignment: Align(8));
2303 }
2304
2305 return SDValue();
2306}
2307
2308SDValue HexagonTargetLowering::LowerHvxLoad(SDValue Op,
2309 SelectionDAG &DAG) const {
2310 const SDLoc &dl(Op);
2311 LoadSDNode *LN = cast<LoadSDNode>(Val: Op.getNode());
2312 MVT ResTy = ty(Op);
2313
2314 // Check if this is a load of an HVX bool vector (predicate)
2315 if (!isHvxBoolTy(Ty: ResTy))
2316 return SDValue();
2317
2318 unsigned NumBits = ResTy.getVectorNumElements();
2319 MachineMemOperand *MMO = LN->getMemOperand();
2320
2321 unsigned RequiredAlign = (NumBits == 32) ? 4 : 8;
2322 if (MMO->getBaseAlign().value() % RequiredAlign != 0)
2323 return SDValue();
2324
2325 SDValue Chain = LN->getChain();
2326 SDValue BasePtr = LN->getBasePtr();
2327 MachinePointerInfo PtrInfo = MMO->getPointerInfo();
2328
2329 if (NumBits == 32) {
2330 SDValue W32 =
2331 DAG.getLoad(VT: MVT::i32, dl, Chain, Ptr: BasePtr, PtrInfo, Alignment: MMO->getBaseAlign());
2332 SDValue Pred = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v32i1, Operand: W32);
2333 SDValue Ops[] = {Pred, W32.getValue(R: 1)};
2334 return DAG.getMergeValues(Ops, dl);
2335 }
2336
2337 if (NumBits == 64) {
2338 SDValue W64 =
2339 DAG.getLoad(VT: MVT::i64, dl, Chain, Ptr: BasePtr, PtrInfo, Alignment: MMO->getBaseAlign());
2340 SDValue Pred = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v64i1, Operand: W64);
2341 SDValue Ops[] = {Pred, W64.getValue(R: 1)};
2342 return DAG.getMergeValues(Ops, dl);
2343 }
2344
2345 if (NumBits == 128) {
2346 SDValue Lo64 =
2347 DAG.getLoad(VT: MVT::i64, dl, Chain, Ptr: BasePtr, PtrInfo, Alignment: MMO->getBaseAlign());
2348 Chain = Lo64.getValue(R: 1);
2349
2350 SDValue Offset8 = DAG.getConstant(Val: 8, DL: dl, VT: MVT::i32);
2351 SDValue Ptr8 = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: MVT::i32, N1: BasePtr, N2: Offset8);
2352 SDValue Hi64 = DAG.getLoad(VT: MVT::i64, dl, Chain, Ptr: Ptr8,
2353 PtrInfo: PtrInfo.getWithOffset(O: 8), Alignment: Align(8));
2354
2355 SDValue LoPred = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v64i1, Operand: Lo64);
2356 SDValue HiPred = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v64i1, Operand: Hi64);
2357 SDValue Pred =
2358 DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: MVT::v128i1, N1: LoPred, N2: HiPred);
2359
2360 SDValue Ops[] = {Pred, Hi64.getValue(R: 1)};
2361 return DAG.getMergeValues(Ops, dl);
2362 }
2363
2364 return SDValue();
2365}
2366
2367SDValue
2368HexagonTargetLowering::LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const {
2369 // Sign- and zero-extends are legal.
2370 assert(Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG);
2371 return DAG.getNode(Opcode: ISD::ZERO_EXTEND_VECTOR_INREG, DL: SDLoc(Op), VT: ty(Op),
2372 Operand: Op.getOperand(i: 0));
2373}
2374
2375SDValue
2376HexagonTargetLowering::LowerHvxSelect(SDValue Op, SelectionDAG &DAG) const {
2377 MVT ResTy = ty(Op);
2378 if (ResTy.getVectorElementType() != MVT::i1)
2379 return Op;
2380
2381 const SDLoc &dl(Op);
2382 unsigned HwLen = Subtarget.getVectorLength();
2383 unsigned VecLen = ResTy.getVectorNumElements();
2384 assert(HwLen % VecLen == 0);
2385 unsigned ElemSize = HwLen / VecLen;
2386
2387 MVT VecTy = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: ElemSize * 8), NumElements: VecLen);
2388 SDValue S =
2389 DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: VecTy, N1: Op.getOperand(i: 0),
2390 N2: DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: VecTy, Operand: Op.getOperand(i: 1)),
2391 N3: DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: VecTy, Operand: Op.getOperand(i: 2)));
2392 return DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: ResTy, Operand: S);
2393}
2394
2395SDValue
2396HexagonTargetLowering::LowerHvxShift(SDValue Op, SelectionDAG &DAG) const {
2397 if (SDValue S = getVectorShiftByInt(Op, DAG))
2398 return S;
2399 return Op;
2400}
2401
2402SDValue
2403HexagonTargetLowering::LowerHvxFunnelShift(SDValue Op,
2404 SelectionDAG &DAG) const {
2405 unsigned Opc = Op.getOpcode();
2406 assert(Opc == ISD::FSHL || Opc == ISD::FSHR);
2407
2408 // Make sure the shift amount is within the range of the bitwidth
2409 // of the element type.
2410 SDValue A = Op.getOperand(i: 0);
2411 SDValue B = Op.getOperand(i: 1);
2412 SDValue S = Op.getOperand(i: 2);
2413
2414 MVT InpTy = ty(Op: A);
2415 MVT ElemTy = InpTy.getVectorElementType();
2416
2417 const SDLoc &dl(Op);
2418 unsigned ElemWidth = ElemTy.getSizeInBits();
2419 bool IsLeft = Opc == ISD::FSHL;
2420
2421 // The expansion into regular shifts produces worse code for i8 and for
2422 // right shift of i32 on v65+.
2423 bool UseShifts = ElemTy != MVT::i8;
2424 if (Subtarget.useHVXV65Ops() && ElemTy == MVT::i32)
2425 UseShifts = false;
2426
2427 if (SDValue SplatV = getSplatValue(Op: S, DAG); SplatV && UseShifts) {
2428 // If this is a funnel shift by a scalar, lower it into regular shifts.
2429 SDValue Mask = DAG.getConstant(Val: ElemWidth - 1, DL: dl, VT: MVT::i32);
2430 SDValue ModS =
2431 DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i32,
2432 Ops: {DAG.getZExtOrTrunc(Op: SplatV, DL: dl, VT: MVT::i32), Mask});
2433 SDValue NegS =
2434 DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: MVT::i32,
2435 Ops: {DAG.getConstant(Val: ElemWidth, DL: dl, VT: MVT::i32), ModS});
2436 SDValue IsZero =
2437 DAG.getSetCC(DL: dl, VT: MVT::i1, LHS: ModS, RHS: getZero(dl, Ty: MVT::i32, DAG), Cond: ISD::SETEQ);
2438 // FSHL A, B => A << | B >>n
2439 // FSHR A, B => A <<n | B >>
2440 SDValue Part1 =
2441 DAG.getNode(Opcode: HexagonISD::VASL, DL: dl, VT: InpTy, Ops: {A, IsLeft ? ModS : NegS});
2442 SDValue Part2 =
2443 DAG.getNode(Opcode: HexagonISD::VLSR, DL: dl, VT: InpTy, Ops: {B, IsLeft ? NegS : ModS});
2444 SDValue Or = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: InpTy, Ops: {Part1, Part2});
2445 // If the shift amount was 0, pick A or B, depending on the direction.
2446 // The opposite shift will also be by 0, so the "Or" will be incorrect.
2447 return DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: InpTy, Ops: {IsZero, (IsLeft ? A : B), Or});
2448 }
2449
2450 SDValue Mask = DAG.getSplatBuildVector(
2451 VT: InpTy, DL: dl, Op: DAG.getConstant(Val: ElemWidth - 1, DL: dl, VT: ElemTy));
2452
2453 unsigned MOpc = Opc == ISD::FSHL ? HexagonISD::MFSHL : HexagonISD::MFSHR;
2454 return DAG.getNode(Opcode: MOpc, DL: dl, VT: ty(Op),
2455 Ops: {A, B, DAG.getNode(Opcode: ISD::AND, DL: dl, VT: InpTy, Ops: {S, Mask})});
2456}
2457
2458SDValue
2459HexagonTargetLowering::LowerHvxIntrinsic(SDValue Op, SelectionDAG &DAG) const {
2460 const SDLoc &dl(Op);
2461 unsigned IntNo = Op.getConstantOperandVal(i: 0);
2462 SmallVector<SDValue> Ops(Op->ops());
2463
2464 auto Swap = [&](SDValue P) {
2465 return DAG.getMergeValues(Ops: {P.getValue(R: 1), P.getValue(R: 0)}, dl);
2466 };
2467
2468 switch (IntNo) {
2469 case Intrinsic::hexagon_V6_pred_typecast:
2470 case Intrinsic::hexagon_V6_pred_typecast_128B: {
2471 MVT ResTy = ty(Op), InpTy = ty(Op: Ops[1]);
2472 if (isHvxBoolTy(Ty: ResTy) && isHvxBoolTy(Ty: InpTy)) {
2473 if (ResTy == InpTy)
2474 return Ops[1];
2475 return DAG.getNode(Opcode: HexagonISD::TYPECAST, DL: dl, VT: ResTy, Operand: Ops[1]);
2476 }
2477 break;
2478 }
2479 case Intrinsic::hexagon_V6_vmpyss_parts:
2480 case Intrinsic::hexagon_V6_vmpyss_parts_128B:
2481 return Swap(DAG.getNode(Opcode: HexagonISD::SMUL_LOHI, DL: dl, VTList: Op->getVTList(),
2482 Ops: {Ops[1], Ops[2]}));
2483 case Intrinsic::hexagon_V6_vmpyuu_parts:
2484 case Intrinsic::hexagon_V6_vmpyuu_parts_128B:
2485 return Swap(DAG.getNode(Opcode: HexagonISD::UMUL_LOHI, DL: dl, VTList: Op->getVTList(),
2486 Ops: {Ops[1], Ops[2]}));
2487 case Intrinsic::hexagon_V6_vmpyus_parts:
2488 case Intrinsic::hexagon_V6_vmpyus_parts_128B: {
2489 return Swap(DAG.getNode(Opcode: HexagonISD::USMUL_LOHI, DL: dl, VTList: Op->getVTList(),
2490 Ops: {Ops[1], Ops[2]}));
2491 }
2492 } // switch
2493
2494 return Op;
2495}
2496
2497SDValue
2498HexagonTargetLowering::LowerHvxMaskedOp(SDValue Op, SelectionDAG &DAG) const {
2499 const SDLoc &dl(Op);
2500 unsigned HwLen = Subtarget.getVectorLength();
2501 MachineFunction &MF = DAG.getMachineFunction();
2502 auto *MaskN = cast<MaskedLoadStoreSDNode>(Val: Op.getNode());
2503 SDValue Mask = MaskN->getMask();
2504 SDValue Chain = MaskN->getChain();
2505 SDValue Base = MaskN->getBasePtr();
2506 auto *MemOp = MF.getMachineMemOperand(MMO: MaskN->getMemOperand(), Offset: 0, Size: HwLen);
2507
2508 unsigned Opc = Op->getOpcode();
2509 assert(Opc == ISD::MLOAD || Opc == ISD::MSTORE);
2510
2511 if (Opc == ISD::MLOAD) {
2512 MVT ValTy = ty(Op);
2513 SDValue Load = DAG.getLoad(VT: ValTy, dl, Chain, Ptr: Base, MMO: MemOp);
2514 SDValue Thru = cast<MaskedLoadSDNode>(Val: MaskN)->getPassThru();
2515 if (isUndef(Op: Thru))
2516 return Load;
2517 SDValue VSel = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: ValTy, N1: Mask, N2: Load, N3: Thru);
2518 return DAG.getMergeValues(Ops: {VSel, Load.getValue(R: 1)}, dl);
2519 }
2520
2521 // MSTORE
2522 // HVX only has aligned masked stores.
2523
2524 // TODO: Fold negations of the mask into the store.
2525 unsigned StoreOpc = Hexagon::V6_vS32b_qpred_ai;
2526 SDValue Value = cast<MaskedStoreSDNode>(Val: MaskN)->getValue();
2527 SDValue Offset0 = DAG.getTargetConstant(Val: 0, DL: dl, VT: ty(Op: Base));
2528
2529 if (MaskN->getAlign().value() % HwLen == 0) {
2530 SDValue Store = getInstr(MachineOpc: StoreOpc, dl, Ty: MVT::Other,
2531 Ops: {Mask, Base, Offset0, Value, Chain}, DAG);
2532 DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: Store.getNode()), NewMemRefs: {MemOp});
2533 return Store;
2534 }
2535
2536 // Unaligned case.
2537 auto StoreAlign = [&](SDValue V, SDValue A) {
2538 SDValue Z = getZero(dl, Ty: ty(Op: V), DAG);
2539 // TODO: use funnel shifts?
2540 // vlalign(Vu,Vv,Rt) rotates the pair Vu:Vv left by Rt and takes the
2541 // upper half.
2542 SDValue LoV = getInstr(MachineOpc: Hexagon::V6_vlalignb, dl, Ty: ty(Op: V), Ops: {V, Z, A}, DAG);
2543 SDValue HiV = getInstr(MachineOpc: Hexagon::V6_vlalignb, dl, Ty: ty(Op: V), Ops: {Z, V, A}, DAG);
2544 return std::make_pair(x&: LoV, y&: HiV);
2545 };
2546
2547 MVT ByteTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen);
2548 MVT BoolTy = MVT::getVectorVT(VT: MVT::i1, NumElements: HwLen);
2549 SDValue MaskV = DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: ByteTy, Operand: Mask);
2550 VectorPair Tmp = StoreAlign(MaskV, Base);
2551 VectorPair MaskU = {DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: BoolTy, Operand: Tmp.first),
2552 DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: BoolTy, Operand: Tmp.second)};
2553 VectorPair ValueU = StoreAlign(Value, Base);
2554
2555 SDValue Offset1 = DAG.getTargetConstant(Val: HwLen, DL: dl, VT: MVT::i32);
2556 SDValue StoreLo =
2557 getInstr(MachineOpc: StoreOpc, dl, Ty: MVT::Other,
2558 Ops: {MaskU.first, Base, Offset0, ValueU.first, Chain}, DAG);
2559 SDValue StoreHi =
2560 getInstr(MachineOpc: StoreOpc, dl, Ty: MVT::Other,
2561 Ops: {MaskU.second, Base, Offset1, ValueU.second, Chain}, DAG);
2562 DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: StoreLo.getNode()), NewMemRefs: {MemOp});
2563 DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: StoreHi.getNode()), NewMemRefs: {MemOp});
2564 return DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: {StoreLo, StoreHi});
2565}
2566
2567SDValue HexagonTargetLowering::LowerHvxFpExtend(SDValue Op,
2568 SelectionDAG &DAG) const {
2569 // This conversion only applies to QFloat. IEEE extension from f16 to f32
2570 // is legal (done via a pattern).
2571 assert(Subtarget.useHVXQFloatOps());
2572
2573 assert(Op->getOpcode() == ISD::FP_EXTEND);
2574
2575 MVT VecTy = ty(Op);
2576 MVT ArgTy = ty(Op: Op.getOperand(i: 0));
2577 const SDLoc &dl(Op);
2578
2579 if (ArgTy == MVT::v64bf16) {
2580 MVT HalfTy = typeSplit(VecTy).first;
2581 SDValue BF16Vec = Op.getOperand(i: 0);
2582 SDValue Zeroes =
2583 getInstr(MachineOpc: Hexagon::V6_vxor, dl, Ty: HalfTy, Ops: {BF16Vec, BF16Vec}, DAG);
2584 // Interleave zero vector with the bf16 vector, with zeroes in the lower
2585 // half of each 32 bit lane, effectively extending the bf16 values to fp32
2586 // values.
2587 SDValue ShuffVec =
2588 getInstr(MachineOpc: Hexagon::V6_vshufoeh, dl, Ty: VecTy, Ops: {BF16Vec, Zeroes}, DAG);
2589 VectorPair VecPair = opSplit(Vec: ShuffVec, dl, DAG);
2590 SDValue Result = getInstr(MachineOpc: Hexagon::V6_vshuffvdd, dl, Ty: VecTy,
2591 Ops: {VecPair.second, VecPair.first,
2592 DAG.getSignedConstant(Val: -4, DL: dl, VT: MVT::i32)},
2593 DAG);
2594 return Result;
2595 }
2596
2597 assert(VecTy == MVT::v64f32 && ArgTy == MVT::v64f16);
2598
2599 SDValue F16Vec = Op.getOperand(i: 0);
2600
2601 APFloat FloatVal = APFloat(1.0f);
2602 bool Ignored;
2603 FloatVal.convert(ToSemantics: APFloat::IEEEhalf(), RM: APFloat::rmNearestTiesToEven, losesInfo: &Ignored);
2604 SDValue Fp16Ones = DAG.getConstantFP(Val: FloatVal, DL: dl, VT: ArgTy);
2605 SDValue VmpyVec =
2606 getInstr(MachineOpc: Hexagon::V6_vmpy_qf32_hf, dl, Ty: VecTy, Ops: {F16Vec, Fp16Ones}, DAG);
2607
2608 MVT HalfTy = typeSplit(VecTy).first;
2609 VectorPair Pair = opSplit(Vec: VmpyVec, dl, DAG);
2610 SDValue LoVec =
2611 getInstr(MachineOpc: Hexagon::V6_vconv_sf_qf32, dl, Ty: HalfTy, Ops: {Pair.first}, DAG);
2612 SDValue HiVec =
2613 getInstr(MachineOpc: Hexagon::V6_vconv_sf_qf32, dl, Ty: HalfTy, Ops: {Pair.second}, DAG);
2614
2615 SDValue ShuffVec =
2616 getInstr(MachineOpc: Hexagon::V6_vshuffvdd, dl, Ty: VecTy,
2617 Ops: {HiVec, LoVec, DAG.getSignedConstant(Val: -4, DL: dl, VT: MVT::i32)}, DAG);
2618
2619 return ShuffVec;
2620}
2621
2622SDValue
2623HexagonTargetLowering::LowerHvxFpToInt(SDValue Op, SelectionDAG &DAG) const {
2624 // Catch invalid conversion ops (just in case).
2625 assert(Op.getOpcode() == ISD::FP_TO_SINT ||
2626 Op.getOpcode() == ISD::FP_TO_UINT);
2627
2628 MVT ResTy = ty(Op);
2629 MVT FpTy = ty(Op: Op.getOperand(i: 0)).getVectorElementType();
2630 MVT IntTy = ResTy.getVectorElementType();
2631
2632 if (Subtarget.useHVXIEEEFPOps()) {
2633 // There are only conversions from f16.
2634 if (FpTy == MVT::f16) {
2635 // Other int types aren't legal in HVX, so we shouldn't see them here.
2636 assert(IntTy == MVT::i8 || IntTy == MVT::i16 || IntTy == MVT::i32);
2637 // Conversions to i8 and i16 are legal.
2638 if (IntTy == MVT::i8 || IntTy == MVT::i16)
2639 return Op;
2640 }
2641 }
2642
2643 if (IntTy.getSizeInBits() != FpTy.getSizeInBits())
2644 return EqualizeFpIntConversion(Op, DAG);
2645
2646 return ExpandHvxFpToInt(Op, DAG);
2647}
2648
2649// For vector type v32i1 uint_to_fp/sint_to_fp to v32f32:
2650// R1 = #1, R2 holds the v32i1 param
2651// V1 = vsplat(R1)
2652// V2 = vsplat(R2)
2653// Q0 = vand(V1,R1)
2654// V0.w=prefixsum(Q0)
2655// V0.w=vsub(V0.w,V1.w)
2656// V2.w = vlsr(V2.w,V0.w)
2657// V2 = vand(V2,V1)
2658// V2.sf = V2.w
2659SDValue HexagonTargetLowering::LowerHvxPred32ToFp(SDValue PredOp,
2660 SelectionDAG &DAG) const {
2661
2662 MVT ResTy = ty(Op: PredOp);
2663 const SDLoc &dl(PredOp);
2664
2665 SDValue Const = DAG.getTargetConstant(Val: 0x1, DL: dl, VT: MVT::i32);
2666 SDNode *RegConst = DAG.getMachineNode(Opcode: Hexagon::A2_tfrsi, dl, VT: MVT::i32, Op1: Const);
2667 SDNode *SplatConst = DAG.getMachineNode(Opcode: Hexagon::V6_lvsplatw, dl, VT: MVT::v32i32,
2668 Op1: SDValue(RegConst, 0));
2669 SDNode *PredTransfer =
2670 DAG.getMachineNode(Opcode: Hexagon::V6_vandvrt, dl, VT: MVT::v32i1,
2671 Op1: SDValue(SplatConst, 0), Op2: SDValue(RegConst, 0));
2672 SDNode *PrefixSum = DAG.getMachineNode(Opcode: Hexagon::V6_vprefixqw, dl, VT: MVT::v32i32,
2673 Op1: SDValue(PredTransfer, 0));
2674 SDNode *SplatParam = DAG.getMachineNode(
2675 Opcode: Hexagon::V6_lvsplatw, dl, VT: MVT::v32i32,
2676 Op1: DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::i32, Operand: PredOp.getOperand(i: 0)));
2677 SDNode *Vsub =
2678 DAG.getMachineNode(Opcode: Hexagon::V6_vsubw, dl, VT: MVT::v32i32,
2679 Op1: SDValue(PrefixSum, 0), Op2: SDValue(SplatConst, 0));
2680 SDNode *IndexShift =
2681 DAG.getMachineNode(Opcode: Hexagon::V6_vlsrwv, dl, VT: MVT::v32i32,
2682 Op1: SDValue(SplatParam, 0), Op2: SDValue(Vsub, 0));
2683 SDNode *MaskOff =
2684 DAG.getMachineNode(Opcode: Hexagon::V6_vand, dl, VT: MVT::v32i32,
2685 Op1: SDValue(IndexShift, 0), Op2: SDValue(SplatConst, 0));
2686 SDNode *Convert = DAG.getMachineNode(Opcode: Hexagon::V6_vconv_sf_w, dl, VT: ResTy,
2687 Op1: SDValue(MaskOff, 0));
2688 return SDValue(Convert, 0);
2689}
2690
2691// For vector type v64i1 uint_to_fo to v64f16:
2692// i64 R32 = bitcast v64i1 R3:2 (R3:2 holds v64i1)
2693// R3 = subreg_high (R32)
2694// R2 = subreg_low (R32)
2695// R1 = #1
2696// V1 = vsplat(R1)
2697// V2 = vsplat(R2)
2698// V3 = vsplat(R3)
2699// Q0 = vand(V1,R1)
2700// V0.w=prefixsum(Q0)
2701// V0.w=vsub(V0.w,V1.w)
2702// V2.w = vlsr(V2.w,V0.w)
2703// V3.w = vlsr(V3.w,V0.w)
2704// V2 = vand(V2,V1)
2705// V3 = vand(V3,V1)
2706// V2.h = vpacke(V3.w,V2.w)
2707// V2.hf = V2.h
2708SDValue HexagonTargetLowering::LowerHvxPred64ToFp(SDValue PredOp,
2709 SelectionDAG &DAG) const {
2710
2711 MVT ResTy = ty(Op: PredOp);
2712 const SDLoc &dl(PredOp);
2713
2714 SDValue Inp = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::i64, Operand: PredOp.getOperand(i: 0));
2715 // Get the hi and lo regs
2716 SDValue HiReg =
2717 DAG.getTargetExtractSubreg(SRIdx: Hexagon::isub_hi, DL: dl, VT: MVT::i32, Operand: Inp);
2718 SDValue LoReg =
2719 DAG.getTargetExtractSubreg(SRIdx: Hexagon::isub_lo, DL: dl, VT: MVT::i32, Operand: Inp);
2720 // Get constant #1 and splat into vector V1
2721 SDValue Const = DAG.getTargetConstant(Val: 0x1, DL: dl, VT: MVT::i32);
2722 SDNode *RegConst = DAG.getMachineNode(Opcode: Hexagon::A2_tfrsi, dl, VT: MVT::i32, Op1: Const);
2723 SDNode *SplatConst = DAG.getMachineNode(Opcode: Hexagon::V6_lvsplatw, dl, VT: MVT::v32i32,
2724 Op1: SDValue(RegConst, 0));
2725 // Splat the hi and lo args
2726 SDNode *SplatHi =
2727 DAG.getMachineNode(Opcode: Hexagon::V6_lvsplatw, dl, VT: MVT::v32i32,
2728 Op1: DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::i32, Operand: HiReg));
2729 SDNode *SplatLo =
2730 DAG.getMachineNode(Opcode: Hexagon::V6_lvsplatw, dl, VT: MVT::v32i32,
2731 Op1: DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::i32, Operand: LoReg));
2732 // vand between splatted const and const
2733 SDNode *PredTransfer =
2734 DAG.getMachineNode(Opcode: Hexagon::V6_vandvrt, dl, VT: MVT::v32i1,
2735 Op1: SDValue(SplatConst, 0), Op2: SDValue(RegConst, 0));
2736 // Get the prefixsum
2737 SDNode *PrefixSum = DAG.getMachineNode(Opcode: Hexagon::V6_vprefixqw, dl, VT: MVT::v32i32,
2738 Op1: SDValue(PredTransfer, 0));
2739 // Get the vsub
2740 SDNode *Vsub =
2741 DAG.getMachineNode(Opcode: Hexagon::V6_vsubw, dl, VT: MVT::v32i32,
2742 Op1: SDValue(PrefixSum, 0), Op2: SDValue(SplatConst, 0));
2743 // Get vlsr for hi and lo
2744 SDNode *IndexShift_hi =
2745 DAG.getMachineNode(Opcode: Hexagon::V6_vlsrwv, dl, VT: MVT::v32i32,
2746 Op1: SDValue(SplatHi, 0), Op2: SDValue(Vsub, 0));
2747 SDNode *IndexShift_lo =
2748 DAG.getMachineNode(Opcode: Hexagon::V6_vlsrwv, dl, VT: MVT::v32i32,
2749 Op1: SDValue(SplatLo, 0), Op2: SDValue(Vsub, 0));
2750 // Get vand of hi and lo
2751 SDNode *MaskOff_hi =
2752 DAG.getMachineNode(Opcode: Hexagon::V6_vand, dl, VT: MVT::v32i32,
2753 Op1: SDValue(IndexShift_hi, 0), Op2: SDValue(SplatConst, 0));
2754 SDNode *MaskOff_lo =
2755 DAG.getMachineNode(Opcode: Hexagon::V6_vand, dl, VT: MVT::v32i32,
2756 Op1: SDValue(IndexShift_lo, 0), Op2: SDValue(SplatConst, 0));
2757 // Pack them
2758 SDNode *Pack =
2759 DAG.getMachineNode(Opcode: Hexagon::V6_vpackeh, dl, VT: MVT::v64i16,
2760 Op1: SDValue(MaskOff_hi, 0), Op2: SDValue(MaskOff_lo, 0));
2761 SDNode *Convert =
2762 DAG.getMachineNode(Opcode: Hexagon::V6_vconv_hf_h, dl, VT: ResTy, Op1: SDValue(Pack, 0));
2763 return SDValue(Convert, 0);
2764}
2765
2766SDValue
2767HexagonTargetLowering::LowerHvxIntToFp(SDValue Op, SelectionDAG &DAG) const {
2768 // Catch invalid conversion ops (just in case).
2769 assert(Op.getOpcode() == ISD::SINT_TO_FP ||
2770 Op.getOpcode() == ISD::UINT_TO_FP);
2771
2772 MVT ResTy = ty(Op);
2773 MVT IntTy = ty(Op: Op.getOperand(i: 0)).getVectorElementType();
2774 MVT FpTy = ResTy.getVectorElementType();
2775
2776 if (Op.getOpcode() == ISD::UINT_TO_FP || Op.getOpcode() == ISD::SINT_TO_FP) {
2777 if (ResTy == MVT::v32f32 && ty(Op: Op.getOperand(i: 0)) == MVT::v32i1)
2778 return LowerHvxPred32ToFp(PredOp: Op, DAG);
2779 if (ResTy == MVT::v64f16 && ty(Op: Op.getOperand(i: 0)) == MVT::v64i1)
2780 return LowerHvxPred64ToFp(PredOp: Op, DAG);
2781 }
2782
2783 if (Subtarget.useHVXIEEEFPOps()) {
2784 // There are only conversions to f16.
2785 if (FpTy == MVT::f16) {
2786 // Other int types aren't legal in HVX, so we shouldn't see them here.
2787 assert(IntTy == MVT::i8 || IntTy == MVT::i16 || IntTy == MVT::i32);
2788 // i8, i16 -> f16 is legal.
2789 if (IntTy == MVT::i8 || IntTy == MVT::i16)
2790 return Op;
2791 }
2792 }
2793
2794 if (IntTy.getSizeInBits() != FpTy.getSizeInBits())
2795 return EqualizeFpIntConversion(Op, DAG);
2796
2797 return ExpandHvxIntToFp(Op, DAG);
2798}
2799
2800HexagonTargetLowering::TypePair
2801HexagonTargetLowering::typeExtendToWider(MVT Ty0, MVT Ty1) const {
2802 // Compare the widths of elements of the two types, and extend the narrower
2803 // type to match the with of the wider type. For vector types, apply this
2804 // to the element type.
2805 assert(Ty0.isVector() == Ty1.isVector());
2806
2807 MVT ElemTy0 = Ty0.getScalarType();
2808 MVT ElemTy1 = Ty1.getScalarType();
2809
2810 unsigned Width0 = ElemTy0.getSizeInBits();
2811 unsigned Width1 = ElemTy1.getSizeInBits();
2812 unsigned MaxWidth = std::max(a: Width0, b: Width1);
2813
2814 auto getScalarWithWidth = [](MVT ScalarTy, unsigned Width) {
2815 if (ScalarTy.isInteger())
2816 return MVT::getIntegerVT(BitWidth: Width);
2817 assert(ScalarTy.isFloatingPoint());
2818 return MVT::getFloatingPointVT(BitWidth: Width);
2819 };
2820
2821 MVT WideETy0 = getScalarWithWidth(ElemTy0, MaxWidth);
2822 MVT WideETy1 = getScalarWithWidth(ElemTy1, MaxWidth);
2823
2824 if (!Ty0.isVector()) {
2825 // Both types are scalars.
2826 return {WideETy0, WideETy1};
2827 }
2828
2829 // Vector types.
2830 unsigned NumElem = Ty0.getVectorNumElements();
2831 assert(NumElem == Ty1.getVectorNumElements());
2832
2833 return {MVT::getVectorVT(VT: WideETy0, NumElements: NumElem),
2834 MVT::getVectorVT(VT: WideETy1, NumElements: NumElem)};
2835}
2836
2837HexagonTargetLowering::TypePair
2838HexagonTargetLowering::typeWidenToWider(MVT Ty0, MVT Ty1) const {
2839 // Compare the numbers of elements of two vector types, and widen the
2840 // narrower one to match the number of elements in the wider one.
2841 assert(Ty0.isVector() && Ty1.isVector());
2842
2843 unsigned Len0 = Ty0.getVectorNumElements();
2844 unsigned Len1 = Ty1.getVectorNumElements();
2845 if (Len0 == Len1)
2846 return {Ty0, Ty1};
2847
2848 unsigned MaxLen = std::max(a: Len0, b: Len1);
2849 return {MVT::getVectorVT(VT: Ty0.getVectorElementType(), NumElements: MaxLen),
2850 MVT::getVectorVT(VT: Ty1.getVectorElementType(), NumElements: MaxLen)};
2851}
2852
2853MVT
2854HexagonTargetLowering::typeLegalize(MVT Ty, SelectionDAG &DAG) const {
2855 EVT LegalTy = getTypeToTransformTo(Context&: *DAG.getContext(), VT: Ty);
2856 assert(LegalTy.isSimple());
2857 return LegalTy.getSimpleVT();
2858}
2859
2860MVT
2861HexagonTargetLowering::typeWidenToHvx(MVT Ty) const {
2862 unsigned HwWidth = 8 * Subtarget.getVectorLength();
2863 assert(Ty.getSizeInBits() <= HwWidth);
2864 if (Ty.getSizeInBits() == HwWidth)
2865 return Ty;
2866
2867 MVT ElemTy = Ty.getScalarType();
2868 return MVT::getVectorVT(VT: ElemTy, NumElements: HwWidth / ElemTy.getSizeInBits());
2869}
2870
2871HexagonTargetLowering::VectorPair
2872HexagonTargetLowering::emitHvxAddWithOverflow(SDValue A, SDValue B,
2873 const SDLoc &dl, bool Signed, SelectionDAG &DAG) const {
2874 // Compute A+B, return {A+B, O}, where O = vector predicate indicating
2875 // whether an overflow has occurred.
2876 MVT ResTy = ty(Op: A);
2877 assert(ResTy == ty(B));
2878 MVT PredTy = MVT::getVectorVT(VT: MVT::i1, NumElements: ResTy.getVectorNumElements());
2879
2880 if (!Signed) {
2881 // V62+ has V6_vaddcarry, but it requires input predicate, so it doesn't
2882 // save any instructions.
2883 SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: ResTy, Ops: {A, B});
2884 SDValue Ovf = DAG.getSetCC(DL: dl, VT: PredTy, LHS: Add, RHS: A, Cond: ISD::SETULT);
2885 return {Add, Ovf};
2886 }
2887
2888 // Signed overflow has happened, if:
2889 // (A, B have the same sign) and (A+B has a different sign from either)
2890 // i.e. (~A xor B) & ((A+B) xor B), then check the sign bit
2891 SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: ResTy, Ops: {A, B});
2892 SDValue NotA =
2893 DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: ResTy, Ops: {A, DAG.getAllOnesConstant(DL: dl, VT: ResTy)});
2894 SDValue Xor0 = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: ResTy, Ops: {NotA, B});
2895 SDValue Xor1 = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: ResTy, Ops: {Add, B});
2896 SDValue And = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ResTy, Ops: {Xor0, Xor1});
2897 SDValue MSB =
2898 DAG.getSetCC(DL: dl, VT: PredTy, LHS: And, RHS: getZero(dl, Ty: ResTy, DAG), Cond: ISD::SETLT);
2899 return {Add, MSB};
2900}
2901
2902HexagonTargetLowering::VectorPair
2903HexagonTargetLowering::emitHvxShiftRightRnd(SDValue Val, unsigned Amt,
2904 bool Signed, SelectionDAG &DAG) const {
2905 // Shift Val right by Amt bits, round the result to the nearest integer,
2906 // tie-break by rounding halves to even integer.
2907
2908 const SDLoc &dl(Val);
2909 MVT ValTy = ty(Op: Val);
2910
2911 // This should also work for signed integers.
2912 //
2913 // uint tmp0 = inp + ((1 << (Amt-1)) - 1);
2914 // bool ovf = (inp > tmp0);
2915 // uint rup = inp & (1 << (Amt+1));
2916 //
2917 // uint tmp1 = inp >> (Amt-1); // tmp1 == tmp2 iff
2918 // uint tmp2 = tmp0 >> (Amt-1); // the Amt-1 lower bits were all 0
2919 // uint tmp3 = tmp2 + rup;
2920 // uint frac = (tmp1 != tmp2) ? tmp2 >> 1 : tmp3 >> 1;
2921 unsigned ElemWidth = ValTy.getVectorElementType().getSizeInBits();
2922 MVT ElemTy = MVT::getIntegerVT(BitWidth: ElemWidth);
2923 MVT IntTy = tyVector(Ty: ValTy, ElemTy);
2924 MVT PredTy = MVT::getVectorVT(VT: MVT::i1, NumElements: IntTy.getVectorNumElements());
2925 unsigned ShRight = Signed ? ISD::SRA : ISD::SRL;
2926
2927 SDValue Inp = DAG.getBitcast(VT: IntTy, V: Val);
2928 SDValue LowBits = DAG.getConstant(Val: (1ull << (Amt - 1)) - 1, DL: dl, VT: IntTy);
2929
2930 SDValue AmtP1 = DAG.getConstant(Val: 1ull << Amt, DL: dl, VT: IntTy);
2931 SDValue And = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: IntTy, Ops: {Inp, AmtP1});
2932 SDValue Zero = getZero(dl, Ty: IntTy, DAG);
2933 SDValue Bit = DAG.getSetCC(DL: dl, VT: PredTy, LHS: And, RHS: Zero, Cond: ISD::SETNE);
2934 SDValue Rup = DAG.getZExtOrTrunc(Op: Bit, DL: dl, VT: IntTy);
2935 auto [Tmp0, Ovf] = emitHvxAddWithOverflow(A: Inp, B: LowBits, dl, Signed, DAG);
2936
2937 SDValue AmtM1 = DAG.getConstant(Val: Amt - 1, DL: dl, VT: IntTy);
2938 SDValue Tmp1 = DAG.getNode(Opcode: ShRight, DL: dl, VT: IntTy, N1: Inp, N2: AmtM1);
2939 SDValue Tmp2 = DAG.getNode(Opcode: ShRight, DL: dl, VT: IntTy, N1: Tmp0, N2: AmtM1);
2940 SDValue Tmp3 = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: IntTy, N1: Tmp2, N2: Rup);
2941
2942 SDValue Eq = DAG.getSetCC(DL: dl, VT: PredTy, LHS: Tmp1, RHS: Tmp2, Cond: ISD::SETEQ);
2943 SDValue One = DAG.getConstant(Val: 1, DL: dl, VT: IntTy);
2944 SDValue Tmp4 = DAG.getNode(Opcode: ShRight, DL: dl, VT: IntTy, Ops: {Tmp2, One});
2945 SDValue Tmp5 = DAG.getNode(Opcode: ShRight, DL: dl, VT: IntTy, Ops: {Tmp3, One});
2946 SDValue Mux = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: IntTy, Ops: {Eq, Tmp5, Tmp4});
2947 return {Mux, Ovf};
2948}
2949
2950SDValue
2951HexagonTargetLowering::emitHvxMulHsV60(SDValue A, SDValue B, const SDLoc &dl,
2952 SelectionDAG &DAG) const {
2953 MVT VecTy = ty(Op: A);
2954 MVT PairTy = typeJoin(Tys: {VecTy, VecTy});
2955 assert(VecTy.getVectorElementType() == MVT::i32);
2956
2957 SDValue S16 = DAG.getConstant(Val: 16, DL: dl, VT: MVT::i32);
2958
2959 // mulhs(A,B) =
2960 // = [(Hi(A)*2^16 + Lo(A)) *s (Hi(B)*2^16 + Lo(B))] >> 32
2961 // = [Hi(A)*2^16 *s Hi(B)*2^16 + Hi(A) *su Lo(B)*2^16
2962 // + Lo(A) *us (Hi(B)*2^16 + Lo(B))] >> 32
2963 // = [Hi(A) *s Hi(B)*2^32 + Hi(A) *su Lo(B)*2^16 + Lo(A) *us B] >> 32
2964 // The low half of Lo(A)*Lo(B) will be discarded (it's not added to
2965 // anything, so it cannot produce any carry over to higher bits),
2966 // so everything in [] can be shifted by 16 without loss of precision.
2967 // = [Hi(A) *s Hi(B)*2^16 + Hi(A)*su Lo(B) + Lo(A)*B >> 16] >> 16
2968 // = [Hi(A) *s Hi(B)*2^16 + Hi(A)*su Lo(B) + V6_vmpyewuh(A,B)] >> 16
2969 // The final additions need to make sure to properly maintain any carry-
2970 // out bits.
2971 //
2972 // Hi(B) Lo(B)
2973 // Hi(A) Lo(A)
2974 // --------------
2975 // Lo(B)*Lo(A) | T0 = V6_vmpyewuh(B,A) does this,
2976 // Hi(B)*Lo(A) | + dropping the low 16 bits
2977 // Hi(A)*Lo(B) | T2
2978 // Hi(B)*Hi(A)
2979
2980 SDValue T0 = getInstr(MachineOpc: Hexagon::V6_vmpyewuh, dl, Ty: VecTy, Ops: {B, A}, DAG);
2981 // T1 = get Hi(A) into low halves.
2982 SDValue T1 = getInstr(MachineOpc: Hexagon::V6_vasrw, dl, Ty: VecTy, Ops: {A, S16}, DAG);
2983 // P0 = interleaved T1.h*B.uh (full precision product)
2984 SDValue P0 = getInstr(MachineOpc: Hexagon::V6_vmpyhus, dl, Ty: PairTy, Ops: {T1, B}, DAG);
2985 // T2 = T1.even(h) * B.even(uh), i.e. Hi(A)*Lo(B)
2986 SDValue T2 = LoHalf(V: P0, DAG);
2987 // We need to add T0+T2, recording the carry-out, which will be 1<<16
2988 // added to the final sum.
2989 // P1 = interleaved even/odd 32-bit (unsigned) sums of 16-bit halves
2990 SDValue P1 = getInstr(MachineOpc: Hexagon::V6_vadduhw, dl, Ty: PairTy, Ops: {T0, T2}, DAG);
2991 // P2 = interleaved even/odd 32-bit (signed) sums of 16-bit halves
2992 SDValue P2 = getInstr(MachineOpc: Hexagon::V6_vaddhw, dl, Ty: PairTy, Ops: {T0, T2}, DAG);
2993 // T3 = full-precision(T0+T2) >> 16
2994 // The low halves are added-unsigned, the high ones are added-signed.
2995 SDValue T3 = getInstr(MachineOpc: Hexagon::V6_vasrw_acc, dl, Ty: VecTy,
2996 Ops: {HiHalf(V: P2, DAG), LoHalf(V: P1, DAG), S16}, DAG);
2997 SDValue T4 = getInstr(MachineOpc: Hexagon::V6_vasrw, dl, Ty: VecTy, Ops: {B, S16}, DAG);
2998 // P3 = interleaved Hi(B)*Hi(A) (full precision),
2999 // which is now Lo(T1)*Lo(T4), so we want to keep the even product.
3000 SDValue P3 = getInstr(MachineOpc: Hexagon::V6_vmpyhv, dl, Ty: PairTy, Ops: {T1, T4}, DAG);
3001 SDValue T5 = LoHalf(V: P3, DAG);
3002 // Add:
3003 SDValue T6 = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: VecTy, Ops: {T3, T5});
3004 return T6;
3005}
3006
3007SDValue
3008HexagonTargetLowering::emitHvxMulLoHiV60(SDValue A, bool SignedA, SDValue B,
3009 bool SignedB, const SDLoc &dl,
3010 SelectionDAG &DAG) const {
3011 MVT VecTy = ty(Op: A);
3012 MVT PairTy = typeJoin(Tys: {VecTy, VecTy});
3013 assert(VecTy.getVectorElementType() == MVT::i32);
3014
3015 SDValue S16 = DAG.getConstant(Val: 16, DL: dl, VT: MVT::i32);
3016
3017 if (SignedA && !SignedB) {
3018 // Make A:unsigned, B:signed.
3019 std::swap(a&: A, b&: B);
3020 std::swap(a&: SignedA, b&: SignedB);
3021 }
3022
3023 // Do halfword-wise multiplications for unsigned*unsigned product, then
3024 // add corrections for signed and unsigned*signed.
3025
3026 SDValue Lo, Hi;
3027
3028 // P0:lo = (uu) products of low halves of A and B,
3029 // P0:hi = (uu) products of high halves.
3030 SDValue P0 = getInstr(MachineOpc: Hexagon::V6_vmpyuhv, dl, Ty: PairTy, Ops: {A, B}, DAG);
3031
3032 // Swap low/high halves in B
3033 SDValue T0 = getInstr(MachineOpc: Hexagon::V6_lvsplatw, dl, Ty: VecTy,
3034 Ops: {DAG.getConstant(Val: 0x02020202, DL: dl, VT: MVT::i32)}, DAG);
3035 SDValue T1 = getInstr(MachineOpc: Hexagon::V6_vdelta, dl, Ty: VecTy, Ops: {B, T0}, DAG);
3036 // P1 = products of even/odd halfwords.
3037 // P1:lo = (uu) products of even(A.uh) * odd(B.uh)
3038 // P1:hi = (uu) products of odd(A.uh) * even(B.uh)
3039 SDValue P1 = getInstr(MachineOpc: Hexagon::V6_vmpyuhv, dl, Ty: PairTy, Ops: {A, T1}, DAG);
3040
3041 // P2:lo = low halves of P1:lo + P1:hi,
3042 // P2:hi = high halves of P1:lo + P1:hi.
3043 SDValue P2 = getInstr(MachineOpc: Hexagon::V6_vadduhw, dl, Ty: PairTy,
3044 Ops: {HiHalf(V: P1, DAG), LoHalf(V: P1, DAG)}, DAG);
3045 // Still need to add the high halves of P0:lo to P2:lo
3046 SDValue T2 =
3047 getInstr(MachineOpc: Hexagon::V6_vlsrw, dl, Ty: VecTy, Ops: {LoHalf(V: P0, DAG), S16}, DAG);
3048 SDValue T3 = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: VecTy, Ops: {LoHalf(V: P2, DAG), T2});
3049
3050 // The high halves of T3 will contribute to the HI part of LOHI.
3051 SDValue T4 = getInstr(MachineOpc: Hexagon::V6_vasrw_acc, dl, Ty: VecTy,
3052 Ops: {HiHalf(V: P2, DAG), T3, S16}, DAG);
3053
3054 // The low halves of P2 need to be added to high halves of the LO part.
3055 Lo = getInstr(MachineOpc: Hexagon::V6_vaslw_acc, dl, Ty: VecTy,
3056 Ops: {LoHalf(V: P0, DAG), LoHalf(V: P2, DAG), S16}, DAG);
3057 Hi = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: VecTy, Ops: {HiHalf(V: P0, DAG), T4});
3058
3059 if (SignedA) {
3060 assert(SignedB && "Signed A and unsigned B should have been inverted");
3061
3062 MVT PredTy = MVT::getVectorVT(VT: MVT::i1, NumElements: VecTy.getVectorNumElements());
3063 SDValue Zero = getZero(dl, Ty: VecTy, DAG);
3064 SDValue Q0 = DAG.getSetCC(DL: dl, VT: PredTy, LHS: A, RHS: Zero, Cond: ISD::SETLT);
3065 SDValue Q1 = DAG.getSetCC(DL: dl, VT: PredTy, LHS: B, RHS: Zero, Cond: ISD::SETLT);
3066 SDValue X0 = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: VecTy, Ops: {Q0, B, Zero});
3067 SDValue X1 = getInstr(MachineOpc: Hexagon::V6_vaddwq, dl, Ty: VecTy, Ops: {Q1, X0, A}, DAG);
3068 Hi = getInstr(MachineOpc: Hexagon::V6_vsubw, dl, Ty: VecTy, Ops: {Hi, X1}, DAG);
3069 } else if (SignedB) {
3070 // Same correction as for mulhus:
3071 // mulhus(A.uw,B.w) = mulhu(A.uw,B.uw) - (A.w if B < 0)
3072 MVT PredTy = MVT::getVectorVT(VT: MVT::i1, NumElements: VecTy.getVectorNumElements());
3073 SDValue Zero = getZero(dl, Ty: VecTy, DAG);
3074 SDValue Q1 = DAG.getSetCC(DL: dl, VT: PredTy, LHS: B, RHS: Zero, Cond: ISD::SETLT);
3075 Hi = getInstr(MachineOpc: Hexagon::V6_vsubwq, dl, Ty: VecTy, Ops: {Q1, Hi, A}, DAG);
3076 } else {
3077 assert(!SignedA && !SignedB);
3078 }
3079
3080 return DAG.getMergeValues(Ops: {Lo, Hi}, dl);
3081}
3082
3083SDValue
3084HexagonTargetLowering::emitHvxMulLoHiV62(SDValue A, bool SignedA,
3085 SDValue B, bool SignedB,
3086 const SDLoc &dl,
3087 SelectionDAG &DAG) const {
3088 MVT VecTy = ty(Op: A);
3089 MVT PairTy = typeJoin(Tys: {VecTy, VecTy});
3090 assert(VecTy.getVectorElementType() == MVT::i32);
3091
3092 if (SignedA && !SignedB) {
3093 // Make A:unsigned, B:signed.
3094 std::swap(a&: A, b&: B);
3095 std::swap(a&: SignedA, b&: SignedB);
3096 }
3097
3098 // Do S*S first, then make corrections for U*S or U*U if needed.
3099 SDValue P0 = getInstr(MachineOpc: Hexagon::V6_vmpyewuh_64, dl, Ty: PairTy, Ops: {A, B}, DAG);
3100 SDValue P1 =
3101 getInstr(MachineOpc: Hexagon::V6_vmpyowh_64_acc, dl, Ty: PairTy, Ops: {P0, A, B}, DAG);
3102 SDValue Lo = LoHalf(V: P1, DAG);
3103 SDValue Hi = HiHalf(V: P1, DAG);
3104
3105 if (!SignedB) {
3106 assert(!SignedA && "Signed A and unsigned B should have been inverted");
3107 SDValue Zero = getZero(dl, Ty: VecTy, DAG);
3108 MVT PredTy = MVT::getVectorVT(VT: MVT::i1, NumElements: VecTy.getVectorNumElements());
3109
3110 // Mulhu(X, Y) = Mulhs(X, Y) + (X, if Y < 0) + (Y, if X < 0).
3111 // def: Pat<(VecI32 (mulhu HVI32:$A, HVI32:$B)),
3112 // (V6_vaddw (HiHalf (Muls64O $A, $B)),
3113 // (V6_vaddwq (V6_vgtw (V6_vd0), $B),
3114 // (V6_vandvqv (V6_vgtw (V6_vd0), $A), $B),
3115 // $A))>;
3116 SDValue Q0 = DAG.getSetCC(DL: dl, VT: PredTy, LHS: A, RHS: Zero, Cond: ISD::SETLT);
3117 SDValue Q1 = DAG.getSetCC(DL: dl, VT: PredTy, LHS: B, RHS: Zero, Cond: ISD::SETLT);
3118 SDValue T0 = getInstr(MachineOpc: Hexagon::V6_vandvqv, dl, Ty: VecTy, Ops: {Q0, B}, DAG);
3119 SDValue T1 = getInstr(MachineOpc: Hexagon::V6_vaddwq, dl, Ty: VecTy, Ops: {Q1, T0, A}, DAG);
3120 Hi = getInstr(MachineOpc: Hexagon::V6_vaddw, dl, Ty: VecTy, Ops: {Hi, T1}, DAG);
3121 } else if (!SignedA) {
3122 SDValue Zero = getZero(dl, Ty: VecTy, DAG);
3123 MVT PredTy = MVT::getVectorVT(VT: MVT::i1, NumElements: VecTy.getVectorNumElements());
3124
3125 // Mulhus(unsigned X, signed Y) = Mulhs(X, Y) + (Y, if X < 0).
3126 // def: Pat<(VecI32 (HexagonMULHUS HVI32:$A, HVI32:$B)),
3127 // (V6_vaddwq (V6_vgtw (V6_vd0), $A),
3128 // (HiHalf (Muls64O $A, $B)),
3129 // $B)>;
3130 SDValue Q0 = DAG.getSetCC(DL: dl, VT: PredTy, LHS: A, RHS: Zero, Cond: ISD::SETLT);
3131 Hi = getInstr(MachineOpc: Hexagon::V6_vaddwq, dl, Ty: VecTy, Ops: {Q0, Hi, B}, DAG);
3132 }
3133
3134 return DAG.getMergeValues(Ops: {Lo, Hi}, dl);
3135}
3136
3137SDValue
3138HexagonTargetLowering::EqualizeFpIntConversion(SDValue Op, SelectionDAG &DAG)
3139 const {
3140 // Rewrite conversion between integer and floating-point in such a way that
3141 // the integer type is extended/narrowed to match the bitwidth of the
3142 // floating-point type, combined with additional integer-integer extensions
3143 // or narrowings to match the original input/result types.
3144 // E.g. f32 -> i8 ==> f32 -> i32 -> i8
3145 //
3146 // The input/result types are not required to be legal, but if they are
3147 // legal, this function should not introduce illegal types.
3148
3149 unsigned Opc = Op.getOpcode();
3150 assert(Opc == ISD::FP_TO_SINT || Opc == ISD::FP_TO_UINT ||
3151 Opc == ISD::SINT_TO_FP || Opc == ISD::UINT_TO_FP);
3152
3153 SDValue Inp = Op.getOperand(i: 0);
3154 MVT InpTy = ty(Op: Inp);
3155 MVT ResTy = ty(Op);
3156
3157 if (InpTy == ResTy)
3158 return Op;
3159
3160 const SDLoc &dl(Op);
3161 bool Signed = Opc == ISD::FP_TO_SINT || Opc == ISD::SINT_TO_FP;
3162
3163 auto [WInpTy, WResTy] = typeExtendToWider(Ty0: InpTy, Ty1: ResTy);
3164 SDValue WInp = resizeToWidth(VecV: Inp, ResTy: WInpTy, Signed, dl, DAG);
3165 SDValue Conv = DAG.getNode(Opcode: Opc, DL: dl, VT: WResTy, Operand: WInp);
3166 SDValue Res = resizeToWidth(VecV: Conv, ResTy, Signed, dl, DAG);
3167 return Res;
3168}
3169
3170SDValue
3171HexagonTargetLowering::ExpandHvxFpToInt(SDValue Op, SelectionDAG &DAG) const {
3172 unsigned Opc = Op.getOpcode();
3173 assert(Opc == ISD::FP_TO_SINT || Opc == ISD::FP_TO_UINT);
3174
3175 const SDLoc &dl(Op);
3176 SDValue Op0 = Op.getOperand(i: 0);
3177 MVT InpTy = ty(Op: Op0);
3178 MVT ResTy = ty(Op);
3179 assert(InpTy.changeTypeToInteger() == ResTy);
3180
3181 // At this point this is an experiment under a flag.
3182 // In arch before V81 the rounding mode is towards nearest value.
3183 // The C/C++ standard requires rounding towards zero:
3184 // C (C99 and later): ISO/IEC 9899:2018 (C18), section 6.3.1.4 — "When a
3185 // finite value of real floating type is converted to an integer type, the
3186 // fractional part is discarded (i.e., the value is truncated toward zero)."
3187 // C++: ISO/IEC 14882:2020 (C++20), section 7.3.7 — "A prvalue of a
3188 // floating-point type can be converted to a prvalue of an integer type. The
3189 // conversion truncates; that is, the fractional part is discarded."
3190 if (InpTy == MVT::v64f16) {
3191 if (Subtarget.useHVXV81Ops()) {
3192 // This is c/c++ compliant
3193 SDValue ConvVec =
3194 getInstr(MachineOpc: Hexagon::V6_vconv_h_hf_rnd, dl, Ty: ResTy, Ops: {Op0}, DAG);
3195 return ConvVec;
3196 } else if (EnableFpFastConvert) {
3197 // Vd32.h=Vu32.hf same as Q6_Vh_equals_Vhf
3198 SDValue ConvVec = getInstr(MachineOpc: Hexagon::V6_vconv_h_hf, dl, Ty: ResTy, Ops: {Op0}, DAG);
3199 return ConvVec;
3200 }
3201 } else if (EnableFpFastConvert && InpTy == MVT::v32f32) {
3202 // Vd32.w=Vu32.sf same as Q6_Vw_equals_Vsf
3203 SDValue ConvVec = getInstr(MachineOpc: Hexagon::V6_vconv_w_sf, dl, Ty: ResTy, Ops: {Op0}, DAG);
3204 return ConvVec;
3205 }
3206
3207 // int32_t conv_f32_to_i32(uint32_t inp) {
3208 // // s | exp8 | frac23
3209 //
3210 // int neg = (int32_t)inp < 0;
3211 //
3212 // // "expm1" is the actual exponent minus 1: instead of "bias", subtract
3213 // // "bias+1". When the encoded exp is "all-1" (i.e. inf/nan), this will
3214 // // produce a large positive "expm1", which will result in max u/int.
3215 // // In all IEEE formats, bias is the largest positive number that can be
3216 // // represented in bias-width bits (i.e. 011..1).
3217 // int32_t expm1 = (inp << 1) - 0x80000000;
3218 // expm1 >>= 24;
3219 //
3220 // // Always insert the "implicit 1". Subnormal numbers will become 0
3221 // // regardless.
3222 // uint32_t frac = (inp << 8) | 0x80000000;
3223 //
3224 // // "frac" is the fraction part represented as Q1.31. If it was
3225 // // interpreted as uint32_t, it would be the fraction part multiplied
3226 // // by 2^31.
3227 //
3228 // // Calculate the amount of right shift, since shifting further to the
3229 // // left would lose significant bits. Limit it to 32, because we want
3230 // // shifts by 32+ to produce 0, whereas V6_vlsrwv treats the shift
3231 // // amount as a 6-bit signed value (so 33 is same as -31, i.e. shift
3232 // // left by 31). "rsh" can be negative.
3233 // int32_t rsh = min(31 - (expm1 + 1), 32);
3234 //
3235 // frac >>= rsh; // rsh == 32 will produce 0
3236 //
3237 // // Everything up to this point is the same for conversion to signed
3238 // // unsigned integer.
3239 //
3240 // if (neg) // Only for signed int
3241 // frac = -frac; //
3242 // if (rsh <= 0 && neg) // bound = neg ? 0x80000000 : 0x7fffffff
3243 // frac = 0x80000000; // frac = rsh <= 0 ? bound : frac
3244 // if (rsh <= 0 && !neg) //
3245 // frac = 0x7fffffff; //
3246 //
3247 // if (neg) // Only for unsigned int
3248 // frac = 0; //
3249 // if (rsh < 0 && !neg) // frac = rsh < 0 ? 0x7fffffff : frac;
3250 // frac = 0x7fffffff; // frac = neg ? 0 : frac;
3251 //
3252 // return frac;
3253 // }
3254
3255 MVT PredTy = MVT::getVectorVT(VT: MVT::i1, EC: ResTy.getVectorElementCount());
3256
3257 // Zero = V6_vd0();
3258 // Neg = V6_vgtw(Zero, Inp);
3259 // One = V6_lvsplatw(1);
3260 // M80 = V6_lvsplatw(0x80000000);
3261 // Exp00 = V6_vaslwv(Inp, One);
3262 // Exp01 = V6_vsubw(Exp00, M80);
3263 // ExpM1 = V6_vasrw(Exp01, 24);
3264 // Frc00 = V6_vaslw(Inp, 8);
3265 // Frc01 = V6_vor(Frc00, M80);
3266 // Rsh00 = V6_vsubw(V6_lvsplatw(30), ExpM1);
3267 // Rsh01 = V6_vminw(Rsh00, V6_lvsplatw(32));
3268 // Frc02 = V6_vlsrwv(Frc01, Rsh01);
3269
3270 // if signed int:
3271 // Bnd = V6_vmux(Neg, M80, V6_lvsplatw(0x7fffffff))
3272 // Pos = V6_vgtw(Rsh01, Zero);
3273 // Frc13 = V6_vsubw(Zero, Frc02);
3274 // Frc14 = V6_vmux(Neg, Frc13, Frc02);
3275 // Int = V6_vmux(Pos, Frc14, Bnd);
3276 //
3277 // if unsigned int:
3278 // Rsn = V6_vgtw(Zero, Rsh01)
3279 // Frc23 = V6_vmux(Rsn, V6_lvsplatw(0x7fffffff), Frc02)
3280 // Int = V6_vmux(Neg, Zero, Frc23)
3281
3282 auto [ExpWidth, ExpBias, FracWidth] = getIEEEProperties(Ty: InpTy);
3283 unsigned ElemWidth = 1 + ExpWidth + FracWidth;
3284 assert((1ull << (ExpWidth - 1)) == (1 + ExpBias));
3285
3286 SDValue Inp = DAG.getBitcast(VT: ResTy, V: Op0);
3287 SDValue Zero = getZero(dl, Ty: ResTy, DAG);
3288 SDValue Neg = DAG.getSetCC(DL: dl, VT: PredTy, LHS: Inp, RHS: Zero, Cond: ISD::SETLT);
3289 SDValue M80 = DAG.getConstant(Val: 1ull << (ElemWidth - 1), DL: dl, VT: ResTy);
3290 SDValue M7F = DAG.getConstant(Val: (1ull << (ElemWidth - 1)) - 1, DL: dl, VT: ResTy);
3291 SDValue One = DAG.getConstant(Val: 1, DL: dl, VT: ResTy);
3292 SDValue Exp00 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: ResTy, Ops: {Inp, One});
3293 SDValue Exp01 = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: ResTy, Ops: {Exp00, M80});
3294 SDValue MNE = DAG.getConstant(Val: ElemWidth - ExpWidth, DL: dl, VT: ResTy);
3295 SDValue ExpM1 = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT: ResTy, Ops: {Exp01, MNE});
3296
3297 SDValue ExpW = DAG.getConstant(Val: ExpWidth, DL: dl, VT: ResTy);
3298 SDValue Frc00 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: ResTy, Ops: {Inp, ExpW});
3299 SDValue Frc01 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: ResTy, Ops: {Frc00, M80});
3300
3301 SDValue MN2 = DAG.getConstant(Val: ElemWidth - 2, DL: dl, VT: ResTy);
3302 SDValue Rsh00 = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: ResTy, Ops: {MN2, ExpM1});
3303 SDValue MW = DAG.getConstant(Val: ElemWidth, DL: dl, VT: ResTy);
3304 SDValue Rsh01 = DAG.getNode(Opcode: ISD::SMIN, DL: dl, VT: ResTy, Ops: {Rsh00, MW});
3305 SDValue Frc02 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: ResTy, Ops: {Frc01, Rsh01});
3306
3307 SDValue Int;
3308
3309 if (Opc == ISD::FP_TO_SINT) {
3310 SDValue Bnd = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: ResTy, Ops: {Neg, M80, M7F});
3311 SDValue Pos = DAG.getSetCC(DL: dl, VT: PredTy, LHS: Rsh01, RHS: Zero, Cond: ISD::SETGT);
3312 SDValue Frc13 = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: ResTy, Ops: {Zero, Frc02});
3313 SDValue Frc14 = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: ResTy, Ops: {Neg, Frc13, Frc02});
3314 Int = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: ResTy, Ops: {Pos, Frc14, Bnd});
3315 } else {
3316 assert(Opc == ISD::FP_TO_UINT);
3317 SDValue Rsn = DAG.getSetCC(DL: dl, VT: PredTy, LHS: Rsh01, RHS: Zero, Cond: ISD::SETLT);
3318 SDValue Frc23 = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: ResTy, N1: Rsn, N2: M7F, N3: Frc02);
3319 Int = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: ResTy, N1: Neg, N2: Zero, N3: Frc23);
3320 }
3321
3322 return Int;
3323}
3324
3325SDValue
3326HexagonTargetLowering::ExpandHvxIntToFp(SDValue Op, SelectionDAG &DAG) const {
3327 unsigned Opc = Op.getOpcode();
3328 assert(Opc == ISD::SINT_TO_FP || Opc == ISD::UINT_TO_FP);
3329
3330 const SDLoc &dl(Op);
3331 SDValue Op0 = Op.getOperand(i: 0);
3332 MVT InpTy = ty(Op: Op0);
3333 MVT ResTy = ty(Op);
3334 assert(ResTy.changeTypeToInteger() == InpTy);
3335
3336 // uint32_t vnoc1_rnd(int32_t w) {
3337 // int32_t iszero = w == 0;
3338 // int32_t isneg = w < 0;
3339 // uint32_t u = __builtin_HEXAGON_A2_abs(w);
3340 //
3341 // uint32_t norm_left = __builtin_HEXAGON_S2_cl0(u) + 1;
3342 // uint32_t frac0 = (uint64_t)u << norm_left;
3343 //
3344 // // Rounding:
3345 // uint32_t frac1 = frac0 + ((1 << 8) - 1);
3346 // uint32_t renorm = (frac0 > frac1);
3347 // uint32_t rup = (int)(frac0 << 22) < 0;
3348 //
3349 // uint32_t frac2 = frac0 >> 8;
3350 // uint32_t frac3 = frac1 >> 8;
3351 // uint32_t frac = (frac2 != frac3) ? frac3 >> 1 : (frac3 + rup) >> 1;
3352 //
3353 // int32_t exp = 32 - norm_left + renorm + 127;
3354 // exp <<= 23;
3355 //
3356 // uint32_t sign = 0x80000000 * isneg;
3357 // uint32_t f = sign | exp | frac;
3358 // return iszero ? 0 : f;
3359 // }
3360
3361 MVT PredTy = MVT::getVectorVT(VT: MVT::i1, EC: InpTy.getVectorElementCount());
3362 bool Signed = Opc == ISD::SINT_TO_FP;
3363
3364 auto [ExpWidth, ExpBias, FracWidth] = getIEEEProperties(Ty: ResTy);
3365 unsigned ElemWidth = 1 + ExpWidth + FracWidth;
3366
3367 SDValue Zero = getZero(dl, Ty: InpTy, DAG);
3368 SDValue One = DAG.getConstant(Val: 1, DL: dl, VT: InpTy);
3369 SDValue IsZero = DAG.getSetCC(DL: dl, VT: PredTy, LHS: Op0, RHS: Zero, Cond: ISD::SETEQ);
3370 SDValue Abs = Signed ? DAG.getNode(Opcode: ISD::ABS, DL: dl, VT: InpTy, Operand: Op0) : Op0;
3371 SDValue Clz = DAG.getNode(Opcode: ISD::CTLZ, DL: dl, VT: InpTy, Operand: Abs);
3372 SDValue NLeft = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: InpTy, Ops: {Clz, One});
3373 SDValue Frac0 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: InpTy, Ops: {Abs, NLeft});
3374
3375 auto [Frac, Ovf] = emitHvxShiftRightRnd(Val: Frac0, Amt: ExpWidth + 1, Signed: false, DAG);
3376 if (Signed) {
3377 SDValue IsNeg = DAG.getSetCC(DL: dl, VT: PredTy, LHS: Op0, RHS: Zero, Cond: ISD::SETLT);
3378 SDValue M80 = DAG.getConstant(Val: 1ull << (ElemWidth - 1), DL: dl, VT: InpTy);
3379 SDValue Sign = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: InpTy, Ops: {IsNeg, M80, Zero});
3380 Frac = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: InpTy, Ops: {Sign, Frac});
3381 }
3382
3383 SDValue Rnrm = DAG.getZExtOrTrunc(Op: Ovf, DL: dl, VT: InpTy);
3384 SDValue Exp0 = DAG.getConstant(Val: ElemWidth + ExpBias, DL: dl, VT: InpTy);
3385 SDValue Exp1 = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: InpTy, Ops: {Rnrm, Exp0});
3386 SDValue Exp2 = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: InpTy, Ops: {Exp1, NLeft});
3387 SDValue Exp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: InpTy,
3388 Ops: {Exp2, DAG.getConstant(Val: FracWidth, DL: dl, VT: InpTy)});
3389 SDValue Flt0 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: InpTy, Ops: {Frac, Exp3});
3390 SDValue Flt1 = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: InpTy, Ops: {IsZero, Zero, Flt0});
3391 SDValue Flt = DAG.getBitcast(VT: ResTy, V: Flt1);
3392
3393 return Flt;
3394}
3395
3396SDValue
3397HexagonTargetLowering::CreateTLWrapper(SDValue Op, SelectionDAG &DAG) const {
3398 unsigned Opc = Op.getOpcode();
3399 unsigned TLOpc;
3400 switch (Opc) {
3401 case ISD::ANY_EXTEND:
3402 case ISD::SIGN_EXTEND:
3403 case ISD::ZERO_EXTEND:
3404 TLOpc = HexagonISD::TL_EXTEND;
3405 break;
3406 case ISD::TRUNCATE:
3407 TLOpc = HexagonISD::TL_TRUNCATE;
3408 break;
3409#ifndef NDEBUG
3410 Op.dump(&DAG);
3411#endif
3412 llvm_unreachable("Unexpected operator");
3413 }
3414
3415 const SDLoc &dl(Op);
3416 return DAG.getNode(Opcode: TLOpc, DL: dl, VT: ty(Op), N1: Op.getOperand(i: 0),
3417 N2: DAG.getUNDEF(VT: MVT::i128), // illegal type
3418 N3: DAG.getConstant(Val: Opc, DL: dl, VT: MVT::i32));
3419}
3420
3421SDValue
3422HexagonTargetLowering::RemoveTLWrapper(SDValue Op, SelectionDAG &DAG) const {
3423 assert(Op.getOpcode() == HexagonISD::TL_EXTEND ||
3424 Op.getOpcode() == HexagonISD::TL_TRUNCATE);
3425 unsigned Opc = Op.getConstantOperandVal(i: 2);
3426 return DAG.getNode(Opcode: Opc, DL: SDLoc(Op), VT: ty(Op), Operand: Op.getOperand(i: 0));
3427}
3428
3429HexagonTargetLowering::VectorPair
3430HexagonTargetLowering::SplitVectorOp(SDValue Op, SelectionDAG &DAG) const {
3431 assert(!Op.isMachineOpcode());
3432 SmallVector<SDValue, 2> OpsL, OpsH;
3433 const SDLoc &dl(Op);
3434
3435 auto SplitVTNode = [&DAG, this](const VTSDNode *N) {
3436 MVT Ty = typeSplit(VecTy: N->getVT().getSimpleVT()).first;
3437 SDValue TV = DAG.getValueType(Ty);
3438 return std::make_pair(x&: TV, y&: TV);
3439 };
3440
3441 for (SDValue A : Op.getNode()->ops()) {
3442 auto [Lo, Hi] =
3443 ty(Op: A).isVector() ? opSplit(Vec: A, dl, DAG) : std::make_pair(x&: A, y&: A);
3444 // Special case for type operand.
3445 switch (Op.getOpcode()) {
3446 case ISD::SIGN_EXTEND_INREG:
3447 case HexagonISD::SSAT:
3448 case HexagonISD::USAT:
3449 if (const auto *N = dyn_cast<const VTSDNode>(Val: A.getNode()))
3450 std::tie(args&: Lo, args&: Hi) = SplitVTNode(N);
3451 break;
3452 }
3453 OpsL.push_back(Elt: Lo);
3454 OpsH.push_back(Elt: Hi);
3455 }
3456
3457 MVT ResTy = ty(Op);
3458 MVT HalfTy = typeSplit(VecTy: ResTy).first;
3459 SDValue L = DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT: HalfTy, Ops: OpsL);
3460 SDValue H = DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT: HalfTy, Ops: OpsH);
3461 return {L, H};
3462}
3463
3464SDValue
3465HexagonTargetLowering::SplitHvxMemOp(SDValue Op, SelectionDAG &DAG) const {
3466 auto *MemN = cast<MemSDNode>(Val: Op.getNode());
3467 unsigned MemOpc = MemN->getOpcode();
3468 EVT MemTy = MemN->getMemoryVT();
3469
3470 if ((MemOpc == ISD::STORE || MemOpc == ISD::LOAD) &&
3471 (!MemTy.isSimple() || !isHvxPairTy(Ty: MemTy.getSimpleVT())))
3472 return Op;
3473
3474 EVT ValueType;
3475 if (MemOpc == ISD::STORE)
3476 ValueType = ty(Op: cast<StoreSDNode>(Val&: Op)->getValue());
3477 else if (MemOpc == ISD::MSTORE)
3478 ValueType = ty(Op: cast<MaskedStoreSDNode>(Val&: Op)->getValue());
3479 else // ISD::LOAD, ISD::MLOAD.
3480 ValueType = MemN->getValueType(ResNo: 0);
3481
3482 EVT LoVT, HiVT;
3483 std::tie(args&: LoVT, args&: HiVT) = DAG.GetSplitDestVTs(VT: ValueType);
3484
3485 EVT LoMemVT, HiMemVT;
3486 bool HiIsEmpty = false;
3487 std::tie(args&: LoMemVT, args&: HiMemVT) =
3488 DAG.GetDependentSplitDestVTs(VT: MemTy, EnvVT: LoVT, HiIsEmpty: &HiIsEmpty);
3489
3490 uint64_t LoSize = LoMemVT.getSizeInBits().getFixedValue() / 8;
3491 uint64_t HiSize = HiMemVT.getSizeInBits().getFixedValue() / 8;
3492
3493 const SDLoc &dl(Op);
3494 SDValue Chain = MemN->getChain();
3495 SDValue Base0 = MemN->getBasePtr();
3496 SDValue Base1 =
3497 DAG.getMemBasePlusOffset(Base: Base0, Offset: TypeSize::getFixed(ExactSize: LoSize), DL: dl);
3498
3499 MachineMemOperand *MOp0 = nullptr, *MOp1 = nullptr;
3500 if (MachineMemOperand *MMO = MemN->getMemOperand()) {
3501 MachineFunction &MF = DAG.getMachineFunction();
3502 auto MemSize = [=](uint64_t Size) {
3503 return (MemOpc == ISD::MLOAD || MemOpc == ISD::MSTORE)
3504 ? (uint64_t)MemoryLocation::UnknownSize
3505 : Size;
3506 };
3507 // MOp1 will not be used if HiIsEmpty for masked loads and stores (MLOAD and
3508 // MSTORE). Non-masked loads and store are always of double-vector size (see
3509 // isHvxPairTy() check above).
3510 MOp0 = MF.getMachineMemOperand(MMO, Offset: 0, Size: MemSize(LoSize));
3511 MOp1 = MF.getMachineMemOperand(MMO, Offset: LoSize, Size: MemSize(HiSize));
3512 }
3513
3514 if (MemOpc == ISD::LOAD) {
3515 assert(cast<LoadSDNode>(Op)->isUnindexed());
3516 SDValue Load0 = DAG.getLoad(VT: LoVT, dl, Chain, Ptr: Base0, MMO: MOp0);
3517 SDValue Load1 = DAG.getLoad(VT: HiVT, dl, Chain, Ptr: Base1, MMO: MOp1);
3518 return DAG.getMergeValues(
3519 Ops: {DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: MemN->getValueType(ResNo: 0), N1: Load0,
3520 N2: Load1),
3521 DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, N1: Load0.getValue(R: 1),
3522 N2: Load1.getValue(R: 1))},
3523 dl);
3524 }
3525 if (MemOpc == ISD::STORE) {
3526 assert(cast<StoreSDNode>(Op)->isUnindexed());
3527 VectorPair Vals = opSplit(Vec: cast<StoreSDNode>(Val&: Op)->getValue(), dl, DAG);
3528 SDValue Store0 = DAG.getStore(Chain, dl, Val: Vals.first, Ptr: Base0, MMO: MOp0);
3529 SDValue Store1 = DAG.getStore(Chain, dl, Val: Vals.second, Ptr: Base1, MMO: MOp1);
3530 return DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, N1: Store0, N2: Store1);
3531 }
3532
3533 assert(MemOpc == ISD::MLOAD || MemOpc == ISD::MSTORE);
3534
3535 auto MaskN = cast<MaskedLoadStoreSDNode>(Val&: Op);
3536 assert(MaskN->isUnindexed());
3537 VectorPair Masks = opSplit(Vec: MaskN->getMask(), dl, DAG);
3538 SDValue Offset = DAG.getUNDEF(VT: MVT::i32);
3539
3540 if (MemOpc == ISD::MLOAD) {
3541 VectorPair Thru =
3542 opSplit(Vec: cast<MaskedLoadSDNode>(Val&: Op)->getPassThru(), dl, DAG);
3543 SDValue MLoad0 = DAG.getMaskedLoad(VT: LoVT, dl, Chain, Base: Base0, Offset,
3544 Mask: Masks.first, Src0: Thru.first, MemVT: LoMemVT, MMO: MOp0,
3545 AM: ISD::UNINDEXED, ISD::NON_EXTLOAD, IsExpanding: false);
3546
3547 // The hi masked load has zero storage size. We therefore simply set it to
3548 // the low masked load and rely on subsequent removal from the chain as it
3549 // is unused. See DAGTypeLegalizer::SplitVecRes_MLOAD() for the same logic.
3550 SDValue MLoad1 =
3551 HiIsEmpty ? MLoad0
3552 : DAG.getMaskedLoad(VT: HiVT, dl, Chain, Base: Base1, Offset,
3553 Mask: Masks.second, Src0: Thru.second, MemVT: HiMemVT, MMO: MOp1,
3554 AM: ISD::UNINDEXED, ISD::NON_EXTLOAD, IsExpanding: false);
3555 return DAG.getMergeValues(
3556 Ops: {DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: MemN->getValueType(ResNo: 0), N1: MLoad0,
3557 N2: MLoad1),
3558 DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, N1: MLoad0.getValue(R: 1),
3559 N2: MLoad1.getValue(R: 1))},
3560 dl);
3561 }
3562 if (MemOpc == ISD::MSTORE) {
3563 VectorPair Vals = opSplit(Vec: cast<MaskedStoreSDNode>(Val&: Op)->getValue(), dl, DAG);
3564 SDValue MStore0 =
3565 DAG.getMaskedStore(Chain, dl, Val: Vals.first, Base: Base0, Offset, Mask: Masks.first,
3566 MemVT: LoMemVT, MMO: MOp0, AM: ISD::UNINDEXED, IsTruncating: false, IsCompressing: false);
3567 if (HiIsEmpty)
3568 return MStore0;
3569 SDValue MStore1 =
3570 DAG.getMaskedStore(Chain, dl, Val: Vals.second, Base: Base1, Offset, Mask: Masks.second,
3571 MemVT: HiMemVT, MMO: MOp1, AM: ISD::UNINDEXED, IsTruncating: false, IsCompressing: false);
3572 return DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, N1: MStore0, N2: MStore1);
3573 }
3574
3575 std::string Name = "Unexpected operation: " + Op->getOperationName(G: &DAG);
3576 llvm_unreachable(Name.c_str());
3577}
3578
3579SDValue
3580HexagonTargetLowering::WidenHvxLoad(SDValue Op, SelectionDAG &DAG) const {
3581 const SDLoc &dl(Op);
3582 auto *LoadN = cast<LoadSDNode>(Val: Op.getNode());
3583 assert(LoadN->isUnindexed() && "Not widening indexed loads yet");
3584 assert(LoadN->getMemoryVT().getVectorElementType() != MVT::i1 &&
3585 "Not widening loads of i1 yet");
3586
3587 SDValue Chain = LoadN->getChain();
3588 SDValue Base = LoadN->getBasePtr();
3589 SDValue Offset = DAG.getUNDEF(VT: MVT::i32);
3590
3591 MVT ResTy = ty(Op);
3592 unsigned HwLen = Subtarget.getVectorLength();
3593 unsigned ResLen = ResTy.getStoreSize();
3594 assert(ResLen < HwLen && "vsetq(v1) prerequisite");
3595
3596 MVT BoolTy = MVT::getVectorVT(VT: MVT::i1, NumElements: HwLen);
3597 SDValue Mask = getInstr(MachineOpc: Hexagon::V6_pred_scalar2, dl, Ty: BoolTy,
3598 Ops: {DAG.getConstant(Val: ResLen, DL: dl, VT: MVT::i32)}, DAG);
3599
3600 MVT LoadTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen);
3601 MachineFunction &MF = DAG.getMachineFunction();
3602 auto *MemOp = MF.getMachineMemOperand(MMO: LoadN->getMemOperand(), Offset: 0, Size: HwLen);
3603
3604 SDValue Load = DAG.getMaskedLoad(VT: LoadTy, dl, Chain, Base, Offset, Mask,
3605 Src0: DAG.getUNDEF(VT: LoadTy), MemVT: LoadTy, MMO: MemOp,
3606 AM: ISD::UNINDEXED, ISD::NON_EXTLOAD, IsExpanding: false);
3607 SDValue Value = opCastElem(Vec: Load, ElemTy: ResTy.getVectorElementType(), DAG);
3608 return DAG.getMergeValues(Ops: {Value, Load.getValue(R: 1)}, dl);
3609}
3610
3611SDValue
3612HexagonTargetLowering::WidenHvxStore(SDValue Op, SelectionDAG &DAG) const {
3613 const SDLoc &dl(Op);
3614 auto *StoreN = cast<StoreSDNode>(Val: Op.getNode());
3615 assert(StoreN->isUnindexed() && "Not widening indexed stores yet");
3616 assert(StoreN->getMemoryVT().getVectorElementType() != MVT::i1 &&
3617 "Not widening stores of i1 yet");
3618
3619 SDValue Chain = StoreN->getChain();
3620 SDValue Base = StoreN->getBasePtr();
3621 SDValue Offset = DAG.getUNDEF(VT: MVT::i32);
3622
3623 SDValue Value = opCastElem(Vec: StoreN->getValue(), ElemTy: MVT::i8, DAG);
3624 MVT ValueTy = ty(Op: Value);
3625 unsigned ValueLen = ValueTy.getVectorNumElements();
3626 unsigned HwLen = Subtarget.getVectorLength();
3627 assert(isPowerOf2_32(ValueLen));
3628
3629 for (unsigned Len = ValueLen; Len < HwLen; ) {
3630 Value = opJoin(Ops: {Value, DAG.getUNDEF(VT: ty(Op: Value))}, dl, DAG);
3631 Len = ty(Op: Value).getVectorNumElements(); // This is Len *= 2
3632 }
3633 assert(ty(Value).getVectorNumElements() == HwLen); // Paranoia
3634
3635 assert(ValueLen < HwLen && "vsetq(v1) prerequisite");
3636 MVT BoolTy = MVT::getVectorVT(VT: MVT::i1, NumElements: HwLen);
3637 SDValue Mask = getInstr(MachineOpc: Hexagon::V6_pred_scalar2, dl, Ty: BoolTy,
3638 Ops: {DAG.getConstant(Val: ValueLen, DL: dl, VT: MVT::i32)}, DAG);
3639 MachineFunction &MF = DAG.getMachineFunction();
3640 auto *MemOp = MF.getMachineMemOperand(MMO: StoreN->getMemOperand(), Offset: 0, Size: HwLen);
3641 return DAG.getMaskedStore(Chain, dl, Val: Value, Base, Offset, Mask, MemVT: ty(Op: Value),
3642 MMO: MemOp, AM: ISD::UNINDEXED, IsTruncating: false, IsCompressing: false);
3643}
3644
3645SDValue
3646HexagonTargetLowering::WidenHvxSetCC(SDValue Op, SelectionDAG &DAG) const {
3647 const SDLoc &dl(Op);
3648 SDValue Op0 = Op.getOperand(i: 0), Op1 = Op.getOperand(i: 1);
3649 MVT ElemTy = ty(Op: Op0).getVectorElementType();
3650 unsigned HwLen = Subtarget.getVectorLength();
3651
3652 unsigned WideOpLen = (8 * HwLen) / ElemTy.getSizeInBits();
3653 assert(WideOpLen * ElemTy.getSizeInBits() == 8 * HwLen);
3654 MVT WideOpTy = MVT::getVectorVT(VT: ElemTy, NumElements: WideOpLen);
3655 if (!Subtarget.isHVXVectorType(VecTy: WideOpTy, IncludeBool: true))
3656 return SDValue();
3657
3658 SDValue WideOp0 = appendUndef(Val: Op0, ResTy: WideOpTy, DAG);
3659 SDValue WideOp1 = appendUndef(Val: Op1, ResTy: WideOpTy, DAG);
3660 EVT ResTy =
3661 getSetCCResultType(DAG.getDataLayout(), C&: *DAG.getContext(), VT: WideOpTy);
3662 SDValue SetCC = DAG.getNode(Opcode: ISD::SETCC, DL: dl, VT: ResTy,
3663 Ops: {WideOp0, WideOp1, Op.getOperand(i: 2)});
3664
3665 EVT RetTy = typeLegalize(Ty: ty(Op), DAG);
3666 return DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL: dl, VT: RetTy,
3667 Ops: {SetCC, getZero(dl, Ty: MVT::i32, DAG)});
3668}
3669
3670SDValue HexagonTargetLowering::WidenHvxTruncateToBool(SDValue Op,
3671 SelectionDAG &DAG) const {
3672 // Handle truncation to boolean vector where the result boolean type
3673 // needs widening (e.g., v16i32 -> v16i1 where v16i1 is not a standard
3674 // HVX predicate type, or v16i8 -> v16i1 in 128-byte mode).
3675 // Widen the input to HVX width, perform the truncate to the widened
3676 // boolean type, then extract the result.
3677 const SDLoc &dl(Op);
3678 SDValue Inp = Op.getOperand(i: 0);
3679 MVT InpTy = ty(Op: Inp);
3680 MVT ResTy = ty(Op);
3681
3682 assert(ResTy.getVectorElementType() == MVT::i1 &&
3683 "Expected boolean result type");
3684
3685 MVT ElemTy = InpTy.getVectorElementType();
3686 unsigned HwLen = Subtarget.getVectorLength();
3687
3688 // Calculate the widened input type that fills the HVX register.
3689 unsigned WideLen = (8 * HwLen) / ElemTy.getSizeInBits();
3690 MVT WideInpTy = MVT::getVectorVT(VT: ElemTy, NumElements: WideLen);
3691 if (!Subtarget.isHVXVectorType(VecTy: WideInpTy, IncludeBool: false))
3692 return SDValue();
3693
3694 // Widen the input to HVX width.
3695 SDValue WideInp = appendUndef(Val: Inp, ResTy: WideInpTy, DAG);
3696
3697 // Perform the truncate to widened boolean type.
3698 MVT WideBoolTy = MVT::getVectorVT(VT: MVT::i1, NumElements: WideLen);
3699 SDValue WideTrunc = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: WideBoolTy, Operand: WideInp);
3700
3701 // Extract the result.
3702 EVT RetTy = typeLegalize(Ty: ResTy, DAG);
3703 return DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL: dl, VT: RetTy,
3704 Ops: {WideTrunc, getZero(dl, Ty: MVT::i32, DAG)});
3705}
3706
3707SDValue
3708HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const {
3709 unsigned Opc = Op.getOpcode();
3710 bool IsPairOp = isHvxPairTy(Ty: ty(Op)) ||
3711 llvm::any_of(Range: Op.getNode()->ops(), P: [this] (SDValue V) {
3712 return isHvxPairTy(Ty: ty(Op: V));
3713 });
3714
3715 if (IsPairOp) {
3716 switch (Opc) {
3717 default:
3718 break;
3719 case ISD::LOAD:
3720 case ISD::STORE:
3721 case ISD::MLOAD:
3722 case ISD::MSTORE:
3723 return SplitHvxMemOp(Op, DAG);
3724 case ISD::SINT_TO_FP:
3725 case ISD::UINT_TO_FP:
3726 case ISD::FP_TO_SINT:
3727 case ISD::FP_TO_UINT:
3728 if (ty(Op).getSizeInBits() == ty(Op: Op.getOperand(i: 0)).getSizeInBits())
3729 return opJoin(Ops: SplitVectorOp(Op, DAG), dl: SDLoc(Op), DAG);
3730 break;
3731 case ISD::ABS:
3732 case ISD::CTPOP:
3733 case ISD::CTLZ:
3734 case ISD::CTTZ:
3735 case ISD::MUL:
3736 case ISD::FADD:
3737 case ISD::FSUB:
3738 case ISD::FMUL:
3739 case ISD::FMINIMUMNUM:
3740 case ISD::FMAXIMUMNUM:
3741 case ISD::MULHS:
3742 case ISD::MULHU:
3743 case ISD::AND:
3744 case ISD::OR:
3745 case ISD::XOR:
3746 case ISD::SRA:
3747 case ISD::SHL:
3748 case ISD::SRL:
3749 case ISD::FSHL:
3750 case ISD::FSHR:
3751 case ISD::SMIN:
3752 case ISD::SMAX:
3753 case ISD::UMIN:
3754 case ISD::UMAX:
3755 case ISD::SETCC:
3756 case ISD::VSELECT:
3757 case ISD::SIGN_EXTEND_INREG:
3758 case ISD::SPLAT_VECTOR:
3759 return opJoin(Ops: SplitVectorOp(Op, DAG), dl: SDLoc(Op), DAG);
3760 case ISD::SIGN_EXTEND:
3761 case ISD::ZERO_EXTEND:
3762 // In general, sign- and zero-extends can't be split and still
3763 // be legal. The only exception is extending bool vectors.
3764 if (ty(Op: Op.getOperand(i: 0)).getVectorElementType() == MVT::i1)
3765 return opJoin(Ops: SplitVectorOp(Op, DAG), dl: SDLoc(Op), DAG);
3766 break;
3767 }
3768 }
3769
3770 switch (Opc) {
3771 default:
3772 break;
3773 // clang-format off
3774 case ISD::BUILD_VECTOR: return LowerHvxBuildVector(Op, DAG);
3775 case ISD::SPLAT_VECTOR: return LowerHvxSplatVector(Op, DAG);
3776 case ISD::CONCAT_VECTORS: return LowerHvxConcatVectors(Op, DAG);
3777 case ISD::INSERT_SUBVECTOR: return LowerHvxInsertSubvector(Op, DAG);
3778 case ISD::INSERT_VECTOR_ELT: return LowerHvxInsertElement(Op, DAG);
3779 case ISD::EXTRACT_SUBVECTOR: return LowerHvxExtractSubvector(Op, DAG);
3780 case ISD::EXTRACT_VECTOR_ELT: return LowerHvxExtractElement(Op, DAG);
3781 case ISD::BITCAST: return LowerHvxBitcast(Op, DAG);
3782 case ISD::ANY_EXTEND: return LowerHvxAnyExt(Op, DAG);
3783 case ISD::SIGN_EXTEND: return LowerHvxSignExt(Op, DAG);
3784 case ISD::ZERO_EXTEND: return LowerHvxZeroExt(Op, DAG);
3785 case ISD::CTTZ: return LowerHvxCttz(Op, DAG);
3786 case ISD::SELECT: return LowerHvxSelect(Op, DAG);
3787 case ISD::SRA:
3788 case ISD::SHL:
3789 case ISD::SRL: return LowerHvxShift(Op, DAG);
3790 case ISD::FSHL:
3791 case ISD::FSHR: return LowerHvxFunnelShift(Op, DAG);
3792 case ISD::MULHS:
3793 case ISD::MULHU: return LowerHvxMulh(Op, DAG);
3794 case ISD::SMUL_LOHI:
3795 case ISD::UMUL_LOHI: return LowerHvxMulLoHi(Op, DAG);
3796 case ISD::ANY_EXTEND_VECTOR_INREG: return LowerHvxExtend(Op, DAG);
3797 case ISD::SETCC:
3798 case ISD::INTRINSIC_VOID: return Op;
3799 case ISD::INTRINSIC_WO_CHAIN: return LowerHvxIntrinsic(Op, DAG);
3800 case ISD::MLOAD:
3801 case ISD::MSTORE: return LowerHvxMaskedOp(Op, DAG);
3802 // Unaligned loads will be handled by the default lowering.
3803 case ISD::LOAD: return LowerHvxLoad(Op, DAG);
3804 case ISD::STORE: return LowerHvxStore(Op, DAG);
3805 case ISD::FP_EXTEND: return LowerHvxFpExtend(Op, DAG);
3806 case ISD::FP_TO_SINT:
3807 case ISD::FP_TO_UINT: return LowerHvxFpToInt(Op, DAG);
3808 case ISD::SINT_TO_FP:
3809 case ISD::UINT_TO_FP: return LowerHvxIntToFp(Op, DAG);
3810
3811 // Special nodes:
3812 case HexagonISD::SMUL_LOHI:
3813 case HexagonISD::UMUL_LOHI:
3814 case HexagonISD::USMUL_LOHI: return LowerHvxMulLoHi(Op, DAG);
3815
3816 case ISD::PARTIAL_REDUCE_SMLA:
3817 case ISD::PARTIAL_REDUCE_UMLA:
3818 case ISD::PARTIAL_REDUCE_SUMLA:
3819 return LowerHvxPartialReduceMLA(Op, DAG);
3820 // clang-format on
3821 }
3822#ifndef NDEBUG
3823 Op.dumpr(&DAG);
3824#endif
3825 llvm_unreachable("Unhandled HVX operation");
3826}
3827
3828SDValue
3829HexagonTargetLowering::ExpandHvxResizeIntoSteps(SDValue Op, SelectionDAG &DAG)
3830 const {
3831 // Rewrite the extension/truncation/saturation op into steps where each
3832 // step changes the type widths by a factor of 2.
3833 // E.g. i8 -> i16 remains unchanged, but i8 -> i32 ==> i8 -> i16 -> i32.
3834 //
3835 // Some of the vector types in Op may not be legal.
3836
3837 unsigned Opc = Op.getOpcode();
3838 switch (Opc) {
3839 case HexagonISD::SSAT:
3840 case HexagonISD::USAT:
3841 case HexagonISD::TL_EXTEND:
3842 case HexagonISD::TL_TRUNCATE:
3843 break;
3844 case ISD::ANY_EXTEND:
3845 case ISD::ZERO_EXTEND:
3846 case ISD::SIGN_EXTEND:
3847 case ISD::TRUNCATE:
3848 llvm_unreachable("ISD:: ops will be auto-folded");
3849 break;
3850#ifndef NDEBUG
3851 Op.dump(&DAG);
3852#endif
3853 llvm_unreachable("Unexpected operation");
3854 }
3855
3856 SDValue Inp = Op.getOperand(i: 0);
3857 MVT InpTy = ty(Op: Inp);
3858 MVT ResTy = ty(Op);
3859
3860 unsigned InpWidth = InpTy.getVectorElementType().getSizeInBits();
3861 unsigned ResWidth = ResTy.getVectorElementType().getSizeInBits();
3862 assert(InpWidth != ResWidth);
3863
3864 if (InpWidth == 2 * ResWidth || ResWidth == 2 * InpWidth)
3865 return Op;
3866
3867 const SDLoc &dl(Op);
3868 unsigned NumElems = InpTy.getVectorNumElements();
3869 assert(NumElems == ResTy.getVectorNumElements());
3870
3871 auto repeatOp = [&](unsigned NewWidth, SDValue Arg) {
3872 MVT Ty = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: NewWidth), NumElements: NumElems);
3873 switch (Opc) {
3874 case HexagonISD::SSAT:
3875 case HexagonISD::USAT:
3876 return DAG.getNode(Opcode: Opc, DL: dl, VT: Ty, Ops: {Arg, DAG.getValueType(Ty)});
3877 case HexagonISD::TL_EXTEND:
3878 case HexagonISD::TL_TRUNCATE:
3879 return DAG.getNode(Opcode: Opc, DL: dl, VT: Ty, Ops: {Arg, Op.getOperand(i: 1), Op.getOperand(i: 2)});
3880 default:
3881 llvm_unreachable("Unexpected opcode");
3882 }
3883 };
3884
3885 SDValue S = Inp;
3886 if (InpWidth < ResWidth) {
3887 assert(ResWidth % InpWidth == 0 && isPowerOf2_32(ResWidth / InpWidth));
3888 while (InpWidth * 2 <= ResWidth)
3889 S = repeatOp(InpWidth *= 2, S);
3890 } else {
3891 // InpWidth > ResWidth
3892 assert(InpWidth % ResWidth == 0 && isPowerOf2_32(InpWidth / ResWidth));
3893 while (InpWidth / 2 >= ResWidth)
3894 S = repeatOp(InpWidth /= 2, S);
3895 }
3896 return S;
3897}
3898
3899SDValue
3900HexagonTargetLowering::LegalizeHvxResize(SDValue Op, SelectionDAG &DAG) const {
3901 SDValue Inp0 = Op.getOperand(i: 0);
3902 MVT InpTy = ty(Op: Inp0);
3903 MVT ResTy = ty(Op);
3904 unsigned InpWidth = InpTy.getSizeInBits();
3905 unsigned ResWidth = ResTy.getSizeInBits();
3906 unsigned Opc = Op.getOpcode();
3907
3908 if (shouldWidenToHvx(Ty: InpTy, DAG) || shouldWidenToHvx(Ty: ResTy, DAG)) {
3909 // First, make sure that the narrower type is widened to HVX.
3910 // This may cause the result to be wider than what the legalizer
3911 // expects, so insert EXTRACT_SUBVECTOR to bring it back to the
3912 // desired type.
3913 auto [WInpTy, WResTy] =
3914 InpWidth < ResWidth ? typeWidenToWider(Ty0: typeWidenToHvx(Ty: InpTy), Ty1: ResTy)
3915 : typeWidenToWider(Ty0: InpTy, Ty1: typeWidenToHvx(Ty: ResTy));
3916 SDValue W = appendUndef(Val: Inp0, ResTy: WInpTy, DAG);
3917 SDValue S;
3918 if (Opc == HexagonISD::TL_EXTEND || Opc == HexagonISD::TL_TRUNCATE) {
3919 S = DAG.getNode(Opcode: Opc, DL: SDLoc(Op), VT: WResTy, N1: W, N2: Op.getOperand(i: 1),
3920 N3: Op.getOperand(i: 2));
3921 } else {
3922 S = DAG.getNode(Opcode: Opc, DL: SDLoc(Op), VT: WResTy, N1: W, N2: DAG.getValueType(WResTy));
3923 }
3924 SDValue T = ExpandHvxResizeIntoSteps(Op: S, DAG);
3925 return extractSubvector(Vec: T, SubTy: typeLegalize(Ty: ResTy, DAG), SubIdx: 0, DAG);
3926 } else if (shouldSplitToHvx(Ty: InpWidth < ResWidth ? ResTy : InpTy, DAG)) {
3927 // For multi-step extends/truncates (e.g., i8->i32), expand into
3928 // single-step operations first. Splitting a multi-step TL_EXTEND
3929 // would halve the operand type to a sub-HVX size (e.g., v128i8 ->
3930 // v64i8), creating illegal types that cause issues in the type
3931 // legalizer's map tracking. Single-step operations (e.g., i16->i32)
3932 // are safe to split because their halved operand types remain legal.
3933 SDValue T = ExpandHvxResizeIntoSteps(Op, DAG);
3934 if (T != Op)
3935 return T;
3936 return opJoin(Ops: SplitVectorOp(Op, DAG), dl: SDLoc(Op), DAG);
3937 } else {
3938 assert(isTypeLegal(InpTy) && isTypeLegal(ResTy));
3939 return RemoveTLWrapper(Op, DAG);
3940 }
3941 llvm_unreachable("Unexpected situation");
3942}
3943
3944void
3945HexagonTargetLowering::LowerHvxOperationWrapper(SDNode *N,
3946 SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
3947 unsigned Opc = N->getOpcode();
3948 SDValue Op(N, 0);
3949 SDValue Inp0; // Optional first argument.
3950 if (N->getNumOperands() > 0)
3951 Inp0 = Op.getOperand(i: 0);
3952
3953 switch (Opc) {
3954 case ISD::ANY_EXTEND:
3955 case ISD::SIGN_EXTEND:
3956 case ISD::ZERO_EXTEND:
3957 if (Subtarget.isHVXElementType(Ty: ty(Op)) &&
3958 Subtarget.isHVXElementType(Ty: ty(Op: Inp0))) {
3959 Results.push_back(Elt: CreateTLWrapper(Op, DAG));
3960 }
3961 break;
3962 case ISD::TRUNCATE:
3963 // Handle truncate to boolean vector when the input is not a
3964 // standard HVX vector type (single or pair). This covers cases
3965 // where the input needs widening (e.g., v64i8 -> v64i1 in
3966 // 128-byte mode) and cases where the result boolean type itself
3967 // needs widening (e.g., v16i32 -> v16i1). When the input is
3968 // already an HVX type, tablegen patterns handle the truncation
3969 // directly (e.g., v64i16 -> v64i1 via V6_vandvrt).
3970 if (ty(Op).getVectorElementType() == MVT::i1 &&
3971 !Subtarget.isHVXVectorType(VecTy: ty(Op: Inp0), IncludeBool: false)) {
3972 if (SDValue T = WidenHvxTruncateToBool(Op, DAG))
3973 Results.push_back(Elt: T);
3974 } else if (Subtarget.isHVXElementType(Ty: ty(Op)) &&
3975 Subtarget.isHVXElementType(Ty: ty(Op: Inp0))) {
3976 Results.push_back(Elt: CreateTLWrapper(Op, DAG));
3977 }
3978 break;
3979 case ISD::SETCC:
3980 if (shouldWidenToHvx(Ty: ty(Op: Inp0), DAG)) {
3981 if (SDValue T = WidenHvxSetCC(Op, DAG))
3982 Results.push_back(Elt: T);
3983 }
3984 break;
3985 case ISD::STORE: {
3986 if (shouldWidenToHvx(Ty: ty(Op: cast<StoreSDNode>(Val: N)->getValue()), DAG)) {
3987 SDValue Store = WidenHvxStore(Op, DAG);
3988 Results.push_back(Elt: Store);
3989 }
3990 break;
3991 }
3992 case ISD::MLOAD:
3993 if (isHvxPairTy(Ty: ty(Op))) {
3994 SDValue S = SplitHvxMemOp(Op, DAG);
3995 assert(S->getOpcode() == ISD::MERGE_VALUES);
3996 Results.push_back(Elt: S.getOperand(i: 0));
3997 Results.push_back(Elt: S.getOperand(i: 1));
3998 }
3999 break;
4000 case ISD::MSTORE:
4001 if (isHvxPairTy(Ty: ty(Op: Op->getOperand(Num: 1)))) { // Stored value
4002 SDValue S = SplitHvxMemOp(Op, DAG);
4003 Results.push_back(Elt: S);
4004 }
4005 break;
4006 case ISD::SINT_TO_FP:
4007 case ISD::UINT_TO_FP:
4008 case ISD::FP_TO_SINT:
4009 case ISD::FP_TO_UINT:
4010 if (ty(Op).getSizeInBits() != ty(Op: Inp0).getSizeInBits()) {
4011 SDValue T = EqualizeFpIntConversion(Op, DAG);
4012 Results.push_back(Elt: T);
4013 }
4014 break;
4015 case HexagonISD::SSAT:
4016 case HexagonISD::USAT:
4017 case HexagonISD::TL_EXTEND:
4018 case HexagonISD::TL_TRUNCATE:
4019 Results.push_back(Elt: LegalizeHvxResize(Op, DAG));
4020 break;
4021 default:
4022 break;
4023 }
4024}
4025
4026void
4027HexagonTargetLowering::ReplaceHvxNodeResults(SDNode *N,
4028 SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
4029 unsigned Opc = N->getOpcode();
4030 SDValue Op(N, 0);
4031 SDValue Inp0; // Optional first argument.
4032 if (N->getNumOperands() > 0)
4033 Inp0 = Op.getOperand(i: 0);
4034
4035 switch (Opc) {
4036 case ISD::ANY_EXTEND:
4037 case ISD::SIGN_EXTEND:
4038 case ISD::ZERO_EXTEND:
4039 if (Subtarget.isHVXElementType(Ty: ty(Op)) &&
4040 Subtarget.isHVXElementType(Ty: ty(Op: Inp0))) {
4041 Results.push_back(Elt: CreateTLWrapper(Op, DAG));
4042 }
4043 break;
4044 case ISD::TRUNCATE:
4045 // Handle truncate to boolean vector when the input is not a
4046 // standard HVX vector type. See comment in LowerHvxOperationWrapper.
4047 if (ty(Op).getVectorElementType() == MVT::i1 &&
4048 !Subtarget.isHVXVectorType(VecTy: ty(Op: Inp0), IncludeBool: false)) {
4049 if (SDValue T = WidenHvxTruncateToBool(Op, DAG))
4050 Results.push_back(Elt: T);
4051 } else if (Subtarget.isHVXElementType(Ty: ty(Op)) &&
4052 Subtarget.isHVXElementType(Ty: ty(Op: Inp0))) {
4053 Results.push_back(Elt: CreateTLWrapper(Op, DAG));
4054 }
4055 break;
4056 case ISD::SETCC:
4057 if (shouldWidenToHvx(Ty: ty(Op), DAG)) {
4058 if (SDValue T = WidenHvxSetCC(Op, DAG))
4059 Results.push_back(Elt: T);
4060 }
4061 break;
4062 case ISD::LOAD: {
4063 if (shouldWidenToHvx(Ty: ty(Op), DAG)) {
4064 SDValue Load = WidenHvxLoad(Op, DAG);
4065 assert(Load->getOpcode() == ISD::MERGE_VALUES);
4066 Results.push_back(Elt: Load.getOperand(i: 0));
4067 Results.push_back(Elt: Load.getOperand(i: 1));
4068 }
4069 break;
4070 }
4071 case ISD::BITCAST:
4072 if (isHvxBoolTy(Ty: ty(Op: Inp0))) {
4073 SDValue C = LowerHvxBitcast(Op, DAG);
4074 Results.push_back(Elt: C);
4075 }
4076 break;
4077 case ISD::FP_TO_SINT:
4078 case ISD::FP_TO_UINT:
4079 if (ty(Op).getSizeInBits() != ty(Op: Inp0).getSizeInBits()) {
4080 SDValue T = EqualizeFpIntConversion(Op, DAG);
4081 Results.push_back(Elt: T);
4082 }
4083 break;
4084 case HexagonISD::SSAT:
4085 case HexagonISD::USAT:
4086 case HexagonISD::TL_EXTEND:
4087 case HexagonISD::TL_TRUNCATE:
4088 Results.push_back(Elt: LegalizeHvxResize(Op, DAG));
4089 break;
4090 default:
4091 break;
4092 }
4093}
4094
4095SDValue
4096HexagonTargetLowering::combineTruncateBeforeLegal(SDValue Op,
4097 DAGCombinerInfo &DCI) const {
4098 // Simplify V:v2NiB --(bitcast)--> vNi2B --(truncate)--> vNiB
4099 // to extract-subvector (shuffle V, pick even, pick odd)
4100
4101 assert(Op.getOpcode() == ISD::TRUNCATE);
4102 SelectionDAG &DAG = DCI.DAG;
4103 const SDLoc &dl(Op);
4104
4105 if (Op.getOperand(i: 0).getOpcode() == ISD::BITCAST)
4106 return SDValue();
4107 SDValue Cast = Op.getOperand(i: 0);
4108 SDValue Src = Cast.getOperand(i: 0);
4109
4110 EVT TruncTy = Op.getValueType();
4111 EVT CastTy = Cast.getValueType();
4112 EVT SrcTy = Src.getValueType();
4113 if (SrcTy.isSimple())
4114 return SDValue();
4115 if (SrcTy.getVectorElementType() != TruncTy.getVectorElementType())
4116 return SDValue();
4117 unsigned SrcLen = SrcTy.getVectorNumElements();
4118 unsigned CastLen = CastTy.getVectorNumElements();
4119 if (2 * CastLen != SrcLen)
4120 return SDValue();
4121
4122 SmallVector<int, 128> Mask(SrcLen);
4123 for (int i = 0; i != static_cast<int>(CastLen); ++i) {
4124 Mask[i] = 2 * i;
4125 Mask[i + CastLen] = 2 * i + 1;
4126 }
4127 SDValue Deal =
4128 DAG.getVectorShuffle(VT: SrcTy, dl, N1: Src, N2: DAG.getUNDEF(VT: SrcTy), Mask);
4129 return opSplit(Vec: Deal, dl, DAG).first;
4130}
4131
4132SDValue
4133HexagonTargetLowering::combineConcatOfShuffles(SDValue Op,
4134 SelectionDAG &DAG) const {
4135 // Fold
4136 // concat (shuffle x, y, m1), (shuffle x, y, m2)
4137 // into
4138 // shuffle (concat x, y), undef, m3
4139 if (Op.getNumOperands() != 2)
4140 return SDValue();
4141
4142 const SDLoc &dl(Op);
4143 SDValue V0 = Op.getOperand(i: 0);
4144 SDValue V1 = Op.getOperand(i: 1);
4145
4146 if (V0.getOpcode() != ISD::VECTOR_SHUFFLE)
4147 return SDValue();
4148 if (V1.getOpcode() != ISD::VECTOR_SHUFFLE)
4149 return SDValue();
4150
4151 SetVector<SDValue> Order;
4152 Order.insert(X: V0.getOperand(i: 0));
4153 Order.insert(X: V0.getOperand(i: 1));
4154 Order.insert(X: V1.getOperand(i: 0));
4155 Order.insert(X: V1.getOperand(i: 1));
4156
4157 if (Order.size() > 2)
4158 return SDValue();
4159
4160 // In ISD::VECTOR_SHUFFLE, the types of each input and the type of the
4161 // result must be the same.
4162 EVT InpTy = V0.getValueType();
4163 assert(InpTy.isVector());
4164 unsigned InpLen = InpTy.getVectorNumElements();
4165
4166 SmallVector<int, 128> LongMask;
4167 auto AppendToMask = [&](SDValue Shuffle) {
4168 auto *SV = cast<ShuffleVectorSDNode>(Val: Shuffle.getNode());
4169 ArrayRef<int> Mask = SV->getMask();
4170 SDValue X = Shuffle.getOperand(i: 0);
4171 SDValue Y = Shuffle.getOperand(i: 1);
4172 for (int M : Mask) {
4173 if (M == -1) {
4174 LongMask.push_back(Elt: M);
4175 continue;
4176 }
4177 SDValue Src = static_cast<unsigned>(M) < InpLen ? X : Y;
4178 if (static_cast<unsigned>(M) >= InpLen)
4179 M -= InpLen;
4180
4181 int OutOffset = Order[0] == Src ? 0 : InpLen;
4182 LongMask.push_back(Elt: M + OutOffset);
4183 }
4184 };
4185
4186 AppendToMask(V0);
4187 AppendToMask(V1);
4188
4189 SDValue C0 = Order.front();
4190 SDValue C1 = Order.back(); // Can be same as front
4191 EVT LongTy = InpTy.getDoubleNumVectorElementsVT(Context&: *DAG.getContext());
4192
4193 SDValue Cat = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: LongTy, Ops: {C0, C1});
4194 return DAG.getVectorShuffle(VT: LongTy, dl, N1: Cat, N2: DAG.getUNDEF(VT: LongTy), Mask: LongMask);
4195}
4196
4197// Reassociate concat(p1, p2, ...) into
4198// concat(concat(p1, ...), concat(pi, ...), ...)
4199// where each inner concat produces a predicate where each bit corresponds
4200// to at most BitBytes bytes.
4201// Concatenating predicates decreases the number of bytes per each predicate
4202// bit.
4203SDValue
4204HexagonTargetLowering::combineConcatOfScalarPreds(SDValue Op, unsigned BitBytes,
4205 SelectionDAG &DAG) const {
4206 const SDLoc &dl(Op);
4207 SmallVector<SDValue> Ops(Op->ops());
4208 MVT ResTy = ty(Op);
4209 MVT InpTy = ty(Op: Ops[0]);
4210 unsigned InpLen = InpTy.getVectorNumElements(); // Scalar predicate
4211 unsigned ResLen = ResTy.getVectorNumElements(); // HVX vector predicate
4212 assert(InpLen <= 8 && "Too long for scalar predicate");
4213 assert(ResLen > 8 && "Too short for HVX vector predicate");
4214
4215 unsigned Bytes = 8 / InpLen; // Bytes-per-bit in input
4216
4217 // Already in the right form?
4218 if (Bytes <= BitBytes)
4219 return Op;
4220
4221 ArrayRef<SDValue> Inputs(Ops);
4222 unsigned SliceLen = Bytes / BitBytes;
4223
4224 SmallVector<SDValue> Cats;
4225 // (8 / BitBytes) is the desired length of the result of the inner concat.
4226 MVT InnerTy = MVT::getVectorVT(VT: MVT::i1, NumElements: 8 / BitBytes);
4227 for (unsigned i = 0; i != ResLen / (8 / BitBytes); ++i) {
4228 SDValue Cat = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: InnerTy,
4229 Ops: Inputs.slice(N: SliceLen * i, M: SliceLen));
4230 Cats.push_back(Elt: Cat);
4231 }
4232
4233 return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: ResTy, Ops: Cats);
4234}
4235
4236SDValue HexagonTargetLowering::combineConcatVectorsBeforeLegal(
4237 SDValue Op, DAGCombinerInfo &DCI) const {
4238 MVT ResTy = ty(Op);
4239 MVT ElemTy = ResTy.getVectorElementType();
4240
4241 if (ElemTy != MVT::i1) {
4242 return combineConcatOfShuffles(Op, DAG&: DCI.DAG);
4243 }
4244 return SDValue();
4245}
4246
4247// Create the inner partial reduction MLA that can be efficiently lowered. This
4248// function is used by partial and full reductions.
4249SDValue HexagonTargetLowering::createExtendingPartialReduceMLA(
4250 unsigned Opcode, EVT AccEltType, unsigned AccNumElements, EVT InputType,
4251 const SDValue &A, const SDValue &B, unsigned &RemainingReductionRatio,
4252 const SDLoc &DL, SelectionDAG &DAG) const {
4253 const auto &Subtarget = DAG.getSubtarget<HexagonSubtarget>();
4254 if (!Subtarget.useHVXOps())
4255 return SDValue();
4256
4257 EVT InputEltType = InputType.getVectorElementType();
4258
4259 // Find if an optimized instruction for the sub-reduction is available.
4260 unsigned NativeRatio;
4261 if (AccEltType == MVT::i32 && InputEltType == MVT::i8)
4262 NativeRatio = 4;
4263 else
4264 return SDValue();
4265
4266 // We only handle the case when additional reduction will be needed, i.e.
4267 // input is longer by a larger factor than the result.
4268 ElementCount InputEC = InputType.getVectorElementCount();
4269 if (!InputEC.isKnownMultipleOf(RHS: AccNumElements * NativeRatio))
4270 return SDValue();
4271
4272 unsigned InputNumElements = InputEC.getFixedValue();
4273 RemainingReductionRatio = InputNumElements / (AccNumElements * NativeRatio);
4274 if (RemainingReductionRatio == 1)
4275 return SDValue();
4276
4277 // Create a reduction by the natively supported factor.
4278 EVT IntermediateType = EVT::getVectorVT(Context&: *DAG.getContext(), VT: AccEltType,
4279 NumElements: InputNumElements / NativeRatio);
4280
4281 SDValue Zero = DAG.getConstant(Val: 0, DL, VT: IntermediateType);
4282 return DAG.getNode(Opcode, DL, VT: IntermediateType, N1: Zero, N2: A, N3: B);
4283}
4284
4285static bool DetectExtendingMultiply(const SDValue &N, EVT ScalarType,
4286 unsigned &Opcode, SDValue &A, SDValue &B) {
4287 SDValue Mul = N;
4288 EVT AccType = Mul.getValueType(); // Vector input type after extension.
4289 if (ScalarType != AccType.getVectorElementType())
4290 return false;
4291 bool swap = false;
4292 if (Mul->getOpcode() != ISD::MUL)
4293 return false;
4294 A = Mul->getOperand(Num: 0);
4295 B = Mul->getOperand(Num: 1);
4296 if (A.getOpcode() == ISD::ZERO_EXTEND) {
4297 if (B.getOpcode() == ISD::ZERO_EXTEND)
4298 Opcode = ISD::PARTIAL_REDUCE_UMLA;
4299 else if (B.getOpcode() == ISD::SIGN_EXTEND) {
4300 swap = true;
4301 Opcode = ISD::PARTIAL_REDUCE_SUMLA;
4302 } else
4303 return false;
4304 } else if (A.getOpcode() == ISD::SIGN_EXTEND) {
4305 if (B.getOpcode() == ISD::ZERO_EXTEND)
4306 Opcode = ISD::PARTIAL_REDUCE_SUMLA;
4307 else if (B.getOpcode() == ISD::SIGN_EXTEND)
4308 Opcode = ISD::PARTIAL_REDUCE_SMLA;
4309 else
4310 return false;
4311 } else
4312 return false;
4313
4314 // Get multiplication arguments before extension.
4315 A = A->getOperand(Num: 0);
4316 B = B->getOperand(Num: 0);
4317 if (A.getValueType() != B.getValueType())
4318 return false;
4319
4320 if (swap)
4321 std::swap(a&: A, b&: B);
4322
4323 return true;
4324}
4325
4326SDValue HexagonTargetLowering::splitVecReduceAdd(SDNode *N,
4327 SelectionDAG &DAG) const {
4328 if (!Subtarget.useHVXOps())
4329 return SDValue();
4330
4331 EVT ScalarType = N->getValueType(ResNo: 0);
4332 unsigned Opcode;
4333 SDValue A, B;
4334 if (!DetectExtendingMultiply(N: N->getOperand(Num: 0), ScalarType, Opcode, A, B))
4335 return SDValue();
4336
4337 SDLoc DL(N);
4338 unsigned RemainingReductionRatio;
4339 SDValue Partial =
4340 createExtendingPartialReduceMLA(Opcode, AccEltType: ScalarType, AccNumElements: 1, InputType: A.getValueType(),
4341 A, B, RemainingReductionRatio, DL, DAG);
4342 if (!Partial)
4343 return SDValue();
4344
4345 // We could have inserted a trivial MLA and rely on the folding action,
4346 // similar to how vector_partial_reduce_add is lowered to an MLA in
4347 // SelectionDAGBuilder. However, we just replace the final result since we
4348 // have analyzed the input completely.
4349 return DAG.getNode(Opcode: ISD::VECREDUCE_ADD, DL, VT: ScalarType, Operand: Partial);
4350}
4351
4352// When possible, separate an MLA reduction with extended operands but
4353// unsupported reduction factor into an extending partial reduction that
4354// can be efficiently lowered, and a follow-up partial reduction.
4355// partial_reduce_mla(a, x, y) ->
4356// partial_reduce_mla(a, partial_reduce_mla(0, x, y), 1)
4357SDValue
4358HexagonTargetLowering::splitExtendingPartialReduceMLA(SDNode *N,
4359 SelectionDAG &DAG) const {
4360 if (!Subtarget.useHVXOps())
4361 return SDValue();
4362
4363 SDValue Acc = N->getOperand(Num: 0);
4364 SDValue A = N->getOperand(Num: 1);
4365 SDValue B = N->getOperand(Num: 2);
4366 if (A.getValueType() != B.getValueType())
4367 return SDValue();
4368
4369 // The types should be declared as custom, but do not split already legal
4370 // operation.
4371 EVT AccType = Acc.getValueType();
4372 EVT InputType = A.getValueType();
4373 if (getPartialReduceMLAAction(Opc: N->getOpcode(), AccVT: AccType, InputVT: InputType) != Custom)
4374 return SDValue();
4375
4376 SDLoc DL(N);
4377 unsigned RemainingReductionRatio;
4378 SDValue Partial = createExtendingPartialReduceMLA(
4379 Opcode: N->getOpcode(), AccEltType: AccType.getVectorElementType(),
4380 AccNumElements: AccType.getVectorNumElements(), InputType, A, B, RemainingReductionRatio,
4381 DL, DAG);
4382 if (!Partial)
4383 return SDValue();
4384 assert(RemainingReductionRatio <= MaxExpandMLA);
4385
4386 // Create the reduction for the remaining ratio.
4387 EVT IntermediateType = Partial->getOperand(Num: 0).getValueType();
4388 SDValue One = DAG.getConstant(Val: 1, DL, VT: IntermediateType);
4389 return DAG.getNode(Opcode: N->getOpcode() == ISD::PARTIAL_REDUCE_UMLA
4390 ? ISD::PARTIAL_REDUCE_UMLA
4391 : ISD::PARTIAL_REDUCE_SUMLA,
4392 DL, VT: AccType, N1: Acc, N2: Partial, N3: One);
4393}
4394
4395SDValue
4396HexagonTargetLowering::LowerHvxPartialReduceMLA(SDValue Op,
4397 SelectionDAG &DAG) const {
4398 const SDLoc &DL(Op);
4399 SDValue Acc = Op.getOperand(i: 0);
4400 SDValue A = Op.getOperand(i: 1);
4401 SDValue B = Op.getOperand(i: 2);
4402
4403 // Split the input vectors into units of one HVX vector length.
4404 unsigned HwVectorSizeInBits = Subtarget.getVectorLength() * 8;
4405
4406 EVT AccType = Acc.getValueType();
4407 EVT AccEltType = AccType.getVectorElementType();
4408 unsigned AccSubvectorNumElements =
4409 HwVectorSizeInBits / AccEltType.getSizeInBits();
4410 EVT AccSubvectorType =
4411 EVT::getVectorVT(Context&: *DAG.getContext(), VT: AccEltType, NumElements: AccSubvectorNumElements);
4412
4413 EVT InputType = A.getValueType();
4414 assert(InputType.getSizeInBits() % HwVectorSizeInBits == 0);
4415 EVT InputEltType = InputType.getVectorElementType();
4416 unsigned InputSubvectorNumElements =
4417 HwVectorSizeInBits / InputEltType.getSizeInBits();
4418 EVT InputSubvectorType = EVT::getVectorVT(Context&: *DAG.getContext(), VT: InputEltType,
4419 NumElements: InputSubvectorNumElements);
4420
4421 unsigned SubvectorNum = InputType.getFixedSizeInBits() / HwVectorSizeInBits;
4422 SmallVector<SDValue, MaxExpandMLA> Subvectors;
4423
4424 for (unsigned I = 0; I != SubvectorNum; ++I) {
4425 SDValue SubvectorAcc = DAG.getExtractSubvector(DL, VT: AccSubvectorType, Vec: Acc,
4426 Idx: I * AccSubvectorNumElements);
4427 SDValue SubvectorA = DAG.getExtractSubvector(DL, VT: InputSubvectorType, Vec: A,
4428 Idx: I * InputSubvectorNumElements);
4429 SDValue SubvectorB = DAG.getExtractSubvector(DL, VT: InputSubvectorType, Vec: B,
4430 Idx: I * InputSubvectorNumElements);
4431 SDValue SubvectorMLA = DAG.getNode(Opcode: Op.getOpcode(), DL, VT: AccSubvectorType,
4432 N1: SubvectorAcc, N2: SubvectorA, N3: SubvectorB);
4433 Subvectors.push_back(Elt: SubvectorMLA);
4434 }
4435
4436 return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: AccType, Ops: Subvectors);
4437}
4438
4439SDValue
4440HexagonTargetLowering::PerformHvxDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
4441 const {
4442 const SDLoc &dl(N);
4443 SelectionDAG &DAG = DCI.DAG;
4444 SDValue Op(N, 0);
4445 unsigned Opc = Op.getOpcode();
4446
4447 SmallVector<SDValue, 4> Ops(N->ops());
4448
4449 if (Opc == ISD::TRUNCATE)
4450 return combineTruncateBeforeLegal(Op, DCI);
4451 if (Opc == ISD::CONCAT_VECTORS)
4452 return combineConcatVectorsBeforeLegal(Op, DCI);
4453
4454 if (DCI.isBeforeLegalizeOps())
4455 return SDValue();
4456
4457 switch (Opc) {
4458 case HexagonISD::V2Q:
4459 if (Ops[0].getOpcode() == ISD::SPLAT_VECTOR) {
4460 if (const auto *C = dyn_cast<ConstantSDNode>(Val: Ops[0].getOperand(i: 0)))
4461 return C->isZero() ? DAG.getNode(Opcode: HexagonISD::QFALSE, DL: dl, VT: ty(Op))
4462 : DAG.getNode(Opcode: HexagonISD::QTRUE, DL: dl, VT: ty(Op));
4463 }
4464 break;
4465 case HexagonISD::Q2V:
4466 if (Ops[0].getOpcode() == HexagonISD::QTRUE)
4467 return DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: ty(Op),
4468 Operand: DAG.getAllOnesConstant(DL: dl, VT: MVT::i32));
4469 if (Ops[0].getOpcode() == HexagonISD::QFALSE)
4470 return getZero(dl, Ty: ty(Op), DAG);
4471 break;
4472 case HexagonISD::VINSERTW0:
4473 if (isUndef(Op: Ops[1]))
4474 return Ops[0];
4475 break;
4476 case HexagonISD::VROR: {
4477 if (Ops[0].getOpcode() == HexagonISD::VROR) {
4478 SDValue Vec = Ops[0].getOperand(i: 0);
4479 SDValue Rot0 = Ops[1], Rot1 = Ops[0].getOperand(i: 1);
4480 SDValue Rot = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: ty(Op: Rot0), Ops: {Rot0, Rot1});
4481 return DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: ty(Op), Ops: {Vec, Rot});
4482 }
4483 break;
4484 }
4485 }
4486
4487 return SDValue();
4488}
4489
4490bool
4491HexagonTargetLowering::shouldSplitToHvx(MVT Ty, SelectionDAG &DAG) const {
4492 if (Subtarget.isHVXVectorType(VecTy: Ty, IncludeBool: true))
4493 return false;
4494 auto Action = getPreferredHvxVectorAction(VecTy: Ty);
4495 if (Action == TargetLoweringBase::TypeSplitVector)
4496 return Subtarget.isHVXVectorType(VecTy: typeLegalize(Ty, DAG), IncludeBool: true);
4497 return false;
4498}
4499
4500bool
4501HexagonTargetLowering::shouldWidenToHvx(MVT Ty, SelectionDAG &DAG) const {
4502 if (Subtarget.isHVXVectorType(VecTy: Ty, IncludeBool: true))
4503 return false;
4504 auto Action = getPreferredHvxVectorAction(VecTy: Ty);
4505 if (Action == TargetLoweringBase::TypeWidenVector)
4506 return Subtarget.isHVXVectorType(VecTy: typeLegalize(Ty, DAG), IncludeBool: true);
4507 return false;
4508}
4509
4510bool
4511HexagonTargetLowering::isHvxOperation(SDNode *N, SelectionDAG &DAG) const {
4512 if (!Subtarget.useHVXOps())
4513 return false;
4514 // If the type of any result, or any operand type are HVX vector types,
4515 // this is an HVX operation.
4516 auto IsHvxTy = [this](EVT Ty) {
4517 return Ty.isSimple() && Subtarget.isHVXVectorType(VecTy: Ty.getSimpleVT(), IncludeBool: true);
4518 };
4519 auto IsHvxOp = [this](SDValue Op) {
4520 return Op.getValueType().isSimple() &&
4521 Subtarget.isHVXVectorType(VecTy: ty(Op), IncludeBool: true);
4522 };
4523 if (llvm::any_of(Range: N->values(), P: IsHvxTy) || llvm::any_of(Range: N->ops(), P: IsHvxOp))
4524 return true;
4525
4526 // Check if this could be an HVX operation after type widening.
4527 auto IsWidenedToHvx = [this, &DAG](SDValue Op) {
4528 if (!Op.getValueType().isSimple())
4529 return false;
4530 MVT ValTy = ty(Op);
4531 return ValTy.isVector() && shouldWidenToHvx(Ty: ValTy, DAG);
4532 };
4533
4534 for (int i = 0, e = N->getNumValues(); i != e; ++i) {
4535 if (IsWidenedToHvx(SDValue(N, i)))
4536 return true;
4537 }
4538 return llvm::any_of(Range: N->ops(), P: IsWidenedToHvx);
4539}
4540