1//===-- HexagonISelLoweringHVX.cpp --- Lowering HVX operations ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "HexagonISelLowering.h"
10#include "HexagonRegisterInfo.h"
11#include "HexagonSubtarget.h"
12#include "llvm/ADT/SetVector.h"
13#include "llvm/ADT/SmallVector.h"
14#include "llvm/Analysis/MemoryLocation.h"
15#include "llvm/CodeGen/MachineBasicBlock.h"
16#include "llvm/CodeGen/MachineFunction.h"
17#include "llvm/CodeGen/MachineInstr.h"
18#include "llvm/CodeGen/MachineOperand.h"
19#include "llvm/CodeGen/MachineRegisterInfo.h"
20#include "llvm/CodeGen/TargetInstrInfo.h"
21#include "llvm/IR/IntrinsicsHexagon.h"
22#include "llvm/Support/CommandLine.h"
23
24#include <algorithm>
25#include <string>
26#include <utility>
27
28using namespace llvm;
29
30static cl::opt<unsigned> HvxWidenThreshold("hexagon-hvx-widen",
31 cl::Hidden, cl::init(Val: 16),
32 cl::desc("Lower threshold (in bytes) for widening to HVX vectors"));
33
34static cl::opt<bool>
35 EnableFpFastConvert("hexagon-fp-fast-convert", cl::Hidden, cl::init(Val: false),
36 cl::desc("Enable FP fast conversion routine."));
37
38static const MVT LegalV64[] = { MVT::v64i8, MVT::v32i16, MVT::v16i32 };
39static const MVT LegalW64[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 };
40static const MVT LegalV128[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 };
41static const MVT LegalW128[] = { MVT::v256i8, MVT::v128i16, MVT::v64i32 };
42
43static const unsigned MaxExpandMLA = 8;
44
45static std::tuple<unsigned, unsigned, unsigned> getIEEEProperties(MVT Ty) {
46 // For a float scalar type, return (exp-bits, exp-bias, fraction-bits)
47 MVT ElemTy = Ty.getScalarType();
48 switch (ElemTy.SimpleTy) {
49 case MVT::f16:
50 return std::make_tuple(args: 5, args: 15, args: 10);
51 case MVT::f32:
52 return std::make_tuple(args: 8, args: 127, args: 23);
53 case MVT::f64:
54 return std::make_tuple(args: 11, args: 1023, args: 52);
55 default:
56 break;
57 }
58 llvm_unreachable(("Unexpected type: " + EVT(ElemTy).getEVTString()).c_str());
59}
60
61void
62HexagonTargetLowering::initializeHVXLowering() {
63 if (Subtarget.useHVX64BOps()) {
64 addRegisterClass(VT: MVT::v64i8, RC: &Hexagon::HvxVRRegClass);
65 addRegisterClass(VT: MVT::v32i16, RC: &Hexagon::HvxVRRegClass);
66 addRegisterClass(VT: MVT::v16i32, RC: &Hexagon::HvxVRRegClass);
67 addRegisterClass(VT: MVT::v128i8, RC: &Hexagon::HvxWRRegClass);
68 addRegisterClass(VT: MVT::v64i16, RC: &Hexagon::HvxWRRegClass);
69 addRegisterClass(VT: MVT::v32i32, RC: &Hexagon::HvxWRRegClass);
70 // These "short" boolean vector types should be legal because
71 // they will appear as results of vector compares. If they were
72 // not legal, type legalization would try to make them legal
73 // and that would require using operations that do not use or
74 // produce such types. That, in turn, would imply using custom
75 // nodes, which would be unoptimizable by the DAG combiner.
76 // The idea is to rely on target-independent operations as much
77 // as possible.
78 addRegisterClass(VT: MVT::v16i1, RC: &Hexagon::HvxQRRegClass);
79 addRegisterClass(VT: MVT::v32i1, RC: &Hexagon::HvxQRRegClass);
80 addRegisterClass(VT: MVT::v64i1, RC: &Hexagon::HvxQRRegClass);
81 } else if (Subtarget.useHVX128BOps()) {
82 addRegisterClass(VT: MVT::v128i8, RC: &Hexagon::HvxVRRegClass);
83 addRegisterClass(VT: MVT::v64i16, RC: &Hexagon::HvxVRRegClass);
84 addRegisterClass(VT: MVT::v32i32, RC: &Hexagon::HvxVRRegClass);
85 addRegisterClass(VT: MVT::v256i8, RC: &Hexagon::HvxWRRegClass);
86 addRegisterClass(VT: MVT::v128i16, RC: &Hexagon::HvxWRRegClass);
87 addRegisterClass(VT: MVT::v64i32, RC: &Hexagon::HvxWRRegClass);
88 addRegisterClass(VT: MVT::v32i1, RC: &Hexagon::HvxQRRegClass);
89 addRegisterClass(VT: MVT::v64i1, RC: &Hexagon::HvxQRRegClass);
90 addRegisterClass(VT: MVT::v128i1, RC: &Hexagon::HvxQRRegClass);
91 if (Subtarget.useHVXV68Ops() && Subtarget.useHVXFloatingPoint()) {
92 addRegisterClass(VT: MVT::v32f32, RC: &Hexagon::HvxVRRegClass);
93 addRegisterClass(VT: MVT::v64f16, RC: &Hexagon::HvxVRRegClass);
94 addRegisterClass(VT: MVT::v64f32, RC: &Hexagon::HvxWRRegClass);
95 addRegisterClass(VT: MVT::v128f16, RC: &Hexagon::HvxWRRegClass);
96 }
97 if (Subtarget.useHVXV81Ops()) {
98 addRegisterClass(VT: MVT::v64bf16, RC: &Hexagon::HvxVRRegClass);
99 addRegisterClass(VT: MVT::v128bf16, RC: &Hexagon::HvxWRRegClass);
100 }
101 }
102
103 // Set up operation actions.
104
105 bool Use64b = Subtarget.useHVX64BOps();
106 ArrayRef<MVT> LegalV = Use64b ? LegalV64 : LegalV128;
107 ArrayRef<MVT> LegalW = Use64b ? LegalW64 : LegalW128;
108 MVT ByteV = Use64b ? MVT::v64i8 : MVT::v128i8;
109 MVT WordV = Use64b ? MVT::v16i32 : MVT::v32i32;
110 MVT ByteW = Use64b ? MVT::v128i8 : MVT::v256i8;
111
112 auto setPromoteTo = [this] (unsigned Opc, MVT FromTy, MVT ToTy) {
113 setOperationAction(Op: Opc, VT: FromTy, Action: Promote);
114 AddPromotedToType(Opc, OrigVT: FromTy, DestVT: ToTy);
115 };
116
117 // Handle bitcasts of vector predicates to scalars (e.g. v32i1 to i32).
118 // Note: v16i1 -> i16 is handled in type legalization instead of op
119 // legalization.
120 setOperationAction(Op: ISD::BITCAST, VT: MVT::i16, Action: Custom);
121 setOperationAction(Op: ISD::BITCAST, VT: MVT::i32, Action: Custom);
122 setOperationAction(Op: ISD::BITCAST, VT: MVT::i64, Action: Custom);
123 setOperationAction(Op: ISD::BITCAST, VT: MVT::v16i1, Action: Custom);
124 setOperationAction(Op: ISD::BITCAST, VT: MVT::v128i1, Action: Custom);
125 setOperationAction(Op: ISD::BITCAST, VT: MVT::i128, Action: Custom);
126 setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT: ByteV, Action: Legal);
127 setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT: ByteW, Action: Legal);
128 setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::Other, Action: Custom);
129
130 if (Subtarget.useHVX128BOps()) {
131 setOperationAction(Op: ISD::BITCAST, VT: MVT::v32i1, Action: Custom);
132 setOperationAction(Op: ISD::BITCAST, VT: MVT::v64i1, Action: Custom);
133 setOperationAction(Op: ISD::STORE, VT: MVT::v32i1, Action: Custom);
134 setOperationAction(Op: ISD::LOAD, VT: MVT::v32i1, Action: Custom);
135 setOperationAction(Op: ISD::STORE, VT: MVT::v64i1, Action: Custom);
136 setOperationAction(Op: ISD::LOAD, VT: MVT::v64i1, Action: Custom);
137 setOperationAction(Op: ISD::STORE, VT: MVT::v128i1, Action: Custom);
138 setOperationAction(Op: ISD::LOAD, VT: MVT::v128i1, Action: Custom);
139 }
140 if (Subtarget.useHVX128BOps() && Subtarget.useHVXV68Ops() &&
141 Subtarget.useHVXFloatingPoint()) {
142
143 static const MVT FloatV[] = { MVT::v64f16, MVT::v32f32 };
144 static const MVT FloatW[] = { MVT::v128f16, MVT::v64f32 };
145
146 for (MVT T : FloatV) {
147 setOperationAction(Op: ISD::FADD, VT: T, Action: Legal);
148 setOperationAction(Op: ISD::FSUB, VT: T, Action: Legal);
149 setOperationAction(Op: ISD::FMUL, VT: T, Action: Legal);
150 setOperationAction(Op: ISD::FMINIMUMNUM, VT: T, Action: Legal);
151 setOperationAction(Op: ISD::FMAXIMUMNUM, VT: T, Action: Legal);
152
153 setOperationAction(Op: ISD::INSERT_SUBVECTOR, VT: T, Action: Custom);
154 setOperationAction(Op: ISD::EXTRACT_SUBVECTOR, VT: T, Action: Custom);
155
156 setOperationAction(Op: ISD::SPLAT_VECTOR, VT: T, Action: Legal);
157 setOperationAction(Op: ISD::SPLAT_VECTOR, VT: T, Action: Legal);
158
159 setOperationAction(Op: ISD::MLOAD, VT: T, Action: Custom);
160 setOperationAction(Op: ISD::MSTORE, VT: T, Action: Custom);
161 // Custom-lower BUILD_VECTOR. The standard (target-independent)
162 // handling of it would convert it to a load, which is not always
163 // the optimal choice.
164 setOperationAction(Op: ISD::BUILD_VECTOR, VT: T, Action: Custom);
165 }
166
167
168 // BUILD_VECTOR with f16 operands cannot be promoted without
169 // promoting the result, so lower the node to vsplat or constant pool
170 setOperationAction(Op: ISD::BUILD_VECTOR, VT: MVT::f16, Action: Custom);
171 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: MVT::f16, Action: Custom);
172 setOperationAction(Op: ISD::SPLAT_VECTOR, VT: MVT::f16, Action: Custom);
173
174 // Vector shuffle is always promoted to ByteV and a bitcast to f16 is
175 // generated.
176 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v128f16, ByteW);
177 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f16, ByteV);
178 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f32, ByteW);
179 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v32f32, ByteV);
180
181 if (Subtarget.useHVXV81Ops()) {
182 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v128bf16, ByteW);
183 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64bf16, ByteV);
184 setPromoteTo(ISD::SETCC, MVT::v64bf16, MVT::v64f32);
185 setPromoteTo(ISD::FADD, MVT::v64bf16, MVT::v64f32);
186 setPromoteTo(ISD::FSUB, MVT::v64bf16, MVT::v64f32);
187 setPromoteTo(ISD::FMUL, MVT::v64bf16, MVT::v64f32);
188 setPromoteTo(ISD::FMINNUM, MVT::v64bf16, MVT::v64f32);
189 setPromoteTo(ISD::FMAXNUM, MVT::v64bf16, MVT::v64f32);
190
191 setOperationAction(Op: ISD::SPLAT_VECTOR, VT: MVT::v64bf16, Action: Legal);
192 setOperationAction(Op: ISD::INSERT_SUBVECTOR, VT: MVT::v64bf16, Action: Custom);
193 setOperationAction(Op: ISD::EXTRACT_SUBVECTOR, VT: MVT::v64bf16, Action: Custom);
194
195 setOperationAction(Op: ISD::LOAD, VT: MVT::v128bf16, Action: Custom);
196 setOperationAction(Op: ISD::STORE, VT: MVT::v128bf16, Action: Custom);
197
198 setOperationAction(Op: ISD::MLOAD, VT: MVT::v64bf16, Action: Custom);
199 setOperationAction(Op: ISD::MSTORE, VT: MVT::v64bf16, Action: Custom);
200 setOperationAction(Op: ISD::BUILD_VECTOR, VT: MVT::v64bf16, Action: Custom);
201 setOperationAction(Op: ISD::CONCAT_VECTORS, VT: MVT::v64bf16, Action: Custom);
202
203 setOperationAction(Op: ISD::MLOAD, VT: MVT::v128bf16, Action: Custom);
204 setOperationAction(Op: ISD::MSTORE, VT: MVT::v128bf16, Action: Custom);
205 setOperationAction(Op: ISD::BUILD_VECTOR, VT: MVT::v128bf16, Action: Custom);
206 setOperationAction(Op: ISD::CONCAT_VECTORS, VT: MVT::v128bf16, Action: Custom);
207
208 setOperationAction(Op: ISD::SPLAT_VECTOR, VT: MVT::bf16, Action: Custom);
209 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: MVT::bf16, Action: Custom);
210 setOperationAction(Op: ISD::BUILD_VECTOR, VT: MVT::bf16, Action: Custom);
211 }
212
213 for (MVT P : FloatW) {
214 setOperationAction(Op: ISD::LOAD, VT: P, Action: Custom);
215 setOperationAction(Op: ISD::STORE, VT: P, Action: Custom);
216 setOperationAction(Op: ISD::FADD, VT: P, Action: Custom);
217 setOperationAction(Op: ISD::FSUB, VT: P, Action: Custom);
218 setOperationAction(Op: ISD::FMUL, VT: P, Action: Custom);
219 setOperationAction(Op: ISD::FMINIMUMNUM, VT: P, Action: Custom);
220 setOperationAction(Op: ISD::FMAXIMUMNUM, VT: P, Action: Custom);
221 setOperationAction(Op: ISD::SETCC, VT: P, Action: Custom);
222 setOperationAction(Op: ISD::VSELECT, VT: P, Action: Custom);
223
224 // Custom-lower BUILD_VECTOR. The standard (target-independent)
225 // handling of it would convert it to a load, which is not always
226 // the optimal choice.
227 setOperationAction(Op: ISD::BUILD_VECTOR, VT: P, Action: Custom);
228 // Make concat-vectors custom to handle concats of more than 2 vectors.
229 setOperationAction(Op: ISD::CONCAT_VECTORS, VT: P, Action: Custom);
230
231 setOperationAction(Op: ISD::MLOAD, VT: P, Action: Custom);
232 setOperationAction(Op: ISD::MSTORE, VT: P, Action: Custom);
233 }
234
235 if (Subtarget.useHVXQFloatOps()) {
236 setOperationAction(Op: ISD::FP_EXTEND, VT: MVT::v64f32, Action: Custom);
237 setOperationAction(Op: ISD::FP_ROUND, VT: MVT::v64f16, Action: Legal);
238 } else if (Subtarget.useHVXIEEEFPOps()) {
239 setOperationAction(Op: ISD::FP_EXTEND, VT: MVT::v64f32, Action: Legal);
240 setOperationAction(Op: ISD::FP_ROUND, VT: MVT::v64f16, Action: Legal);
241 }
242 }
243
244 for (MVT T : LegalV) {
245 setIndexedLoadAction(IdxModes: ISD::POST_INC, VT: T, Action: Legal);
246 setIndexedStoreAction(IdxModes: ISD::POST_INC, VT: T, Action: Legal);
247
248 setOperationAction(Op: ISD::ABS, VT: T, Action: Legal);
249 setOperationAction(Op: ISD::AND, VT: T, Action: Legal);
250 setOperationAction(Op: ISD::OR, VT: T, Action: Legal);
251 setOperationAction(Op: ISD::XOR, VT: T, Action: Legal);
252 setOperationAction(Op: ISD::ADD, VT: T, Action: Legal);
253 setOperationAction(Op: ISD::SUB, VT: T, Action: Legal);
254 setOperationAction(Op: ISD::MUL, VT: T, Action: Legal);
255 setOperationAction(Op: ISD::CTPOP, VT: T, Action: Legal);
256 setOperationAction(Op: ISD::CTLZ, VT: T, Action: Legal);
257 setOperationAction(Op: ISD::SELECT, VT: T, Action: Legal);
258 setOperationAction(Op: ISD::SPLAT_VECTOR, VT: T, Action: Legal);
259 setOperationAction(Op: ISD::UADDSAT, VT: T, Action: Legal);
260 setOperationAction(Op: ISD::SADDSAT, VT: T, Action: Legal);
261 setOperationAction(Op: ISD::USUBSAT, VT: T, Action: Legal);
262 setOperationAction(Op: ISD::SSUBSAT, VT: T, Action: Legal);
263 if (T != ByteV) {
264 setOperationAction(Op: ISD::SIGN_EXTEND_VECTOR_INREG, VT: T, Action: Legal);
265 setOperationAction(Op: ISD::ZERO_EXTEND_VECTOR_INREG, VT: T, Action: Legal);
266 setOperationAction(Op: ISD::BSWAP, VT: T, Action: Legal);
267 }
268
269 setOperationAction(Op: ISD::SMIN, VT: T, Action: Legal);
270 setOperationAction(Op: ISD::SMAX, VT: T, Action: Legal);
271 if (T.getScalarType() != MVT::i32) {
272 setOperationAction(Op: ISD::UMIN, VT: T, Action: Legal);
273 setOperationAction(Op: ISD::UMAX, VT: T, Action: Legal);
274 }
275
276 setOperationAction(Op: ISD::CTTZ, VT: T, Action: Custom);
277 setOperationAction(Op: ISD::LOAD, VT: T, Action: Custom);
278 setOperationAction(Op: ISD::MLOAD, VT: T, Action: Custom);
279 setOperationAction(Op: ISD::MSTORE, VT: T, Action: Custom);
280 if (T.getScalarType() != MVT::i32) {
281 setOperationAction(Op: ISD::MULHS, VT: T, Action: Legal);
282 setOperationAction(Op: ISD::MULHU, VT: T, Action: Legal);
283 }
284
285 setOperationAction(Op: ISD::BUILD_VECTOR, VT: T, Action: Custom);
286 // Make concat-vectors custom to handle concats of more than 2 vectors.
287 setOperationAction(Op: ISD::CONCAT_VECTORS, VT: T, Action: Custom);
288 setOperationAction(Op: ISD::INSERT_SUBVECTOR, VT: T, Action: Custom);
289 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: T, Action: Custom);
290 setOperationAction(Op: ISD::EXTRACT_SUBVECTOR, VT: T, Action: Custom);
291 setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: T, Action: Custom);
292 setOperationAction(Op: ISD::ANY_EXTEND, VT: T, Action: Custom);
293 setOperationAction(Op: ISD::SIGN_EXTEND, VT: T, Action: Custom);
294 setOperationAction(Op: ISD::ZERO_EXTEND, VT: T, Action: Custom);
295 setOperationAction(Op: ISD::FSHL, VT: T, Action: Custom);
296 setOperationAction(Op: ISD::FSHR, VT: T, Action: Custom);
297 if (T != ByteV) {
298 setOperationAction(Op: ISD::ANY_EXTEND_VECTOR_INREG, VT: T, Action: Custom);
299 // HVX only has shifts of words and halfwords.
300 setOperationAction(Op: ISD::SRA, VT: T, Action: Custom);
301 setOperationAction(Op: ISD::SHL, VT: T, Action: Custom);
302 setOperationAction(Op: ISD::SRL, VT: T, Action: Custom);
303
304 // Promote all shuffles to operate on vectors of bytes.
305 setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteV);
306 }
307
308 if (Subtarget.useHVXFloatingPoint()) {
309 // Same action for both QFloat and IEEE.
310 setOperationAction(Op: ISD::SINT_TO_FP, VT: T, Action: Custom);
311 setOperationAction(Op: ISD::UINT_TO_FP, VT: T, Action: Custom);
312 setOperationAction(Op: ISD::FP_TO_SINT, VT: T, Action: Custom);
313 setOperationAction(Op: ISD::FP_TO_UINT, VT: T, Action: Custom);
314 }
315
316 setCondCodeAction(CCs: ISD::SETNE, VT: T, Action: Expand);
317 setCondCodeAction(CCs: ISD::SETLE, VT: T, Action: Expand);
318 setCondCodeAction(CCs: ISD::SETGE, VT: T, Action: Expand);
319 setCondCodeAction(CCs: ISD::SETLT, VT: T, Action: Expand);
320 setCondCodeAction(CCs: ISD::SETULE, VT: T, Action: Expand);
321 setCondCodeAction(CCs: ISD::SETUGE, VT: T, Action: Expand);
322 setCondCodeAction(CCs: ISD::SETULT, VT: T, Action: Expand);
323 }
324
325 for (MVT T : LegalW) {
326 // Custom-lower BUILD_VECTOR for vector pairs. The standard (target-
327 // independent) handling of it would convert it to a load, which is
328 // not always the optimal choice.
329 setOperationAction(Op: ISD::BUILD_VECTOR, VT: T, Action: Custom);
330 // Make concat-vectors custom to handle concats of more than 2 vectors.
331 setOperationAction(Op: ISD::CONCAT_VECTORS, VT: T, Action: Custom);
332
333 // Custom-lower these operations for pairs. Expand them into a concat
334 // of the corresponding operations on individual vectors.
335 setOperationAction(Op: ISD::ANY_EXTEND, VT: T, Action: Custom);
336 setOperationAction(Op: ISD::SIGN_EXTEND, VT: T, Action: Custom);
337 setOperationAction(Op: ISD::ZERO_EXTEND, VT: T, Action: Custom);
338 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: T, Action: Custom);
339 setOperationAction(Op: ISD::ANY_EXTEND_VECTOR_INREG, VT: T, Action: Custom);
340 setOperationAction(Op: ISD::SIGN_EXTEND_VECTOR_INREG, VT: T, Action: Legal);
341 setOperationAction(Op: ISD::ZERO_EXTEND_VECTOR_INREG, VT: T, Action: Legal);
342 setOperationAction(Op: ISD::SPLAT_VECTOR, VT: T, Action: Custom);
343
344 setOperationAction(Op: ISD::LOAD, VT: T, Action: Custom);
345 setOperationAction(Op: ISD::STORE, VT: T, Action: Custom);
346 setOperationAction(Op: ISD::MLOAD, VT: T, Action: Custom);
347 setOperationAction(Op: ISD::MSTORE, VT: T, Action: Custom);
348 setOperationAction(Op: ISD::ABS, VT: T, Action: Custom);
349 setOperationAction(Op: ISD::CTLZ, VT: T, Action: Custom);
350 setOperationAction(Op: ISD::CTTZ, VT: T, Action: Custom);
351 setOperationAction(Op: ISD::CTPOP, VT: T, Action: Custom);
352
353 setOperationAction(Op: ISD::ADD, VT: T, Action: Legal);
354 setOperationAction(Op: ISD::UADDSAT, VT: T, Action: Legal);
355 setOperationAction(Op: ISD::SADDSAT, VT: T, Action: Legal);
356 setOperationAction(Op: ISD::SUB, VT: T, Action: Legal);
357 setOperationAction(Op: ISD::USUBSAT, VT: T, Action: Legal);
358 setOperationAction(Op: ISD::SSUBSAT, VT: T, Action: Legal);
359 setOperationAction(Op: ISD::MUL, VT: T, Action: Custom);
360 setOperationAction(Op: ISD::MULHS, VT: T, Action: Custom);
361 setOperationAction(Op: ISD::MULHU, VT: T, Action: Custom);
362 setOperationAction(Op: ISD::AND, VT: T, Action: Custom);
363 setOperationAction(Op: ISD::OR, VT: T, Action: Custom);
364 setOperationAction(Op: ISD::XOR, VT: T, Action: Custom);
365 setOperationAction(Op: ISD::SETCC, VT: T, Action: Custom);
366 setOperationAction(Op: ISD::VSELECT, VT: T, Action: Custom);
367 if (T != ByteW) {
368 setOperationAction(Op: ISD::SRA, VT: T, Action: Custom);
369 setOperationAction(Op: ISD::SHL, VT: T, Action: Custom);
370 setOperationAction(Op: ISD::SRL, VT: T, Action: Custom);
371
372 // Promote all shuffles to operate on vectors of bytes.
373 setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteW);
374 }
375 setOperationAction(Op: ISD::FSHL, VT: T, Action: Custom);
376 setOperationAction(Op: ISD::FSHR, VT: T, Action: Custom);
377
378 setOperationAction(Op: ISD::SMIN, VT: T, Action: Custom);
379 setOperationAction(Op: ISD::SMAX, VT: T, Action: Custom);
380 if (T.getScalarType() != MVT::i32) {
381 setOperationAction(Op: ISD::UMIN, VT: T, Action: Custom);
382 setOperationAction(Op: ISD::UMAX, VT: T, Action: Custom);
383 }
384
385 if (Subtarget.useHVXFloatingPoint()) {
386 // Same action for both QFloat and IEEE.
387 setOperationAction(Op: ISD::SINT_TO_FP, VT: T, Action: Custom);
388 setOperationAction(Op: ISD::UINT_TO_FP, VT: T, Action: Custom);
389 setOperationAction(Op: ISD::FP_TO_SINT, VT: T, Action: Custom);
390 setOperationAction(Op: ISD::FP_TO_UINT, VT: T, Action: Custom);
391 }
392 }
393
394 // Legalize all of these to HexagonISD::[SU]MUL_LOHI.
395 setOperationAction(Op: ISD::MULHS, VT: WordV, Action: Custom); // -> _LOHI
396 setOperationAction(Op: ISD::MULHU, VT: WordV, Action: Custom); // -> _LOHI
397 setOperationAction(Op: ISD::SMUL_LOHI, VT: WordV, Action: Custom);
398 setOperationAction(Op: ISD::UMUL_LOHI, VT: WordV, Action: Custom);
399
400 setCondCodeAction(CCs: ISD::SETNE, VT: MVT::v64f16, Action: Expand);
401 setCondCodeAction(CCs: ISD::SETLE, VT: MVT::v64f16, Action: Expand);
402 setCondCodeAction(CCs: ISD::SETGE, VT: MVT::v64f16, Action: Expand);
403 setCondCodeAction(CCs: ISD::SETLT, VT: MVT::v64f16, Action: Expand);
404 setCondCodeAction(CCs: ISD::SETONE, VT: MVT::v64f16, Action: Expand);
405 setCondCodeAction(CCs: ISD::SETOLE, VT: MVT::v64f16, Action: Expand);
406 setCondCodeAction(CCs: ISD::SETOGE, VT: MVT::v64f16, Action: Expand);
407 setCondCodeAction(CCs: ISD::SETOLT, VT: MVT::v64f16, Action: Expand);
408 setCondCodeAction(CCs: ISD::SETUNE, VT: MVT::v64f16, Action: Expand);
409 setCondCodeAction(CCs: ISD::SETULE, VT: MVT::v64f16, Action: Expand);
410 setCondCodeAction(CCs: ISD::SETUGE, VT: MVT::v64f16, Action: Expand);
411 setCondCodeAction(CCs: ISD::SETULT, VT: MVT::v64f16, Action: Expand);
412 setCondCodeAction(CCs: ISD::SETUO, VT: MVT::v64f16, Action: Expand);
413 setCondCodeAction(CCs: ISD::SETO, VT: MVT::v64f16, Action: Expand);
414
415 setCondCodeAction(CCs: ISD::SETNE, VT: MVT::v32f32, Action: Expand);
416 setCondCodeAction(CCs: ISD::SETLE, VT: MVT::v32f32, Action: Expand);
417 setCondCodeAction(CCs: ISD::SETGE, VT: MVT::v32f32, Action: Expand);
418 setCondCodeAction(CCs: ISD::SETLT, VT: MVT::v32f32, Action: Expand);
419 setCondCodeAction(CCs: ISD::SETONE, VT: MVT::v32f32, Action: Expand);
420 setCondCodeAction(CCs: ISD::SETOLE, VT: MVT::v32f32, Action: Expand);
421 setCondCodeAction(CCs: ISD::SETOGE, VT: MVT::v32f32, Action: Expand);
422 setCondCodeAction(CCs: ISD::SETOLT, VT: MVT::v32f32, Action: Expand);
423 setCondCodeAction(CCs: ISD::SETUNE, VT: MVT::v32f32, Action: Expand);
424 setCondCodeAction(CCs: ISD::SETULE, VT: MVT::v32f32, Action: Expand);
425 setCondCodeAction(CCs: ISD::SETUGE, VT: MVT::v32f32, Action: Expand);
426 setCondCodeAction(CCs: ISD::SETULT, VT: MVT::v32f32, Action: Expand);
427 setCondCodeAction(CCs: ISD::SETUO, VT: MVT::v32f32, Action: Expand);
428 setCondCodeAction(CCs: ISD::SETO, VT: MVT::v32f32, Action: Expand);
429
430 // Boolean vectors.
431
432 for (MVT T : LegalW) {
433 // Boolean types for vector pairs will overlap with the boolean
434 // types for single vectors, e.g.
435 // v64i8 -> v64i1 (single)
436 // v64i16 -> v64i1 (pair)
437 // Set these actions first, and allow the single actions to overwrite
438 // any duplicates.
439 MVT BoolW = MVT::getVectorVT(VT: MVT::i1, NumElements: T.getVectorNumElements());
440 setOperationAction(Op: ISD::SETCC, VT: BoolW, Action: Custom);
441 setOperationAction(Op: ISD::AND, VT: BoolW, Action: Custom);
442 setOperationAction(Op: ISD::OR, VT: BoolW, Action: Custom);
443 setOperationAction(Op: ISD::XOR, VT: BoolW, Action: Custom);
444 // Masked load/store takes a mask that may need splitting.
445 setOperationAction(Op: ISD::MLOAD, VT: BoolW, Action: Custom);
446 setOperationAction(Op: ISD::MSTORE, VT: BoolW, Action: Custom);
447 }
448
449 for (MVT T : LegalV) {
450 MVT BoolV = MVT::getVectorVT(VT: MVT::i1, NumElements: T.getVectorNumElements());
451 setOperationAction(Op: ISD::BUILD_VECTOR, VT: BoolV, Action: Custom);
452 setOperationAction(Op: ISD::CONCAT_VECTORS, VT: BoolV, Action: Custom);
453 setOperationAction(Op: ISD::INSERT_SUBVECTOR, VT: BoolV, Action: Custom);
454 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: BoolV, Action: Custom);
455 setOperationAction(Op: ISD::EXTRACT_SUBVECTOR, VT: BoolV, Action: Custom);
456 setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: BoolV, Action: Custom);
457 setOperationAction(Op: ISD::SELECT, VT: BoolV, Action: Custom);
458 setOperationAction(Op: ISD::AND, VT: BoolV, Action: Legal);
459 setOperationAction(Op: ISD::OR, VT: BoolV, Action: Legal);
460 setOperationAction(Op: ISD::XOR, VT: BoolV, Action: Legal);
461 }
462
463 if (Use64b) {
464 for (MVT T: {MVT::v32i8, MVT::v32i16, MVT::v16i8, MVT::v16i16, MVT::v16i32})
465 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: T, Action: Legal);
466 } else {
467 for (MVT T: {MVT::v64i8, MVT::v64i16, MVT::v32i8, MVT::v32i16, MVT::v32i32})
468 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: T, Action: Legal);
469 }
470
471 // Handle store widening for short vectors.
472 unsigned HwLen = Subtarget.getVectorLength();
473 for (MVT ElemTy : Subtarget.getHVXElementTypes()) {
474 if (ElemTy == MVT::i1)
475 continue;
476 int ElemWidth = ElemTy.getFixedSizeInBits();
477 int MaxElems = (8*HwLen) / ElemWidth;
478 for (int N = 2; N < MaxElems; N *= 2) {
479 MVT VecTy = MVT::getVectorVT(VT: ElemTy, NumElements: N);
480 auto Action = getPreferredVectorAction(VT: VecTy);
481 if (Action == TargetLoweringBase::TypeWidenVector) {
482 setOperationAction(Op: ISD::LOAD, VT: VecTy, Action: Custom);
483 setOperationAction(Op: ISD::STORE, VT: VecTy, Action: Custom);
484 setOperationAction(Op: ISD::SETCC, VT: VecTy, Action: Custom);
485 setOperationAction(Op: ISD::TRUNCATE, VT: VecTy, Action: Custom);
486 setOperationAction(Op: ISD::ANY_EXTEND, VT: VecTy, Action: Custom);
487 setOperationAction(Op: ISD::SIGN_EXTEND, VT: VecTy, Action: Custom);
488 setOperationAction(Op: ISD::ZERO_EXTEND, VT: VecTy, Action: Custom);
489 if (Subtarget.useHVXFloatingPoint()) {
490 setOperationAction(Op: ISD::FP_TO_SINT, VT: VecTy, Action: Custom);
491 setOperationAction(Op: ISD::FP_TO_UINT, VT: VecTy, Action: Custom);
492 setOperationAction(Op: ISD::SINT_TO_FP, VT: VecTy, Action: Custom);
493 setOperationAction(Op: ISD::UINT_TO_FP, VT: VecTy, Action: Custom);
494 }
495
496 MVT BoolTy = MVT::getVectorVT(VT: MVT::i1, NumElements: N);
497 if (!isTypeLegal(VT: BoolTy))
498 setOperationAction(Op: ISD::SETCC, VT: BoolTy, Action: Custom);
499 }
500 }
501 }
502
503 // Include cases which are not hander earlier
504 setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::v32i1, Action: Custom);
505 setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::v64i1, Action: Custom);
506 setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::v32i1, Action: Custom);
507
508 setTargetDAGCombine({ISD::CONCAT_VECTORS, ISD::TRUNCATE, ISD::VSELECT});
509
510 setTargetDAGCombine({ISD::PARTIAL_REDUCE_SMLA, ISD::PARTIAL_REDUCE_UMLA,
511 ISD::PARTIAL_REDUCE_SUMLA});
512
513 // Partial MLA reductions.
514 {
515 static const unsigned MLAOps[] = {ISD::PARTIAL_REDUCE_SMLA,
516 ISD::PARTIAL_REDUCE_UMLA,
517 ISD::PARTIAL_REDUCE_SUMLA};
518
519 auto HvxType = [=](MVT ScalarT, unsigned Factor = 1) {
520 return MVT::getVectorVT(VT: ScalarT, NumElements: Subtarget.getVectorLength() * Factor *
521 8 / ScalarT.getSizeInBits());
522 };
523
524 // Tuple of (Acc element type, input element type, vector pair).
525 // The assumption is both the input and reduction result are of the same
526 // size so the reduction ratio is the same as the ratio of element type
527 // sizes. This may not hold for all available instructions.
528 typedef std::tuple<MVT, MVT, bool> ReductionSignature;
529
530 static const std::vector<ReductionSignature> NativeReductions = {
531 {MVT::i32, MVT::i8, false},
532 };
533
534 for (const auto &R : NativeReductions) {
535
536 MVT AccType = std::get<0>(t: R);
537 MVT InputType = std::get<1>(t: R);
538 unsigned Factor = std::get<2>(t: R) ? 2 : 1;
539
540 // The native size is legal.
541 setPartialReduceMLAAction(Opcodes: MLAOps, AccVT: HvxType(AccType), InputVT: HvxType(InputType),
542 Action: Legal);
543
544 // Allow custom partial MLA reductions on larger vectors than legally
545 // supported. These reduction must be declared as Custom (or Legal)
546 // for foldPartialReduceMLAMulOp() to fold the multiply by one pattern
547 // inserted when the partial reduction intrinsic is converted to
548 // PARTIAL_REDUCE_U/S/SUMLA. Otherwise, the Split action will apply
549 // on the original pattern, including the extensions and multiplies,
550 // which will make it impossible to match.
551 // There are two independent ways to extend the
552 // input size: 1. to concatenate the result - output vector is
553 // proportionally extended, 2) to reduce the result - the output vector
554 // size stays the same. We limit allowed combinations so that the total
555 // number of generated reduction instructions is limited by a constant
556 // number. This limit is arbitrary and can be revised. On one hand, it is
557 // convenient to have more choices; on the other hand, there is a
558 // diminishing benefit of very long sequences, which should probably be
559 // written as loops instead.
560 for (unsigned ConcatFactor = 1; ConcatFactor <= MaxExpandMLA;
561 ConcatFactor <<= 1)
562 for (unsigned ReductionFactor = 1; ReductionFactor <= MaxExpandMLA;
563 ReductionFactor <<= 1)
564 if (ConcatFactor * ReductionFactor != 1 &&
565 ConcatFactor * ReductionFactor <= MaxExpandMLA)
566 setPartialReduceMLAAction(
567 Opcodes: MLAOps, AccVT: HvxType(AccType, Factor * ConcatFactor),
568 InputVT: HvxType(InputType, Factor * ConcatFactor * ReductionFactor),
569 Action: Custom);
570 }
571 }
572}
573
574unsigned
575HexagonTargetLowering::getPreferredHvxVectorAction(MVT VecTy) const {
576 // Early exit for invalid input types
577 if (!VecTy.isVector())
578 return ~0u;
579
580 MVT ElemTy = VecTy.getVectorElementType();
581 unsigned VecLen = VecTy.getVectorNumElements();
582 unsigned HwLen = Subtarget.getVectorLength();
583
584 // Split vectors of i1 that exceed byte vector length.
585 if (ElemTy == MVT::i1 && VecLen > HwLen)
586 return TargetLoweringBase::TypeSplitVector;
587
588 ArrayRef<MVT> Tys = Subtarget.getHVXElementTypes();
589 // For shorter vectors of i1, widen them if any of the corresponding
590 // vectors of integers needs to be widened.
591 if (ElemTy == MVT::i1) {
592 for (MVT T : Tys) {
593 assert(T != MVT::i1);
594 auto A = getPreferredHvxVectorAction(VecTy: MVT::getVectorVT(VT: T, NumElements: VecLen));
595 if (A != ~0u)
596 return A;
597 }
598 return ~0u;
599 }
600
601 // If the size of VecTy is at least half of the vector length,
602 // widen the vector. Note: the threshold was not selected in
603 // any scientific way.
604 if (llvm::is_contained(Range&: Tys, Element: ElemTy)) {
605 unsigned VecWidth = VecTy.getSizeInBits();
606 unsigned HwWidth = 8*HwLen;
607 if (VecWidth > 2*HwWidth)
608 return TargetLoweringBase::TypeSplitVector;
609
610 bool HaveThreshold = HvxWidenThreshold.getNumOccurrences() > 0;
611 if (HaveThreshold && 8*HvxWidenThreshold <= VecWidth)
612 return TargetLoweringBase::TypeWidenVector;
613 if (VecWidth >= HwWidth/2 && VecWidth < HwWidth)
614 return TargetLoweringBase::TypeWidenVector;
615 }
616
617 // Defer to default.
618 return ~0u;
619}
620
621unsigned
622HexagonTargetLowering::getCustomHvxOperationAction(SDNode &Op) const {
623 unsigned Opc = Op.getOpcode();
624 switch (Opc) {
625 case HexagonISD::SMUL_LOHI:
626 case HexagonISD::UMUL_LOHI:
627 case HexagonISD::USMUL_LOHI:
628 return TargetLoweringBase::Custom;
629 }
630 return TargetLoweringBase::Legal;
631}
632
633SDValue
634HexagonTargetLowering::getInt(unsigned IntId, MVT ResTy, ArrayRef<SDValue> Ops,
635 const SDLoc &dl, SelectionDAG &DAG) const {
636 SmallVector<SDValue,4> IntOps;
637 IntOps.push_back(Elt: DAG.getConstant(Val: IntId, DL: dl, VT: MVT::i32));
638 append_range(C&: IntOps, R&: Ops);
639 return DAG.getNode(Opcode: ISD::INTRINSIC_WO_CHAIN, DL: dl, VT: ResTy, Ops: IntOps);
640}
641
642MVT
643HexagonTargetLowering::typeJoin(const TypePair &Tys) const {
644 assert(Tys.first.getVectorElementType() == Tys.second.getVectorElementType());
645
646 MVT ElemTy = Tys.first.getVectorElementType();
647 return MVT::getVectorVT(VT: ElemTy, NumElements: Tys.first.getVectorNumElements() +
648 Tys.second.getVectorNumElements());
649}
650
651HexagonTargetLowering::TypePair
652HexagonTargetLowering::typeSplit(MVT VecTy) const {
653 assert(VecTy.isVector());
654 unsigned NumElem = VecTy.getVectorNumElements();
655 assert((NumElem % 2) == 0 && "Expecting even-sized vector type");
656 MVT HalfTy = MVT::getVectorVT(VT: VecTy.getVectorElementType(), NumElements: NumElem/2);
657 return { HalfTy, HalfTy };
658}
659
660MVT
661HexagonTargetLowering::typeExtElem(MVT VecTy, unsigned Factor) const {
662 MVT ElemTy = VecTy.getVectorElementType();
663 MVT NewElemTy = MVT::getIntegerVT(BitWidth: ElemTy.getSizeInBits() * Factor);
664 return MVT::getVectorVT(VT: NewElemTy, NumElements: VecTy.getVectorNumElements());
665}
666
667MVT
668HexagonTargetLowering::typeTruncElem(MVT VecTy, unsigned Factor) const {
669 MVT ElemTy = VecTy.getVectorElementType();
670 MVT NewElemTy = MVT::getIntegerVT(BitWidth: ElemTy.getSizeInBits() / Factor);
671 return MVT::getVectorVT(VT: NewElemTy, NumElements: VecTy.getVectorNumElements());
672}
673
674SDValue
675HexagonTargetLowering::opCastElem(SDValue Vec, MVT ElemTy,
676 SelectionDAG &DAG) const {
677 if (ty(Op: Vec).getVectorElementType() == ElemTy)
678 return Vec;
679 MVT CastTy = tyVector(Ty: Vec.getValueType().getSimpleVT(), ElemTy);
680 return DAG.getBitcast(VT: CastTy, V: Vec);
681}
682
683SDValue
684HexagonTargetLowering::opJoin(const VectorPair &Ops, const SDLoc &dl,
685 SelectionDAG &DAG) const {
686 return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: typeJoin(Tys: ty(Ops)),
687 N1: Ops.first, N2: Ops.second);
688}
689
690HexagonTargetLowering::VectorPair
691HexagonTargetLowering::opSplit(SDValue Vec, const SDLoc &dl,
692 SelectionDAG &DAG) const {
693 TypePair Tys = typeSplit(VecTy: ty(Op: Vec));
694 if (Vec.getOpcode() == HexagonISD::QCAT)
695 return VectorPair(Vec.getOperand(i: 0), Vec.getOperand(i: 1));
696 return DAG.SplitVector(N: Vec, DL: dl, LoVT: Tys.first, HiVT: Tys.second);
697}
698
699bool
700HexagonTargetLowering::isHvxSingleTy(MVT Ty) const {
701 return Subtarget.isHVXVectorType(VecTy: Ty) &&
702 Ty.getSizeInBits() == 8 * Subtarget.getVectorLength();
703}
704
705bool
706HexagonTargetLowering::isHvxPairTy(MVT Ty) const {
707 return Subtarget.isHVXVectorType(VecTy: Ty) &&
708 Ty.getSizeInBits() == 16 * Subtarget.getVectorLength();
709}
710
711bool
712HexagonTargetLowering::isHvxBoolTy(MVT Ty) const {
713 return Subtarget.isHVXVectorType(VecTy: Ty, IncludeBool: true) &&
714 Ty.getVectorElementType() == MVT::i1;
715}
716
717bool HexagonTargetLowering::allowsHvxMemoryAccess(
718 MVT VecTy, MachineMemOperand::Flags Flags, unsigned *Fast) const {
719 // Bool vectors are excluded by default, but make it explicit to
720 // emphasize that bool vectors cannot be loaded or stored.
721 // Also, disallow double vector stores (to prevent unnecessary
722 // store widening in DAG combiner).
723 if (VecTy.getSizeInBits() > 8*Subtarget.getVectorLength())
724 return false;
725 if (!Subtarget.isHVXVectorType(VecTy, /*IncludeBool=*/false))
726 return false;
727 if (Fast)
728 *Fast = 1;
729 return true;
730}
731
732bool HexagonTargetLowering::allowsHvxMisalignedMemoryAccesses(
733 MVT VecTy, MachineMemOperand::Flags Flags, unsigned *Fast) const {
734 if (!Subtarget.isHVXVectorType(VecTy))
735 return false;
736 // XXX Should this be false? vmemu are a bit slower than vmem.
737 if (Fast)
738 *Fast = 1;
739 return true;
740}
741
742void HexagonTargetLowering::AdjustHvxInstrPostInstrSelection(
743 MachineInstr &MI, SDNode *Node) const {
744 unsigned Opc = MI.getOpcode();
745 const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
746 MachineBasicBlock &MB = *MI.getParent();
747 MachineFunction &MF = *MB.getParent();
748 MachineRegisterInfo &MRI = MF.getRegInfo();
749 DebugLoc DL = MI.getDebugLoc();
750 auto At = MI.getIterator();
751
752 switch (Opc) {
753 case Hexagon::PS_vsplatib:
754 if (Subtarget.useHVXV62Ops()) {
755 // SplatV = A2_tfrsi #imm
756 // OutV = V6_lvsplatb SplatV
757 Register SplatV = MRI.createVirtualRegister(RegClass: &Hexagon::IntRegsRegClass);
758 BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::A2_tfrsi), DestReg: SplatV)
759 .add(MO: MI.getOperand(i: 1));
760 Register OutV = MI.getOperand(i: 0).getReg();
761 BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::V6_lvsplatb), DestReg: OutV)
762 .addReg(RegNo: SplatV);
763 } else {
764 // SplatV = A2_tfrsi #imm:#imm:#imm:#imm
765 // OutV = V6_lvsplatw SplatV
766 Register SplatV = MRI.createVirtualRegister(RegClass: &Hexagon::IntRegsRegClass);
767 const MachineOperand &InpOp = MI.getOperand(i: 1);
768 assert(InpOp.isImm());
769 uint32_t V = InpOp.getImm() & 0xFF;
770 BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::A2_tfrsi), DestReg: SplatV)
771 .addImm(Val: V << 24 | V << 16 | V << 8 | V);
772 Register OutV = MI.getOperand(i: 0).getReg();
773 BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::V6_lvsplatw), DestReg: OutV).addReg(RegNo: SplatV);
774 }
775 MB.erase(I: At);
776 break;
777 case Hexagon::PS_vsplatrb:
778 if (Subtarget.useHVXV62Ops()) {
779 // OutV = V6_lvsplatb Inp
780 Register OutV = MI.getOperand(i: 0).getReg();
781 BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::V6_lvsplatb), DestReg: OutV)
782 .add(MO: MI.getOperand(i: 1));
783 } else {
784 Register SplatV = MRI.createVirtualRegister(RegClass: &Hexagon::IntRegsRegClass);
785 const MachineOperand &InpOp = MI.getOperand(i: 1);
786 BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::S2_vsplatrb), DestReg: SplatV)
787 .addReg(RegNo: InpOp.getReg(), Flags: {}, SubReg: InpOp.getSubReg());
788 Register OutV = MI.getOperand(i: 0).getReg();
789 BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::V6_lvsplatw), DestReg: OutV)
790 .addReg(RegNo: SplatV);
791 }
792 MB.erase(I: At);
793 break;
794 case Hexagon::PS_vsplatih:
795 if (Subtarget.useHVXV62Ops()) {
796 // SplatV = A2_tfrsi #imm
797 // OutV = V6_lvsplath SplatV
798 Register SplatV = MRI.createVirtualRegister(RegClass: &Hexagon::IntRegsRegClass);
799 BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::A2_tfrsi), DestReg: SplatV)
800 .add(MO: MI.getOperand(i: 1));
801 Register OutV = MI.getOperand(i: 0).getReg();
802 BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::V6_lvsplath), DestReg: OutV)
803 .addReg(RegNo: SplatV);
804 } else {
805 // SplatV = A2_tfrsi #imm:#imm
806 // OutV = V6_lvsplatw SplatV
807 Register SplatV = MRI.createVirtualRegister(RegClass: &Hexagon::IntRegsRegClass);
808 const MachineOperand &InpOp = MI.getOperand(i: 1);
809 assert(InpOp.isImm());
810 uint32_t V = InpOp.getImm() & 0xFFFF;
811 BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::A2_tfrsi), DestReg: SplatV)
812 .addImm(Val: V << 16 | V);
813 Register OutV = MI.getOperand(i: 0).getReg();
814 BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::V6_lvsplatw), DestReg: OutV).addReg(RegNo: SplatV);
815 }
816 MB.erase(I: At);
817 break;
818 case Hexagon::PS_vsplatrh:
819 if (Subtarget.useHVXV62Ops()) {
820 // OutV = V6_lvsplath Inp
821 Register OutV = MI.getOperand(i: 0).getReg();
822 BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::V6_lvsplath), DestReg: OutV)
823 .add(MO: MI.getOperand(i: 1));
824 } else {
825 // SplatV = A2_combine_ll Inp, Inp
826 // OutV = V6_lvsplatw SplatV
827 Register SplatV = MRI.createVirtualRegister(RegClass: &Hexagon::IntRegsRegClass);
828 const MachineOperand &InpOp = MI.getOperand(i: 1);
829 BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::A2_combine_ll), DestReg: SplatV)
830 .addReg(RegNo: InpOp.getReg(), Flags: {}, SubReg: InpOp.getSubReg())
831 .addReg(RegNo: InpOp.getReg(), Flags: {}, SubReg: InpOp.getSubReg());
832 Register OutV = MI.getOperand(i: 0).getReg();
833 BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::V6_lvsplatw), DestReg: OutV).addReg(RegNo: SplatV);
834 }
835 MB.erase(I: At);
836 break;
837 case Hexagon::PS_vsplatiw:
838 case Hexagon::PS_vsplatrw:
839 if (Opc == Hexagon::PS_vsplatiw) {
840 // SplatV = A2_tfrsi #imm
841 Register SplatV = MRI.createVirtualRegister(RegClass: &Hexagon::IntRegsRegClass);
842 BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::A2_tfrsi), DestReg: SplatV)
843 .add(MO: MI.getOperand(i: 1));
844 MI.getOperand(i: 1).ChangeToRegister(Reg: SplatV, isDef: false);
845 }
846 // OutV = V6_lvsplatw SplatV/Inp
847 MI.setDesc(TII.get(Opcode: Hexagon::V6_lvsplatw));
848 break;
849 }
850}
851
852SDValue
853HexagonTargetLowering::convertToByteIndex(SDValue ElemIdx, MVT ElemTy,
854 SelectionDAG &DAG) const {
855 if (ElemIdx.getValueType().getSimpleVT() != MVT::i32)
856 ElemIdx = DAG.getBitcast(VT: MVT::i32, V: ElemIdx);
857
858 unsigned ElemWidth = ElemTy.getSizeInBits();
859 if (ElemWidth == 8)
860 return ElemIdx;
861
862 unsigned L = Log2_32(Value: ElemWidth/8);
863 const SDLoc &dl(ElemIdx);
864 return DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: MVT::i32,
865 Ops: {ElemIdx, DAG.getConstant(Val: L, DL: dl, VT: MVT::i32)});
866}
867
868SDValue
869HexagonTargetLowering::getIndexInWord32(SDValue Idx, MVT ElemTy,
870 SelectionDAG &DAG) const {
871 unsigned ElemWidth = ElemTy.getSizeInBits();
872 assert(ElemWidth >= 8 && ElemWidth <= 32);
873 if (ElemWidth == 32)
874 return Idx;
875
876 if (ty(Op: Idx) != MVT::i32)
877 Idx = DAG.getBitcast(VT: MVT::i32, V: Idx);
878 const SDLoc &dl(Idx);
879 SDValue Mask = DAG.getConstant(Val: 32/ElemWidth - 1, DL: dl, VT: MVT::i32);
880 SDValue SubIdx = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i32, Ops: {Idx, Mask});
881 return SubIdx;
882}
883
884SDValue
885HexagonTargetLowering::getByteShuffle(const SDLoc &dl, SDValue Op0,
886 SDValue Op1, ArrayRef<int> Mask,
887 SelectionDAG &DAG) const {
888 MVT OpTy = ty(Op: Op0);
889 assert(OpTy == ty(Op1));
890
891 MVT ElemTy = OpTy.getVectorElementType();
892 if (ElemTy == MVT::i8)
893 return DAG.getVectorShuffle(VT: OpTy, dl, N1: Op0, N2: Op1, Mask);
894 assert(ElemTy.getSizeInBits() >= 8);
895
896 MVT ResTy = tyVector(Ty: OpTy, ElemTy: MVT::i8);
897 unsigned ElemSize = ElemTy.getSizeInBits() / 8;
898
899 SmallVector<int,128> ByteMask;
900 for (int M : Mask) {
901 if (M < 0) {
902 for (unsigned I = 0; I != ElemSize; ++I)
903 ByteMask.push_back(Elt: -1);
904 } else {
905 int NewM = M*ElemSize;
906 for (unsigned I = 0; I != ElemSize; ++I)
907 ByteMask.push_back(Elt: NewM+I);
908 }
909 }
910 assert(ResTy.getVectorNumElements() == ByteMask.size());
911 return DAG.getVectorShuffle(VT: ResTy, dl, N1: opCastElem(Vec: Op0, ElemTy: MVT::i8, DAG),
912 N2: opCastElem(Vec: Op1, ElemTy: MVT::i8, DAG), Mask: ByteMask);
913}
914
915SDValue
916HexagonTargetLowering::buildHvxVectorReg(ArrayRef<SDValue> Values,
917 const SDLoc &dl, MVT VecTy,
918 SelectionDAG &DAG) const {
919 unsigned VecLen = Values.size();
920 MachineFunction &MF = DAG.getMachineFunction();
921 MVT ElemTy = VecTy.getVectorElementType();
922 unsigned ElemWidth = ElemTy.getSizeInBits();
923 unsigned HwLen = Subtarget.getVectorLength();
924
925 unsigned ElemSize = ElemWidth / 8;
926 assert(ElemSize*VecLen == HwLen);
927 SmallVector<SDValue,32> Words;
928
929 if (VecTy.getVectorElementType() != MVT::i32 &&
930 !(Subtarget.useHVXFloatingPoint() &&
931 VecTy.getVectorElementType() == MVT::f32)) {
932 assert((ElemSize == 1 || ElemSize == 2) && "Invalid element size");
933 unsigned OpsPerWord = (ElemSize == 1) ? 4 : 2;
934 MVT PartVT = MVT::getVectorVT(VT: VecTy.getVectorElementType(), NumElements: OpsPerWord);
935 for (unsigned i = 0; i != VecLen; i += OpsPerWord) {
936 SDValue W = buildVector32(Elem: Values.slice(N: i, M: OpsPerWord), dl, VecTy: PartVT, DAG);
937 Words.push_back(Elt: DAG.getBitcast(VT: MVT::i32, V: W));
938 }
939 } else {
940 for (SDValue V : Values)
941 Words.push_back(Elt: DAG.getBitcast(VT: MVT::i32, V));
942 }
943 auto isSplat = [] (ArrayRef<SDValue> Values, SDValue &SplatV) {
944 unsigned NumValues = Values.size();
945 assert(NumValues > 0);
946 bool IsUndef = true;
947 for (unsigned i = 0; i != NumValues; ++i) {
948 if (Values[i].isUndef())
949 continue;
950 IsUndef = false;
951 if (!SplatV.getNode())
952 SplatV = Values[i];
953 else if (SplatV != Values[i])
954 return false;
955 }
956 if (IsUndef)
957 SplatV = Values[0];
958 return true;
959 };
960
961 unsigned NumWords = Words.size();
962 SDValue SplatV;
963 bool IsSplat = isSplat(Words, SplatV);
964 if (IsSplat && isUndef(Op: SplatV))
965 return DAG.getUNDEF(VT: VecTy);
966 if (IsSplat) {
967 assert(SplatV.getNode());
968 if (isNullConstant(V: SplatV))
969 return getZero(dl, Ty: VecTy, DAG);
970 MVT WordTy = MVT::getVectorVT(VT: MVT::i32, NumElements: HwLen/4);
971 SDValue S = DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: WordTy, Operand: SplatV);
972 return DAG.getBitcast(VT: VecTy, V: S);
973 }
974
975 // Delay recognizing constant vectors until here, so that we can generate
976 // a vsplat.
977 SmallVector<ConstantInt*, 128> Consts(VecLen);
978 bool AllConst = getBuildVectorConstInts(Values, VecTy, DAG, Consts);
979 if (AllConst) {
980 ArrayRef<Constant*> Tmp((Constant**)Consts.begin(),
981 (Constant**)Consts.end());
982 Constant *CV = ConstantVector::get(V: Tmp);
983 Align Alignment(HwLen);
984 SDValue CP = LowerConstantPool(
985 Op: DAG.getConstantPool(C: CV, VT: getPointerTy(DL: DAG.getDataLayout()), Align: Alignment),
986 DAG);
987 return DAG.getLoad(VT: VecTy, dl, Chain: DAG.getEntryNode(), Ptr: CP,
988 PtrInfo: MachinePointerInfo::getConstantPool(MF), Alignment);
989 }
990
991 // A special case is a situation where the vector is built entirely from
992 // elements extracted from another vector. This could be done via a shuffle
993 // more efficiently, but typically, the size of the source vector will not
994 // match the size of the vector being built (which precludes the use of a
995 // shuffle directly).
996 // This only handles a single source vector, and the vector being built
997 // should be of a sub-vector type of the source vector type.
998 auto IsBuildFromExtracts = [this,&Values] (SDValue &SrcVec,
999 SmallVectorImpl<int> &SrcIdx) {
1000 SDValue Vec;
1001 for (SDValue V : Values) {
1002 if (isUndef(Op: V)) {
1003 SrcIdx.push_back(Elt: -1);
1004 continue;
1005 }
1006 if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
1007 return false;
1008 // All extracts should come from the same vector.
1009 SDValue T = V.getOperand(i: 0);
1010 if (Vec.getNode() != nullptr && T.getNode() != Vec.getNode())
1011 return false;
1012 Vec = T;
1013 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val: V.getOperand(i: 1));
1014 if (C == nullptr)
1015 return false;
1016 int I = C->getSExtValue();
1017 assert(I >= 0 && "Negative element index");
1018 SrcIdx.push_back(Elt: I);
1019 }
1020 SrcVec = Vec;
1021 return true;
1022 };
1023
1024 SmallVector<int,128> ExtIdx;
1025 SDValue ExtVec;
1026 if (IsBuildFromExtracts(ExtVec, ExtIdx)) {
1027 MVT ExtTy = ty(Op: ExtVec);
1028 unsigned ExtLen = ExtTy.getVectorNumElements();
1029 if (ExtLen == VecLen || ExtLen == 2*VecLen) {
1030 // Construct a new shuffle mask that will produce a vector with the same
1031 // number of elements as the input vector, and such that the vector we
1032 // want will be the initial subvector of it.
1033 SmallVector<int,128> Mask;
1034 BitVector Used(ExtLen);
1035
1036 for (int M : ExtIdx) {
1037 Mask.push_back(Elt: M);
1038 if (M >= 0)
1039 Used.set(M);
1040 }
1041 // Fill the rest of the mask with the unused elements of ExtVec in hopes
1042 // that it will result in a permutation of ExtVec's elements. It's still
1043 // fine if it doesn't (e.g. if undefs are present, or elements are
1044 // repeated), but permutations can always be done efficiently via vdelta
1045 // and vrdelta.
1046 for (unsigned I = 0; I != ExtLen; ++I) {
1047 if (Mask.size() == ExtLen)
1048 break;
1049 if (!Used.test(Idx: I))
1050 Mask.push_back(Elt: I);
1051 }
1052
1053 SDValue S = DAG.getVectorShuffle(VT: ExtTy, dl, N1: ExtVec,
1054 N2: DAG.getUNDEF(VT: ExtTy), Mask);
1055 return ExtLen == VecLen ? S : LoHalf(V: S, DAG);
1056 }
1057 }
1058
1059 // Find most common element to initialize vector with. This is to avoid
1060 // unnecessary vinsert/valign for cases where the same value is present
1061 // many times. Creates a histogram of the vector's elements to find the
1062 // most common element n.
1063 assert(4*Words.size() == Subtarget.getVectorLength());
1064 int VecHist[32];
1065 int n = 0;
1066 for (unsigned i = 0; i != NumWords; ++i) {
1067 VecHist[i] = 0;
1068 if (Words[i].isUndef())
1069 continue;
1070 for (unsigned j = i; j != NumWords; ++j)
1071 if (Words[i] == Words[j])
1072 VecHist[i]++;
1073
1074 if (VecHist[i] > VecHist[n])
1075 n = i;
1076 }
1077
1078 SDValue HalfV = getZero(dl, Ty: VecTy, DAG);
1079 if (VecHist[n] > 1) {
1080 SDValue SplatV = DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: VecTy, Operand: Words[n]);
1081 HalfV = DAG.getNode(Opcode: HexagonISD::VALIGN, DL: dl, VT: VecTy,
1082 Ops: {HalfV, SplatV, DAG.getConstant(Val: HwLen/2, DL: dl, VT: MVT::i32)});
1083 }
1084 SDValue HalfV0 = HalfV;
1085 SDValue HalfV1 = HalfV;
1086
1087 // Construct two halves in parallel, then or them together. Rn and Rm count
1088 // number of rotations needed before the next element. One last rotation is
1089 // performed post-loop to position the last element.
1090 int Rn = 0, Rm = 0;
1091 SDValue Sn, Sm;
1092 SDValue N = HalfV0;
1093 SDValue M = HalfV1;
1094 for (unsigned i = 0; i != NumWords/2; ++i) {
1095 // Rotate by element count since last insertion.
1096 if (Words[i] != Words[n] || VecHist[n] <= 1) {
1097 Sn = DAG.getConstant(Val: Rn, DL: dl, VT: MVT::i32);
1098 HalfV0 = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: VecTy, Ops: {N, Sn});
1099 N = DAG.getNode(Opcode: HexagonISD::VINSERTW0, DL: dl, VT: VecTy,
1100 Ops: {HalfV0, Words[i]});
1101 Rn = 0;
1102 }
1103 if (Words[i+NumWords/2] != Words[n] || VecHist[n] <= 1) {
1104 Sm = DAG.getConstant(Val: Rm, DL: dl, VT: MVT::i32);
1105 HalfV1 = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: VecTy, Ops: {M, Sm});
1106 M = DAG.getNode(Opcode: HexagonISD::VINSERTW0, DL: dl, VT: VecTy,
1107 Ops: {HalfV1, Words[i+NumWords/2]});
1108 Rm = 0;
1109 }
1110 Rn += 4;
1111 Rm += 4;
1112 }
1113 // Perform last rotation.
1114 Sn = DAG.getConstant(Val: Rn+HwLen/2, DL: dl, VT: MVT::i32);
1115 Sm = DAG.getConstant(Val: Rm, DL: dl, VT: MVT::i32);
1116 HalfV0 = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: VecTy, Ops: {N, Sn});
1117 HalfV1 = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: VecTy, Ops: {M, Sm});
1118
1119 SDValue T0 = DAG.getBitcast(VT: tyVector(Ty: VecTy, ElemTy: MVT::i32), V: HalfV0);
1120 SDValue T1 = DAG.getBitcast(VT: tyVector(Ty: VecTy, ElemTy: MVT::i32), V: HalfV1);
1121
1122 SDValue DstV = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: ty(Op: T0), Ops: {T0, T1});
1123
1124 SDValue OutV =
1125 DAG.getBitcast(VT: tyVector(Ty: ty(Op: DstV), ElemTy: VecTy.getVectorElementType()), V: DstV);
1126 return OutV;
1127}
1128
1129SDValue
1130HexagonTargetLowering::createHvxPrefixPred(SDValue PredV, const SDLoc &dl,
1131 unsigned BitBytes, bool ZeroFill, SelectionDAG &DAG) const {
1132 MVT PredTy = ty(Op: PredV);
1133 unsigned HwLen = Subtarget.getVectorLength();
1134 MVT ByteTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen);
1135
1136 if (Subtarget.isHVXVectorType(VecTy: PredTy, IncludeBool: true)) {
1137 // Move the vector predicate SubV to a vector register, and scale it
1138 // down to match the representation (bytes per type element) that VecV
1139 // uses. The scaling down will pick every 2nd or 4th (every Scale-th
1140 // in general) element and put them at the front of the resulting
1141 // vector. This subvector will then be inserted into the Q2V of VecV.
1142 // To avoid having an operation that generates an illegal type (short
1143 // vector), generate a full size vector.
1144 //
1145 SDValue T = DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: ByteTy, Operand: PredV);
1146 SmallVector<int,128> Mask(HwLen);
1147 // Scale = BitBytes(PredV) / Given BitBytes.
1148 unsigned Scale = HwLen / (PredTy.getVectorNumElements() * BitBytes);
1149 unsigned BlockLen = PredTy.getVectorNumElements() * BitBytes;
1150
1151 for (unsigned i = 0; i != HwLen; ++i) {
1152 unsigned Num = i % Scale;
1153 unsigned Off = i / Scale;
1154 Mask[BlockLen*Num + Off] = i;
1155 }
1156 SDValue S = DAG.getVectorShuffle(VT: ByteTy, dl, N1: T, N2: DAG.getUNDEF(VT: ByteTy), Mask);
1157 if (!ZeroFill)
1158 return S;
1159 // Fill the bytes beyond BlockLen with 0s.
1160 // V6_pred_scalar2 cannot fill the entire predicate, so it only works
1161 // when BlockLen < HwLen.
1162 assert(BlockLen < HwLen && "vsetq(v1) prerequisite");
1163 MVT BoolTy = MVT::getVectorVT(VT: MVT::i1, NumElements: HwLen);
1164 SDValue Q = getInstr(MachineOpc: Hexagon::V6_pred_scalar2, dl, Ty: BoolTy,
1165 Ops: {DAG.getConstant(Val: BlockLen, DL: dl, VT: MVT::i32)}, DAG);
1166 SDValue M = DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: ByteTy, Operand: Q);
1167 return DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ByteTy, N1: S, N2: M);
1168 }
1169
1170 // Make sure that this is a valid scalar predicate.
1171 assert(PredTy == MVT::v2i1 || PredTy == MVT::v4i1 || PredTy == MVT::v8i1);
1172
1173 unsigned Bytes = 8 / PredTy.getVectorNumElements();
1174 SmallVector<SDValue,4> Words[2];
1175 unsigned IdxW = 0;
1176
1177 SDValue W0 = isUndef(Op: PredV)
1178 ? DAG.getUNDEF(VT: MVT::i64)
1179 : DAG.getNode(Opcode: HexagonISD::P2D, DL: dl, VT: MVT::i64, Operand: PredV);
1180 Words[IdxW].push_back(Elt: HiHalf(V: W0, DAG));
1181 Words[IdxW].push_back(Elt: LoHalf(V: W0, DAG));
1182
1183 while (Bytes < BitBytes) {
1184 IdxW ^= 1;
1185 Words[IdxW].clear();
1186
1187 if (Bytes < 4) {
1188 for (const SDValue &W : Words[IdxW ^ 1]) {
1189 SDValue T = expandPredicate(Vec32: W, dl, DAG);
1190 Words[IdxW].push_back(Elt: HiHalf(V: T, DAG));
1191 Words[IdxW].push_back(Elt: LoHalf(V: T, DAG));
1192 }
1193 } else {
1194 for (const SDValue &W : Words[IdxW ^ 1]) {
1195 Words[IdxW].push_back(Elt: W);
1196 Words[IdxW].push_back(Elt: W);
1197 }
1198 }
1199 Bytes *= 2;
1200 }
1201
1202 assert(Bytes == BitBytes);
1203 SDValue Vec = ZeroFill ? getZero(dl, Ty: ByteTy, DAG) : DAG.getUNDEF(VT: ByteTy);
1204 SDValue S4 = DAG.getConstant(Val: HwLen-4, DL: dl, VT: MVT::i32);
1205 for (const SDValue &W : Words[IdxW]) {
1206 Vec = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: ByteTy, N1: Vec, N2: S4);
1207 Vec = DAG.getNode(Opcode: HexagonISD::VINSERTW0, DL: dl, VT: ByteTy, N1: Vec, N2: W);
1208 }
1209
1210 return Vec;
1211}
1212
1213SDValue
1214HexagonTargetLowering::buildHvxVectorPred(ArrayRef<SDValue> Values,
1215 const SDLoc &dl, MVT VecTy,
1216 SelectionDAG &DAG) const {
1217 // Construct a vector V of bytes, such that a comparison V >u 0 would
1218 // produce the required vector predicate.
1219 unsigned VecLen = Values.size();
1220 unsigned HwLen = Subtarget.getVectorLength();
1221 assert(VecLen <= HwLen || VecLen == 8*HwLen);
1222 SmallVector<SDValue,128> Bytes;
1223 bool AllT = true, AllF = true;
1224
1225 auto IsTrue = [] (SDValue V) {
1226 if (const auto *N = dyn_cast<ConstantSDNode>(Val: V.getNode()))
1227 return !N->isZero();
1228 return false;
1229 };
1230 auto IsFalse = [] (SDValue V) {
1231 if (const auto *N = dyn_cast<ConstantSDNode>(Val: V.getNode()))
1232 return N->isZero();
1233 return false;
1234 };
1235
1236 if (VecLen <= HwLen) {
1237 // In the hardware, each bit of a vector predicate corresponds to a byte
1238 // of a vector register. Calculate how many bytes does a bit of VecTy
1239 // correspond to.
1240 assert(HwLen % VecLen == 0);
1241 unsigned BitBytes = HwLen / VecLen;
1242 for (SDValue V : Values) {
1243 AllT &= IsTrue(V);
1244 AllF &= IsFalse(V);
1245
1246 SDValue Ext = !V.isUndef() ? DAG.getZExtOrTrunc(Op: V, DL: dl, VT: MVT::i8)
1247 : DAG.getUNDEF(VT: MVT::i8);
1248 for (unsigned B = 0; B != BitBytes; ++B)
1249 Bytes.push_back(Elt: Ext);
1250 }
1251 } else {
1252 // There are as many i1 values, as there are bits in a vector register.
1253 // Divide the values into groups of 8 and check that each group consists
1254 // of the same value (ignoring undefs).
1255 for (unsigned I = 0; I != VecLen; I += 8) {
1256 unsigned B = 0;
1257 // Find the first non-undef value in this group.
1258 for (; B != 8; ++B) {
1259 if (!Values[I+B].isUndef())
1260 break;
1261 }
1262 SDValue F = Values[I+B];
1263 AllT &= IsTrue(F);
1264 AllF &= IsFalse(F);
1265
1266 SDValue Ext = (B < 8) ? DAG.getZExtOrTrunc(Op: F, DL: dl, VT: MVT::i8)
1267 : DAG.getUNDEF(VT: MVT::i8);
1268 Bytes.push_back(Elt: Ext);
1269 // Verify that the rest of values in the group are the same as the
1270 // first.
1271 for (; B != 8; ++B)
1272 assert(Values[I+B].isUndef() || Values[I+B] == F);
1273 }
1274 }
1275
1276 if (AllT)
1277 return DAG.getNode(Opcode: HexagonISD::QTRUE, DL: dl, VT: VecTy);
1278 if (AllF)
1279 return DAG.getNode(Opcode: HexagonISD::QFALSE, DL: dl, VT: VecTy);
1280
1281 MVT ByteTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen);
1282 SDValue ByteVec = buildHvxVectorReg(Values: Bytes, dl, VecTy: ByteTy, DAG);
1283 return DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: VecTy, Operand: ByteVec);
1284}
1285
1286SDValue
1287HexagonTargetLowering::extractHvxElementReg(SDValue VecV, SDValue IdxV,
1288 const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1289 MVT ElemTy = ty(Op: VecV).getVectorElementType();
1290
1291 unsigned ElemWidth = ElemTy.getSizeInBits();
1292 assert(ElemWidth >= 8 && ElemWidth <= 32);
1293 (void)ElemWidth;
1294
1295 SDValue ByteIdx = convertToByteIndex(ElemIdx: IdxV, ElemTy, DAG);
1296 SDValue ExWord = DAG.getNode(Opcode: HexagonISD::VEXTRACTW, DL: dl, VT: MVT::i32,
1297 Ops: {VecV, ByteIdx});
1298 if (ElemTy == MVT::i32)
1299 return ExWord;
1300
1301 // Have an extracted word, need to extract the smaller element out of it.
1302 // 1. Extract the bits of (the original) IdxV that correspond to the index
1303 // of the desired element in the 32-bit word.
1304 SDValue SubIdx = getIndexInWord32(Idx: IdxV, ElemTy, DAG);
1305 // 2. Extract the element from the word.
1306 SDValue ExVec = DAG.getBitcast(VT: tyVector(Ty: ty(Op: ExWord), ElemTy), V: ExWord);
1307 return extractVector(VecV: ExVec, IdxV: SubIdx, dl, ValTy: ElemTy, ResTy: MVT::i32, DAG);
1308}
1309
1310SDValue
1311HexagonTargetLowering::extractHvxElementPred(SDValue VecV, SDValue IdxV,
1312 const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1313 // Implement other return types if necessary.
1314 assert(ResTy == MVT::i1);
1315
1316 unsigned HwLen = Subtarget.getVectorLength();
1317 MVT ByteTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen);
1318 SDValue ByteVec = DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: ByteTy, Operand: VecV);
1319
1320 unsigned Scale = HwLen / ty(Op: VecV).getVectorNumElements();
1321 SDValue ScV = DAG.getConstant(Val: Scale, DL: dl, VT: MVT::i32);
1322 IdxV = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: MVT::i32, N1: IdxV, N2: ScV);
1323
1324 SDValue ExtB = extractHvxElementReg(VecV: ByteVec, IdxV, dl, ResTy: MVT::i32, DAG);
1325 SDValue Zero = DAG.getTargetConstant(Val: 0, DL: dl, VT: MVT::i32);
1326 return getInstr(MachineOpc: Hexagon::C2_cmpgtui, dl, Ty: MVT::i1, Ops: {ExtB, Zero}, DAG);
1327}
1328
1329SDValue
1330HexagonTargetLowering::insertHvxElementReg(SDValue VecV, SDValue IdxV,
1331 SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const {
1332 MVT ElemTy = ty(Op: VecV).getVectorElementType();
1333
1334 unsigned ElemWidth = ElemTy.getSizeInBits();
1335 assert(ElemWidth >= 8 && ElemWidth <= 32);
1336 (void)ElemWidth;
1337
1338 auto InsertWord = [&DAG,&dl,this] (SDValue VecV, SDValue ValV,
1339 SDValue ByteIdxV) {
1340 MVT VecTy = ty(Op: VecV);
1341 unsigned HwLen = Subtarget.getVectorLength();
1342 SDValue MaskV =
1343 DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i32,
1344 Ops: {ByteIdxV, DAG.getSignedConstant(Val: -4, DL: dl, VT: MVT::i32)});
1345 SDValue RotV = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: VecTy, Ops: {VecV, MaskV});
1346 SDValue InsV = DAG.getNode(Opcode: HexagonISD::VINSERTW0, DL: dl, VT: VecTy, Ops: {RotV, ValV});
1347 SDValue SubV = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: MVT::i32,
1348 Ops: {DAG.getConstant(Val: HwLen, DL: dl, VT: MVT::i32), MaskV});
1349 SDValue TorV = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: VecTy, Ops: {InsV, SubV});
1350 return TorV;
1351 };
1352
1353 SDValue ByteIdx = convertToByteIndex(ElemIdx: IdxV, ElemTy, DAG);
1354 if (ElemTy == MVT::i32)
1355 return InsertWord(VecV, ValV, ByteIdx);
1356
1357 // If this is not inserting a 32-bit word, convert it into such a thing.
1358 // 1. Extract the existing word from the target vector.
1359 SDValue WordIdx = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: MVT::i32,
1360 Ops: {ByteIdx, DAG.getConstant(Val: 2, DL: dl, VT: MVT::i32)});
1361 SDValue Ext = extractHvxElementReg(VecV: opCastElem(Vec: VecV, ElemTy: MVT::i32, DAG), IdxV: WordIdx,
1362 dl, ResTy: MVT::i32, DAG);
1363
1364 // 2. Treating the extracted word as a 32-bit vector, insert the given
1365 // value into it.
1366 SDValue SubIdx = getIndexInWord32(Idx: IdxV, ElemTy, DAG);
1367 MVT SubVecTy = tyVector(Ty: ty(Op: Ext), ElemTy);
1368 SDValue Ins = insertVector(VecV: DAG.getBitcast(VT: SubVecTy, V: Ext),
1369 ValV, IdxV: SubIdx, dl, ValTy: ElemTy, DAG);
1370
1371 // 3. Insert the 32-bit word back into the original vector.
1372 return InsertWord(VecV, Ins, ByteIdx);
1373}
1374
1375SDValue
1376HexagonTargetLowering::insertHvxElementPred(SDValue VecV, SDValue IdxV,
1377 SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const {
1378 unsigned HwLen = Subtarget.getVectorLength();
1379 MVT ByteTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen);
1380 SDValue ByteVec = DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: ByteTy, Operand: VecV);
1381
1382 unsigned Scale = HwLen / ty(Op: VecV).getVectorNumElements();
1383 SDValue ScV = DAG.getConstant(Val: Scale, DL: dl, VT: MVT::i32);
1384 IdxV = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: MVT::i32, N1: IdxV, N2: ScV);
1385 ValV = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: MVT::i32, Operand: ValV);
1386
1387 SDValue InsV = insertHvxElementReg(VecV: ByteVec, IdxV, ValV, dl, DAG);
1388 return DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: ty(Op: VecV), Operand: InsV);
1389}
1390
1391SDValue
1392HexagonTargetLowering::extractHvxSubvectorReg(SDValue OrigOp, SDValue VecV,
1393 SDValue IdxV, const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1394 MVT VecTy = ty(Op: VecV);
1395 unsigned HwLen = Subtarget.getVectorLength();
1396 unsigned Idx = IdxV.getNode()->getAsZExtVal();
1397 MVT ElemTy = VecTy.getVectorElementType();
1398 unsigned ElemWidth = ElemTy.getSizeInBits();
1399
1400 // If the source vector is a vector pair, get the single vector containing
1401 // the subvector of interest. The subvector will never overlap two single
1402 // vectors.
1403 if (isHvxPairTy(Ty: VecTy)) {
1404 unsigned SubIdx = Hexagon::vsub_lo;
1405 if (Idx * ElemWidth >= 8 * HwLen) {
1406 SubIdx = Hexagon::vsub_hi;
1407 Idx -= VecTy.getVectorNumElements() / 2;
1408 }
1409
1410 VecTy = typeSplit(VecTy).first;
1411 VecV = DAG.getTargetExtractSubreg(SRIdx: SubIdx, DL: dl, VT: VecTy, Operand: VecV);
1412 if (VecTy == ResTy)
1413 return VecV;
1414 }
1415
1416 // The only meaningful subvectors of a single HVX vector are those that
1417 // fit in a scalar register.
1418 assert(ResTy.getSizeInBits() == 32 || ResTy.getSizeInBits() == 64);
1419
1420 MVT WordTy = tyVector(Ty: VecTy, ElemTy: MVT::i32);
1421 SDValue WordVec = DAG.getBitcast(VT: WordTy, V: VecV);
1422 unsigned WordIdx = (Idx*ElemWidth) / 32;
1423
1424 SDValue W0Idx = DAG.getConstant(Val: WordIdx, DL: dl, VT: MVT::i32);
1425 SDValue W0 = extractHvxElementReg(VecV: WordVec, IdxV: W0Idx, dl, ResTy: MVT::i32, DAG);
1426 if (ResTy.getSizeInBits() == 32)
1427 return DAG.getBitcast(VT: ResTy, V: W0);
1428
1429 SDValue W1Idx = DAG.getConstant(Val: WordIdx+1, DL: dl, VT: MVT::i32);
1430 SDValue W1 = extractHvxElementReg(VecV: WordVec, IdxV: W1Idx, dl, ResTy: MVT::i32, DAG);
1431 SDValue WW = getCombine(Hi: W1, Lo: W0, dl, ResTy: MVT::i64, DAG);
1432 return DAG.getBitcast(VT: ResTy, V: WW);
1433}
1434
1435SDValue
1436HexagonTargetLowering::extractHvxSubvectorPred(SDValue VecV, SDValue IdxV,
1437 const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1438 MVT VecTy = ty(Op: VecV);
1439 unsigned HwLen = Subtarget.getVectorLength();
1440 MVT ByteTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen);
1441 SDValue ByteVec = DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: ByteTy, Operand: VecV);
1442 // IdxV is required to be a constant.
1443 unsigned Idx = IdxV.getNode()->getAsZExtVal();
1444
1445 unsigned ResLen = ResTy.getVectorNumElements();
1446 unsigned BitBytes = HwLen / VecTy.getVectorNumElements();
1447 unsigned Offset = Idx * BitBytes;
1448 SDValue Undef = DAG.getUNDEF(VT: ByteTy);
1449 SmallVector<int,128> Mask;
1450
1451 if (Subtarget.isHVXVectorType(VecTy: ResTy, IncludeBool: true)) {
1452 // Converting between two vector predicates. Since the result is shorter
1453 // than the source, it will correspond to a vector predicate with the
1454 // relevant bits replicated. The replication count is the ratio of the
1455 // source and target vector lengths.
1456 unsigned Rep = VecTy.getVectorNumElements() / ResLen;
1457 assert(isPowerOf2_32(Rep) && HwLen % Rep == 0);
1458 for (unsigned i = 0; i != HwLen/Rep; ++i) {
1459 for (unsigned j = 0; j != Rep; ++j)
1460 Mask.push_back(Elt: i + Offset);
1461 }
1462 SDValue ShuffV = DAG.getVectorShuffle(VT: ByteTy, dl, N1: ByteVec, N2: Undef, Mask);
1463 return DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: ResTy, Operand: ShuffV);
1464 }
1465
1466 // Converting between a vector predicate and a scalar predicate. In the
1467 // vector predicate, a group of BitBytes bits will correspond to a single
1468 // i1 element of the source vector type. Those bits will all have the same
1469 // value. The same will be true for ByteVec, where each byte corresponds
1470 // to a bit in the vector predicate.
1471 // The algorithm is to traverse the ByteVec, going over the i1 values from
1472 // the source vector, and generate the corresponding representation in an
1473 // 8-byte vector. To avoid repeated extracts from ByteVec, shuffle the
1474 // elements so that the interesting 8 bytes will be in the low end of the
1475 // vector.
1476 unsigned Rep = 8 / ResLen;
1477 // Make sure the output fill the entire vector register, so repeat the
1478 // 8-byte groups as many times as necessary.
1479 for (unsigned r = 0; r != HwLen / 8; ++r) {
1480 // This will generate the indexes of the 8 interesting bytes.
1481 for (unsigned i = 0; i != ResLen; ++i) {
1482 for (unsigned j = 0; j != Rep; ++j)
1483 Mask.push_back(Elt: Offset + i*BitBytes);
1484 }
1485 }
1486
1487 SDValue Zero = getZero(dl, Ty: MVT::i32, DAG);
1488 SDValue ShuffV = DAG.getVectorShuffle(VT: ByteTy, dl, N1: ByteVec, N2: Undef, Mask);
1489 // Combine the two low words from ShuffV into a v8i8, and byte-compare
1490 // them against 0.
1491 SDValue W0 = DAG.getNode(Opcode: HexagonISD::VEXTRACTW, DL: dl, VT: MVT::i32, Ops: {ShuffV, Zero});
1492 SDValue W1 = DAG.getNode(Opcode: HexagonISD::VEXTRACTW, DL: dl, VT: MVT::i32,
1493 Ops: {ShuffV, DAG.getConstant(Val: 4, DL: dl, VT: MVT::i32)});
1494 SDValue Vec64 = getCombine(Hi: W1, Lo: W0, dl, ResTy: MVT::v8i8, DAG);
1495 return getInstr(MachineOpc: Hexagon::A4_vcmpbgtui, dl, Ty: ResTy,
1496 Ops: {Vec64, DAG.getTargetConstant(Val: 0, DL: dl, VT: MVT::i32)}, DAG);
1497}
1498
1499SDValue
1500HexagonTargetLowering::insertHvxSubvectorReg(SDValue VecV, SDValue SubV,
1501 SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const {
1502 MVT VecTy = ty(Op: VecV);
1503 MVT SubTy = ty(Op: SubV);
1504 unsigned HwLen = Subtarget.getVectorLength();
1505 MVT ElemTy = VecTy.getVectorElementType();
1506 unsigned ElemWidth = ElemTy.getSizeInBits();
1507
1508 bool IsPair = isHvxPairTy(Ty: VecTy);
1509 MVT SingleTy = MVT::getVectorVT(VT: ElemTy, NumElements: (8*HwLen)/ElemWidth);
1510 // The two single vectors that VecV consists of, if it's a pair.
1511 SDValue V0, V1;
1512 SDValue SingleV = VecV;
1513 SDValue PickHi;
1514
1515 if (IsPair) {
1516 V0 = LoHalf(V: VecV, DAG);
1517 V1 = HiHalf(V: VecV, DAG);
1518
1519 SDValue HalfV = DAG.getConstant(Val: SingleTy.getVectorNumElements(),
1520 DL: dl, VT: MVT::i32);
1521 PickHi = DAG.getSetCC(DL: dl, VT: MVT::i1, LHS: IdxV, RHS: HalfV, Cond: ISD::SETUGT);
1522 if (isHvxSingleTy(Ty: SubTy)) {
1523 if (const auto *CN = dyn_cast<const ConstantSDNode>(Val: IdxV.getNode())) {
1524 unsigned Idx = CN->getZExtValue();
1525 assert(Idx == 0 || Idx == VecTy.getVectorNumElements()/2);
1526 unsigned SubIdx = (Idx == 0) ? Hexagon::vsub_lo : Hexagon::vsub_hi;
1527 return DAG.getTargetInsertSubreg(SRIdx: SubIdx, DL: dl, VT: VecTy, Operand: VecV, Subreg: SubV);
1528 }
1529 // If IdxV is not a constant, generate the two variants: with the
1530 // SubV as the high and as the low subregister, and select the right
1531 // pair based on the IdxV.
1532 SDValue InLo = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: VecTy, Ops: {SubV, V1});
1533 SDValue InHi = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: VecTy, Ops: {V0, SubV});
1534 return DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: VecTy, N1: PickHi, N2: InHi, N3: InLo);
1535 }
1536 // The subvector being inserted must be entirely contained in one of
1537 // the vectors V0 or V1. Set SingleV to the correct one, and update
1538 // IdxV to be the index relative to the beginning of that vector.
1539 SDValue S = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: MVT::i32, N1: IdxV, N2: HalfV);
1540 IdxV = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: MVT::i32, N1: PickHi, N2: S, N3: IdxV);
1541 SingleV = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: SingleTy, N1: PickHi, N2: V1, N3: V0);
1542 }
1543
1544 // The only meaningful subvectors of a single HVX vector are those that
1545 // fit in a scalar register.
1546 assert(SubTy.getSizeInBits() == 32 || SubTy.getSizeInBits() == 64);
1547 // Convert IdxV to be index in bytes.
1548 auto *IdxN = dyn_cast<ConstantSDNode>(Val: IdxV.getNode());
1549 if (!IdxN || !IdxN->isZero()) {
1550 IdxV = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: MVT::i32, N1: IdxV,
1551 N2: DAG.getConstant(Val: ElemWidth/8, DL: dl, VT: MVT::i32));
1552 SingleV = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: SingleTy, N1: SingleV, N2: IdxV);
1553 }
1554 // When inserting a single word, the rotation back to the original position
1555 // would be by HwLen-Idx, but if two words are inserted, it will need to be
1556 // by (HwLen-4)-Idx.
1557 unsigned RolBase = HwLen;
1558 if (SubTy.getSizeInBits() == 32) {
1559 SDValue V = DAG.getBitcast(VT: MVT::i32, V: SubV);
1560 SingleV = DAG.getNode(Opcode: HexagonISD::VINSERTW0, DL: dl, VT: SingleTy, N1: SingleV, N2: V);
1561 } else {
1562 SDValue V = DAG.getBitcast(VT: MVT::i64, V: SubV);
1563 SDValue R0 = LoHalf(V, DAG);
1564 SDValue R1 = HiHalf(V, DAG);
1565 SingleV = DAG.getNode(Opcode: HexagonISD::VINSERTW0, DL: dl, VT: SingleTy, N1: SingleV, N2: R0);
1566 SingleV = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: SingleTy, N1: SingleV,
1567 N2: DAG.getConstant(Val: 4, DL: dl, VT: MVT::i32));
1568 SingleV = DAG.getNode(Opcode: HexagonISD::VINSERTW0, DL: dl, VT: SingleTy, N1: SingleV, N2: R1);
1569 RolBase = HwLen-4;
1570 }
1571 // If the vector wasn't ror'ed, don't ror it back.
1572 if (RolBase != 4 || !IdxN || !IdxN->isZero()) {
1573 SDValue RolV = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: MVT::i32,
1574 N1: DAG.getConstant(Val: RolBase, DL: dl, VT: MVT::i32), N2: IdxV);
1575 SingleV = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: SingleTy, N1: SingleV, N2: RolV);
1576 }
1577
1578 if (IsPair) {
1579 SDValue InLo = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: VecTy, Ops: {SingleV, V1});
1580 SDValue InHi = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: VecTy, Ops: {V0, SingleV});
1581 return DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: VecTy, N1: PickHi, N2: InHi, N3: InLo);
1582 }
1583 return SingleV;
1584}
1585
1586SDValue
1587HexagonTargetLowering::insertHvxSubvectorPred(SDValue VecV, SDValue SubV,
1588 SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const {
1589 MVT VecTy = ty(Op: VecV);
1590 MVT SubTy = ty(Op: SubV);
1591 assert(Subtarget.isHVXVectorType(VecTy, true));
1592 // VecV is an HVX vector predicate. SubV may be either an HVX vector
1593 // predicate as well, or it can be a scalar predicate.
1594
1595 unsigned VecLen = VecTy.getVectorNumElements();
1596 unsigned HwLen = Subtarget.getVectorLength();
1597 assert(HwLen % VecLen == 0 && "Unexpected vector type");
1598
1599 unsigned Scale = VecLen / SubTy.getVectorNumElements();
1600 unsigned BitBytes = HwLen / VecLen;
1601 unsigned BlockLen = HwLen / Scale;
1602
1603 MVT ByteTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen);
1604 SDValue ByteVec = DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: ByteTy, Operand: VecV);
1605 SDValue ByteSub = createHvxPrefixPred(PredV: SubV, dl, BitBytes, ZeroFill: false, DAG);
1606 SDValue ByteIdx;
1607
1608 auto *IdxN = dyn_cast<ConstantSDNode>(Val: IdxV.getNode());
1609 if (!IdxN || !IdxN->isZero()) {
1610 ByteIdx = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: MVT::i32, N1: IdxV,
1611 N2: DAG.getConstant(Val: BitBytes, DL: dl, VT: MVT::i32));
1612 ByteVec = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: ByteTy, N1: ByteVec, N2: ByteIdx);
1613 }
1614
1615 // ByteVec is the target vector VecV rotated in such a way that the
1616 // subvector should be inserted at index 0. Generate a predicate mask
1617 // and use vmux to do the insertion.
1618 assert(BlockLen < HwLen && "vsetq(v1) prerequisite");
1619 MVT BoolTy = MVT::getVectorVT(VT: MVT::i1, NumElements: HwLen);
1620 SDValue Q = getInstr(MachineOpc: Hexagon::V6_pred_scalar2, dl, Ty: BoolTy,
1621 Ops: {DAG.getConstant(Val: BlockLen, DL: dl, VT: MVT::i32)}, DAG);
1622 ByteVec = getInstr(MachineOpc: Hexagon::V6_vmux, dl, Ty: ByteTy, Ops: {Q, ByteSub, ByteVec}, DAG);
1623 // Rotate ByteVec back, and convert to a vector predicate.
1624 if (!IdxN || !IdxN->isZero()) {
1625 SDValue HwLenV = DAG.getConstant(Val: HwLen, DL: dl, VT: MVT::i32);
1626 SDValue ByteXdi = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: MVT::i32, N1: HwLenV, N2: ByteIdx);
1627 ByteVec = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: ByteTy, N1: ByteVec, N2: ByteXdi);
1628 }
1629 return DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: VecTy, Operand: ByteVec);
1630}
1631
1632SDValue
1633HexagonTargetLowering::extendHvxVectorPred(SDValue VecV, const SDLoc &dl,
1634 MVT ResTy, bool ZeroExt, SelectionDAG &DAG) const {
1635 // Sign- and any-extending of a vector predicate to a vector register is
1636 // equivalent to Q2V. For zero-extensions, generate a vmux between 0 and
1637 // a vector of 1s (where the 1s are of type matching the vector type).
1638 assert(Subtarget.isHVXVectorType(ResTy));
1639 if (!ZeroExt)
1640 return DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: ResTy, Operand: VecV);
1641
1642 assert(ty(VecV).getVectorNumElements() == ResTy.getVectorNumElements());
1643 SDValue True = DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: ResTy,
1644 Operand: DAG.getConstant(Val: 1, DL: dl, VT: MVT::i32));
1645 SDValue False = getZero(dl, Ty: ResTy, DAG);
1646 return DAG.getSelect(DL: dl, VT: ResTy, Cond: VecV, LHS: True, RHS: False);
1647}
1648
1649SDValue
1650HexagonTargetLowering::compressHvxPred(SDValue VecQ, const SDLoc &dl,
1651 MVT ResTy, SelectionDAG &DAG) const {
1652 // Given a predicate register VecQ, transfer bits VecQ[0..HwLen-1]
1653 // (i.e. the entire predicate register) to bits [0..HwLen-1] of a
1654 // vector register. The remaining bits of the vector register are
1655 // unspecified.
1656
1657 MachineFunction &MF = DAG.getMachineFunction();
1658 unsigned HwLen = Subtarget.getVectorLength();
1659 MVT ByteTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen);
1660 MVT PredTy = ty(Op: VecQ);
1661 unsigned PredLen = PredTy.getVectorNumElements();
1662 assert(HwLen % PredLen == 0);
1663 MVT VecTy = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: 8*HwLen/PredLen), NumElements: PredLen);
1664
1665 Type *Int8Ty = Type::getInt8Ty(C&: *DAG.getContext());
1666 SmallVector<Constant*, 128> Tmp;
1667 // Create an array of bytes (hex): 01,02,04,08,10,20,40,80, 01,02,04,08,...
1668 // These are bytes with the LSB rotated left with respect to their index.
1669 for (unsigned i = 0; i != HwLen/8; ++i) {
1670 for (unsigned j = 0; j != 8; ++j)
1671 Tmp.push_back(Elt: ConstantInt::get(Ty: Int8Ty, V: 1ull << j));
1672 }
1673 Constant *CV = ConstantVector::get(V: Tmp);
1674 Align Alignment(HwLen);
1675 SDValue CP = LowerConstantPool(
1676 Op: DAG.getConstantPool(C: CV, VT: getPointerTy(DL: DAG.getDataLayout()), Align: Alignment),
1677 DAG);
1678 SDValue Bytes =
1679 DAG.getLoad(VT: ByteTy, dl, Chain: DAG.getEntryNode(), Ptr: CP,
1680 PtrInfo: MachinePointerInfo::getConstantPool(MF), Alignment);
1681
1682 // Select the bytes that correspond to true bits in the vector predicate.
1683 SDValue Sel = DAG.getSelect(DL: dl, VT: VecTy, Cond: VecQ, LHS: DAG.getBitcast(VT: VecTy, V: Bytes),
1684 RHS: getZero(dl, Ty: VecTy, DAG));
1685 // Calculate the OR of all bytes in each group of 8. That will compress
1686 // all the individual bits into a single byte.
1687 // First, OR groups of 4, via vrmpy with 0x01010101.
1688 SDValue All1 =
1689 DAG.getSplatBuildVector(VT: MVT::v4i8, DL: dl, Op: DAG.getConstant(Val: 1, DL: dl, VT: MVT::i32));
1690 SDValue Vrmpy = getInstr(MachineOpc: Hexagon::V6_vrmpyub, dl, Ty: ByteTy, Ops: {Sel, All1}, DAG);
1691 // Then rotate the accumulated vector by 4 bytes, and do the final OR.
1692 SDValue Rot = getInstr(MachineOpc: Hexagon::V6_valignbi, dl, Ty: ByteTy,
1693 Ops: {Vrmpy, Vrmpy, DAG.getTargetConstant(Val: 4, DL: dl, VT: MVT::i32)}, DAG);
1694 SDValue Vor = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: ByteTy, Ops: {Vrmpy, Rot});
1695
1696 // Pick every 8th byte and coalesce them at the beginning of the output.
1697 // For symmetry, coalesce every 1+8th byte after that, then every 2+8th
1698 // byte and so on.
1699 SmallVector<int,128> Mask;
1700 for (unsigned i = 0; i != HwLen; ++i)
1701 Mask.push_back(Elt: (8*i) % HwLen + i/(HwLen/8));
1702 SDValue Collect =
1703 DAG.getVectorShuffle(VT: ByteTy, dl, N1: Vor, N2: DAG.getUNDEF(VT: ByteTy), Mask);
1704 return DAG.getBitcast(VT: ResTy, V: Collect);
1705}
1706
1707SDValue
1708HexagonTargetLowering::resizeToWidth(SDValue VecV, MVT ResTy, bool Signed,
1709 const SDLoc &dl, SelectionDAG &DAG) const {
1710 // Take a vector and resize the element type to match the given type.
1711 MVT InpTy = ty(Op: VecV);
1712 if (InpTy == ResTy)
1713 return VecV;
1714
1715 unsigned InpWidth = InpTy.getSizeInBits();
1716 unsigned ResWidth = ResTy.getSizeInBits();
1717
1718 if (InpTy.isFloatingPoint()) {
1719 return InpWidth < ResWidth
1720 ? DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: ResTy, Operand: VecV)
1721 : DAG.getNode(Opcode: ISD::FP_ROUND, DL: dl, VT: ResTy, N1: VecV,
1722 N2: DAG.getTargetConstant(Val: 0, DL: dl, VT: MVT::i32));
1723 }
1724
1725 assert(InpTy.isInteger());
1726
1727 if (InpWidth < ResWidth) {
1728 unsigned ExtOpc = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1729 return DAG.getNode(Opcode: ExtOpc, DL: dl, VT: ResTy, Operand: VecV);
1730 } else {
1731 unsigned NarOpc = Signed ? HexagonISD::SSAT : HexagonISD::USAT;
1732 return DAG.getNode(Opcode: NarOpc, DL: dl, VT: ResTy, N1: VecV, N2: DAG.getValueType(ResTy));
1733 }
1734}
1735
1736SDValue
1737HexagonTargetLowering::extractSubvector(SDValue Vec, MVT SubTy, unsigned SubIdx,
1738 SelectionDAG &DAG) const {
1739 assert(ty(Vec).getSizeInBits() % SubTy.getSizeInBits() == 0);
1740
1741 const SDLoc &dl(Vec);
1742 unsigned ElemIdx = SubIdx * SubTy.getVectorNumElements();
1743 return DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL: dl, VT: SubTy,
1744 Ops: {Vec, DAG.getConstant(Val: ElemIdx, DL: dl, VT: MVT::i32)});
1745}
1746
1747SDValue
1748HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG)
1749 const {
1750 const SDLoc &dl(Op);
1751 MVT VecTy = ty(Op);
1752
1753 unsigned Size = Op.getNumOperands();
1754 SmallVector<SDValue,128> Ops;
1755 for (unsigned i = 0; i != Size; ++i)
1756 Ops.push_back(Elt: Op.getOperand(i));
1757
1758 if (VecTy.getVectorElementType() == MVT::i1)
1759 return buildHvxVectorPred(Values: Ops, dl, VecTy, DAG);
1760
1761 // In case of MVT::f16 BUILD_VECTOR, since MVT::f16 is
1762 // not a legal type, just bitcast the node to use i16
1763 // types and bitcast the result back to f16
1764 if (VecTy.getVectorElementType() == MVT::f16 ||
1765 VecTy.getVectorElementType() == MVT::bf16) {
1766 SmallVector<SDValue, 64> NewOps;
1767 for (unsigned i = 0; i != Size; i++)
1768 NewOps.push_back(Elt: DAG.getBitcast(VT: MVT::i16, V: Ops[i]));
1769
1770 SDValue T0 =
1771 DAG.getNode(Opcode: ISD::BUILD_VECTOR, DL: dl, VT: tyVector(Ty: VecTy, ElemTy: MVT::i16), Ops: NewOps);
1772 return DAG.getBitcast(VT: tyVector(Ty: VecTy, ElemTy: VecTy.getVectorElementType()), V: T0);
1773 }
1774
1775 // First, split the BUILD_VECTOR for vector pairs. We could generate
1776 // some pairs directly (via splat), but splats should be generated
1777 // by the combiner prior to getting here.
1778 if (VecTy.getSizeInBits() == 16 * Subtarget.getVectorLength()) {
1779 ArrayRef<SDValue> A(Ops);
1780 MVT SingleTy = typeSplit(VecTy).first;
1781 SDValue V0 = buildHvxVectorReg(Values: A.take_front(N: Size / 2), dl, VecTy: SingleTy, DAG);
1782 SDValue V1 = buildHvxVectorReg(Values: A.drop_front(N: Size / 2), dl, VecTy: SingleTy, DAG);
1783 return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: VecTy, N1: V0, N2: V1);
1784 }
1785
1786 return buildHvxVectorReg(Values: Ops, dl, VecTy, DAG);
1787}
1788
1789SDValue
1790HexagonTargetLowering::LowerHvxSplatVector(SDValue Op, SelectionDAG &DAG)
1791 const {
1792 const SDLoc &dl(Op);
1793 MVT VecTy = ty(Op);
1794 MVT ArgTy = ty(Op: Op.getOperand(i: 0));
1795
1796 if (ArgTy == MVT::f16 || ArgTy == MVT::bf16) {
1797 MVT SplatTy = MVT::getVectorVT(VT: MVT::i16, NumElements: VecTy.getVectorNumElements());
1798 SDValue ToInt16 = DAG.getBitcast(VT: MVT::i16, V: Op.getOperand(i: 0));
1799 SDValue ToInt32 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT: MVT::i32, Operand: ToInt16);
1800 SDValue Splat = DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: SplatTy, Operand: ToInt32);
1801 return DAG.getBitcast(VT: VecTy, V: Splat);
1802 }
1803
1804 return SDValue();
1805}
1806
1807SDValue
1808HexagonTargetLowering::LowerHvxConcatVectors(SDValue Op, SelectionDAG &DAG)
1809 const {
1810 // Vector concatenation of two integer (non-bool) vectors does not need
1811 // special lowering. Custom-lower concats of bool vectors and expand
1812 // concats of more than 2 vectors.
1813 MVT VecTy = ty(Op);
1814 const SDLoc &dl(Op);
1815 unsigned NumOp = Op.getNumOperands();
1816 if (VecTy.getVectorElementType() != MVT::i1) {
1817 if (NumOp == 2)
1818 return Op;
1819 // Expand the other cases into a build-vector.
1820 SmallVector<SDValue,8> Elems;
1821 for (SDValue V : Op.getNode()->ops())
1822 DAG.ExtractVectorElements(Op: V, Args&: Elems);
1823 // A vector of i16 will be broken up into a build_vector of i16's.
1824 // This is a problem, since at the time of operation legalization,
1825 // all operations are expected to be type-legalized, and i16 is not
1826 // a legal type. If any of the extracted elements is not of a valid
1827 // type, sign-extend it to a valid one.
1828 for (SDValue &V : Elems) {
1829 MVT Ty = ty(Op: V);
1830 if (!isTypeLegal(VT: Ty)) {
1831 MVT NTy = typeLegalize(Ty, DAG);
1832 if (V.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1833 V = DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL: dl, VT: NTy,
1834 N1: DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: dl, VT: NTy,
1835 N1: V.getOperand(i: 0), N2: V.getOperand(i: 1)),
1836 N2: DAG.getValueType(Ty));
1837 continue;
1838 }
1839 // A few less complicated cases.
1840 switch (V.getOpcode()) {
1841 case ISD::Constant:
1842 V = DAG.getSExtOrTrunc(Op: V, DL: dl, VT: NTy);
1843 break;
1844 case ISD::UNDEF:
1845 V = DAG.getUNDEF(VT: NTy);
1846 break;
1847 case ISD::TRUNCATE:
1848 V = V.getOperand(i: 0);
1849 break;
1850 default:
1851 llvm_unreachable("Unexpected vector element");
1852 }
1853 }
1854 }
1855 return DAG.getBuildVector(VT: VecTy, DL: dl, Ops: Elems);
1856 }
1857
1858 assert(VecTy.getVectorElementType() == MVT::i1);
1859 unsigned HwLen = Subtarget.getVectorLength();
1860 assert(isPowerOf2_32(NumOp) && HwLen % NumOp == 0);
1861
1862 SDValue Op0 = Op.getOperand(i: 0);
1863
1864 // If the operands are HVX types (i.e. not scalar predicates), then
1865 // defer the concatenation, and create QCAT instead.
1866 if (Subtarget.isHVXVectorType(VecTy: ty(Op: Op0), IncludeBool: true)) {
1867 if (NumOp == 2)
1868 return DAG.getNode(Opcode: HexagonISD::QCAT, DL: dl, VT: VecTy, N1: Op0, N2: Op.getOperand(i: 1));
1869
1870 ArrayRef<SDUse> U(Op.getNode()->ops());
1871 SmallVector<SDValue, 4> SV(U);
1872 ArrayRef<SDValue> Ops(SV);
1873
1874 MVT HalfTy = typeSplit(VecTy).first;
1875 SDValue V0 = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: HalfTy,
1876 Ops: Ops.take_front(N: NumOp/2));
1877 SDValue V1 = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: HalfTy,
1878 Ops: Ops.take_back(N: NumOp/2));
1879 return DAG.getNode(Opcode: HexagonISD::QCAT, DL: dl, VT: VecTy, N1: V0, N2: V1);
1880 }
1881
1882 // Count how many bytes (in a vector register) each bit in VecTy
1883 // corresponds to.
1884 unsigned BitBytes = HwLen / VecTy.getVectorNumElements();
1885
1886 // Make sure that createHvxPrefixPred will only ever need to expand
1887 // the predicate, i.e. bytes-per-bit in the input is not greater than
1888 // the target bytes-per-bit in the result.
1889 SDValue Combined = combineConcatOfScalarPreds(Op, BitBytes, DAG);
1890 SmallVector<SDValue,8> Prefixes;
1891 for (SDValue V : Combined.getNode()->op_values()) {
1892 SDValue P = createHvxPrefixPred(PredV: V, dl, BitBytes, ZeroFill: true, DAG);
1893 Prefixes.push_back(Elt: P);
1894 }
1895
1896 unsigned InpLen = ty(Op: Combined.getOperand(i: 0)).getVectorNumElements();
1897 MVT ByteTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen);
1898 SDValue S = DAG.getConstant(Val: HwLen - InpLen*BitBytes, DL: dl, VT: MVT::i32);
1899 SDValue Res = getZero(dl, Ty: ByteTy, DAG);
1900 for (unsigned i = 0, e = Prefixes.size(); i != e; ++i) {
1901 Res = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: ByteTy, N1: Res, N2: S);
1902 Res = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: ByteTy, N1: Res, N2: Prefixes[e-i-1]);
1903 }
1904 return DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: VecTy, Operand: Res);
1905}
1906
1907SDValue
1908HexagonTargetLowering::LowerHvxExtractElement(SDValue Op, SelectionDAG &DAG)
1909 const {
1910 // Change the type of the extracted element to i32.
1911 SDValue VecV = Op.getOperand(i: 0);
1912 MVT ElemTy = ty(Op: VecV).getVectorElementType();
1913 const SDLoc &dl(Op);
1914 SDValue IdxV = Op.getOperand(i: 1);
1915 if (ElemTy == MVT::i1)
1916 return extractHvxElementPred(VecV, IdxV, dl, ResTy: ty(Op), DAG);
1917
1918 return extractHvxElementReg(VecV, IdxV, dl, ResTy: ty(Op), DAG);
1919}
1920
1921SDValue
1922HexagonTargetLowering::LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG)
1923 const {
1924 const SDLoc &dl(Op);
1925 MVT VecTy = ty(Op);
1926 SDValue VecV = Op.getOperand(i: 0);
1927 SDValue ValV = Op.getOperand(i: 1);
1928 SDValue IdxV = Op.getOperand(i: 2);
1929 MVT ElemTy = ty(Op: VecV).getVectorElementType();
1930 if (ElemTy == MVT::i1)
1931 return insertHvxElementPred(VecV, IdxV, ValV, dl, DAG);
1932
1933 if (ElemTy == MVT::f16 || ElemTy == MVT::bf16) {
1934 SDValue T0 = DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: dl,
1935 VT: tyVector(Ty: VecTy, ElemTy: MVT::i16),
1936 N1: DAG.getBitcast(VT: tyVector(Ty: VecTy, ElemTy: MVT::i16), V: VecV),
1937 N2: DAG.getBitcast(VT: MVT::i16, V: ValV), N3: IdxV);
1938 return DAG.getBitcast(VT: tyVector(Ty: VecTy, ElemTy), V: T0);
1939 }
1940
1941 return insertHvxElementReg(VecV, IdxV, ValV, dl, DAG);
1942}
1943
1944SDValue
1945HexagonTargetLowering::LowerHvxExtractSubvector(SDValue Op, SelectionDAG &DAG)
1946 const {
1947 SDValue SrcV = Op.getOperand(i: 0);
1948 MVT SrcTy = ty(Op: SrcV);
1949 MVT DstTy = ty(Op);
1950 SDValue IdxV = Op.getOperand(i: 1);
1951 unsigned Idx = IdxV.getNode()->getAsZExtVal();
1952 assert(Idx % DstTy.getVectorNumElements() == 0);
1953 (void)Idx;
1954 const SDLoc &dl(Op);
1955
1956 MVT ElemTy = SrcTy.getVectorElementType();
1957 if (ElemTy == MVT::i1)
1958 return extractHvxSubvectorPred(VecV: SrcV, IdxV, dl, ResTy: DstTy, DAG);
1959
1960 return extractHvxSubvectorReg(OrigOp: Op, VecV: SrcV, IdxV, dl, ResTy: DstTy, DAG);
1961}
1962
1963SDValue
1964HexagonTargetLowering::LowerHvxInsertSubvector(SDValue Op, SelectionDAG &DAG)
1965 const {
1966 // Idx does not need to be a constant.
1967 SDValue VecV = Op.getOperand(i: 0);
1968 SDValue ValV = Op.getOperand(i: 1);
1969 SDValue IdxV = Op.getOperand(i: 2);
1970
1971 const SDLoc &dl(Op);
1972 MVT VecTy = ty(Op: VecV);
1973 MVT ElemTy = VecTy.getVectorElementType();
1974 if (ElemTy == MVT::i1)
1975 return insertHvxSubvectorPred(VecV, SubV: ValV, IdxV, dl, DAG);
1976
1977 return insertHvxSubvectorReg(VecV, SubV: ValV, IdxV, dl, DAG);
1978}
1979
1980SDValue
1981HexagonTargetLowering::LowerHvxAnyExt(SDValue Op, SelectionDAG &DAG) const {
1982 // Lower any-extends of boolean vectors to sign-extends, since they
1983 // translate directly to Q2V. Zero-extending could also be done equally
1984 // fast, but Q2V is used/recognized in more places.
1985 // For all other vectors, use zero-extend.
1986 MVT ResTy = ty(Op);
1987 SDValue InpV = Op.getOperand(i: 0);
1988 MVT ElemTy = ty(Op: InpV).getVectorElementType();
1989 if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(VecTy: ResTy))
1990 return LowerHvxSignExt(Op, DAG);
1991 return DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: SDLoc(Op), VT: ResTy, Operand: InpV);
1992}
1993
1994SDValue
1995HexagonTargetLowering::LowerHvxSignExt(SDValue Op, SelectionDAG &DAG) const {
1996 MVT ResTy = ty(Op);
1997 SDValue InpV = Op.getOperand(i: 0);
1998 MVT ElemTy = ty(Op: InpV).getVectorElementType();
1999 if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(VecTy: ResTy))
2000 return extendHvxVectorPred(VecV: InpV, dl: SDLoc(Op), ResTy: ty(Op), ZeroExt: false, DAG);
2001 return Op;
2002}
2003
2004SDValue
2005HexagonTargetLowering::LowerHvxZeroExt(SDValue Op, SelectionDAG &DAG) const {
2006 MVT ResTy = ty(Op);
2007 SDValue InpV = Op.getOperand(i: 0);
2008 MVT ElemTy = ty(Op: InpV).getVectorElementType();
2009 if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(VecTy: ResTy))
2010 return extendHvxVectorPred(VecV: InpV, dl: SDLoc(Op), ResTy: ty(Op), ZeroExt: true, DAG);
2011 return Op;
2012}
2013
2014SDValue
2015HexagonTargetLowering::LowerHvxCttz(SDValue Op, SelectionDAG &DAG) const {
2016 // Lower vector CTTZ into a computation using CTLZ (Hacker's Delight):
2017 // cttz(x) = bitwidth(x) - ctlz(~x & (x-1))
2018 const SDLoc &dl(Op);
2019 MVT ResTy = ty(Op);
2020 SDValue InpV = Op.getOperand(i: 0);
2021 assert(ResTy == ty(InpV));
2022
2023 // Calculate the vectors of 1 and bitwidth(x).
2024 MVT ElemTy = ty(Op: InpV).getVectorElementType();
2025 unsigned ElemWidth = ElemTy.getSizeInBits();
2026
2027 SDValue Vec1 = DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: ResTy,
2028 Operand: DAG.getConstant(Val: 1, DL: dl, VT: MVT::i32));
2029 SDValue VecW = DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: ResTy,
2030 Operand: DAG.getConstant(Val: ElemWidth, DL: dl, VT: MVT::i32));
2031 SDValue VecN1 = DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: ResTy,
2032 Operand: DAG.getAllOnesConstant(DL: dl, VT: MVT::i32));
2033
2034 // Do not use DAG.getNOT, because that would create BUILD_VECTOR with
2035 // a BITCAST. Here we can skip the BITCAST (so we don't have to handle
2036 // it separately in custom combine or selection).
2037 SDValue A = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ResTy,
2038 Ops: {DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: ResTy, Ops: {InpV, VecN1}),
2039 DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: ResTy, Ops: {InpV, Vec1})});
2040 return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: ResTy,
2041 Ops: {VecW, DAG.getNode(Opcode: ISD::CTLZ, DL: dl, VT: ResTy, Operand: A)});
2042}
2043
2044SDValue
2045HexagonTargetLowering::LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const {
2046 const SDLoc &dl(Op);
2047 MVT ResTy = ty(Op);
2048 assert(ResTy.getVectorElementType() == MVT::i32);
2049
2050 SDValue Vs = Op.getOperand(i: 0);
2051 SDValue Vt = Op.getOperand(i: 1);
2052
2053 SDVTList ResTys = DAG.getVTList(VT1: ResTy, VT2: ResTy);
2054 unsigned Opc = Op.getOpcode();
2055
2056 // On HVX v62+ producing the full product is cheap, so legalize MULH to LOHI.
2057 if (Opc == ISD::MULHU)
2058 return DAG.getNode(Opcode: HexagonISD::UMUL_LOHI, DL: dl, VTList: ResTys, Ops: {Vs, Vt}).getValue(R: 1);
2059 if (Opc == ISD::MULHS)
2060 return DAG.getNode(Opcode: HexagonISD::SMUL_LOHI, DL: dl, VTList: ResTys, Ops: {Vs, Vt}).getValue(R: 1);
2061
2062#ifndef NDEBUG
2063 Op.dump(&DAG);
2064#endif
2065 llvm_unreachable("Unexpected mulh operation");
2066}
2067
2068SDValue
2069HexagonTargetLowering::LowerHvxMulLoHi(SDValue Op, SelectionDAG &DAG) const {
2070 const SDLoc &dl(Op);
2071 unsigned Opc = Op.getOpcode();
2072 SDValue Vu = Op.getOperand(i: 0);
2073 SDValue Vv = Op.getOperand(i: 1);
2074
2075 // If the HI part is not used, convert it to a regular MUL.
2076 if (auto HiVal = Op.getValue(R: 1); HiVal.use_empty()) {
2077 // Need to preserve the types and the number of values.
2078 SDValue Hi = DAG.getUNDEF(VT: ty(Op: HiVal));
2079 SDValue Lo = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: ty(Op), Ops: {Vu, Vv});
2080 return DAG.getMergeValues(Ops: {Lo, Hi}, dl);
2081 }
2082
2083 bool SignedVu = Opc == HexagonISD::SMUL_LOHI;
2084 bool SignedVv = Opc == HexagonISD::SMUL_LOHI || Opc == HexagonISD::USMUL_LOHI;
2085
2086 // Legal on HVX v62+, but lower it here because patterns can't handle multi-
2087 // valued nodes.
2088 if (Subtarget.useHVXV62Ops())
2089 return emitHvxMulLoHiV62(A: Vu, SignedA: SignedVu, B: Vv, SignedB: SignedVv, dl, DAG);
2090
2091 if (Opc == HexagonISD::SMUL_LOHI) {
2092 // Direct MULHS expansion is cheaper than doing the whole SMUL_LOHI,
2093 // for other signedness LOHI is cheaper.
2094 if (auto LoVal = Op.getValue(R: 0); LoVal.use_empty()) {
2095 SDValue Hi = emitHvxMulHsV60(A: Vu, B: Vv, dl, DAG);
2096 SDValue Lo = DAG.getUNDEF(VT: ty(Op: LoVal));
2097 return DAG.getMergeValues(Ops: {Lo, Hi}, dl);
2098 }
2099 }
2100
2101 return emitHvxMulLoHiV60(A: Vu, SignedA: SignedVu, B: Vv, SignedB: SignedVv, dl, DAG);
2102}
2103
2104SDValue
2105HexagonTargetLowering::LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const {
2106 SDValue Val = Op.getOperand(i: 0);
2107 MVT ResTy = ty(Op);
2108 MVT ValTy = ty(Op: Val);
2109 const SDLoc &dl(Op);
2110
2111 if (isHvxBoolTy(Ty: ValTy) && ResTy.isScalarInteger()) {
2112 unsigned HwLen = Subtarget.getVectorLength();
2113 MVT WordTy = MVT::getVectorVT(VT: MVT::i32, NumElements: HwLen/4);
2114 SDValue VQ = compressHvxPred(VecQ: Val, dl, ResTy: WordTy, DAG);
2115 unsigned BitWidth = ResTy.getSizeInBits();
2116
2117 if (BitWidth < 64) {
2118 SDValue W0 = extractHvxElementReg(VecV: VQ, IdxV: DAG.getConstant(Val: 0, DL: dl, VT: MVT::i32),
2119 dl, ResTy: MVT::i32, DAG);
2120 if (BitWidth == 32)
2121 return W0;
2122 assert(BitWidth < 32u);
2123 return DAG.getZExtOrTrunc(Op: W0, DL: dl, VT: ResTy);
2124 }
2125
2126 // The result is >= 64 bits. The only options are 64 or 128.
2127 assert(BitWidth == 64 || BitWidth == 128);
2128 SmallVector<SDValue,4> Words;
2129 for (unsigned i = 0; i != BitWidth/32; ++i) {
2130 SDValue W = extractHvxElementReg(
2131 VecV: VQ, IdxV: DAG.getConstant(Val: i, DL: dl, VT: MVT::i32), dl, ResTy: MVT::i32, DAG);
2132 Words.push_back(Elt: W);
2133 }
2134 SmallVector<SDValue,2> Combines;
2135 assert(Words.size() % 2 == 0);
2136 for (unsigned i = 0, e = Words.size(); i < e; i += 2) {
2137 SDValue C = getCombine(Hi: Words[i+1], Lo: Words[i], dl, ResTy: MVT::i64, DAG);
2138 Combines.push_back(Elt: C);
2139 }
2140
2141 if (BitWidth == 64)
2142 return Combines[0];
2143
2144 return DAG.getNode(Opcode: ISD::BUILD_PAIR, DL: dl, VT: ResTy, Ops: Combines);
2145 }
2146
2147 // Handle bitcast from i32, v2i16, and v4i8 to v32i1.
2148 // Splat the input into a 32-element i32 vector, then AND each element
2149 // with a unique bitmask to isolate individual bits.
2150 auto bitcastI32ToV32I1 = [&](SDValue Val32) {
2151 assert(Val32.getValueType().getSizeInBits() == 32 &&
2152 "Input must be 32 bits");
2153 MVT VecTy = MVT::getVectorVT(VT: MVT::i32, NumElements: 32);
2154 SDValue Splat = DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: VecTy, Operand: Val32);
2155 SmallVector<SDValue, 32> Mask;
2156 for (unsigned i = 0; i < 32; ++i)
2157 Mask.push_back(Elt: DAG.getConstant(Val: 1ull << i, DL: dl, VT: MVT::i32));
2158
2159 SDValue MaskVec = DAG.getBuildVector(VT: VecTy, DL: dl, Ops: Mask);
2160 SDValue Anded = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: VecTy, N1: Splat, N2: MaskVec);
2161 return DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: MVT::v32i1, Operand: Anded);
2162 };
2163 // === Case: v32i1 ===
2164 if (ResTy == MVT::v32i1 &&
2165 (ValTy == MVT::i32 || ValTy == MVT::v2i16 || ValTy == MVT::v4i8) &&
2166 Subtarget.useHVX128BOps()) {
2167 SDValue Val32 = Val;
2168 if (ValTy == MVT::v2i16 || ValTy == MVT::v4i8)
2169 Val32 = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::i32, Operand: Val);
2170 return bitcastI32ToV32I1(Val32);
2171 }
2172 // === Case: v64i1 ===
2173 if (ResTy == MVT::v64i1 && ValTy == MVT::i64 && Subtarget.useHVX128BOps()) {
2174 // Split i64 into lo/hi 32-bit halves.
2175 SDValue Lo = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i32, Operand: Val);
2176 SDValue HiShifted = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: MVT::i64, N1: Val,
2177 N2: DAG.getConstant(Val: 32, DL: dl, VT: MVT::i64));
2178 SDValue Hi = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i32, Operand: HiShifted);
2179
2180 // Reuse the same 32-bit logic twice.
2181 SDValue LoRes = bitcastI32ToV32I1(Lo);
2182 SDValue HiRes = bitcastI32ToV32I1(Hi);
2183
2184 // Concatenate into a v64i1 predicate.
2185 return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: MVT::v64i1, N1: LoRes, N2: HiRes);
2186 }
2187
2188 if (isHvxBoolTy(Ty: ResTy) && ValTy.isScalarInteger()) {
2189 // Handle bitcast from i128 -> v128i1 and i64 -> v64i1.
2190 unsigned BitWidth = ValTy.getSizeInBits();
2191 unsigned HwLen = Subtarget.getVectorLength();
2192 assert(BitWidth == HwLen);
2193
2194 MVT ValAsVecTy = MVT::getVectorVT(VT: MVT::i8, NumElements: BitWidth / 8);
2195 SDValue ValAsVec = DAG.getBitcast(VT: ValAsVecTy, V: Val);
2196 // Splat each byte of Val 8 times.
2197 // Bytes = [(b0)x8, (b1)x8, ...., (b15)x8]
2198 // where b0, b1,..., b15 are least to most significant bytes of I.
2199 SmallVector<SDValue, 128> Bytes;
2200 // Tmp: 0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80, 0x01,0x02,0x04,0x08,...
2201 // These are bytes with the LSB rotated left with respect to their index.
2202 SmallVector<SDValue, 128> Tmp;
2203 for (unsigned I = 0; I != HwLen / 8; ++I) {
2204 SDValue Idx = DAG.getConstant(Val: I, DL: dl, VT: MVT::i32);
2205 SDValue Byte =
2206 DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: dl, VT: MVT::i8, N1: ValAsVec, N2: Idx);
2207 for (unsigned J = 0; J != 8; ++J) {
2208 Bytes.push_back(Elt: Byte);
2209 Tmp.push_back(Elt: DAG.getConstant(Val: 1ull << J, DL: dl, VT: MVT::i8));
2210 }
2211 }
2212
2213 MVT ConstantVecTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen);
2214 SDValue ConstantVec = DAG.getBuildVector(VT: ConstantVecTy, DL: dl, Ops: Tmp);
2215 SDValue I2V = buildHvxVectorReg(Values: Bytes, dl, VecTy: ConstantVecTy, DAG);
2216
2217 // Each Byte in the I2V will be set iff corresponding bit is set in Val.
2218 I2V = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ConstantVecTy, Ops: {I2V, ConstantVec});
2219 return DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: ResTy, Operand: I2V);
2220 }
2221
2222 return Op;
2223}
2224
2225SDValue HexagonTargetLowering::LowerHvxStore(SDValue Op,
2226 SelectionDAG &DAG) const {
2227 const SDLoc &dl(Op);
2228 StoreSDNode *SN = cast<StoreSDNode>(Val: Op.getNode());
2229 SDValue Val = SN->getValue();
2230 MVT ValTy = ty(Op: Val);
2231
2232 // Check if this is a store of an HVX bool vector (predicate)
2233 if (!isHvxBoolTy(Ty: ValTy))
2234 return SDValue();
2235
2236 unsigned NumBits = ValTy.getVectorNumElements();
2237 MachineMemOperand *MMO = SN->getMemOperand();
2238
2239 // Check alignment requirements based on predicate size
2240 unsigned RequiredAlign = (NumBits == 32) ? 4 : 8;
2241 if (MMO->getBaseAlign().value() % RequiredAlign != 0)
2242 return SDValue();
2243
2244 unsigned HwLen = Subtarget.getVectorLength();
2245 MVT WordTy = MVT::getVectorVT(VT: MVT::i32, NumElements: HwLen / 4);
2246
2247 // Compress the predicate into a vector register
2248 SDValue VQ = compressHvxPred(VecQ: Val, dl, ResTy: WordTy, DAG);
2249
2250 // Extract words from the compressed vector
2251 SmallVector<SDValue, 4> Words;
2252 for (unsigned i = 0; i != NumBits / 32; ++i) {
2253 SDValue W = extractHvxElementReg(VecV: VQ, IdxV: DAG.getConstant(Val: i, DL: dl, VT: MVT::i32), dl,
2254 ResTy: MVT::i32, DAG);
2255 Words.push_back(Elt: W);
2256 }
2257
2258 SDValue Chain = SN->getChain();
2259 SDValue BasePtr = SN->getBasePtr();
2260 MachinePointerInfo PtrInfo = MMO->getPointerInfo();
2261
2262 if (NumBits == 32)
2263 return DAG.getStore(Chain, dl, Val: Words[0], Ptr: BasePtr, PtrInfo,
2264 Alignment: MMO->getBaseAlign());
2265
2266 if (NumBits == 64) {
2267 SDValue W64 = getCombine(Hi: Words[1], Lo: Words[0], dl, ResTy: MVT::i64, DAG);
2268 return DAG.getStore(Chain, dl, Val: W64, Ptr: BasePtr, PtrInfo, Alignment: MMO->getBaseAlign());
2269 }
2270
2271 if (NumBits == 128) {
2272 SDValue Lo64 = getCombine(Hi: Words[1], Lo: Words[0], dl, ResTy: MVT::i64, DAG);
2273 SDValue Hi64 = getCombine(Hi: Words[3], Lo: Words[2], dl, ResTy: MVT::i64, DAG);
2274
2275 Chain =
2276 DAG.getStore(Chain, dl, Val: Lo64, Ptr: BasePtr, PtrInfo, Alignment: MMO->getBaseAlign());
2277
2278 SDValue Offset8 = DAG.getConstant(Val: 8, DL: dl, VT: MVT::i32);
2279 SDValue Ptr8 = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: MVT::i32, N1: BasePtr, N2: Offset8);
2280 return DAG.getStore(Chain, dl, Val: Hi64, Ptr: Ptr8, PtrInfo: PtrInfo.getWithOffset(O: 8),
2281 Alignment: Align(8));
2282 }
2283
2284 return SDValue();
2285}
2286
2287SDValue HexagonTargetLowering::LowerHvxLoad(SDValue Op,
2288 SelectionDAG &DAG) const {
2289 const SDLoc &dl(Op);
2290 LoadSDNode *LN = cast<LoadSDNode>(Val: Op.getNode());
2291 MVT ResTy = ty(Op);
2292
2293 // Check if this is a load of an HVX bool vector (predicate)
2294 if (!isHvxBoolTy(Ty: ResTy))
2295 return SDValue();
2296
2297 unsigned NumBits = ResTy.getVectorNumElements();
2298 MachineMemOperand *MMO = LN->getMemOperand();
2299
2300 unsigned RequiredAlign = (NumBits == 32) ? 4 : 8;
2301 if (MMO->getBaseAlign().value() % RequiredAlign != 0)
2302 return SDValue();
2303
2304 SDValue Chain = LN->getChain();
2305 SDValue BasePtr = LN->getBasePtr();
2306 MachinePointerInfo PtrInfo = MMO->getPointerInfo();
2307
2308 if (NumBits == 32) {
2309 SDValue W32 =
2310 DAG.getLoad(VT: MVT::i32, dl, Chain, Ptr: BasePtr, PtrInfo, Alignment: MMO->getBaseAlign());
2311 SDValue Pred = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v32i1, Operand: W32);
2312 SDValue Ops[] = {Pred, W32.getValue(R: 1)};
2313 return DAG.getMergeValues(Ops, dl);
2314 }
2315
2316 if (NumBits == 64) {
2317 SDValue W64 =
2318 DAG.getLoad(VT: MVT::i64, dl, Chain, Ptr: BasePtr, PtrInfo, Alignment: MMO->getBaseAlign());
2319 SDValue Pred = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v64i1, Operand: W64);
2320 SDValue Ops[] = {Pred, W64.getValue(R: 1)};
2321 return DAG.getMergeValues(Ops, dl);
2322 }
2323
2324 if (NumBits == 128) {
2325 SDValue Lo64 =
2326 DAG.getLoad(VT: MVT::i64, dl, Chain, Ptr: BasePtr, PtrInfo, Alignment: MMO->getBaseAlign());
2327 Chain = Lo64.getValue(R: 1);
2328
2329 SDValue Offset8 = DAG.getConstant(Val: 8, DL: dl, VT: MVT::i32);
2330 SDValue Ptr8 = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: MVT::i32, N1: BasePtr, N2: Offset8);
2331 SDValue Hi64 = DAG.getLoad(VT: MVT::i64, dl, Chain, Ptr: Ptr8,
2332 PtrInfo: PtrInfo.getWithOffset(O: 8), Alignment: Align(8));
2333
2334 SDValue LoPred = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v64i1, Operand: Lo64);
2335 SDValue HiPred = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v64i1, Operand: Hi64);
2336 SDValue Pred =
2337 DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: MVT::v128i1, N1: LoPred, N2: HiPred);
2338
2339 SDValue Ops[] = {Pred, Hi64.getValue(R: 1)};
2340 return DAG.getMergeValues(Ops, dl);
2341 }
2342
2343 return SDValue();
2344}
2345
2346SDValue
2347HexagonTargetLowering::LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const {
2348 // Sign- and zero-extends are legal.
2349 assert(Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG);
2350 return DAG.getNode(Opcode: ISD::ZERO_EXTEND_VECTOR_INREG, DL: SDLoc(Op), VT: ty(Op),
2351 Operand: Op.getOperand(i: 0));
2352}
2353
2354SDValue
2355HexagonTargetLowering::LowerHvxSelect(SDValue Op, SelectionDAG &DAG) const {
2356 MVT ResTy = ty(Op);
2357 if (ResTy.getVectorElementType() != MVT::i1)
2358 return Op;
2359
2360 const SDLoc &dl(Op);
2361 unsigned HwLen = Subtarget.getVectorLength();
2362 unsigned VecLen = ResTy.getVectorNumElements();
2363 assert(HwLen % VecLen == 0);
2364 unsigned ElemSize = HwLen / VecLen;
2365
2366 MVT VecTy = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: ElemSize * 8), NumElements: VecLen);
2367 SDValue S =
2368 DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: VecTy, N1: Op.getOperand(i: 0),
2369 N2: DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: VecTy, Operand: Op.getOperand(i: 1)),
2370 N3: DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: VecTy, Operand: Op.getOperand(i: 2)));
2371 return DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: ResTy, Operand: S);
2372}
2373
2374SDValue
2375HexagonTargetLowering::LowerHvxShift(SDValue Op, SelectionDAG &DAG) const {
2376 if (SDValue S = getVectorShiftByInt(Op, DAG))
2377 return S;
2378 return Op;
2379}
2380
2381SDValue
2382HexagonTargetLowering::LowerHvxFunnelShift(SDValue Op,
2383 SelectionDAG &DAG) const {
2384 unsigned Opc = Op.getOpcode();
2385 assert(Opc == ISD::FSHL || Opc == ISD::FSHR);
2386
2387 // Make sure the shift amount is within the range of the bitwidth
2388 // of the element type.
2389 SDValue A = Op.getOperand(i: 0);
2390 SDValue B = Op.getOperand(i: 1);
2391 SDValue S = Op.getOperand(i: 2);
2392
2393 MVT InpTy = ty(Op: A);
2394 MVT ElemTy = InpTy.getVectorElementType();
2395
2396 const SDLoc &dl(Op);
2397 unsigned ElemWidth = ElemTy.getSizeInBits();
2398 bool IsLeft = Opc == ISD::FSHL;
2399
2400 // The expansion into regular shifts produces worse code for i8 and for
2401 // right shift of i32 on v65+.
2402 bool UseShifts = ElemTy != MVT::i8;
2403 if (Subtarget.useHVXV65Ops() && ElemTy == MVT::i32)
2404 UseShifts = false;
2405
2406 if (SDValue SplatV = getSplatValue(Op: S, DAG); SplatV && UseShifts) {
2407 // If this is a funnel shift by a scalar, lower it into regular shifts.
2408 SDValue Mask = DAG.getConstant(Val: ElemWidth - 1, DL: dl, VT: MVT::i32);
2409 SDValue ModS =
2410 DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i32,
2411 Ops: {DAG.getZExtOrTrunc(Op: SplatV, DL: dl, VT: MVT::i32), Mask});
2412 SDValue NegS =
2413 DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: MVT::i32,
2414 Ops: {DAG.getConstant(Val: ElemWidth, DL: dl, VT: MVT::i32), ModS});
2415 SDValue IsZero =
2416 DAG.getSetCC(DL: dl, VT: MVT::i1, LHS: ModS, RHS: getZero(dl, Ty: MVT::i32, DAG), Cond: ISD::SETEQ);
2417 // FSHL A, B => A << | B >>n
2418 // FSHR A, B => A <<n | B >>
2419 SDValue Part1 =
2420 DAG.getNode(Opcode: HexagonISD::VASL, DL: dl, VT: InpTy, Ops: {A, IsLeft ? ModS : NegS});
2421 SDValue Part2 =
2422 DAG.getNode(Opcode: HexagonISD::VLSR, DL: dl, VT: InpTy, Ops: {B, IsLeft ? NegS : ModS});
2423 SDValue Or = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: InpTy, Ops: {Part1, Part2});
2424 // If the shift amount was 0, pick A or B, depending on the direction.
2425 // The opposite shift will also be by 0, so the "Or" will be incorrect.
2426 return DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: InpTy, Ops: {IsZero, (IsLeft ? A : B), Or});
2427 }
2428
2429 SDValue Mask = DAG.getSplatBuildVector(
2430 VT: InpTy, DL: dl, Op: DAG.getConstant(Val: ElemWidth - 1, DL: dl, VT: ElemTy));
2431
2432 unsigned MOpc = Opc == ISD::FSHL ? HexagonISD::MFSHL : HexagonISD::MFSHR;
2433 return DAG.getNode(Opcode: MOpc, DL: dl, VT: ty(Op),
2434 Ops: {A, B, DAG.getNode(Opcode: ISD::AND, DL: dl, VT: InpTy, Ops: {S, Mask})});
2435}
2436
2437SDValue
2438HexagonTargetLowering::LowerHvxIntrinsic(SDValue Op, SelectionDAG &DAG) const {
2439 const SDLoc &dl(Op);
2440 unsigned IntNo = Op.getConstantOperandVal(i: 0);
2441 SmallVector<SDValue> Ops(Op->ops());
2442
2443 auto Swap = [&](SDValue P) {
2444 return DAG.getMergeValues(Ops: {P.getValue(R: 1), P.getValue(R: 0)}, dl);
2445 };
2446
2447 switch (IntNo) {
2448 case Intrinsic::hexagon_V6_pred_typecast:
2449 case Intrinsic::hexagon_V6_pred_typecast_128B: {
2450 MVT ResTy = ty(Op), InpTy = ty(Op: Ops[1]);
2451 if (isHvxBoolTy(Ty: ResTy) && isHvxBoolTy(Ty: InpTy)) {
2452 if (ResTy == InpTy)
2453 return Ops[1];
2454 return DAG.getNode(Opcode: HexagonISD::TYPECAST, DL: dl, VT: ResTy, Operand: Ops[1]);
2455 }
2456 break;
2457 }
2458 case Intrinsic::hexagon_V6_vmpyss_parts:
2459 case Intrinsic::hexagon_V6_vmpyss_parts_128B:
2460 return Swap(DAG.getNode(Opcode: HexagonISD::SMUL_LOHI, DL: dl, VTList: Op->getVTList(),
2461 Ops: {Ops[1], Ops[2]}));
2462 case Intrinsic::hexagon_V6_vmpyuu_parts:
2463 case Intrinsic::hexagon_V6_vmpyuu_parts_128B:
2464 return Swap(DAG.getNode(Opcode: HexagonISD::UMUL_LOHI, DL: dl, VTList: Op->getVTList(),
2465 Ops: {Ops[1], Ops[2]}));
2466 case Intrinsic::hexagon_V6_vmpyus_parts:
2467 case Intrinsic::hexagon_V6_vmpyus_parts_128B: {
2468 return Swap(DAG.getNode(Opcode: HexagonISD::USMUL_LOHI, DL: dl, VTList: Op->getVTList(),
2469 Ops: {Ops[1], Ops[2]}));
2470 }
2471 } // switch
2472
2473 return Op;
2474}
2475
2476SDValue
2477HexagonTargetLowering::LowerHvxMaskedOp(SDValue Op, SelectionDAG &DAG) const {
2478 const SDLoc &dl(Op);
2479 unsigned HwLen = Subtarget.getVectorLength();
2480 MachineFunction &MF = DAG.getMachineFunction();
2481 auto *MaskN = cast<MaskedLoadStoreSDNode>(Val: Op.getNode());
2482 SDValue Mask = MaskN->getMask();
2483 SDValue Chain = MaskN->getChain();
2484 SDValue Base = MaskN->getBasePtr();
2485 auto *MemOp = MF.getMachineMemOperand(MMO: MaskN->getMemOperand(), Offset: 0, Size: HwLen);
2486
2487 unsigned Opc = Op->getOpcode();
2488 assert(Opc == ISD::MLOAD || Opc == ISD::MSTORE);
2489
2490 if (Opc == ISD::MLOAD) {
2491 MVT ValTy = ty(Op);
2492 SDValue Load = DAG.getLoad(VT: ValTy, dl, Chain, Ptr: Base, MMO: MemOp);
2493 SDValue Thru = cast<MaskedLoadSDNode>(Val: MaskN)->getPassThru();
2494 if (isUndef(Op: Thru))
2495 return Load;
2496 SDValue VSel = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: ValTy, N1: Mask, N2: Load, N3: Thru);
2497 return DAG.getMergeValues(Ops: {VSel, Load.getValue(R: 1)}, dl);
2498 }
2499
2500 // MSTORE
2501 // HVX only has aligned masked stores.
2502
2503 // TODO: Fold negations of the mask into the store.
2504 unsigned StoreOpc = Hexagon::V6_vS32b_qpred_ai;
2505 SDValue Value = cast<MaskedStoreSDNode>(Val: MaskN)->getValue();
2506 SDValue Offset0 = DAG.getTargetConstant(Val: 0, DL: dl, VT: ty(Op: Base));
2507
2508 if (MaskN->getAlign().value() % HwLen == 0) {
2509 SDValue Store = getInstr(MachineOpc: StoreOpc, dl, Ty: MVT::Other,
2510 Ops: {Mask, Base, Offset0, Value, Chain}, DAG);
2511 DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: Store.getNode()), NewMemRefs: {MemOp});
2512 return Store;
2513 }
2514
2515 // Unaligned case.
2516 auto StoreAlign = [&](SDValue V, SDValue A) {
2517 SDValue Z = getZero(dl, Ty: ty(Op: V), DAG);
2518 // TODO: use funnel shifts?
2519 // vlalign(Vu,Vv,Rt) rotates the pair Vu:Vv left by Rt and takes the
2520 // upper half.
2521 SDValue LoV = getInstr(MachineOpc: Hexagon::V6_vlalignb, dl, Ty: ty(Op: V), Ops: {V, Z, A}, DAG);
2522 SDValue HiV = getInstr(MachineOpc: Hexagon::V6_vlalignb, dl, Ty: ty(Op: V), Ops: {Z, V, A}, DAG);
2523 return std::make_pair(x&: LoV, y&: HiV);
2524 };
2525
2526 MVT ByteTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen);
2527 MVT BoolTy = MVT::getVectorVT(VT: MVT::i1, NumElements: HwLen);
2528 SDValue MaskV = DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: ByteTy, Operand: Mask);
2529 VectorPair Tmp = StoreAlign(MaskV, Base);
2530 VectorPair MaskU = {DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: BoolTy, Operand: Tmp.first),
2531 DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: BoolTy, Operand: Tmp.second)};
2532 VectorPair ValueU = StoreAlign(Value, Base);
2533
2534 SDValue Offset1 = DAG.getTargetConstant(Val: HwLen, DL: dl, VT: MVT::i32);
2535 SDValue StoreLo =
2536 getInstr(MachineOpc: StoreOpc, dl, Ty: MVT::Other,
2537 Ops: {MaskU.first, Base, Offset0, ValueU.first, Chain}, DAG);
2538 SDValue StoreHi =
2539 getInstr(MachineOpc: StoreOpc, dl, Ty: MVT::Other,
2540 Ops: {MaskU.second, Base, Offset1, ValueU.second, Chain}, DAG);
2541 DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: StoreLo.getNode()), NewMemRefs: {MemOp});
2542 DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: StoreHi.getNode()), NewMemRefs: {MemOp});
2543 return DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: {StoreLo, StoreHi});
2544}
2545
2546SDValue HexagonTargetLowering::LowerHvxFpExtend(SDValue Op,
2547 SelectionDAG &DAG) const {
2548 // This conversion only applies to QFloat. IEEE extension from f16 to f32
2549 // is legal (done via a pattern).
2550 assert(Subtarget.useHVXQFloatOps());
2551
2552 assert(Op->getOpcode() == ISD::FP_EXTEND);
2553
2554 MVT VecTy = ty(Op);
2555 MVT ArgTy = ty(Op: Op.getOperand(i: 0));
2556 const SDLoc &dl(Op);
2557
2558 if (ArgTy == MVT::v64bf16) {
2559 MVT HalfTy = typeSplit(VecTy).first;
2560 SDValue BF16Vec = Op.getOperand(i: 0);
2561 SDValue Zeroes =
2562 getInstr(MachineOpc: Hexagon::V6_vxor, dl, Ty: HalfTy, Ops: {BF16Vec, BF16Vec}, DAG);
2563 // Interleave zero vector with the bf16 vector, with zeroes in the lower
2564 // half of each 32 bit lane, effectively extending the bf16 values to fp32
2565 // values.
2566 SDValue ShuffVec =
2567 getInstr(MachineOpc: Hexagon::V6_vshufoeh, dl, Ty: VecTy, Ops: {BF16Vec, Zeroes}, DAG);
2568 VectorPair VecPair = opSplit(Vec: ShuffVec, dl, DAG);
2569 SDValue Result = getInstr(MachineOpc: Hexagon::V6_vshuffvdd, dl, Ty: VecTy,
2570 Ops: {VecPair.second, VecPair.first,
2571 DAG.getSignedConstant(Val: -4, DL: dl, VT: MVT::i32)},
2572 DAG);
2573 return Result;
2574 }
2575
2576 assert(VecTy == MVT::v64f32 && ArgTy == MVT::v64f16);
2577
2578 SDValue F16Vec = Op.getOperand(i: 0);
2579
2580 APFloat FloatVal = APFloat(1.0f);
2581 bool Ignored;
2582 FloatVal.convert(ToSemantics: APFloat::IEEEhalf(), RM: APFloat::rmNearestTiesToEven, losesInfo: &Ignored);
2583 SDValue Fp16Ones = DAG.getConstantFP(Val: FloatVal, DL: dl, VT: ArgTy);
2584 SDValue VmpyVec =
2585 getInstr(MachineOpc: Hexagon::V6_vmpy_qf32_hf, dl, Ty: VecTy, Ops: {F16Vec, Fp16Ones}, DAG);
2586
2587 MVT HalfTy = typeSplit(VecTy).first;
2588 VectorPair Pair = opSplit(Vec: VmpyVec, dl, DAG);
2589 SDValue LoVec =
2590 getInstr(MachineOpc: Hexagon::V6_vconv_sf_qf32, dl, Ty: HalfTy, Ops: {Pair.first}, DAG);
2591 SDValue HiVec =
2592 getInstr(MachineOpc: Hexagon::V6_vconv_sf_qf32, dl, Ty: HalfTy, Ops: {Pair.second}, DAG);
2593
2594 SDValue ShuffVec =
2595 getInstr(MachineOpc: Hexagon::V6_vshuffvdd, dl, Ty: VecTy,
2596 Ops: {HiVec, LoVec, DAG.getSignedConstant(Val: -4, DL: dl, VT: MVT::i32)}, DAG);
2597
2598 return ShuffVec;
2599}
2600
2601SDValue
2602HexagonTargetLowering::LowerHvxFpToInt(SDValue Op, SelectionDAG &DAG) const {
2603 // Catch invalid conversion ops (just in case).
2604 assert(Op.getOpcode() == ISD::FP_TO_SINT ||
2605 Op.getOpcode() == ISD::FP_TO_UINT);
2606
2607 MVT ResTy = ty(Op);
2608 MVT FpTy = ty(Op: Op.getOperand(i: 0)).getVectorElementType();
2609 MVT IntTy = ResTy.getVectorElementType();
2610
2611 if (Subtarget.useHVXIEEEFPOps()) {
2612 // There are only conversions from f16.
2613 if (FpTy == MVT::f16) {
2614 // Other int types aren't legal in HVX, so we shouldn't see them here.
2615 assert(IntTy == MVT::i8 || IntTy == MVT::i16 || IntTy == MVT::i32);
2616 // Conversions to i8 and i16 are legal.
2617 if (IntTy == MVT::i8 || IntTy == MVT::i16)
2618 return Op;
2619 }
2620 }
2621
2622 if (IntTy.getSizeInBits() != FpTy.getSizeInBits())
2623 return EqualizeFpIntConversion(Op, DAG);
2624
2625 return ExpandHvxFpToInt(Op, DAG);
2626}
2627
2628// For vector type v32i1 uint_to_fp/sint_to_fp to v32f32:
2629// R1 = #1, R2 holds the v32i1 param
2630// V1 = vsplat(R1)
2631// V2 = vsplat(R2)
2632// Q0 = vand(V1,R1)
2633// V0.w=prefixsum(Q0)
2634// V0.w=vsub(V0.w,V1.w)
2635// V2.w = vlsr(V2.w,V0.w)
2636// V2 = vand(V2,V1)
2637// V2.sf = V2.w
2638SDValue HexagonTargetLowering::LowerHvxPred32ToFp(SDValue PredOp,
2639 SelectionDAG &DAG) const {
2640
2641 MVT ResTy = ty(Op: PredOp);
2642 const SDLoc &dl(PredOp);
2643
2644 SDValue Const = DAG.getTargetConstant(Val: 0x1, DL: dl, VT: MVT::i32);
2645 SDNode *RegConst = DAG.getMachineNode(Opcode: Hexagon::A2_tfrsi, dl, VT: MVT::i32, Op1: Const);
2646 SDNode *SplatConst = DAG.getMachineNode(Opcode: Hexagon::V6_lvsplatw, dl, VT: MVT::v32i32,
2647 Op1: SDValue(RegConst, 0));
2648 SDNode *PredTransfer =
2649 DAG.getMachineNode(Opcode: Hexagon::V6_vandvrt, dl, VT: MVT::v32i1,
2650 Op1: SDValue(SplatConst, 0), Op2: SDValue(RegConst, 0));
2651 SDNode *PrefixSum = DAG.getMachineNode(Opcode: Hexagon::V6_vprefixqw, dl, VT: MVT::v32i32,
2652 Op1: SDValue(PredTransfer, 0));
2653 SDNode *SplatParam = DAG.getMachineNode(
2654 Opcode: Hexagon::V6_lvsplatw, dl, VT: MVT::v32i32,
2655 Op1: DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::i32, Operand: PredOp.getOperand(i: 0)));
2656 SDNode *Vsub =
2657 DAG.getMachineNode(Opcode: Hexagon::V6_vsubw, dl, VT: MVT::v32i32,
2658 Op1: SDValue(PrefixSum, 0), Op2: SDValue(SplatConst, 0));
2659 SDNode *IndexShift =
2660 DAG.getMachineNode(Opcode: Hexagon::V6_vlsrwv, dl, VT: MVT::v32i32,
2661 Op1: SDValue(SplatParam, 0), Op2: SDValue(Vsub, 0));
2662 SDNode *MaskOff =
2663 DAG.getMachineNode(Opcode: Hexagon::V6_vand, dl, VT: MVT::v32i32,
2664 Op1: SDValue(IndexShift, 0), Op2: SDValue(SplatConst, 0));
2665 SDNode *Convert = DAG.getMachineNode(Opcode: Hexagon::V6_vconv_sf_w, dl, VT: ResTy,
2666 Op1: SDValue(MaskOff, 0));
2667 return SDValue(Convert, 0);
2668}
2669
2670// For vector type v64i1 uint_to_fo to v64f16:
2671// i64 R32 = bitcast v64i1 R3:2 (R3:2 holds v64i1)
2672// R3 = subreg_high (R32)
2673// R2 = subreg_low (R32)
2674// R1 = #1
2675// V1 = vsplat(R1)
2676// V2 = vsplat(R2)
2677// V3 = vsplat(R3)
2678// Q0 = vand(V1,R1)
2679// V0.w=prefixsum(Q0)
2680// V0.w=vsub(V0.w,V1.w)
2681// V2.w = vlsr(V2.w,V0.w)
2682// V3.w = vlsr(V3.w,V0.w)
2683// V2 = vand(V2,V1)
2684// V3 = vand(V3,V1)
2685// V2.h = vpacke(V3.w,V2.w)
2686// V2.hf = V2.h
2687SDValue HexagonTargetLowering::LowerHvxPred64ToFp(SDValue PredOp,
2688 SelectionDAG &DAG) const {
2689
2690 MVT ResTy = ty(Op: PredOp);
2691 const SDLoc &dl(PredOp);
2692
2693 SDValue Inp = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::i64, Operand: PredOp.getOperand(i: 0));
2694 // Get the hi and lo regs
2695 SDValue HiReg =
2696 DAG.getTargetExtractSubreg(SRIdx: Hexagon::isub_hi, DL: dl, VT: MVT::i32, Operand: Inp);
2697 SDValue LoReg =
2698 DAG.getTargetExtractSubreg(SRIdx: Hexagon::isub_lo, DL: dl, VT: MVT::i32, Operand: Inp);
2699 // Get constant #1 and splat into vector V1
2700 SDValue Const = DAG.getTargetConstant(Val: 0x1, DL: dl, VT: MVT::i32);
2701 SDNode *RegConst = DAG.getMachineNode(Opcode: Hexagon::A2_tfrsi, dl, VT: MVT::i32, Op1: Const);
2702 SDNode *SplatConst = DAG.getMachineNode(Opcode: Hexagon::V6_lvsplatw, dl, VT: MVT::v32i32,
2703 Op1: SDValue(RegConst, 0));
2704 // Splat the hi and lo args
2705 SDNode *SplatHi =
2706 DAG.getMachineNode(Opcode: Hexagon::V6_lvsplatw, dl, VT: MVT::v32i32,
2707 Op1: DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::i32, Operand: HiReg));
2708 SDNode *SplatLo =
2709 DAG.getMachineNode(Opcode: Hexagon::V6_lvsplatw, dl, VT: MVT::v32i32,
2710 Op1: DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::i32, Operand: LoReg));
2711 // vand between splatted const and const
2712 SDNode *PredTransfer =
2713 DAG.getMachineNode(Opcode: Hexagon::V6_vandvrt, dl, VT: MVT::v32i1,
2714 Op1: SDValue(SplatConst, 0), Op2: SDValue(RegConst, 0));
2715 // Get the prefixsum
2716 SDNode *PrefixSum = DAG.getMachineNode(Opcode: Hexagon::V6_vprefixqw, dl, VT: MVT::v32i32,
2717 Op1: SDValue(PredTransfer, 0));
2718 // Get the vsub
2719 SDNode *Vsub =
2720 DAG.getMachineNode(Opcode: Hexagon::V6_vsubw, dl, VT: MVT::v32i32,
2721 Op1: SDValue(PrefixSum, 0), Op2: SDValue(SplatConst, 0));
2722 // Get vlsr for hi and lo
2723 SDNode *IndexShift_hi =
2724 DAG.getMachineNode(Opcode: Hexagon::V6_vlsrwv, dl, VT: MVT::v32i32,
2725 Op1: SDValue(SplatHi, 0), Op2: SDValue(Vsub, 0));
2726 SDNode *IndexShift_lo =
2727 DAG.getMachineNode(Opcode: Hexagon::V6_vlsrwv, dl, VT: MVT::v32i32,
2728 Op1: SDValue(SplatLo, 0), Op2: SDValue(Vsub, 0));
2729 // Get vand of hi and lo
2730 SDNode *MaskOff_hi =
2731 DAG.getMachineNode(Opcode: Hexagon::V6_vand, dl, VT: MVT::v32i32,
2732 Op1: SDValue(IndexShift_hi, 0), Op2: SDValue(SplatConst, 0));
2733 SDNode *MaskOff_lo =
2734 DAG.getMachineNode(Opcode: Hexagon::V6_vand, dl, VT: MVT::v32i32,
2735 Op1: SDValue(IndexShift_lo, 0), Op2: SDValue(SplatConst, 0));
2736 // Pack them
2737 SDNode *Pack =
2738 DAG.getMachineNode(Opcode: Hexagon::V6_vpackeh, dl, VT: MVT::v64i16,
2739 Op1: SDValue(MaskOff_hi, 0), Op2: SDValue(MaskOff_lo, 0));
2740 SDNode *Convert =
2741 DAG.getMachineNode(Opcode: Hexagon::V6_vconv_hf_h, dl, VT: ResTy, Op1: SDValue(Pack, 0));
2742 return SDValue(Convert, 0);
2743}
2744
2745SDValue
2746HexagonTargetLowering::LowerHvxIntToFp(SDValue Op, SelectionDAG &DAG) const {
2747 // Catch invalid conversion ops (just in case).
2748 assert(Op.getOpcode() == ISD::SINT_TO_FP ||
2749 Op.getOpcode() == ISD::UINT_TO_FP);
2750
2751 MVT ResTy = ty(Op);
2752 MVT IntTy = ty(Op: Op.getOperand(i: 0)).getVectorElementType();
2753 MVT FpTy = ResTy.getVectorElementType();
2754
2755 if (Op.getOpcode() == ISD::UINT_TO_FP || Op.getOpcode() == ISD::SINT_TO_FP) {
2756 if (ResTy == MVT::v32f32 && ty(Op: Op.getOperand(i: 0)) == MVT::v32i1)
2757 return LowerHvxPred32ToFp(PredOp: Op, DAG);
2758 if (ResTy == MVT::v64f16 && ty(Op: Op.getOperand(i: 0)) == MVT::v64i1)
2759 return LowerHvxPred64ToFp(PredOp: Op, DAG);
2760 }
2761
2762 if (Subtarget.useHVXIEEEFPOps()) {
2763 // There are only conversions to f16.
2764 if (FpTy == MVT::f16) {
2765 // Other int types aren't legal in HVX, so we shouldn't see them here.
2766 assert(IntTy == MVT::i8 || IntTy == MVT::i16 || IntTy == MVT::i32);
2767 // i8, i16 -> f16 is legal.
2768 if (IntTy == MVT::i8 || IntTy == MVT::i16)
2769 return Op;
2770 }
2771 }
2772
2773 if (IntTy.getSizeInBits() != FpTy.getSizeInBits())
2774 return EqualizeFpIntConversion(Op, DAG);
2775
2776 return ExpandHvxIntToFp(Op, DAG);
2777}
2778
2779HexagonTargetLowering::TypePair
2780HexagonTargetLowering::typeExtendToWider(MVT Ty0, MVT Ty1) const {
2781 // Compare the widths of elements of the two types, and extend the narrower
2782 // type to match the with of the wider type. For vector types, apply this
2783 // to the element type.
2784 assert(Ty0.isVector() == Ty1.isVector());
2785
2786 MVT ElemTy0 = Ty0.getScalarType();
2787 MVT ElemTy1 = Ty1.getScalarType();
2788
2789 unsigned Width0 = ElemTy0.getSizeInBits();
2790 unsigned Width1 = ElemTy1.getSizeInBits();
2791 unsigned MaxWidth = std::max(a: Width0, b: Width1);
2792
2793 auto getScalarWithWidth = [](MVT ScalarTy, unsigned Width) {
2794 if (ScalarTy.isInteger())
2795 return MVT::getIntegerVT(BitWidth: Width);
2796 assert(ScalarTy.isFloatingPoint());
2797 return MVT::getFloatingPointVT(BitWidth: Width);
2798 };
2799
2800 MVT WideETy0 = getScalarWithWidth(ElemTy0, MaxWidth);
2801 MVT WideETy1 = getScalarWithWidth(ElemTy1, MaxWidth);
2802
2803 if (!Ty0.isVector()) {
2804 // Both types are scalars.
2805 return {WideETy0, WideETy1};
2806 }
2807
2808 // Vector types.
2809 unsigned NumElem = Ty0.getVectorNumElements();
2810 assert(NumElem == Ty1.getVectorNumElements());
2811
2812 return {MVT::getVectorVT(VT: WideETy0, NumElements: NumElem),
2813 MVT::getVectorVT(VT: WideETy1, NumElements: NumElem)};
2814}
2815
2816HexagonTargetLowering::TypePair
2817HexagonTargetLowering::typeWidenToWider(MVT Ty0, MVT Ty1) const {
2818 // Compare the numbers of elements of two vector types, and widen the
2819 // narrower one to match the number of elements in the wider one.
2820 assert(Ty0.isVector() && Ty1.isVector());
2821
2822 unsigned Len0 = Ty0.getVectorNumElements();
2823 unsigned Len1 = Ty1.getVectorNumElements();
2824 if (Len0 == Len1)
2825 return {Ty0, Ty1};
2826
2827 unsigned MaxLen = std::max(a: Len0, b: Len1);
2828 return {MVT::getVectorVT(VT: Ty0.getVectorElementType(), NumElements: MaxLen),
2829 MVT::getVectorVT(VT: Ty1.getVectorElementType(), NumElements: MaxLen)};
2830}
2831
2832MVT
2833HexagonTargetLowering::typeLegalize(MVT Ty, SelectionDAG &DAG) const {
2834 EVT LegalTy = getTypeToTransformTo(Context&: *DAG.getContext(), VT: Ty);
2835 assert(LegalTy.isSimple());
2836 return LegalTy.getSimpleVT();
2837}
2838
2839MVT
2840HexagonTargetLowering::typeWidenToHvx(MVT Ty) const {
2841 unsigned HwWidth = 8 * Subtarget.getVectorLength();
2842 assert(Ty.getSizeInBits() <= HwWidth);
2843 if (Ty.getSizeInBits() == HwWidth)
2844 return Ty;
2845
2846 MVT ElemTy = Ty.getScalarType();
2847 return MVT::getVectorVT(VT: ElemTy, NumElements: HwWidth / ElemTy.getSizeInBits());
2848}
2849
2850HexagonTargetLowering::VectorPair
2851HexagonTargetLowering::emitHvxAddWithOverflow(SDValue A, SDValue B,
2852 const SDLoc &dl, bool Signed, SelectionDAG &DAG) const {
2853 // Compute A+B, return {A+B, O}, where O = vector predicate indicating
2854 // whether an overflow has occurred.
2855 MVT ResTy = ty(Op: A);
2856 assert(ResTy == ty(B));
2857 MVT PredTy = MVT::getVectorVT(VT: MVT::i1, NumElements: ResTy.getVectorNumElements());
2858
2859 if (!Signed) {
2860 // V62+ has V6_vaddcarry, but it requires input predicate, so it doesn't
2861 // save any instructions.
2862 SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: ResTy, Ops: {A, B});
2863 SDValue Ovf = DAG.getSetCC(DL: dl, VT: PredTy, LHS: Add, RHS: A, Cond: ISD::SETULT);
2864 return {Add, Ovf};
2865 }
2866
2867 // Signed overflow has happened, if:
2868 // (A, B have the same sign) and (A+B has a different sign from either)
2869 // i.e. (~A xor B) & ((A+B) xor B), then check the sign bit
2870 SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: ResTy, Ops: {A, B});
2871 SDValue NotA =
2872 DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: ResTy, Ops: {A, DAG.getAllOnesConstant(DL: dl, VT: ResTy)});
2873 SDValue Xor0 = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: ResTy, Ops: {NotA, B});
2874 SDValue Xor1 = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: ResTy, Ops: {Add, B});
2875 SDValue And = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ResTy, Ops: {Xor0, Xor1});
2876 SDValue MSB =
2877 DAG.getSetCC(DL: dl, VT: PredTy, LHS: And, RHS: getZero(dl, Ty: ResTy, DAG), Cond: ISD::SETLT);
2878 return {Add, MSB};
2879}
2880
2881HexagonTargetLowering::VectorPair
2882HexagonTargetLowering::emitHvxShiftRightRnd(SDValue Val, unsigned Amt,
2883 bool Signed, SelectionDAG &DAG) const {
2884 // Shift Val right by Amt bits, round the result to the nearest integer,
2885 // tie-break by rounding halves to even integer.
2886
2887 const SDLoc &dl(Val);
2888 MVT ValTy = ty(Op: Val);
2889
2890 // This should also work for signed integers.
2891 //
2892 // uint tmp0 = inp + ((1 << (Amt-1)) - 1);
2893 // bool ovf = (inp > tmp0);
2894 // uint rup = inp & (1 << (Amt+1));
2895 //
2896 // uint tmp1 = inp >> (Amt-1); // tmp1 == tmp2 iff
2897 // uint tmp2 = tmp0 >> (Amt-1); // the Amt-1 lower bits were all 0
2898 // uint tmp3 = tmp2 + rup;
2899 // uint frac = (tmp1 != tmp2) ? tmp2 >> 1 : tmp3 >> 1;
2900 unsigned ElemWidth = ValTy.getVectorElementType().getSizeInBits();
2901 MVT ElemTy = MVT::getIntegerVT(BitWidth: ElemWidth);
2902 MVT IntTy = tyVector(Ty: ValTy, ElemTy);
2903 MVT PredTy = MVT::getVectorVT(VT: MVT::i1, NumElements: IntTy.getVectorNumElements());
2904 unsigned ShRight = Signed ? ISD::SRA : ISD::SRL;
2905
2906 SDValue Inp = DAG.getBitcast(VT: IntTy, V: Val);
2907 SDValue LowBits = DAG.getConstant(Val: (1ull << (Amt - 1)) - 1, DL: dl, VT: IntTy);
2908
2909 SDValue AmtP1 = DAG.getConstant(Val: 1ull << Amt, DL: dl, VT: IntTy);
2910 SDValue And = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: IntTy, Ops: {Inp, AmtP1});
2911 SDValue Zero = getZero(dl, Ty: IntTy, DAG);
2912 SDValue Bit = DAG.getSetCC(DL: dl, VT: PredTy, LHS: And, RHS: Zero, Cond: ISD::SETNE);
2913 SDValue Rup = DAG.getZExtOrTrunc(Op: Bit, DL: dl, VT: IntTy);
2914 auto [Tmp0, Ovf] = emitHvxAddWithOverflow(A: Inp, B: LowBits, dl, Signed, DAG);
2915
2916 SDValue AmtM1 = DAG.getConstant(Val: Amt - 1, DL: dl, VT: IntTy);
2917 SDValue Tmp1 = DAG.getNode(Opcode: ShRight, DL: dl, VT: IntTy, N1: Inp, N2: AmtM1);
2918 SDValue Tmp2 = DAG.getNode(Opcode: ShRight, DL: dl, VT: IntTy, N1: Tmp0, N2: AmtM1);
2919 SDValue Tmp3 = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: IntTy, N1: Tmp2, N2: Rup);
2920
2921 SDValue Eq = DAG.getSetCC(DL: dl, VT: PredTy, LHS: Tmp1, RHS: Tmp2, Cond: ISD::SETEQ);
2922 SDValue One = DAG.getConstant(Val: 1, DL: dl, VT: IntTy);
2923 SDValue Tmp4 = DAG.getNode(Opcode: ShRight, DL: dl, VT: IntTy, Ops: {Tmp2, One});
2924 SDValue Tmp5 = DAG.getNode(Opcode: ShRight, DL: dl, VT: IntTy, Ops: {Tmp3, One});
2925 SDValue Mux = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: IntTy, Ops: {Eq, Tmp5, Tmp4});
2926 return {Mux, Ovf};
2927}
2928
2929SDValue
2930HexagonTargetLowering::emitHvxMulHsV60(SDValue A, SDValue B, const SDLoc &dl,
2931 SelectionDAG &DAG) const {
2932 MVT VecTy = ty(Op: A);
2933 MVT PairTy = typeJoin(Tys: {VecTy, VecTy});
2934 assert(VecTy.getVectorElementType() == MVT::i32);
2935
2936 SDValue S16 = DAG.getConstant(Val: 16, DL: dl, VT: MVT::i32);
2937
2938 // mulhs(A,B) =
2939 // = [(Hi(A)*2^16 + Lo(A)) *s (Hi(B)*2^16 + Lo(B))] >> 32
2940 // = [Hi(A)*2^16 *s Hi(B)*2^16 + Hi(A) *su Lo(B)*2^16
2941 // + Lo(A) *us (Hi(B)*2^16 + Lo(B))] >> 32
2942 // = [Hi(A) *s Hi(B)*2^32 + Hi(A) *su Lo(B)*2^16 + Lo(A) *us B] >> 32
2943 // The low half of Lo(A)*Lo(B) will be discarded (it's not added to
2944 // anything, so it cannot produce any carry over to higher bits),
2945 // so everything in [] can be shifted by 16 without loss of precision.
2946 // = [Hi(A) *s Hi(B)*2^16 + Hi(A)*su Lo(B) + Lo(A)*B >> 16] >> 16
2947 // = [Hi(A) *s Hi(B)*2^16 + Hi(A)*su Lo(B) + V6_vmpyewuh(A,B)] >> 16
2948 // The final additions need to make sure to properly maintain any carry-
2949 // out bits.
2950 //
2951 // Hi(B) Lo(B)
2952 // Hi(A) Lo(A)
2953 // --------------
2954 // Lo(B)*Lo(A) | T0 = V6_vmpyewuh(B,A) does this,
2955 // Hi(B)*Lo(A) | + dropping the low 16 bits
2956 // Hi(A)*Lo(B) | T2
2957 // Hi(B)*Hi(A)
2958
2959 SDValue T0 = getInstr(MachineOpc: Hexagon::V6_vmpyewuh, dl, Ty: VecTy, Ops: {B, A}, DAG);
2960 // T1 = get Hi(A) into low halves.
2961 SDValue T1 = getInstr(MachineOpc: Hexagon::V6_vasrw, dl, Ty: VecTy, Ops: {A, S16}, DAG);
2962 // P0 = interleaved T1.h*B.uh (full precision product)
2963 SDValue P0 = getInstr(MachineOpc: Hexagon::V6_vmpyhus, dl, Ty: PairTy, Ops: {T1, B}, DAG);
2964 // T2 = T1.even(h) * B.even(uh), i.e. Hi(A)*Lo(B)
2965 SDValue T2 = LoHalf(V: P0, DAG);
2966 // We need to add T0+T2, recording the carry-out, which will be 1<<16
2967 // added to the final sum.
2968 // P1 = interleaved even/odd 32-bit (unsigned) sums of 16-bit halves
2969 SDValue P1 = getInstr(MachineOpc: Hexagon::V6_vadduhw, dl, Ty: PairTy, Ops: {T0, T2}, DAG);
2970 // P2 = interleaved even/odd 32-bit (signed) sums of 16-bit halves
2971 SDValue P2 = getInstr(MachineOpc: Hexagon::V6_vaddhw, dl, Ty: PairTy, Ops: {T0, T2}, DAG);
2972 // T3 = full-precision(T0+T2) >> 16
2973 // The low halves are added-unsigned, the high ones are added-signed.
2974 SDValue T3 = getInstr(MachineOpc: Hexagon::V6_vasrw_acc, dl, Ty: VecTy,
2975 Ops: {HiHalf(V: P2, DAG), LoHalf(V: P1, DAG), S16}, DAG);
2976 SDValue T4 = getInstr(MachineOpc: Hexagon::V6_vasrw, dl, Ty: VecTy, Ops: {B, S16}, DAG);
2977 // P3 = interleaved Hi(B)*Hi(A) (full precision),
2978 // which is now Lo(T1)*Lo(T4), so we want to keep the even product.
2979 SDValue P3 = getInstr(MachineOpc: Hexagon::V6_vmpyhv, dl, Ty: PairTy, Ops: {T1, T4}, DAG);
2980 SDValue T5 = LoHalf(V: P3, DAG);
2981 // Add:
2982 SDValue T6 = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: VecTy, Ops: {T3, T5});
2983 return T6;
2984}
2985
2986SDValue
2987HexagonTargetLowering::emitHvxMulLoHiV60(SDValue A, bool SignedA, SDValue B,
2988 bool SignedB, const SDLoc &dl,
2989 SelectionDAG &DAG) const {
2990 MVT VecTy = ty(Op: A);
2991 MVT PairTy = typeJoin(Tys: {VecTy, VecTy});
2992 assert(VecTy.getVectorElementType() == MVT::i32);
2993
2994 SDValue S16 = DAG.getConstant(Val: 16, DL: dl, VT: MVT::i32);
2995
2996 if (SignedA && !SignedB) {
2997 // Make A:unsigned, B:signed.
2998 std::swap(a&: A, b&: B);
2999 std::swap(a&: SignedA, b&: SignedB);
3000 }
3001
3002 // Do halfword-wise multiplications for unsigned*unsigned product, then
3003 // add corrections for signed and unsigned*signed.
3004
3005 SDValue Lo, Hi;
3006
3007 // P0:lo = (uu) products of low halves of A and B,
3008 // P0:hi = (uu) products of high halves.
3009 SDValue P0 = getInstr(MachineOpc: Hexagon::V6_vmpyuhv, dl, Ty: PairTy, Ops: {A, B}, DAG);
3010
3011 // Swap low/high halves in B
3012 SDValue T0 = getInstr(MachineOpc: Hexagon::V6_lvsplatw, dl, Ty: VecTy,
3013 Ops: {DAG.getConstant(Val: 0x02020202, DL: dl, VT: MVT::i32)}, DAG);
3014 SDValue T1 = getInstr(MachineOpc: Hexagon::V6_vdelta, dl, Ty: VecTy, Ops: {B, T0}, DAG);
3015 // P1 = products of even/odd halfwords.
3016 // P1:lo = (uu) products of even(A.uh) * odd(B.uh)
3017 // P1:hi = (uu) products of odd(A.uh) * even(B.uh)
3018 SDValue P1 = getInstr(MachineOpc: Hexagon::V6_vmpyuhv, dl, Ty: PairTy, Ops: {A, T1}, DAG);
3019
3020 // P2:lo = low halves of P1:lo + P1:hi,
3021 // P2:hi = high halves of P1:lo + P1:hi.
3022 SDValue P2 = getInstr(MachineOpc: Hexagon::V6_vadduhw, dl, Ty: PairTy,
3023 Ops: {HiHalf(V: P1, DAG), LoHalf(V: P1, DAG)}, DAG);
3024 // Still need to add the high halves of P0:lo to P2:lo
3025 SDValue T2 =
3026 getInstr(MachineOpc: Hexagon::V6_vlsrw, dl, Ty: VecTy, Ops: {LoHalf(V: P0, DAG), S16}, DAG);
3027 SDValue T3 = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: VecTy, Ops: {LoHalf(V: P2, DAG), T2});
3028
3029 // The high halves of T3 will contribute to the HI part of LOHI.
3030 SDValue T4 = getInstr(MachineOpc: Hexagon::V6_vasrw_acc, dl, Ty: VecTy,
3031 Ops: {HiHalf(V: P2, DAG), T3, S16}, DAG);
3032
3033 // The low halves of P2 need to be added to high halves of the LO part.
3034 Lo = getInstr(MachineOpc: Hexagon::V6_vaslw_acc, dl, Ty: VecTy,
3035 Ops: {LoHalf(V: P0, DAG), LoHalf(V: P2, DAG), S16}, DAG);
3036 Hi = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: VecTy, Ops: {HiHalf(V: P0, DAG), T4});
3037
3038 if (SignedA) {
3039 assert(SignedB && "Signed A and unsigned B should have been inverted");
3040
3041 MVT PredTy = MVT::getVectorVT(VT: MVT::i1, NumElements: VecTy.getVectorNumElements());
3042 SDValue Zero = getZero(dl, Ty: VecTy, DAG);
3043 SDValue Q0 = DAG.getSetCC(DL: dl, VT: PredTy, LHS: A, RHS: Zero, Cond: ISD::SETLT);
3044 SDValue Q1 = DAG.getSetCC(DL: dl, VT: PredTy, LHS: B, RHS: Zero, Cond: ISD::SETLT);
3045 SDValue X0 = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: VecTy, Ops: {Q0, B, Zero});
3046 SDValue X1 = getInstr(MachineOpc: Hexagon::V6_vaddwq, dl, Ty: VecTy, Ops: {Q1, X0, A}, DAG);
3047 Hi = getInstr(MachineOpc: Hexagon::V6_vsubw, dl, Ty: VecTy, Ops: {Hi, X1}, DAG);
3048 } else if (SignedB) {
3049 // Same correction as for mulhus:
3050 // mulhus(A.uw,B.w) = mulhu(A.uw,B.uw) - (A.w if B < 0)
3051 MVT PredTy = MVT::getVectorVT(VT: MVT::i1, NumElements: VecTy.getVectorNumElements());
3052 SDValue Zero = getZero(dl, Ty: VecTy, DAG);
3053 SDValue Q1 = DAG.getSetCC(DL: dl, VT: PredTy, LHS: B, RHS: Zero, Cond: ISD::SETLT);
3054 Hi = getInstr(MachineOpc: Hexagon::V6_vsubwq, dl, Ty: VecTy, Ops: {Q1, Hi, A}, DAG);
3055 } else {
3056 assert(!SignedA && !SignedB);
3057 }
3058
3059 return DAG.getMergeValues(Ops: {Lo, Hi}, dl);
3060}
3061
3062SDValue
3063HexagonTargetLowering::emitHvxMulLoHiV62(SDValue A, bool SignedA,
3064 SDValue B, bool SignedB,
3065 const SDLoc &dl,
3066 SelectionDAG &DAG) const {
3067 MVT VecTy = ty(Op: A);
3068 MVT PairTy = typeJoin(Tys: {VecTy, VecTy});
3069 assert(VecTy.getVectorElementType() == MVT::i32);
3070
3071 if (SignedA && !SignedB) {
3072 // Make A:unsigned, B:signed.
3073 std::swap(a&: A, b&: B);
3074 std::swap(a&: SignedA, b&: SignedB);
3075 }
3076
3077 // Do S*S first, then make corrections for U*S or U*U if needed.
3078 SDValue P0 = getInstr(MachineOpc: Hexagon::V6_vmpyewuh_64, dl, Ty: PairTy, Ops: {A, B}, DAG);
3079 SDValue P1 =
3080 getInstr(MachineOpc: Hexagon::V6_vmpyowh_64_acc, dl, Ty: PairTy, Ops: {P0, A, B}, DAG);
3081 SDValue Lo = LoHalf(V: P1, DAG);
3082 SDValue Hi = HiHalf(V: P1, DAG);
3083
3084 if (!SignedB) {
3085 assert(!SignedA && "Signed A and unsigned B should have been inverted");
3086 SDValue Zero = getZero(dl, Ty: VecTy, DAG);
3087 MVT PredTy = MVT::getVectorVT(VT: MVT::i1, NumElements: VecTy.getVectorNumElements());
3088
3089 // Mulhu(X, Y) = Mulhs(X, Y) + (X, if Y < 0) + (Y, if X < 0).
3090 // def: Pat<(VecI32 (mulhu HVI32:$A, HVI32:$B)),
3091 // (V6_vaddw (HiHalf (Muls64O $A, $B)),
3092 // (V6_vaddwq (V6_vgtw (V6_vd0), $B),
3093 // (V6_vandvqv (V6_vgtw (V6_vd0), $A), $B),
3094 // $A))>;
3095 SDValue Q0 = DAG.getSetCC(DL: dl, VT: PredTy, LHS: A, RHS: Zero, Cond: ISD::SETLT);
3096 SDValue Q1 = DAG.getSetCC(DL: dl, VT: PredTy, LHS: B, RHS: Zero, Cond: ISD::SETLT);
3097 SDValue T0 = getInstr(MachineOpc: Hexagon::V6_vandvqv, dl, Ty: VecTy, Ops: {Q0, B}, DAG);
3098 SDValue T1 = getInstr(MachineOpc: Hexagon::V6_vaddwq, dl, Ty: VecTy, Ops: {Q1, T0, A}, DAG);
3099 Hi = getInstr(MachineOpc: Hexagon::V6_vaddw, dl, Ty: VecTy, Ops: {Hi, T1}, DAG);
3100 } else if (!SignedA) {
3101 SDValue Zero = getZero(dl, Ty: VecTy, DAG);
3102 MVT PredTy = MVT::getVectorVT(VT: MVT::i1, NumElements: VecTy.getVectorNumElements());
3103
3104 // Mulhus(unsigned X, signed Y) = Mulhs(X, Y) + (Y, if X < 0).
3105 // def: Pat<(VecI32 (HexagonMULHUS HVI32:$A, HVI32:$B)),
3106 // (V6_vaddwq (V6_vgtw (V6_vd0), $A),
3107 // (HiHalf (Muls64O $A, $B)),
3108 // $B)>;
3109 SDValue Q0 = DAG.getSetCC(DL: dl, VT: PredTy, LHS: A, RHS: Zero, Cond: ISD::SETLT);
3110 Hi = getInstr(MachineOpc: Hexagon::V6_vaddwq, dl, Ty: VecTy, Ops: {Q0, Hi, B}, DAG);
3111 }
3112
3113 return DAG.getMergeValues(Ops: {Lo, Hi}, dl);
3114}
3115
3116SDValue
3117HexagonTargetLowering::EqualizeFpIntConversion(SDValue Op, SelectionDAG &DAG)
3118 const {
3119 // Rewrite conversion between integer and floating-point in such a way that
3120 // the integer type is extended/narrowed to match the bitwidth of the
3121 // floating-point type, combined with additional integer-integer extensions
3122 // or narrowings to match the original input/result types.
3123 // E.g. f32 -> i8 ==> f32 -> i32 -> i8
3124 //
3125 // The input/result types are not required to be legal, but if they are
3126 // legal, this function should not introduce illegal types.
3127
3128 unsigned Opc = Op.getOpcode();
3129 assert(Opc == ISD::FP_TO_SINT || Opc == ISD::FP_TO_UINT ||
3130 Opc == ISD::SINT_TO_FP || Opc == ISD::UINT_TO_FP);
3131
3132 SDValue Inp = Op.getOperand(i: 0);
3133 MVT InpTy = ty(Op: Inp);
3134 MVT ResTy = ty(Op);
3135
3136 if (InpTy == ResTy)
3137 return Op;
3138
3139 const SDLoc &dl(Op);
3140 bool Signed = Opc == ISD::FP_TO_SINT || Opc == ISD::SINT_TO_FP;
3141
3142 auto [WInpTy, WResTy] = typeExtendToWider(Ty0: InpTy, Ty1: ResTy);
3143 SDValue WInp = resizeToWidth(VecV: Inp, ResTy: WInpTy, Signed, dl, DAG);
3144 SDValue Conv = DAG.getNode(Opcode: Opc, DL: dl, VT: WResTy, Operand: WInp);
3145 SDValue Res = resizeToWidth(VecV: Conv, ResTy, Signed, dl, DAG);
3146 return Res;
3147}
3148
3149SDValue
3150HexagonTargetLowering::ExpandHvxFpToInt(SDValue Op, SelectionDAG &DAG) const {
3151 unsigned Opc = Op.getOpcode();
3152 assert(Opc == ISD::FP_TO_SINT || Opc == ISD::FP_TO_UINT);
3153
3154 const SDLoc &dl(Op);
3155 SDValue Op0 = Op.getOperand(i: 0);
3156 MVT InpTy = ty(Op: Op0);
3157 MVT ResTy = ty(Op);
3158 assert(InpTy.changeTypeToInteger() == ResTy);
3159
3160 // At this point this is an experiment under a flag.
3161 // In arch before V81 the rounding mode is towards nearest value.
3162 // The C/C++ standard requires rounding towards zero:
3163 // C (C99 and later): ISO/IEC 9899:2018 (C18), section 6.3.1.4 — "When a
3164 // finite value of real floating type is converted to an integer type, the
3165 // fractional part is discarded (i.e., the value is truncated toward zero)."
3166 // C++: ISO/IEC 14882:2020 (C++20), section 7.3.7 — "A prvalue of a
3167 // floating-point type can be converted to a prvalue of an integer type. The
3168 // conversion truncates; that is, the fractional part is discarded."
3169 if (InpTy == MVT::v64f16) {
3170 if (Subtarget.useHVXV81Ops()) {
3171 // This is c/c++ compliant
3172 SDValue ConvVec =
3173 getInstr(MachineOpc: Hexagon::V6_vconv_h_hf_rnd, dl, Ty: ResTy, Ops: {Op0}, DAG);
3174 return ConvVec;
3175 } else if (EnableFpFastConvert) {
3176 // Vd32.h=Vu32.hf same as Q6_Vh_equals_Vhf
3177 SDValue ConvVec = getInstr(MachineOpc: Hexagon::V6_vconv_h_hf, dl, Ty: ResTy, Ops: {Op0}, DAG);
3178 return ConvVec;
3179 }
3180 } else if (EnableFpFastConvert && InpTy == MVT::v32f32) {
3181 // Vd32.w=Vu32.sf same as Q6_Vw_equals_Vsf
3182 SDValue ConvVec = getInstr(MachineOpc: Hexagon::V6_vconv_w_sf, dl, Ty: ResTy, Ops: {Op0}, DAG);
3183 return ConvVec;
3184 }
3185
3186 // int32_t conv_f32_to_i32(uint32_t inp) {
3187 // // s | exp8 | frac23
3188 //
3189 // int neg = (int32_t)inp < 0;
3190 //
3191 // // "expm1" is the actual exponent minus 1: instead of "bias", subtract
3192 // // "bias+1". When the encoded exp is "all-1" (i.e. inf/nan), this will
3193 // // produce a large positive "expm1", which will result in max u/int.
3194 // // In all IEEE formats, bias is the largest positive number that can be
3195 // // represented in bias-width bits (i.e. 011..1).
3196 // int32_t expm1 = (inp << 1) - 0x80000000;
3197 // expm1 >>= 24;
3198 //
3199 // // Always insert the "implicit 1". Subnormal numbers will become 0
3200 // // regardless.
3201 // uint32_t frac = (inp << 8) | 0x80000000;
3202 //
3203 // // "frac" is the fraction part represented as Q1.31. If it was
3204 // // interpreted as uint32_t, it would be the fraction part multiplied
3205 // // by 2^31.
3206 //
3207 // // Calculate the amount of right shift, since shifting further to the
3208 // // left would lose significant bits. Limit it to 32, because we want
3209 // // shifts by 32+ to produce 0, whereas V6_vlsrwv treats the shift
3210 // // amount as a 6-bit signed value (so 33 is same as -31, i.e. shift
3211 // // left by 31). "rsh" can be negative.
3212 // int32_t rsh = min(31 - (expm1 + 1), 32);
3213 //
3214 // frac >>= rsh; // rsh == 32 will produce 0
3215 //
3216 // // Everything up to this point is the same for conversion to signed
3217 // // unsigned integer.
3218 //
3219 // if (neg) // Only for signed int
3220 // frac = -frac; //
3221 // if (rsh <= 0 && neg) // bound = neg ? 0x80000000 : 0x7fffffff
3222 // frac = 0x80000000; // frac = rsh <= 0 ? bound : frac
3223 // if (rsh <= 0 && !neg) //
3224 // frac = 0x7fffffff; //
3225 //
3226 // if (neg) // Only for unsigned int
3227 // frac = 0; //
3228 // if (rsh < 0 && !neg) // frac = rsh < 0 ? 0x7fffffff : frac;
3229 // frac = 0x7fffffff; // frac = neg ? 0 : frac;
3230 //
3231 // return frac;
3232 // }
3233
3234 MVT PredTy = MVT::getVectorVT(VT: MVT::i1, EC: ResTy.getVectorElementCount());
3235
3236 // Zero = V6_vd0();
3237 // Neg = V6_vgtw(Zero, Inp);
3238 // One = V6_lvsplatw(1);
3239 // M80 = V6_lvsplatw(0x80000000);
3240 // Exp00 = V6_vaslwv(Inp, One);
3241 // Exp01 = V6_vsubw(Exp00, M80);
3242 // ExpM1 = V6_vasrw(Exp01, 24);
3243 // Frc00 = V6_vaslw(Inp, 8);
3244 // Frc01 = V6_vor(Frc00, M80);
3245 // Rsh00 = V6_vsubw(V6_lvsplatw(30), ExpM1);
3246 // Rsh01 = V6_vminw(Rsh00, V6_lvsplatw(32));
3247 // Frc02 = V6_vlsrwv(Frc01, Rsh01);
3248
3249 // if signed int:
3250 // Bnd = V6_vmux(Neg, M80, V6_lvsplatw(0x7fffffff))
3251 // Pos = V6_vgtw(Rsh01, Zero);
3252 // Frc13 = V6_vsubw(Zero, Frc02);
3253 // Frc14 = V6_vmux(Neg, Frc13, Frc02);
3254 // Int = V6_vmux(Pos, Frc14, Bnd);
3255 //
3256 // if unsigned int:
3257 // Rsn = V6_vgtw(Zero, Rsh01)
3258 // Frc23 = V6_vmux(Rsn, V6_lvsplatw(0x7fffffff), Frc02)
3259 // Int = V6_vmux(Neg, Zero, Frc23)
3260
3261 auto [ExpWidth, ExpBias, FracWidth] = getIEEEProperties(Ty: InpTy);
3262 unsigned ElemWidth = 1 + ExpWidth + FracWidth;
3263 assert((1ull << (ExpWidth - 1)) == (1 + ExpBias));
3264
3265 SDValue Inp = DAG.getBitcast(VT: ResTy, V: Op0);
3266 SDValue Zero = getZero(dl, Ty: ResTy, DAG);
3267 SDValue Neg = DAG.getSetCC(DL: dl, VT: PredTy, LHS: Inp, RHS: Zero, Cond: ISD::SETLT);
3268 SDValue M80 = DAG.getConstant(Val: 1ull << (ElemWidth - 1), DL: dl, VT: ResTy);
3269 SDValue M7F = DAG.getConstant(Val: (1ull << (ElemWidth - 1)) - 1, DL: dl, VT: ResTy);
3270 SDValue One = DAG.getConstant(Val: 1, DL: dl, VT: ResTy);
3271 SDValue Exp00 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: ResTy, Ops: {Inp, One});
3272 SDValue Exp01 = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: ResTy, Ops: {Exp00, M80});
3273 SDValue MNE = DAG.getConstant(Val: ElemWidth - ExpWidth, DL: dl, VT: ResTy);
3274 SDValue ExpM1 = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT: ResTy, Ops: {Exp01, MNE});
3275
3276 SDValue ExpW = DAG.getConstant(Val: ExpWidth, DL: dl, VT: ResTy);
3277 SDValue Frc00 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: ResTy, Ops: {Inp, ExpW});
3278 SDValue Frc01 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: ResTy, Ops: {Frc00, M80});
3279
3280 SDValue MN2 = DAG.getConstant(Val: ElemWidth - 2, DL: dl, VT: ResTy);
3281 SDValue Rsh00 = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: ResTy, Ops: {MN2, ExpM1});
3282 SDValue MW = DAG.getConstant(Val: ElemWidth, DL: dl, VT: ResTy);
3283 SDValue Rsh01 = DAG.getNode(Opcode: ISD::SMIN, DL: dl, VT: ResTy, Ops: {Rsh00, MW});
3284 SDValue Frc02 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: ResTy, Ops: {Frc01, Rsh01});
3285
3286 SDValue Int;
3287
3288 if (Opc == ISD::FP_TO_SINT) {
3289 SDValue Bnd = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: ResTy, Ops: {Neg, M80, M7F});
3290 SDValue Pos = DAG.getSetCC(DL: dl, VT: PredTy, LHS: Rsh01, RHS: Zero, Cond: ISD::SETGT);
3291 SDValue Frc13 = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: ResTy, Ops: {Zero, Frc02});
3292 SDValue Frc14 = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: ResTy, Ops: {Neg, Frc13, Frc02});
3293 Int = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: ResTy, Ops: {Pos, Frc14, Bnd});
3294 } else {
3295 assert(Opc == ISD::FP_TO_UINT);
3296 SDValue Rsn = DAG.getSetCC(DL: dl, VT: PredTy, LHS: Rsh01, RHS: Zero, Cond: ISD::SETLT);
3297 SDValue Frc23 = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: ResTy, N1: Rsn, N2: M7F, N3: Frc02);
3298 Int = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: ResTy, N1: Neg, N2: Zero, N3: Frc23);
3299 }
3300
3301 return Int;
3302}
3303
3304SDValue
3305HexagonTargetLowering::ExpandHvxIntToFp(SDValue Op, SelectionDAG &DAG) const {
3306 unsigned Opc = Op.getOpcode();
3307 assert(Opc == ISD::SINT_TO_FP || Opc == ISD::UINT_TO_FP);
3308
3309 const SDLoc &dl(Op);
3310 SDValue Op0 = Op.getOperand(i: 0);
3311 MVT InpTy = ty(Op: Op0);
3312 MVT ResTy = ty(Op);
3313 assert(ResTy.changeTypeToInteger() == InpTy);
3314
3315 // uint32_t vnoc1_rnd(int32_t w) {
3316 // int32_t iszero = w == 0;
3317 // int32_t isneg = w < 0;
3318 // uint32_t u = __builtin_HEXAGON_A2_abs(w);
3319 //
3320 // uint32_t norm_left = __builtin_HEXAGON_S2_cl0(u) + 1;
3321 // uint32_t frac0 = (uint64_t)u << norm_left;
3322 //
3323 // // Rounding:
3324 // uint32_t frac1 = frac0 + ((1 << 8) - 1);
3325 // uint32_t renorm = (frac0 > frac1);
3326 // uint32_t rup = (int)(frac0 << 22) < 0;
3327 //
3328 // uint32_t frac2 = frac0 >> 8;
3329 // uint32_t frac3 = frac1 >> 8;
3330 // uint32_t frac = (frac2 != frac3) ? frac3 >> 1 : (frac3 + rup) >> 1;
3331 //
3332 // int32_t exp = 32 - norm_left + renorm + 127;
3333 // exp <<= 23;
3334 //
3335 // uint32_t sign = 0x80000000 * isneg;
3336 // uint32_t f = sign | exp | frac;
3337 // return iszero ? 0 : f;
3338 // }
3339
3340 MVT PredTy = MVT::getVectorVT(VT: MVT::i1, EC: InpTy.getVectorElementCount());
3341 bool Signed = Opc == ISD::SINT_TO_FP;
3342
3343 auto [ExpWidth, ExpBias, FracWidth] = getIEEEProperties(Ty: ResTy);
3344 unsigned ElemWidth = 1 + ExpWidth + FracWidth;
3345
3346 SDValue Zero = getZero(dl, Ty: InpTy, DAG);
3347 SDValue One = DAG.getConstant(Val: 1, DL: dl, VT: InpTy);
3348 SDValue IsZero = DAG.getSetCC(DL: dl, VT: PredTy, LHS: Op0, RHS: Zero, Cond: ISD::SETEQ);
3349 SDValue Abs = Signed ? DAG.getNode(Opcode: ISD::ABS, DL: dl, VT: InpTy, Operand: Op0) : Op0;
3350 SDValue Clz = DAG.getNode(Opcode: ISD::CTLZ, DL: dl, VT: InpTy, Operand: Abs);
3351 SDValue NLeft = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: InpTy, Ops: {Clz, One});
3352 SDValue Frac0 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: InpTy, Ops: {Abs, NLeft});
3353
3354 auto [Frac, Ovf] = emitHvxShiftRightRnd(Val: Frac0, Amt: ExpWidth + 1, Signed: false, DAG);
3355 if (Signed) {
3356 SDValue IsNeg = DAG.getSetCC(DL: dl, VT: PredTy, LHS: Op0, RHS: Zero, Cond: ISD::SETLT);
3357 SDValue M80 = DAG.getConstant(Val: 1ull << (ElemWidth - 1), DL: dl, VT: InpTy);
3358 SDValue Sign = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: InpTy, Ops: {IsNeg, M80, Zero});
3359 Frac = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: InpTy, Ops: {Sign, Frac});
3360 }
3361
3362 SDValue Rnrm = DAG.getZExtOrTrunc(Op: Ovf, DL: dl, VT: InpTy);
3363 SDValue Exp0 = DAG.getConstant(Val: ElemWidth + ExpBias, DL: dl, VT: InpTy);
3364 SDValue Exp1 = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: InpTy, Ops: {Rnrm, Exp0});
3365 SDValue Exp2 = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: InpTy, Ops: {Exp1, NLeft});
3366 SDValue Exp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: InpTy,
3367 Ops: {Exp2, DAG.getConstant(Val: FracWidth, DL: dl, VT: InpTy)});
3368 SDValue Flt0 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: InpTy, Ops: {Frac, Exp3});
3369 SDValue Flt1 = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: InpTy, Ops: {IsZero, Zero, Flt0});
3370 SDValue Flt = DAG.getBitcast(VT: ResTy, V: Flt1);
3371
3372 return Flt;
3373}
3374
3375SDValue
3376HexagonTargetLowering::CreateTLWrapper(SDValue Op, SelectionDAG &DAG) const {
3377 unsigned Opc = Op.getOpcode();
3378 unsigned TLOpc;
3379 switch (Opc) {
3380 case ISD::ANY_EXTEND:
3381 case ISD::SIGN_EXTEND:
3382 case ISD::ZERO_EXTEND:
3383 TLOpc = HexagonISD::TL_EXTEND;
3384 break;
3385 case ISD::TRUNCATE:
3386 TLOpc = HexagonISD::TL_TRUNCATE;
3387 break;
3388#ifndef NDEBUG
3389 Op.dump(&DAG);
3390#endif
3391 llvm_unreachable("Unexpected operator");
3392 }
3393
3394 const SDLoc &dl(Op);
3395 return DAG.getNode(Opcode: TLOpc, DL: dl, VT: ty(Op), N1: Op.getOperand(i: 0),
3396 N2: DAG.getUNDEF(VT: MVT::i128), // illegal type
3397 N3: DAG.getConstant(Val: Opc, DL: dl, VT: MVT::i32));
3398}
3399
3400SDValue
3401HexagonTargetLowering::RemoveTLWrapper(SDValue Op, SelectionDAG &DAG) const {
3402 assert(Op.getOpcode() == HexagonISD::TL_EXTEND ||
3403 Op.getOpcode() == HexagonISD::TL_TRUNCATE);
3404 unsigned Opc = Op.getConstantOperandVal(i: 2);
3405 return DAG.getNode(Opcode: Opc, DL: SDLoc(Op), VT: ty(Op), Operand: Op.getOperand(i: 0));
3406}
3407
3408HexagonTargetLowering::VectorPair
3409HexagonTargetLowering::SplitVectorOp(SDValue Op, SelectionDAG &DAG) const {
3410 assert(!Op.isMachineOpcode());
3411 SmallVector<SDValue, 2> OpsL, OpsH;
3412 const SDLoc &dl(Op);
3413
3414 auto SplitVTNode = [&DAG, this](const VTSDNode *N) {
3415 MVT Ty = typeSplit(VecTy: N->getVT().getSimpleVT()).first;
3416 SDValue TV = DAG.getValueType(Ty);
3417 return std::make_pair(x&: TV, y&: TV);
3418 };
3419
3420 for (SDValue A : Op.getNode()->ops()) {
3421 auto [Lo, Hi] =
3422 ty(Op: A).isVector() ? opSplit(Vec: A, dl, DAG) : std::make_pair(x&: A, y&: A);
3423 // Special case for type operand.
3424 switch (Op.getOpcode()) {
3425 case ISD::SIGN_EXTEND_INREG:
3426 case HexagonISD::SSAT:
3427 case HexagonISD::USAT:
3428 if (const auto *N = dyn_cast<const VTSDNode>(Val: A.getNode()))
3429 std::tie(args&: Lo, args&: Hi) = SplitVTNode(N);
3430 break;
3431 }
3432 OpsL.push_back(Elt: Lo);
3433 OpsH.push_back(Elt: Hi);
3434 }
3435
3436 MVT ResTy = ty(Op);
3437 MVT HalfTy = typeSplit(VecTy: ResTy).first;
3438 SDValue L = DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT: HalfTy, Ops: OpsL);
3439 SDValue H = DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT: HalfTy, Ops: OpsH);
3440 return {L, H};
3441}
3442
3443SDValue
3444HexagonTargetLowering::SplitHvxMemOp(SDValue Op, SelectionDAG &DAG) const {
3445 auto *MemN = cast<MemSDNode>(Val: Op.getNode());
3446
3447 if (!MemN->getMemoryVT().isSimple())
3448 return Op;
3449
3450 MVT MemTy = MemN->getMemoryVT().getSimpleVT();
3451 if (!isHvxPairTy(Ty: MemTy))
3452 return Op;
3453
3454 const SDLoc &dl(Op);
3455 unsigned HwLen = Subtarget.getVectorLength();
3456 MVT SingleTy = typeSplit(VecTy: MemTy).first;
3457 SDValue Chain = MemN->getChain();
3458 SDValue Base0 = MemN->getBasePtr();
3459 SDValue Base1 =
3460 DAG.getMemBasePlusOffset(Base: Base0, Offset: TypeSize::getFixed(ExactSize: HwLen), DL: dl);
3461 unsigned MemOpc = MemN->getOpcode();
3462
3463 MachineMemOperand *MOp0 = nullptr, *MOp1 = nullptr;
3464 if (MachineMemOperand *MMO = MemN->getMemOperand()) {
3465 MachineFunction &MF = DAG.getMachineFunction();
3466 uint64_t MemSize = (MemOpc == ISD::MLOAD || MemOpc == ISD::MSTORE)
3467 ? (uint64_t)MemoryLocation::UnknownSize
3468 : HwLen;
3469 MOp0 = MF.getMachineMemOperand(MMO, Offset: 0, Size: MemSize);
3470 MOp1 = MF.getMachineMemOperand(MMO, Offset: HwLen, Size: MemSize);
3471 }
3472
3473 if (MemOpc == ISD::LOAD) {
3474 assert(cast<LoadSDNode>(Op)->isUnindexed());
3475 SDValue Load0 = DAG.getLoad(VT: SingleTy, dl, Chain, Ptr: Base0, MMO: MOp0);
3476 SDValue Load1 = DAG.getLoad(VT: SingleTy, dl, Chain, Ptr: Base1, MMO: MOp1);
3477 return DAG.getMergeValues(
3478 Ops: { DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: MemTy, N1: Load0, N2: Load1),
3479 DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other,
3480 N1: Load0.getValue(R: 1), N2: Load1.getValue(R: 1)) }, dl);
3481 }
3482 if (MemOpc == ISD::STORE) {
3483 assert(cast<StoreSDNode>(Op)->isUnindexed());
3484 VectorPair Vals = opSplit(Vec: cast<StoreSDNode>(Val&: Op)->getValue(), dl, DAG);
3485 SDValue Store0 = DAG.getStore(Chain, dl, Val: Vals.first, Ptr: Base0, MMO: MOp0);
3486 SDValue Store1 = DAG.getStore(Chain, dl, Val: Vals.second, Ptr: Base1, MMO: MOp1);
3487 return DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, N1: Store0, N2: Store1);
3488 }
3489
3490 assert(MemOpc == ISD::MLOAD || MemOpc == ISD::MSTORE);
3491
3492 auto MaskN = cast<MaskedLoadStoreSDNode>(Val&: Op);
3493 assert(MaskN->isUnindexed());
3494 VectorPair Masks = opSplit(Vec: MaskN->getMask(), dl, DAG);
3495 SDValue Offset = DAG.getUNDEF(VT: MVT::i32);
3496
3497 if (MemOpc == ISD::MLOAD) {
3498 VectorPair Thru =
3499 opSplit(Vec: cast<MaskedLoadSDNode>(Val&: Op)->getPassThru(), dl, DAG);
3500 SDValue MLoad0 =
3501 DAG.getMaskedLoad(VT: SingleTy, dl, Chain, Base: Base0, Offset, Mask: Masks.first,
3502 Src0: Thru.first, MemVT: SingleTy, MMO: MOp0, AM: ISD::UNINDEXED,
3503 ISD::NON_EXTLOAD, IsExpanding: false);
3504 SDValue MLoad1 =
3505 DAG.getMaskedLoad(VT: SingleTy, dl, Chain, Base: Base1, Offset, Mask: Masks.second,
3506 Src0: Thru.second, MemVT: SingleTy, MMO: MOp1, AM: ISD::UNINDEXED,
3507 ISD::NON_EXTLOAD, IsExpanding: false);
3508 return DAG.getMergeValues(
3509 Ops: { DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: MemTy, N1: MLoad0, N2: MLoad1),
3510 DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other,
3511 N1: MLoad0.getValue(R: 1), N2: MLoad1.getValue(R: 1)) }, dl);
3512 }
3513 if (MemOpc == ISD::MSTORE) {
3514 VectorPair Vals = opSplit(Vec: cast<MaskedStoreSDNode>(Val&: Op)->getValue(), dl, DAG);
3515 SDValue MStore0 = DAG.getMaskedStore(Chain, dl, Val: Vals.first, Base: Base0, Offset,
3516 Mask: Masks.first, MemVT: SingleTy, MMO: MOp0,
3517 AM: ISD::UNINDEXED, IsTruncating: false, IsCompressing: false);
3518 SDValue MStore1 = DAG.getMaskedStore(Chain, dl, Val: Vals.second, Base: Base1, Offset,
3519 Mask: Masks.second, MemVT: SingleTy, MMO: MOp1,
3520 AM: ISD::UNINDEXED, IsTruncating: false, IsCompressing: false);
3521 return DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, N1: MStore0, N2: MStore1);
3522 }
3523
3524 std::string Name = "Unexpected operation: " + Op->getOperationName(G: &DAG);
3525 llvm_unreachable(Name.c_str());
3526}
3527
3528SDValue
3529HexagonTargetLowering::WidenHvxLoad(SDValue Op, SelectionDAG &DAG) const {
3530 const SDLoc &dl(Op);
3531 auto *LoadN = cast<LoadSDNode>(Val: Op.getNode());
3532 assert(LoadN->isUnindexed() && "Not widening indexed loads yet");
3533 assert(LoadN->getMemoryVT().getVectorElementType() != MVT::i1 &&
3534 "Not widening loads of i1 yet");
3535
3536 SDValue Chain = LoadN->getChain();
3537 SDValue Base = LoadN->getBasePtr();
3538 SDValue Offset = DAG.getUNDEF(VT: MVT::i32);
3539
3540 MVT ResTy = ty(Op);
3541 unsigned HwLen = Subtarget.getVectorLength();
3542 unsigned ResLen = ResTy.getStoreSize();
3543 assert(ResLen < HwLen && "vsetq(v1) prerequisite");
3544
3545 MVT BoolTy = MVT::getVectorVT(VT: MVT::i1, NumElements: HwLen);
3546 SDValue Mask = getInstr(MachineOpc: Hexagon::V6_pred_scalar2, dl, Ty: BoolTy,
3547 Ops: {DAG.getConstant(Val: ResLen, DL: dl, VT: MVT::i32)}, DAG);
3548
3549 MVT LoadTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen);
3550 MachineFunction &MF = DAG.getMachineFunction();
3551 auto *MemOp = MF.getMachineMemOperand(MMO: LoadN->getMemOperand(), Offset: 0, Size: HwLen);
3552
3553 SDValue Load = DAG.getMaskedLoad(VT: LoadTy, dl, Chain, Base, Offset, Mask,
3554 Src0: DAG.getUNDEF(VT: LoadTy), MemVT: LoadTy, MMO: MemOp,
3555 AM: ISD::UNINDEXED, ISD::NON_EXTLOAD, IsExpanding: false);
3556 SDValue Value = opCastElem(Vec: Load, ElemTy: ResTy.getVectorElementType(), DAG);
3557 return DAG.getMergeValues(Ops: {Value, Load.getValue(R: 1)}, dl);
3558}
3559
3560SDValue
3561HexagonTargetLowering::WidenHvxStore(SDValue Op, SelectionDAG &DAG) const {
3562 const SDLoc &dl(Op);
3563 auto *StoreN = cast<StoreSDNode>(Val: Op.getNode());
3564 assert(StoreN->isUnindexed() && "Not widening indexed stores yet");
3565 assert(StoreN->getMemoryVT().getVectorElementType() != MVT::i1 &&
3566 "Not widening stores of i1 yet");
3567
3568 SDValue Chain = StoreN->getChain();
3569 SDValue Base = StoreN->getBasePtr();
3570 SDValue Offset = DAG.getUNDEF(VT: MVT::i32);
3571
3572 SDValue Value = opCastElem(Vec: StoreN->getValue(), ElemTy: MVT::i8, DAG);
3573 MVT ValueTy = ty(Op: Value);
3574 unsigned ValueLen = ValueTy.getVectorNumElements();
3575 unsigned HwLen = Subtarget.getVectorLength();
3576 assert(isPowerOf2_32(ValueLen));
3577
3578 for (unsigned Len = ValueLen; Len < HwLen; ) {
3579 Value = opJoin(Ops: {Value, DAG.getUNDEF(VT: ty(Op: Value))}, dl, DAG);
3580 Len = ty(Op: Value).getVectorNumElements(); // This is Len *= 2
3581 }
3582 assert(ty(Value).getVectorNumElements() == HwLen); // Paranoia
3583
3584 assert(ValueLen < HwLen && "vsetq(v1) prerequisite");
3585 MVT BoolTy = MVT::getVectorVT(VT: MVT::i1, NumElements: HwLen);
3586 SDValue Mask = getInstr(MachineOpc: Hexagon::V6_pred_scalar2, dl, Ty: BoolTy,
3587 Ops: {DAG.getConstant(Val: ValueLen, DL: dl, VT: MVT::i32)}, DAG);
3588 MachineFunction &MF = DAG.getMachineFunction();
3589 auto *MemOp = MF.getMachineMemOperand(MMO: StoreN->getMemOperand(), Offset: 0, Size: HwLen);
3590 return DAG.getMaskedStore(Chain, dl, Val: Value, Base, Offset, Mask, MemVT: ty(Op: Value),
3591 MMO: MemOp, AM: ISD::UNINDEXED, IsTruncating: false, IsCompressing: false);
3592}
3593
3594SDValue
3595HexagonTargetLowering::WidenHvxSetCC(SDValue Op, SelectionDAG &DAG) const {
3596 const SDLoc &dl(Op);
3597 SDValue Op0 = Op.getOperand(i: 0), Op1 = Op.getOperand(i: 1);
3598 MVT ElemTy = ty(Op: Op0).getVectorElementType();
3599 unsigned HwLen = Subtarget.getVectorLength();
3600
3601 unsigned WideOpLen = (8 * HwLen) / ElemTy.getSizeInBits();
3602 assert(WideOpLen * ElemTy.getSizeInBits() == 8 * HwLen);
3603 MVT WideOpTy = MVT::getVectorVT(VT: ElemTy, NumElements: WideOpLen);
3604 if (!Subtarget.isHVXVectorType(VecTy: WideOpTy, IncludeBool: true))
3605 return SDValue();
3606
3607 SDValue WideOp0 = appendUndef(Val: Op0, ResTy: WideOpTy, DAG);
3608 SDValue WideOp1 = appendUndef(Val: Op1, ResTy: WideOpTy, DAG);
3609 EVT ResTy =
3610 getSetCCResultType(DAG.getDataLayout(), C&: *DAG.getContext(), VT: WideOpTy);
3611 SDValue SetCC = DAG.getNode(Opcode: ISD::SETCC, DL: dl, VT: ResTy,
3612 Ops: {WideOp0, WideOp1, Op.getOperand(i: 2)});
3613
3614 EVT RetTy = typeLegalize(Ty: ty(Op), DAG);
3615 return DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL: dl, VT: RetTy,
3616 Ops: {SetCC, getZero(dl, Ty: MVT::i32, DAG)});
3617}
3618
3619SDValue
3620HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const {
3621 unsigned Opc = Op.getOpcode();
3622 bool IsPairOp = isHvxPairTy(Ty: ty(Op)) ||
3623 llvm::any_of(Range: Op.getNode()->ops(), P: [this] (SDValue V) {
3624 return isHvxPairTy(Ty: ty(Op: V));
3625 });
3626
3627 if (IsPairOp) {
3628 switch (Opc) {
3629 default:
3630 break;
3631 case ISD::LOAD:
3632 case ISD::STORE:
3633 case ISD::MLOAD:
3634 case ISD::MSTORE:
3635 return SplitHvxMemOp(Op, DAG);
3636 case ISD::SINT_TO_FP:
3637 case ISD::UINT_TO_FP:
3638 case ISD::FP_TO_SINT:
3639 case ISD::FP_TO_UINT:
3640 if (ty(Op).getSizeInBits() == ty(Op: Op.getOperand(i: 0)).getSizeInBits())
3641 return opJoin(Ops: SplitVectorOp(Op, DAG), dl: SDLoc(Op), DAG);
3642 break;
3643 case ISD::ABS:
3644 case ISD::CTPOP:
3645 case ISD::CTLZ:
3646 case ISD::CTTZ:
3647 case ISD::MUL:
3648 case ISD::FADD:
3649 case ISD::FSUB:
3650 case ISD::FMUL:
3651 case ISD::FMINIMUMNUM:
3652 case ISD::FMAXIMUMNUM:
3653 case ISD::MULHS:
3654 case ISD::MULHU:
3655 case ISD::AND:
3656 case ISD::OR:
3657 case ISD::XOR:
3658 case ISD::SRA:
3659 case ISD::SHL:
3660 case ISD::SRL:
3661 case ISD::FSHL:
3662 case ISD::FSHR:
3663 case ISD::SMIN:
3664 case ISD::SMAX:
3665 case ISD::UMIN:
3666 case ISD::UMAX:
3667 case ISD::SETCC:
3668 case ISD::VSELECT:
3669 case ISD::SIGN_EXTEND_INREG:
3670 case ISD::SPLAT_VECTOR:
3671 return opJoin(Ops: SplitVectorOp(Op, DAG), dl: SDLoc(Op), DAG);
3672 case ISD::SIGN_EXTEND:
3673 case ISD::ZERO_EXTEND:
3674 // In general, sign- and zero-extends can't be split and still
3675 // be legal. The only exception is extending bool vectors.
3676 if (ty(Op: Op.getOperand(i: 0)).getVectorElementType() == MVT::i1)
3677 return opJoin(Ops: SplitVectorOp(Op, DAG), dl: SDLoc(Op), DAG);
3678 break;
3679 }
3680 }
3681
3682 switch (Opc) {
3683 default:
3684 break;
3685 // clang-format off
3686 case ISD::BUILD_VECTOR: return LowerHvxBuildVector(Op, DAG);
3687 case ISD::SPLAT_VECTOR: return LowerHvxSplatVector(Op, DAG);
3688 case ISD::CONCAT_VECTORS: return LowerHvxConcatVectors(Op, DAG);
3689 case ISD::INSERT_SUBVECTOR: return LowerHvxInsertSubvector(Op, DAG);
3690 case ISD::INSERT_VECTOR_ELT: return LowerHvxInsertElement(Op, DAG);
3691 case ISD::EXTRACT_SUBVECTOR: return LowerHvxExtractSubvector(Op, DAG);
3692 case ISD::EXTRACT_VECTOR_ELT: return LowerHvxExtractElement(Op, DAG);
3693 case ISD::BITCAST: return LowerHvxBitcast(Op, DAG);
3694 case ISD::ANY_EXTEND: return LowerHvxAnyExt(Op, DAG);
3695 case ISD::SIGN_EXTEND: return LowerHvxSignExt(Op, DAG);
3696 case ISD::ZERO_EXTEND: return LowerHvxZeroExt(Op, DAG);
3697 case ISD::CTTZ: return LowerHvxCttz(Op, DAG);
3698 case ISD::SELECT: return LowerHvxSelect(Op, DAG);
3699 case ISD::SRA:
3700 case ISD::SHL:
3701 case ISD::SRL: return LowerHvxShift(Op, DAG);
3702 case ISD::FSHL:
3703 case ISD::FSHR: return LowerHvxFunnelShift(Op, DAG);
3704 case ISD::MULHS:
3705 case ISD::MULHU: return LowerHvxMulh(Op, DAG);
3706 case ISD::SMUL_LOHI:
3707 case ISD::UMUL_LOHI: return LowerHvxMulLoHi(Op, DAG);
3708 case ISD::ANY_EXTEND_VECTOR_INREG: return LowerHvxExtend(Op, DAG);
3709 case ISD::SETCC:
3710 case ISD::INTRINSIC_VOID: return Op;
3711 case ISD::INTRINSIC_WO_CHAIN: return LowerHvxIntrinsic(Op, DAG);
3712 case ISD::MLOAD:
3713 case ISD::MSTORE: return LowerHvxMaskedOp(Op, DAG);
3714 // Unaligned loads will be handled by the default lowering.
3715 case ISD::LOAD: return LowerHvxLoad(Op, DAG);
3716 case ISD::STORE: return LowerHvxStore(Op, DAG);
3717 case ISD::FP_EXTEND: return LowerHvxFpExtend(Op, DAG);
3718 case ISD::FP_TO_SINT:
3719 case ISD::FP_TO_UINT: return LowerHvxFpToInt(Op, DAG);
3720 case ISD::SINT_TO_FP:
3721 case ISD::UINT_TO_FP: return LowerHvxIntToFp(Op, DAG);
3722
3723 // Special nodes:
3724 case HexagonISD::SMUL_LOHI:
3725 case HexagonISD::UMUL_LOHI:
3726 case HexagonISD::USMUL_LOHI: return LowerHvxMulLoHi(Op, DAG);
3727
3728 case ISD::PARTIAL_REDUCE_SMLA:
3729 case ISD::PARTIAL_REDUCE_UMLA:
3730 case ISD::PARTIAL_REDUCE_SUMLA:
3731 return LowerHvxPartialReduceMLA(Op, DAG);
3732 // clang-format on
3733 }
3734#ifndef NDEBUG
3735 Op.dumpr(&DAG);
3736#endif
3737 llvm_unreachable("Unhandled HVX operation");
3738}
3739
3740SDValue
3741HexagonTargetLowering::ExpandHvxResizeIntoSteps(SDValue Op, SelectionDAG &DAG)
3742 const {
3743 // Rewrite the extension/truncation/saturation op into steps where each
3744 // step changes the type widths by a factor of 2.
3745 // E.g. i8 -> i16 remains unchanged, but i8 -> i32 ==> i8 -> i16 -> i32.
3746 //
3747 // Some of the vector types in Op may not be legal.
3748
3749 unsigned Opc = Op.getOpcode();
3750 switch (Opc) {
3751 case HexagonISD::SSAT:
3752 case HexagonISD::USAT:
3753 case HexagonISD::TL_EXTEND:
3754 case HexagonISD::TL_TRUNCATE:
3755 break;
3756 case ISD::ANY_EXTEND:
3757 case ISD::ZERO_EXTEND:
3758 case ISD::SIGN_EXTEND:
3759 case ISD::TRUNCATE:
3760 llvm_unreachable("ISD:: ops will be auto-folded");
3761 break;
3762#ifndef NDEBUG
3763 Op.dump(&DAG);
3764#endif
3765 llvm_unreachable("Unexpected operation");
3766 }
3767
3768 SDValue Inp = Op.getOperand(i: 0);
3769 MVT InpTy = ty(Op: Inp);
3770 MVT ResTy = ty(Op);
3771
3772 unsigned InpWidth = InpTy.getVectorElementType().getSizeInBits();
3773 unsigned ResWidth = ResTy.getVectorElementType().getSizeInBits();
3774 assert(InpWidth != ResWidth);
3775
3776 if (InpWidth == 2 * ResWidth || ResWidth == 2 * InpWidth)
3777 return Op;
3778
3779 const SDLoc &dl(Op);
3780 unsigned NumElems = InpTy.getVectorNumElements();
3781 assert(NumElems == ResTy.getVectorNumElements());
3782
3783 auto repeatOp = [&](unsigned NewWidth, SDValue Arg) {
3784 MVT Ty = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: NewWidth), NumElements: NumElems);
3785 switch (Opc) {
3786 case HexagonISD::SSAT:
3787 case HexagonISD::USAT:
3788 return DAG.getNode(Opcode: Opc, DL: dl, VT: Ty, Ops: {Arg, DAG.getValueType(Ty)});
3789 case HexagonISD::TL_EXTEND:
3790 case HexagonISD::TL_TRUNCATE:
3791 return DAG.getNode(Opcode: Opc, DL: dl, VT: Ty, Ops: {Arg, Op.getOperand(i: 1), Op.getOperand(i: 2)});
3792 default:
3793 llvm_unreachable("Unexpected opcode");
3794 }
3795 };
3796
3797 SDValue S = Inp;
3798 if (InpWidth < ResWidth) {
3799 assert(ResWidth % InpWidth == 0 && isPowerOf2_32(ResWidth / InpWidth));
3800 while (InpWidth * 2 <= ResWidth)
3801 S = repeatOp(InpWidth *= 2, S);
3802 } else {
3803 // InpWidth > ResWidth
3804 assert(InpWidth % ResWidth == 0 && isPowerOf2_32(InpWidth / ResWidth));
3805 while (InpWidth / 2 >= ResWidth)
3806 S = repeatOp(InpWidth /= 2, S);
3807 }
3808 return S;
3809}
3810
3811SDValue
3812HexagonTargetLowering::LegalizeHvxResize(SDValue Op, SelectionDAG &DAG) const {
3813 SDValue Inp0 = Op.getOperand(i: 0);
3814 MVT InpTy = ty(Op: Inp0);
3815 MVT ResTy = ty(Op);
3816 unsigned InpWidth = InpTy.getSizeInBits();
3817 unsigned ResWidth = ResTy.getSizeInBits();
3818 unsigned Opc = Op.getOpcode();
3819
3820 if (shouldWidenToHvx(Ty: InpTy, DAG) || shouldWidenToHvx(Ty: ResTy, DAG)) {
3821 // First, make sure that the narrower type is widened to HVX.
3822 // This may cause the result to be wider than what the legalizer
3823 // expects, so insert EXTRACT_SUBVECTOR to bring it back to the
3824 // desired type.
3825 auto [WInpTy, WResTy] =
3826 InpWidth < ResWidth ? typeWidenToWider(Ty0: typeWidenToHvx(Ty: InpTy), Ty1: ResTy)
3827 : typeWidenToWider(Ty0: InpTy, Ty1: typeWidenToHvx(Ty: ResTy));
3828 SDValue W = appendUndef(Val: Inp0, ResTy: WInpTy, DAG);
3829 SDValue S;
3830 if (Opc == HexagonISD::TL_EXTEND || Opc == HexagonISD::TL_TRUNCATE) {
3831 S = DAG.getNode(Opcode: Opc, DL: SDLoc(Op), VT: WResTy, N1: W, N2: Op.getOperand(i: 1),
3832 N3: Op.getOperand(i: 2));
3833 } else {
3834 S = DAG.getNode(Opcode: Opc, DL: SDLoc(Op), VT: WResTy, N1: W, N2: DAG.getValueType(WResTy));
3835 }
3836 SDValue T = ExpandHvxResizeIntoSteps(Op: S, DAG);
3837 return extractSubvector(Vec: T, SubTy: typeLegalize(Ty: ResTy, DAG), SubIdx: 0, DAG);
3838 } else if (shouldSplitToHvx(Ty: InpWidth < ResWidth ? ResTy : InpTy, DAG)) {
3839 // For multi-step extends/truncates (e.g., i8->i32), expand into
3840 // single-step operations first. Splitting a multi-step TL_EXTEND
3841 // would halve the operand type to a sub-HVX size (e.g., v128i8 ->
3842 // v64i8), creating illegal types that cause issues in the type
3843 // legalizer's map tracking. Single-step operations (e.g., i16->i32)
3844 // are safe to split because their halved operand types remain legal.
3845 SDValue T = ExpandHvxResizeIntoSteps(Op, DAG);
3846 if (T != Op)
3847 return T;
3848 return opJoin(Ops: SplitVectorOp(Op, DAG), dl: SDLoc(Op), DAG);
3849 } else {
3850 assert(isTypeLegal(InpTy) && isTypeLegal(ResTy));
3851 return RemoveTLWrapper(Op, DAG);
3852 }
3853 llvm_unreachable("Unexpected situation");
3854}
3855
3856void
3857HexagonTargetLowering::LowerHvxOperationWrapper(SDNode *N,
3858 SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
3859 unsigned Opc = N->getOpcode();
3860 SDValue Op(N, 0);
3861 SDValue Inp0; // Optional first argument.
3862 if (N->getNumOperands() > 0)
3863 Inp0 = Op.getOperand(i: 0);
3864
3865 switch (Opc) {
3866 case ISD::ANY_EXTEND:
3867 case ISD::SIGN_EXTEND:
3868 case ISD::ZERO_EXTEND:
3869 case ISD::TRUNCATE:
3870 if (Subtarget.isHVXElementType(Ty: ty(Op)) &&
3871 Subtarget.isHVXElementType(Ty: ty(Op: Inp0))) {
3872 Results.push_back(Elt: CreateTLWrapper(Op, DAG));
3873 }
3874 break;
3875 case ISD::SETCC:
3876 if (shouldWidenToHvx(Ty: ty(Op: Inp0), DAG)) {
3877 if (SDValue T = WidenHvxSetCC(Op, DAG))
3878 Results.push_back(Elt: T);
3879 }
3880 break;
3881 case ISD::STORE: {
3882 if (shouldWidenToHvx(Ty: ty(Op: cast<StoreSDNode>(Val: N)->getValue()), DAG)) {
3883 SDValue Store = WidenHvxStore(Op, DAG);
3884 Results.push_back(Elt: Store);
3885 }
3886 break;
3887 }
3888 case ISD::MLOAD:
3889 if (isHvxPairTy(Ty: ty(Op))) {
3890 SDValue S = SplitHvxMemOp(Op, DAG);
3891 assert(S->getOpcode() == ISD::MERGE_VALUES);
3892 Results.push_back(Elt: S.getOperand(i: 0));
3893 Results.push_back(Elt: S.getOperand(i: 1));
3894 }
3895 break;
3896 case ISD::MSTORE:
3897 if (isHvxPairTy(Ty: ty(Op: Op->getOperand(Num: 1)))) { // Stored value
3898 SDValue S = SplitHvxMemOp(Op, DAG);
3899 Results.push_back(Elt: S);
3900 }
3901 break;
3902 case ISD::SINT_TO_FP:
3903 case ISD::UINT_TO_FP:
3904 case ISD::FP_TO_SINT:
3905 case ISD::FP_TO_UINT:
3906 if (ty(Op).getSizeInBits() != ty(Op: Inp0).getSizeInBits()) {
3907 SDValue T = EqualizeFpIntConversion(Op, DAG);
3908 Results.push_back(Elt: T);
3909 }
3910 break;
3911 case HexagonISD::SSAT:
3912 case HexagonISD::USAT:
3913 case HexagonISD::TL_EXTEND:
3914 case HexagonISD::TL_TRUNCATE:
3915 Results.push_back(Elt: LegalizeHvxResize(Op, DAG));
3916 break;
3917 default:
3918 break;
3919 }
3920}
3921
3922void
3923HexagonTargetLowering::ReplaceHvxNodeResults(SDNode *N,
3924 SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
3925 unsigned Opc = N->getOpcode();
3926 SDValue Op(N, 0);
3927 SDValue Inp0; // Optional first argument.
3928 if (N->getNumOperands() > 0)
3929 Inp0 = Op.getOperand(i: 0);
3930
3931 switch (Opc) {
3932 case ISD::ANY_EXTEND:
3933 case ISD::SIGN_EXTEND:
3934 case ISD::ZERO_EXTEND:
3935 case ISD::TRUNCATE:
3936 if (Subtarget.isHVXElementType(Ty: ty(Op)) &&
3937 Subtarget.isHVXElementType(Ty: ty(Op: Inp0))) {
3938 Results.push_back(Elt: CreateTLWrapper(Op, DAG));
3939 }
3940 break;
3941 case ISD::SETCC:
3942 if (shouldWidenToHvx(Ty: ty(Op), DAG)) {
3943 if (SDValue T = WidenHvxSetCC(Op, DAG))
3944 Results.push_back(Elt: T);
3945 }
3946 break;
3947 case ISD::LOAD: {
3948 if (shouldWidenToHvx(Ty: ty(Op), DAG)) {
3949 SDValue Load = WidenHvxLoad(Op, DAG);
3950 assert(Load->getOpcode() == ISD::MERGE_VALUES);
3951 Results.push_back(Elt: Load.getOperand(i: 0));
3952 Results.push_back(Elt: Load.getOperand(i: 1));
3953 }
3954 break;
3955 }
3956 case ISD::BITCAST:
3957 if (isHvxBoolTy(Ty: ty(Op: Inp0))) {
3958 SDValue C = LowerHvxBitcast(Op, DAG);
3959 Results.push_back(Elt: C);
3960 }
3961 break;
3962 case ISD::FP_TO_SINT:
3963 case ISD::FP_TO_UINT:
3964 if (ty(Op).getSizeInBits() != ty(Op: Inp0).getSizeInBits()) {
3965 SDValue T = EqualizeFpIntConversion(Op, DAG);
3966 Results.push_back(Elt: T);
3967 }
3968 break;
3969 case HexagonISD::SSAT:
3970 case HexagonISD::USAT:
3971 case HexagonISD::TL_EXTEND:
3972 case HexagonISD::TL_TRUNCATE:
3973 Results.push_back(Elt: LegalizeHvxResize(Op, DAG));
3974 break;
3975 default:
3976 break;
3977 }
3978}
3979
3980SDValue
3981HexagonTargetLowering::combineTruncateBeforeLegal(SDValue Op,
3982 DAGCombinerInfo &DCI) const {
3983 // Simplify V:v2NiB --(bitcast)--> vNi2B --(truncate)--> vNiB
3984 // to extract-subvector (shuffle V, pick even, pick odd)
3985
3986 assert(Op.getOpcode() == ISD::TRUNCATE);
3987 SelectionDAG &DAG = DCI.DAG;
3988 const SDLoc &dl(Op);
3989
3990 if (Op.getOperand(i: 0).getOpcode() == ISD::BITCAST)
3991 return SDValue();
3992 SDValue Cast = Op.getOperand(i: 0);
3993 SDValue Src = Cast.getOperand(i: 0);
3994
3995 EVT TruncTy = Op.getValueType();
3996 EVT CastTy = Cast.getValueType();
3997 EVT SrcTy = Src.getValueType();
3998 if (SrcTy.isSimple())
3999 return SDValue();
4000 if (SrcTy.getVectorElementType() != TruncTy.getVectorElementType())
4001 return SDValue();
4002 unsigned SrcLen = SrcTy.getVectorNumElements();
4003 unsigned CastLen = CastTy.getVectorNumElements();
4004 if (2 * CastLen != SrcLen)
4005 return SDValue();
4006
4007 SmallVector<int, 128> Mask(SrcLen);
4008 for (int i = 0; i != static_cast<int>(CastLen); ++i) {
4009 Mask[i] = 2 * i;
4010 Mask[i + CastLen] = 2 * i + 1;
4011 }
4012 SDValue Deal =
4013 DAG.getVectorShuffle(VT: SrcTy, dl, N1: Src, N2: DAG.getUNDEF(VT: SrcTy), Mask);
4014 return opSplit(Vec: Deal, dl, DAG).first;
4015}
4016
4017SDValue
4018HexagonTargetLowering::combineConcatOfShuffles(SDValue Op,
4019 SelectionDAG &DAG) const {
4020 // Fold
4021 // concat (shuffle x, y, m1), (shuffle x, y, m2)
4022 // into
4023 // shuffle (concat x, y), undef, m3
4024 if (Op.getNumOperands() != 2)
4025 return SDValue();
4026
4027 const SDLoc &dl(Op);
4028 SDValue V0 = Op.getOperand(i: 0);
4029 SDValue V1 = Op.getOperand(i: 1);
4030
4031 if (V0.getOpcode() != ISD::VECTOR_SHUFFLE)
4032 return SDValue();
4033 if (V1.getOpcode() != ISD::VECTOR_SHUFFLE)
4034 return SDValue();
4035
4036 SetVector<SDValue> Order;
4037 Order.insert(X: V0.getOperand(i: 0));
4038 Order.insert(X: V0.getOperand(i: 1));
4039 Order.insert(X: V1.getOperand(i: 0));
4040 Order.insert(X: V1.getOperand(i: 1));
4041
4042 if (Order.size() > 2)
4043 return SDValue();
4044
4045 // In ISD::VECTOR_SHUFFLE, the types of each input and the type of the
4046 // result must be the same.
4047 EVT InpTy = V0.getValueType();
4048 assert(InpTy.isVector());
4049 unsigned InpLen = InpTy.getVectorNumElements();
4050
4051 SmallVector<int, 128> LongMask;
4052 auto AppendToMask = [&](SDValue Shuffle) {
4053 auto *SV = cast<ShuffleVectorSDNode>(Val: Shuffle.getNode());
4054 ArrayRef<int> Mask = SV->getMask();
4055 SDValue X = Shuffle.getOperand(i: 0);
4056 SDValue Y = Shuffle.getOperand(i: 1);
4057 for (int M : Mask) {
4058 if (M == -1) {
4059 LongMask.push_back(Elt: M);
4060 continue;
4061 }
4062 SDValue Src = static_cast<unsigned>(M) < InpLen ? X : Y;
4063 if (static_cast<unsigned>(M) >= InpLen)
4064 M -= InpLen;
4065
4066 int OutOffset = Order[0] == Src ? 0 : InpLen;
4067 LongMask.push_back(Elt: M + OutOffset);
4068 }
4069 };
4070
4071 AppendToMask(V0);
4072 AppendToMask(V1);
4073
4074 SDValue C0 = Order.front();
4075 SDValue C1 = Order.back(); // Can be same as front
4076 EVT LongTy = InpTy.getDoubleNumVectorElementsVT(Context&: *DAG.getContext());
4077
4078 SDValue Cat = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: LongTy, Ops: {C0, C1});
4079 return DAG.getVectorShuffle(VT: LongTy, dl, N1: Cat, N2: DAG.getUNDEF(VT: LongTy), Mask: LongMask);
4080}
4081
4082// Reassociate concat(p1, p2, ...) into
4083// concat(concat(p1, ...), concat(pi, ...), ...)
4084// where each inner concat produces a predicate where each bit corresponds
4085// to at most BitBytes bytes.
4086// Concatenating predicates decreases the number of bytes per each predicate
4087// bit.
4088SDValue
4089HexagonTargetLowering::combineConcatOfScalarPreds(SDValue Op, unsigned BitBytes,
4090 SelectionDAG &DAG) const {
4091 const SDLoc &dl(Op);
4092 SmallVector<SDValue> Ops(Op->ops());
4093 MVT ResTy = ty(Op);
4094 MVT InpTy = ty(Op: Ops[0]);
4095 unsigned InpLen = InpTy.getVectorNumElements(); // Scalar predicate
4096 unsigned ResLen = ResTy.getVectorNumElements(); // HVX vector predicate
4097 assert(InpLen <= 8 && "Too long for scalar predicate");
4098 assert(ResLen > 8 && "Too short for HVX vector predicate");
4099
4100 unsigned Bytes = 8 / InpLen; // Bytes-per-bit in input
4101
4102 // Already in the right form?
4103 if (Bytes <= BitBytes)
4104 return Op;
4105
4106 ArrayRef<SDValue> Inputs(Ops);
4107 unsigned SliceLen = Bytes / BitBytes;
4108
4109 SmallVector<SDValue> Cats;
4110 // (8 / BitBytes) is the desired length of the result of the inner concat.
4111 for (unsigned i = 0; i != ResLen / (8 / BitBytes); ++i) {
4112 SDValue Cat = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: MVT::v8i1,
4113 Ops: Inputs.slice(N: SliceLen * i, M: SliceLen));
4114 Cats.push_back(Elt: Cat);
4115 }
4116
4117 return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: ResTy, Ops: Cats);
4118}
4119
4120SDValue HexagonTargetLowering::combineConcatVectorsBeforeLegal(
4121 SDValue Op, DAGCombinerInfo &DCI) const {
4122 MVT ResTy = ty(Op);
4123 MVT ElemTy = ResTy.getVectorElementType();
4124
4125 if (ElemTy != MVT::i1) {
4126 return combineConcatOfShuffles(Op, DAG&: DCI.DAG);
4127 }
4128 return SDValue();
4129}
4130
4131// Create the inner partial reduction MLA that can be efficiently lowered. This
4132// function is used by partial and full reductions.
4133SDValue HexagonTargetLowering::createExtendingPartialReduceMLA(
4134 unsigned Opcode, EVT AccEltType, unsigned AccNumElements, EVT InputType,
4135 const SDValue &A, const SDValue &B, unsigned &RemainingReductionRatio,
4136 const SDLoc &DL, SelectionDAG &DAG) const {
4137 const auto &Subtarget = DAG.getSubtarget<HexagonSubtarget>();
4138 if (!Subtarget.useHVXOps())
4139 return SDValue();
4140
4141 EVT InputEltType = InputType.getVectorElementType();
4142
4143 // Find if an optimized instruction for the sub-reduction is available.
4144 unsigned NativeRatio;
4145 if (AccEltType == MVT::i32 && InputEltType == MVT::i8)
4146 NativeRatio = 4;
4147 else
4148 return SDValue();
4149
4150 // We only handle the case when additional reduction will be needed, i.e.
4151 // input is longer by a larger factor than the result.
4152 ElementCount InputEC = InputType.getVectorElementCount();
4153 if (!InputEC.isKnownMultipleOf(RHS: AccNumElements * NativeRatio))
4154 return SDValue();
4155
4156 unsigned InputNumElements = InputEC.getFixedValue();
4157 RemainingReductionRatio = InputNumElements / (AccNumElements * NativeRatio);
4158 if (RemainingReductionRatio == 1)
4159 return SDValue();
4160
4161 // Create a reduction by the natively supported factor.
4162 EVT IntermediateType = EVT::getVectorVT(Context&: *DAG.getContext(), VT: AccEltType,
4163 NumElements: InputNumElements / NativeRatio);
4164
4165 SDValue Zero = DAG.getConstant(Val: 0, DL, VT: IntermediateType);
4166 return DAG.getNode(Opcode, DL, VT: IntermediateType, N1: Zero, N2: A, N3: B);
4167}
4168
4169static bool DetectExtendingMultiply(const SDValue &N, EVT ScalarType,
4170 unsigned &Opcode, SDValue &A, SDValue &B) {
4171 SDValue Mul = N;
4172 EVT AccType = Mul.getValueType(); // Vector input type after extension.
4173 if (ScalarType != AccType.getVectorElementType())
4174 return false;
4175 bool swap = false;
4176 if (Mul->getOpcode() != ISD::MUL)
4177 return false;
4178 A = Mul->getOperand(Num: 0);
4179 B = Mul->getOperand(Num: 1);
4180 if (A.getOpcode() == ISD::ZERO_EXTEND) {
4181 if (B.getOpcode() == ISD::ZERO_EXTEND)
4182 Opcode = ISD::PARTIAL_REDUCE_UMLA;
4183 else if (B.getOpcode() == ISD::SIGN_EXTEND) {
4184 swap = true;
4185 Opcode = ISD::PARTIAL_REDUCE_SUMLA;
4186 } else
4187 return false;
4188 } else if (A.getOpcode() == ISD::SIGN_EXTEND) {
4189 if (B.getOpcode() == ISD::ZERO_EXTEND)
4190 Opcode = ISD::PARTIAL_REDUCE_SUMLA;
4191 else if (B.getOpcode() == ISD::SIGN_EXTEND)
4192 Opcode = ISD::PARTIAL_REDUCE_SMLA;
4193 else
4194 return false;
4195 } else
4196 return false;
4197
4198 // Get multiplication arguments before extension.
4199 A = A->getOperand(Num: 0);
4200 B = B->getOperand(Num: 0);
4201 if (A.getValueType() != B.getValueType())
4202 return false;
4203
4204 if (swap)
4205 std::swap(a&: A, b&: B);
4206
4207 return true;
4208}
4209
4210SDValue HexagonTargetLowering::splitVecReduceAdd(SDNode *N,
4211 SelectionDAG &DAG) const {
4212 if (!Subtarget.useHVXOps())
4213 return SDValue();
4214
4215 EVT ScalarType = N->getValueType(ResNo: 0);
4216 unsigned Opcode;
4217 SDValue A, B;
4218 if (!DetectExtendingMultiply(N: N->getOperand(Num: 0), ScalarType, Opcode, A, B))
4219 return SDValue();
4220
4221 SDLoc DL(N);
4222 unsigned RemainingReductionRatio;
4223 SDValue Partial =
4224 createExtendingPartialReduceMLA(Opcode, AccEltType: ScalarType, AccNumElements: 1, InputType: A.getValueType(),
4225 A, B, RemainingReductionRatio, DL, DAG);
4226 if (!Partial)
4227 return SDValue();
4228
4229 // We could have inserted a trivial MLA and rely on the folding action,
4230 // similar to how vector_partial_reduce_add is lowered to an MLA in
4231 // SelectionDAGBuilder. However, we just replace the final result since we
4232 // have analyzed the input completely.
4233 return DAG.getNode(Opcode: ISD::VECREDUCE_ADD, DL, VT: ScalarType, Operand: Partial);
4234}
4235
4236// When possible, separate an MLA reduction with extended operands but
4237// unsupported reduction factor into an extending partial reduction that
4238// can be efficiently lowered, and a follow-up partial reduction.
4239// partial_reduce_mla(a, x, y) ->
4240// partial_reduce_mla(a, partial_reduce_mla(0, x, y), 1)
4241SDValue
4242HexagonTargetLowering::splitExtendingPartialReduceMLA(SDNode *N,
4243 SelectionDAG &DAG) const {
4244 if (!Subtarget.useHVXOps())
4245 return SDValue();
4246
4247 SDValue Acc = N->getOperand(Num: 0);
4248 SDValue A = N->getOperand(Num: 1);
4249 SDValue B = N->getOperand(Num: 2);
4250 if (A.getValueType() != B.getValueType())
4251 return SDValue();
4252
4253 // The types should be declared as custom, but do not split already legal
4254 // operation.
4255 EVT AccType = Acc.getValueType();
4256 EVT InputType = A.getValueType();
4257 if (getPartialReduceMLAAction(Opc: N->getOpcode(), AccVT: AccType, InputVT: InputType) != Custom)
4258 return SDValue();
4259
4260 SDLoc DL(N);
4261 unsigned RemainingReductionRatio;
4262 SDValue Partial = createExtendingPartialReduceMLA(
4263 Opcode: N->getOpcode(), AccEltType: AccType.getVectorElementType(),
4264 AccNumElements: AccType.getVectorNumElements(), InputType, A, B, RemainingReductionRatio,
4265 DL, DAG);
4266 if (!Partial)
4267 return SDValue();
4268 assert(RemainingReductionRatio <= MaxExpandMLA);
4269
4270 // Create the reduction for the remaining ratio.
4271 EVT IntermediateType = Partial->getOperand(Num: 0).getValueType();
4272 SDValue One = DAG.getConstant(Val: 1, DL, VT: IntermediateType);
4273 return DAG.getNode(Opcode: N->getOpcode() == ISD::PARTIAL_REDUCE_UMLA
4274 ? ISD::PARTIAL_REDUCE_UMLA
4275 : ISD::PARTIAL_REDUCE_SUMLA,
4276 DL, VT: AccType, N1: Acc, N2: Partial, N3: One);
4277}
4278
4279SDValue
4280HexagonTargetLowering::LowerHvxPartialReduceMLA(SDValue Op,
4281 SelectionDAG &DAG) const {
4282 const SDLoc &DL(Op);
4283 SDValue Acc = Op.getOperand(i: 0);
4284 SDValue A = Op.getOperand(i: 1);
4285 SDValue B = Op.getOperand(i: 2);
4286
4287 // Split the input vectors into units of one HVX vector length.
4288 unsigned HwVectorSizeInBits = Subtarget.getVectorLength() * 8;
4289
4290 EVT AccType = Acc.getValueType();
4291 EVT AccEltType = AccType.getVectorElementType();
4292 unsigned AccSubvectorNumElements =
4293 HwVectorSizeInBits / AccEltType.getSizeInBits();
4294 EVT AccSubvectorType =
4295 EVT::getVectorVT(Context&: *DAG.getContext(), VT: AccEltType, NumElements: AccSubvectorNumElements);
4296
4297 EVT InputType = A.getValueType();
4298 assert(InputType.getSizeInBits() % HwVectorSizeInBits == 0);
4299 EVT InputEltType = InputType.getVectorElementType();
4300 unsigned InputSubvectorNumElements =
4301 HwVectorSizeInBits / InputEltType.getSizeInBits();
4302 EVT InputSubvectorType = EVT::getVectorVT(Context&: *DAG.getContext(), VT: InputEltType,
4303 NumElements: InputSubvectorNumElements);
4304
4305 unsigned SubvectorNum = InputType.getFixedSizeInBits() / HwVectorSizeInBits;
4306 SmallVector<SDValue, MaxExpandMLA> Subvectors;
4307
4308 for (unsigned I = 0; I != SubvectorNum; ++I) {
4309 SDValue SubvectorAcc = DAG.getExtractSubvector(DL, VT: AccSubvectorType, Vec: Acc,
4310 Idx: I * AccSubvectorNumElements);
4311 SDValue SubvectorA = DAG.getExtractSubvector(DL, VT: InputSubvectorType, Vec: A,
4312 Idx: I * InputSubvectorNumElements);
4313 SDValue SubvectorB = DAG.getExtractSubvector(DL, VT: InputSubvectorType, Vec: B,
4314 Idx: I * InputSubvectorNumElements);
4315 SDValue SubvectorMLA = DAG.getNode(Opcode: Op.getOpcode(), DL, VT: AccSubvectorType,
4316 N1: SubvectorAcc, N2: SubvectorA, N3: SubvectorB);
4317 Subvectors.push_back(Elt: SubvectorMLA);
4318 }
4319
4320 return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: AccType, Ops: Subvectors);
4321}
4322
4323SDValue
4324HexagonTargetLowering::PerformHvxDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
4325 const {
4326 const SDLoc &dl(N);
4327 SelectionDAG &DAG = DCI.DAG;
4328 SDValue Op(N, 0);
4329 unsigned Opc = Op.getOpcode();
4330
4331 SmallVector<SDValue, 4> Ops(N->ops());
4332
4333 if (Opc == ISD::TRUNCATE)
4334 return combineTruncateBeforeLegal(Op, DCI);
4335 if (Opc == ISD::CONCAT_VECTORS)
4336 return combineConcatVectorsBeforeLegal(Op, DCI);
4337
4338 if (DCI.isBeforeLegalizeOps())
4339 return SDValue();
4340
4341 switch (Opc) {
4342 case HexagonISD::V2Q:
4343 if (Ops[0].getOpcode() == ISD::SPLAT_VECTOR) {
4344 if (const auto *C = dyn_cast<ConstantSDNode>(Val: Ops[0].getOperand(i: 0)))
4345 return C->isZero() ? DAG.getNode(Opcode: HexagonISD::QFALSE, DL: dl, VT: ty(Op))
4346 : DAG.getNode(Opcode: HexagonISD::QTRUE, DL: dl, VT: ty(Op));
4347 }
4348 break;
4349 case HexagonISD::Q2V:
4350 if (Ops[0].getOpcode() == HexagonISD::QTRUE)
4351 return DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: ty(Op),
4352 Operand: DAG.getAllOnesConstant(DL: dl, VT: MVT::i32));
4353 if (Ops[0].getOpcode() == HexagonISD::QFALSE)
4354 return getZero(dl, Ty: ty(Op), DAG);
4355 break;
4356 case HexagonISD::VINSERTW0:
4357 if (isUndef(Op: Ops[1]))
4358 return Ops[0];
4359 break;
4360 case HexagonISD::VROR: {
4361 if (Ops[0].getOpcode() == HexagonISD::VROR) {
4362 SDValue Vec = Ops[0].getOperand(i: 0);
4363 SDValue Rot0 = Ops[1], Rot1 = Ops[0].getOperand(i: 1);
4364 SDValue Rot = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: ty(Op: Rot0), Ops: {Rot0, Rot1});
4365 return DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: ty(Op), Ops: {Vec, Rot});
4366 }
4367 break;
4368 }
4369 }
4370
4371 return SDValue();
4372}
4373
4374bool
4375HexagonTargetLowering::shouldSplitToHvx(MVT Ty, SelectionDAG &DAG) const {
4376 if (Subtarget.isHVXVectorType(VecTy: Ty, IncludeBool: true))
4377 return false;
4378 auto Action = getPreferredHvxVectorAction(VecTy: Ty);
4379 if (Action == TargetLoweringBase::TypeSplitVector)
4380 return Subtarget.isHVXVectorType(VecTy: typeLegalize(Ty, DAG), IncludeBool: true);
4381 return false;
4382}
4383
4384bool
4385HexagonTargetLowering::shouldWidenToHvx(MVT Ty, SelectionDAG &DAG) const {
4386 if (Subtarget.isHVXVectorType(VecTy: Ty, IncludeBool: true))
4387 return false;
4388 auto Action = getPreferredHvxVectorAction(VecTy: Ty);
4389 if (Action == TargetLoweringBase::TypeWidenVector)
4390 return Subtarget.isHVXVectorType(VecTy: typeLegalize(Ty, DAG), IncludeBool: true);
4391 return false;
4392}
4393
4394bool
4395HexagonTargetLowering::isHvxOperation(SDNode *N, SelectionDAG &DAG) const {
4396 if (!Subtarget.useHVXOps())
4397 return false;
4398 // If the type of any result, or any operand type are HVX vector types,
4399 // this is an HVX operation.
4400 auto IsHvxTy = [this](EVT Ty) {
4401 return Ty.isSimple() && Subtarget.isHVXVectorType(VecTy: Ty.getSimpleVT(), IncludeBool: true);
4402 };
4403 auto IsHvxOp = [this](SDValue Op) {
4404 return Op.getValueType().isSimple() &&
4405 Subtarget.isHVXVectorType(VecTy: ty(Op), IncludeBool: true);
4406 };
4407 if (llvm::any_of(Range: N->values(), P: IsHvxTy) || llvm::any_of(Range: N->ops(), P: IsHvxOp))
4408 return true;
4409
4410 // Check if this could be an HVX operation after type widening.
4411 auto IsWidenedToHvx = [this, &DAG](SDValue Op) {
4412 if (!Op.getValueType().isSimple())
4413 return false;
4414 MVT ValTy = ty(Op);
4415 return ValTy.isVector() && shouldWidenToHvx(Ty: ValTy, DAG);
4416 };
4417
4418 for (int i = 0, e = N->getNumValues(); i != e; ++i) {
4419 if (IsWidenedToHvx(SDValue(N, i)))
4420 return true;
4421 }
4422 return llvm::any_of(Range: N->ops(), P: IsWidenedToHvx);
4423}
4424